Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

devlink: Add health recover notifications on devlink flows

Devlink health recover notifications were added only on driver direct
updates of health_state through devlink_health_reporter_state_update().
Add notifications on updates of health_state by devlink flows of report
and recover.

Moved functions devlink_nl_health_reporter_fill() and
devlink_recover_notify() to avoid forward declaration.

Fixes: 97ff3bd37fac ("devlink: add devink notification when reporter update health state")
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Moshe Shemesh and committed by
David S. Miller
6ec8b6cd 53c67700

+176 -174
+176 -174
net/core/devlink.c
··· 4843 4843 } 4844 4844 EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy); 4845 4845 4846 - void 4847 - devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter) 4848 - { 4849 - reporter->recovery_count++; 4850 - reporter->last_recovery_ts = jiffies; 4851 - } 4852 - EXPORT_SYMBOL_GPL(devlink_health_reporter_recovery_done); 4853 - 4854 - static int 4855 - devlink_health_reporter_recover(struct devlink_health_reporter *reporter, 4856 - void *priv_ctx, struct netlink_ext_ack *extack) 4857 - { 4858 - int err; 4859 - 4860 - if (reporter->health_state == DEVLINK_HEALTH_REPORTER_STATE_HEALTHY) 4861 - return 0; 4862 - 4863 - if (!reporter->ops->recover) 4864 - return -EOPNOTSUPP; 4865 - 4866 - err = reporter->ops->recover(reporter, priv_ctx, extack); 4867 - if (err) 4868 - return err; 4869 - 4870 - devlink_health_reporter_recovery_done(reporter); 4871 - reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY; 4872 - 4873 - return 0; 4874 - } 4875 - 4876 - static void 4877 - devlink_health_dump_clear(struct devlink_health_reporter *reporter) 4878 - { 4879 - if (!reporter->dump_fmsg) 4880 - return; 4881 - devlink_fmsg_free(reporter->dump_fmsg); 4882 - reporter->dump_fmsg = NULL; 4883 - } 4884 - 4885 - static int devlink_health_do_dump(struct devlink_health_reporter *reporter, 4886 - void *priv_ctx, 4887 - struct netlink_ext_ack *extack) 4888 - { 4889 - int err; 4890 - 4891 - if (!reporter->ops->dump) 4892 - return 0; 4893 - 4894 - if (reporter->dump_fmsg) 4895 - return 0; 4896 - 4897 - reporter->dump_fmsg = devlink_fmsg_alloc(); 4898 - if (!reporter->dump_fmsg) { 4899 - err = -ENOMEM; 4900 - return err; 4901 - } 4902 - 4903 - err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg); 4904 - if (err) 4905 - goto dump_err; 4906 - 4907 - err = reporter->ops->dump(reporter, reporter->dump_fmsg, 4908 - priv_ctx, extack); 4909 - if (err) 4910 - goto dump_err; 4911 - 4912 - err = devlink_fmsg_obj_nest_end(reporter->dump_fmsg); 4913 - if (err) 4914 - goto dump_err; 4915 - 4916 - reporter->dump_ts = jiffies; 4917 - reporter->dump_real_ts = ktime_get_real_ns(); 4918 - 4919 - return 0; 4920 - 4921 - dump_err: 4922 - devlink_health_dump_clear(reporter); 4923 - return err; 4924 - } 4925 - 4926 - int devlink_health_report(struct devlink_health_reporter *reporter, 4927 - const char *msg, void *priv_ctx) 4928 - { 4929 - enum devlink_health_reporter_state prev_health_state; 4930 - struct devlink *devlink = reporter->devlink; 4931 - 4932 - /* write a log message of the current error */ 4933 - WARN_ON(!msg); 4934 - trace_devlink_health_report(devlink, reporter->ops->name, msg); 4935 - reporter->error_count++; 4936 - prev_health_state = reporter->health_state; 4937 - reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; 4938 - 4939 - /* abort if the previous error wasn't recovered */ 4940 - if (reporter->auto_recover && 4941 - (prev_health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY || 4942 - jiffies - reporter->last_recovery_ts < 4943 - msecs_to_jiffies(reporter->graceful_period))) { 4944 - trace_devlink_health_recover_aborted(devlink, 4945 - reporter->ops->name, 4946 - reporter->health_state, 4947 - jiffies - 4948 - reporter->last_recovery_ts); 4949 - return -ECANCELED; 4950 - } 4951 - 4952 - reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; 4953 - 4954 - mutex_lock(&reporter->dump_lock); 4955 - /* store current dump of current error, for later analysis */ 4956 - devlink_health_do_dump(reporter, priv_ctx, NULL); 4957 - mutex_unlock(&reporter->dump_lock); 4958 - 4959 - if (reporter->auto_recover) 4960 - return devlink_health_reporter_recover(reporter, 4961 - priv_ctx, NULL); 4962 - 4963 - return 0; 4964 - } 4965 - EXPORT_SYMBOL_GPL(devlink_health_report); 4966 - 4967 - static struct devlink_health_reporter * 4968 - devlink_health_reporter_get_from_attrs(struct devlink *devlink, 4969 - struct nlattr **attrs) 4970 - { 4971 - struct devlink_health_reporter *reporter; 4972 - char *reporter_name; 4973 - 4974 - if (!attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]) 4975 - return NULL; 4976 - 4977 - reporter_name = nla_data(attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]); 4978 - mutex_lock(&devlink->reporters_lock); 4979 - reporter = devlink_health_reporter_find_by_name(devlink, reporter_name); 4980 - if (reporter) 4981 - refcount_inc(&reporter->refcount); 4982 - mutex_unlock(&devlink->reporters_lock); 4983 - return reporter; 4984 - } 4985 - 4986 - static struct devlink_health_reporter * 4987 - devlink_health_reporter_get_from_info(struct devlink *devlink, 4988 - struct genl_info *info) 4989 - { 4990 - return devlink_health_reporter_get_from_attrs(devlink, info->attrs); 4991 - } 4992 - 4993 - static struct devlink_health_reporter * 4994 - devlink_health_reporter_get_from_cb(struct netlink_callback *cb) 4995 - { 4996 - const struct genl_dumpit_info *info = genl_dumpit_info(cb); 4997 - struct devlink_health_reporter *reporter; 4998 - struct nlattr **attrs = info->attrs; 4999 - struct devlink *devlink; 5000 - 5001 - mutex_lock(&devlink_mutex); 5002 - devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs); 5003 - if (IS_ERR(devlink)) 5004 - goto unlock; 5005 - 5006 - reporter = devlink_health_reporter_get_from_attrs(devlink, attrs); 5007 - mutex_unlock(&devlink_mutex); 5008 - return reporter; 5009 - unlock: 5010 - mutex_unlock(&devlink_mutex); 5011 - return NULL; 5012 - } 5013 - 5014 - static void 5015 - devlink_health_reporter_put(struct devlink_health_reporter *reporter) 5016 - { 5017 - refcount_dec(&reporter->refcount); 5018 - } 5019 - 5020 4846 static int 5021 4847 devlink_nl_health_reporter_fill(struct sk_buff *msg, 5022 4848 struct devlink *devlink, ··· 4928 5102 genlmsg_multicast_netns(&devlink_nl_family, 4929 5103 devlink_net(reporter->devlink), 4930 5104 msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); 5105 + } 5106 + 5107 + void 5108 + devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter) 5109 + { 5110 + reporter->recovery_count++; 5111 + reporter->last_recovery_ts = jiffies; 5112 + } 5113 + EXPORT_SYMBOL_GPL(devlink_health_reporter_recovery_done); 5114 + 5115 + static int 5116 + devlink_health_reporter_recover(struct devlink_health_reporter *reporter, 5117 + void *priv_ctx, struct netlink_ext_ack *extack) 5118 + { 5119 + int err; 5120 + 5121 + if (reporter->health_state == DEVLINK_HEALTH_REPORTER_STATE_HEALTHY) 5122 + return 0; 5123 + 5124 + if (!reporter->ops->recover) 5125 + return -EOPNOTSUPP; 5126 + 5127 + err = reporter->ops->recover(reporter, priv_ctx, extack); 5128 + if (err) 5129 + return err; 5130 + 5131 + devlink_health_reporter_recovery_done(reporter); 5132 + reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY; 5133 + devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER); 5134 + 5135 + return 0; 5136 + } 5137 + 5138 + static void 5139 + devlink_health_dump_clear(struct devlink_health_reporter *reporter) 5140 + { 5141 + if (!reporter->dump_fmsg) 5142 + return; 5143 + devlink_fmsg_free(reporter->dump_fmsg); 5144 + reporter->dump_fmsg = NULL; 5145 + } 5146 + 5147 + static int devlink_health_do_dump(struct devlink_health_reporter *reporter, 5148 + void *priv_ctx, 5149 + struct netlink_ext_ack *extack) 5150 + { 5151 + int err; 5152 + 5153 + if (!reporter->ops->dump) 5154 + return 0; 5155 + 5156 + if (reporter->dump_fmsg) 5157 + return 0; 5158 + 5159 + reporter->dump_fmsg = devlink_fmsg_alloc(); 5160 + if (!reporter->dump_fmsg) { 5161 + err = -ENOMEM; 5162 + return err; 5163 + } 5164 + 5165 + err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg); 5166 + if (err) 5167 + goto dump_err; 5168 + 5169 + err = reporter->ops->dump(reporter, reporter->dump_fmsg, 5170 + priv_ctx, extack); 5171 + if (err) 5172 + goto dump_err; 5173 + 5174 + err = devlink_fmsg_obj_nest_end(reporter->dump_fmsg); 5175 + if (err) 5176 + goto dump_err; 5177 + 5178 + reporter->dump_ts = jiffies; 5179 + reporter->dump_real_ts = ktime_get_real_ns(); 5180 + 5181 + return 0; 5182 + 5183 + dump_err: 5184 + devlink_health_dump_clear(reporter); 5185 + return err; 5186 + } 5187 + 5188 + int devlink_health_report(struct devlink_health_reporter *reporter, 5189 + const char *msg, void *priv_ctx) 5190 + { 5191 + enum devlink_health_reporter_state prev_health_state; 5192 + struct devlink *devlink = reporter->devlink; 5193 + 5194 + /* write a log message of the current error */ 5195 + WARN_ON(!msg); 5196 + trace_devlink_health_report(devlink, reporter->ops->name, msg); 5197 + reporter->error_count++; 5198 + prev_health_state = reporter->health_state; 5199 + reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; 5200 + devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER); 5201 + 5202 + /* abort if the previous error wasn't recovered */ 5203 + if (reporter->auto_recover && 5204 + (prev_health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY || 5205 + jiffies - reporter->last_recovery_ts < 5206 + msecs_to_jiffies(reporter->graceful_period))) { 5207 + trace_devlink_health_recover_aborted(devlink, 5208 + reporter->ops->name, 5209 + reporter->health_state, 5210 + jiffies - 5211 + reporter->last_recovery_ts); 5212 + return -ECANCELED; 5213 + } 5214 + 5215 + reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; 5216 + 5217 + mutex_lock(&reporter->dump_lock); 5218 + /* store current dump of current error, for later analysis */ 5219 + devlink_health_do_dump(reporter, priv_ctx, NULL); 5220 + mutex_unlock(&reporter->dump_lock); 5221 + 5222 + if (reporter->auto_recover) 5223 + return devlink_health_reporter_recover(reporter, 5224 + priv_ctx, NULL); 5225 + 5226 + return 0; 5227 + } 5228 + EXPORT_SYMBOL_GPL(devlink_health_report); 5229 + 5230 + static struct devlink_health_reporter * 5231 + devlink_health_reporter_get_from_attrs(struct devlink *devlink, 5232 + struct nlattr **attrs) 5233 + { 5234 + struct devlink_health_reporter *reporter; 5235 + char *reporter_name; 5236 + 5237 + if (!attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]) 5238 + return NULL; 5239 + 5240 + reporter_name = nla_data(attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]); 5241 + mutex_lock(&devlink->reporters_lock); 5242 + reporter = devlink_health_reporter_find_by_name(devlink, reporter_name); 5243 + if (reporter) 5244 + refcount_inc(&reporter->refcount); 5245 + mutex_unlock(&devlink->reporters_lock); 5246 + return reporter; 5247 + } 5248 + 5249 + static struct devlink_health_reporter * 5250 + devlink_health_reporter_get_from_info(struct devlink *devlink, 5251 + struct genl_info *info) 5252 + { 5253 + return devlink_health_reporter_get_from_attrs(devlink, info->attrs); 5254 + } 5255 + 5256 + static struct devlink_health_reporter * 5257 + devlink_health_reporter_get_from_cb(struct netlink_callback *cb) 5258 + { 5259 + const struct genl_dumpit_info *info = genl_dumpit_info(cb); 5260 + struct devlink_health_reporter *reporter; 5261 + struct nlattr **attrs = info->attrs; 5262 + struct devlink *devlink; 5263 + 5264 + mutex_lock(&devlink_mutex); 5265 + devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs); 5266 + if (IS_ERR(devlink)) 5267 + goto unlock; 5268 + 5269 + reporter = devlink_health_reporter_get_from_attrs(devlink, attrs); 5270 + mutex_unlock(&devlink_mutex); 5271 + return reporter; 5272 + unlock: 5273 + mutex_unlock(&devlink_mutex); 5274 + return NULL; 5275 + } 5276 + 5277 + static void 5278 + devlink_health_reporter_put(struct devlink_health_reporter *reporter) 5279 + { 5280 + refcount_dec(&reporter->refcount); 4931 5281 } 4932 5282 4933 5283 void