at v2.6.18 365 lines 9.0 kB view raw
1/* 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 5 * Copyright (c) 2005 Intel Corporation. All rights reserved. 6 * 7 * This Software is licensed under one of the following licenses: 8 * 9 * 1) under the terms of the "Common Public License 1.0" a copy of which is 10 * available from the Open Source Initiative, see 11 * http://www.opensource.org/licenses/cpl.php. 12 * 13 * 2) under the terms of the "The BSD License" a copy of which is 14 * available from the Open Source Initiative, see 15 * http://www.opensource.org/licenses/bsd-license.php. 16 * 17 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a 18 * copy of which is available from the Open Source Initiative, see 19 * http://www.opensource.org/licenses/gpl-license.php. 20 * 21 * Licensee has the right to choose one of the above licenses. 22 * 23 * Redistributions of source code must retain the above copyright 24 * notice and one of the license notices. 25 * 26 * Redistributions in binary form must reproduce both the above copyright 27 * notice, one of the license notices in the documentation 28 * and/or other materials provided with the distribution. 29 */ 30 31#include <linux/mutex.h> 32#include <linux/inetdevice.h> 33#include <linux/workqueue.h> 34#include <linux/if_arp.h> 35#include <net/arp.h> 36#include <net/neighbour.h> 37#include <net/route.h> 38#include <net/netevent.h> 39#include <rdma/ib_addr.h> 40 41MODULE_AUTHOR("Sean Hefty"); 42MODULE_DESCRIPTION("IB Address Translation"); 43MODULE_LICENSE("Dual BSD/GPL"); 44 45struct addr_req { 46 struct list_head list; 47 struct sockaddr src_addr; 48 struct sockaddr dst_addr; 49 struct rdma_dev_addr *addr; 50 void *context; 51 void (*callback)(int status, struct sockaddr *src_addr, 52 struct rdma_dev_addr *addr, void *context); 53 unsigned long timeout; 54 int status; 55}; 56 57static void process_req(void *data); 58 59static DEFINE_MUTEX(lock); 60static LIST_HEAD(req_list); 61static DECLARE_WORK(work, process_req, NULL); 62static struct workqueue_struct *addr_wq; 63 64static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, 65 unsigned char *dst_dev_addr) 66{ 67 switch (dev->type) { 68 case ARPHRD_INFINIBAND: 69 dev_addr->dev_type = IB_NODE_CA; 70 break; 71 default: 72 return -EADDRNOTAVAIL; 73 } 74 75 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 76 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); 77 if (dst_dev_addr) 78 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); 79 return 0; 80} 81 82int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 83{ 84 struct net_device *dev; 85 u32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; 86 int ret; 87 88 dev = ip_dev_find(ip); 89 if (!dev) 90 return -EADDRNOTAVAIL; 91 92 ret = copy_addr(dev_addr, dev, NULL); 93 dev_put(dev); 94 return ret; 95} 96EXPORT_SYMBOL(rdma_translate_ip); 97 98static void set_timeout(unsigned long time) 99{ 100 unsigned long delay; 101 102 cancel_delayed_work(&work); 103 104 delay = time - jiffies; 105 if ((long)delay <= 0) 106 delay = 1; 107 108 queue_delayed_work(addr_wq, &work, delay); 109} 110 111static void queue_req(struct addr_req *req) 112{ 113 struct addr_req *temp_req; 114 115 mutex_lock(&lock); 116 list_for_each_entry_reverse(temp_req, &req_list, list) { 117 if (time_after(req->timeout, temp_req->timeout)) 118 break; 119 } 120 121 list_add(&req->list, &temp_req->list); 122 123 if (req_list.next == &req->list) 124 set_timeout(req->timeout); 125 mutex_unlock(&lock); 126} 127 128static void addr_send_arp(struct sockaddr_in *dst_in) 129{ 130 struct rtable *rt; 131 struct flowi fl; 132 u32 dst_ip = dst_in->sin_addr.s_addr; 133 134 memset(&fl, 0, sizeof fl); 135 fl.nl_u.ip4_u.daddr = dst_ip; 136 if (ip_route_output_key(&rt, &fl)) 137 return; 138 139 arp_send(ARPOP_REQUEST, ETH_P_ARP, rt->rt_gateway, rt->idev->dev, 140 rt->rt_src, NULL, rt->idev->dev->dev_addr, NULL); 141 ip_rt_put(rt); 142} 143 144static int addr_resolve_remote(struct sockaddr_in *src_in, 145 struct sockaddr_in *dst_in, 146 struct rdma_dev_addr *addr) 147{ 148 u32 src_ip = src_in->sin_addr.s_addr; 149 u32 dst_ip = dst_in->sin_addr.s_addr; 150 struct flowi fl; 151 struct rtable *rt; 152 struct neighbour *neigh; 153 int ret; 154 155 memset(&fl, 0, sizeof fl); 156 fl.nl_u.ip4_u.daddr = dst_ip; 157 fl.nl_u.ip4_u.saddr = src_ip; 158 ret = ip_route_output_key(&rt, &fl); 159 if (ret) 160 goto out; 161 162 /* If the device does ARP internally, return 'done' */ 163 if (rt->idev->dev->flags & IFF_NOARP) { 164 copy_addr(addr, rt->idev->dev, NULL); 165 goto put; 166 } 167 168 neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); 169 if (!neigh) { 170 ret = -ENODATA; 171 goto put; 172 } 173 174 if (!(neigh->nud_state & NUD_VALID)) { 175 ret = -ENODATA; 176 goto release; 177 } 178 179 if (!src_ip) { 180 src_in->sin_family = dst_in->sin_family; 181 src_in->sin_addr.s_addr = rt->rt_src; 182 } 183 184 ret = copy_addr(addr, neigh->dev, neigh->ha); 185release: 186 neigh_release(neigh); 187put: 188 ip_rt_put(rt); 189out: 190 return ret; 191} 192 193static void process_req(void *data) 194{ 195 struct addr_req *req, *temp_req; 196 struct sockaddr_in *src_in, *dst_in; 197 struct list_head done_list; 198 199 INIT_LIST_HEAD(&done_list); 200 201 mutex_lock(&lock); 202 list_for_each_entry_safe(req, temp_req, &req_list, list) { 203 if (req->status) { 204 src_in = (struct sockaddr_in *) &req->src_addr; 205 dst_in = (struct sockaddr_in *) &req->dst_addr; 206 req->status = addr_resolve_remote(src_in, dst_in, 207 req->addr); 208 } 209 if (req->status && time_after(jiffies, req->timeout)) 210 req->status = -ETIMEDOUT; 211 else if (req->status == -ENODATA) 212 continue; 213 214 list_del(&req->list); 215 list_add_tail(&req->list, &done_list); 216 } 217 218 if (!list_empty(&req_list)) { 219 req = list_entry(req_list.next, struct addr_req, list); 220 set_timeout(req->timeout); 221 } 222 mutex_unlock(&lock); 223 224 list_for_each_entry_safe(req, temp_req, &done_list, list) { 225 list_del(&req->list); 226 req->callback(req->status, &req->src_addr, req->addr, 227 req->context); 228 kfree(req); 229 } 230} 231 232static int addr_resolve_local(struct sockaddr_in *src_in, 233 struct sockaddr_in *dst_in, 234 struct rdma_dev_addr *addr) 235{ 236 struct net_device *dev; 237 u32 src_ip = src_in->sin_addr.s_addr; 238 u32 dst_ip = dst_in->sin_addr.s_addr; 239 int ret; 240 241 dev = ip_dev_find(dst_ip); 242 if (!dev) 243 return -EADDRNOTAVAIL; 244 245 if (ZERONET(src_ip)) { 246 src_in->sin_family = dst_in->sin_family; 247 src_in->sin_addr.s_addr = dst_ip; 248 ret = copy_addr(addr, dev, dev->dev_addr); 249 } else if (LOOPBACK(src_ip)) { 250 ret = rdma_translate_ip((struct sockaddr *)dst_in, addr); 251 if (!ret) 252 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 253 } else { 254 ret = rdma_translate_ip((struct sockaddr *)src_in, addr); 255 if (!ret) 256 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 257 } 258 259 dev_put(dev); 260 return ret; 261} 262 263int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr, 264 struct rdma_dev_addr *addr, int timeout_ms, 265 void (*callback)(int status, struct sockaddr *src_addr, 266 struct rdma_dev_addr *addr, void *context), 267 void *context) 268{ 269 struct sockaddr_in *src_in, *dst_in; 270 struct addr_req *req; 271 int ret = 0; 272 273 req = kmalloc(sizeof *req, GFP_KERNEL); 274 if (!req) 275 return -ENOMEM; 276 memset(req, 0, sizeof *req); 277 278 if (src_addr) 279 memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr)); 280 memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr)); 281 req->addr = addr; 282 req->callback = callback; 283 req->context = context; 284 285 src_in = (struct sockaddr_in *) &req->src_addr; 286 dst_in = (struct sockaddr_in *) &req->dst_addr; 287 288 req->status = addr_resolve_local(src_in, dst_in, addr); 289 if (req->status == -EADDRNOTAVAIL) 290 req->status = addr_resolve_remote(src_in, dst_in, addr); 291 292 switch (req->status) { 293 case 0: 294 req->timeout = jiffies; 295 queue_req(req); 296 break; 297 case -ENODATA: 298 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; 299 queue_req(req); 300 addr_send_arp(dst_in); 301 break; 302 default: 303 ret = req->status; 304 kfree(req); 305 break; 306 } 307 return ret; 308} 309EXPORT_SYMBOL(rdma_resolve_ip); 310 311void rdma_addr_cancel(struct rdma_dev_addr *addr) 312{ 313 struct addr_req *req, *temp_req; 314 315 mutex_lock(&lock); 316 list_for_each_entry_safe(req, temp_req, &req_list, list) { 317 if (req->addr == addr) { 318 req->status = -ECANCELED; 319 req->timeout = jiffies; 320 list_del(&req->list); 321 list_add(&req->list, &req_list); 322 set_timeout(req->timeout); 323 break; 324 } 325 } 326 mutex_unlock(&lock); 327} 328EXPORT_SYMBOL(rdma_addr_cancel); 329 330static int netevent_callback(struct notifier_block *self, unsigned long event, 331 void *ctx) 332{ 333 if (event == NETEVENT_NEIGH_UPDATE) { 334 struct neighbour *neigh = ctx; 335 336 if (neigh->dev->type == ARPHRD_INFINIBAND && 337 (neigh->nud_state & NUD_VALID)) { 338 set_timeout(jiffies); 339 } 340 } 341 return 0; 342} 343 344static struct notifier_block nb = { 345 .notifier_call = netevent_callback 346}; 347 348static int addr_init(void) 349{ 350 addr_wq = create_singlethread_workqueue("ib_addr_wq"); 351 if (!addr_wq) 352 return -ENOMEM; 353 354 register_netevent_notifier(&nb); 355 return 0; 356} 357 358static void addr_cleanup(void) 359{ 360 unregister_netevent_notifier(&nb); 361 destroy_workqueue(addr_wq); 362} 363 364module_init(addr_init); 365module_exit(addr_cleanup);