Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v3.16-rc5 1727 lines 43 kB view raw
1/* 2 * Intel MIC Platform Software Stack (MPSS) 3 * 4 * Copyright(c) 2013 Intel Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License, version 2, as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 * 15 * The full GNU General Public License is included in this distribution in 16 * the file called "COPYING". 17 * 18 * Intel MIC User Space Tools. 19 */ 20 21#define _GNU_SOURCE 22 23#include <stdlib.h> 24#include <fcntl.h> 25#include <getopt.h> 26#include <assert.h> 27#include <unistd.h> 28#include <stdbool.h> 29#include <signal.h> 30#include <poll.h> 31#include <features.h> 32#include <sys/types.h> 33#include <sys/stat.h> 34#include <sys/mman.h> 35#include <sys/socket.h> 36#include <linux/virtio_ring.h> 37#include <linux/virtio_net.h> 38#include <linux/virtio_console.h> 39#include <linux/virtio_blk.h> 40#include <linux/version.h> 41#include "mpssd.h" 42#include <linux/mic_ioctl.h> 43#include <linux/mic_common.h> 44 45static void init_mic(struct mic_info *mic); 46 47static FILE *logfp; 48static struct mic_info mic_list; 49 50#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 51 52#define min_t(type, x, y) ({ \ 53 type __min1 = (x); \ 54 type __min2 = (y); \ 55 __min1 < __min2 ? __min1 : __min2; }) 56 57/* align addr on a size boundary - adjust address up/down if needed */ 58#define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1))) 59#define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size) 60 61/* align addr on a size boundary - adjust address up if needed */ 62#define _ALIGN(addr, size) _ALIGN_UP(addr, size) 63 64/* to align the pointer to the (next) page boundary */ 65#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) 66 67#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) 68 69#define GSO_ENABLED 1 70#define MAX_GSO_SIZE (64 * 1024) 71#define ETH_H_LEN 14 72#define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64)) 73#define MIC_DEVICE_PAGE_END 0x1000 74 75#ifndef VIRTIO_NET_HDR_F_DATA_VALID 76#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ 77#endif 78 79static struct { 80 struct mic_device_desc dd; 81 struct mic_vqconfig vqconfig[2]; 82 __u32 host_features, guest_acknowledgements; 83 struct virtio_console_config cons_config; 84} virtcons_dev_page = { 85 .dd = { 86 .type = VIRTIO_ID_CONSOLE, 87 .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig), 88 .feature_len = sizeof(virtcons_dev_page.host_features), 89 .config_len = sizeof(virtcons_dev_page.cons_config), 90 }, 91 .vqconfig[0] = { 92 .num = htole16(MIC_VRING_ENTRIES), 93 }, 94 .vqconfig[1] = { 95 .num = htole16(MIC_VRING_ENTRIES), 96 }, 97}; 98 99static struct { 100 struct mic_device_desc dd; 101 struct mic_vqconfig vqconfig[2]; 102 __u32 host_features, guest_acknowledgements; 103 struct virtio_net_config net_config; 104} virtnet_dev_page = { 105 .dd = { 106 .type = VIRTIO_ID_NET, 107 .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig), 108 .feature_len = sizeof(virtnet_dev_page.host_features), 109 .config_len = sizeof(virtnet_dev_page.net_config), 110 }, 111 .vqconfig[0] = { 112 .num = htole16(MIC_VRING_ENTRIES), 113 }, 114 .vqconfig[1] = { 115 .num = htole16(MIC_VRING_ENTRIES), 116 }, 117#if GSO_ENABLED 118 .host_features = htole32( 119 1 << VIRTIO_NET_F_CSUM | 120 1 << VIRTIO_NET_F_GSO | 121 1 << VIRTIO_NET_F_GUEST_TSO4 | 122 1 << VIRTIO_NET_F_GUEST_TSO6 | 123 1 << VIRTIO_NET_F_GUEST_ECN | 124 1 << VIRTIO_NET_F_GUEST_UFO), 125#else 126 .host_features = 0, 127#endif 128}; 129 130static const char *mic_config_dir = "/etc/sysconfig/mic"; 131static const char *virtblk_backend = "VIRTBLK_BACKEND"; 132static struct { 133 struct mic_device_desc dd; 134 struct mic_vqconfig vqconfig[1]; 135 __u32 host_features, guest_acknowledgements; 136 struct virtio_blk_config blk_config; 137} virtblk_dev_page = { 138 .dd = { 139 .type = VIRTIO_ID_BLOCK, 140 .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig), 141 .feature_len = sizeof(virtblk_dev_page.host_features), 142 .config_len = sizeof(virtblk_dev_page.blk_config), 143 }, 144 .vqconfig[0] = { 145 .num = htole16(MIC_VRING_ENTRIES), 146 }, 147 .host_features = 148 htole32(1<<VIRTIO_BLK_F_SEG_MAX), 149 .blk_config = { 150 .seg_max = htole32(MIC_VRING_ENTRIES - 2), 151 .capacity = htole64(0), 152 } 153}; 154 155static char *myname; 156 157static int 158tap_configure(struct mic_info *mic, char *dev) 159{ 160 pid_t pid; 161 char *ifargv[7]; 162 char ipaddr[IFNAMSIZ]; 163 int ret = 0; 164 165 pid = fork(); 166 if (pid == 0) { 167 ifargv[0] = "ip"; 168 ifargv[1] = "link"; 169 ifargv[2] = "set"; 170 ifargv[3] = dev; 171 ifargv[4] = "up"; 172 ifargv[5] = NULL; 173 mpsslog("Configuring %s\n", dev); 174 ret = execvp("ip", ifargv); 175 if (ret < 0) { 176 mpsslog("%s execvp failed errno %s\n", 177 mic->name, strerror(errno)); 178 return ret; 179 } 180 } 181 if (pid < 0) { 182 mpsslog("%s fork failed errno %s\n", 183 mic->name, strerror(errno)); 184 return ret; 185 } 186 187 ret = waitpid(pid, NULL, 0); 188 if (ret < 0) { 189 mpsslog("%s waitpid failed errno %s\n", 190 mic->name, strerror(errno)); 191 return ret; 192 } 193 194 snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id); 195 196 pid = fork(); 197 if (pid == 0) { 198 ifargv[0] = "ip"; 199 ifargv[1] = "addr"; 200 ifargv[2] = "add"; 201 ifargv[3] = ipaddr; 202 ifargv[4] = "dev"; 203 ifargv[5] = dev; 204 ifargv[6] = NULL; 205 mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr); 206 ret = execvp("ip", ifargv); 207 if (ret < 0) { 208 mpsslog("%s execvp failed errno %s\n", 209 mic->name, strerror(errno)); 210 return ret; 211 } 212 } 213 if (pid < 0) { 214 mpsslog("%s fork failed errno %s\n", 215 mic->name, strerror(errno)); 216 return ret; 217 } 218 219 ret = waitpid(pid, NULL, 0); 220 if (ret < 0) { 221 mpsslog("%s waitpid failed errno %s\n", 222 mic->name, strerror(errno)); 223 return ret; 224 } 225 mpsslog("MIC name %s %s %d DONE!\n", 226 mic->name, __func__, __LINE__); 227 return 0; 228} 229 230static int tun_alloc(struct mic_info *mic, char *dev) 231{ 232 struct ifreq ifr; 233 int fd, err; 234#if GSO_ENABLED 235 unsigned offload; 236#endif 237 fd = open("/dev/net/tun", O_RDWR); 238 if (fd < 0) { 239 mpsslog("Could not open /dev/net/tun %s\n", strerror(errno)); 240 goto done; 241 } 242 243 memset(&ifr, 0, sizeof(ifr)); 244 245 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 246 if (*dev) 247 strncpy(ifr.ifr_name, dev, IFNAMSIZ); 248 249 err = ioctl(fd, TUNSETIFF, (void *)&ifr); 250 if (err < 0) { 251 mpsslog("%s %s %d TUNSETIFF failed %s\n", 252 mic->name, __func__, __LINE__, strerror(errno)); 253 close(fd); 254 return err; 255 } 256#if GSO_ENABLED 257 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | 258 TUN_F_TSO_ECN | TUN_F_UFO; 259 260 err = ioctl(fd, TUNSETOFFLOAD, offload); 261 if (err < 0) { 262 mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n", 263 mic->name, __func__, __LINE__, strerror(errno)); 264 close(fd); 265 return err; 266 } 267#endif 268 strcpy(dev, ifr.ifr_name); 269 mpsslog("Created TAP %s\n", dev); 270done: 271 return fd; 272} 273 274#define NET_FD_VIRTIO_NET 0 275#define NET_FD_TUN 1 276#define MAX_NET_FD 2 277 278static void set_dp(struct mic_info *mic, int type, void *dp) 279{ 280 switch (type) { 281 case VIRTIO_ID_CONSOLE: 282 mic->mic_console.console_dp = dp; 283 return; 284 case VIRTIO_ID_NET: 285 mic->mic_net.net_dp = dp; 286 return; 287 case VIRTIO_ID_BLOCK: 288 mic->mic_virtblk.block_dp = dp; 289 return; 290 } 291 mpsslog("%s %s %d not found\n", mic->name, __func__, type); 292 assert(0); 293} 294 295static void *get_dp(struct mic_info *mic, int type) 296{ 297 switch (type) { 298 case VIRTIO_ID_CONSOLE: 299 return mic->mic_console.console_dp; 300 case VIRTIO_ID_NET: 301 return mic->mic_net.net_dp; 302 case VIRTIO_ID_BLOCK: 303 return mic->mic_virtblk.block_dp; 304 } 305 mpsslog("%s %s %d not found\n", mic->name, __func__, type); 306 assert(0); 307 return NULL; 308} 309 310static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type) 311{ 312 struct mic_device_desc *d; 313 int i; 314 void *dp = get_dp(mic, type); 315 316 for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE; 317 i += mic_total_desc_size(d)) { 318 d = dp + i; 319 320 /* End of list */ 321 if (d->type == 0) 322 break; 323 324 if (d->type == -1) 325 continue; 326 327 mpsslog("%s %s d-> type %d d %p\n", 328 mic->name, __func__, d->type, d); 329 330 if (d->type == (__u8)type) 331 return d; 332 } 333 mpsslog("%s %s %d not found\n", mic->name, __func__, type); 334 assert(0); 335 return NULL; 336} 337 338/* See comments in vhost.c for explanation of next_desc() */ 339static unsigned next_desc(struct vring_desc *desc) 340{ 341 unsigned int next; 342 343 if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) 344 return -1U; 345 next = le16toh(desc->next); 346 return next; 347} 348 349/* Sum up all the IOVEC length */ 350static ssize_t 351sum_iovec_len(struct mic_copy_desc *copy) 352{ 353 ssize_t sum = 0; 354 int i; 355 356 for (i = 0; i < copy->iovcnt; i++) 357 sum += copy->iov[i].iov_len; 358 return sum; 359} 360 361static inline void verify_out_len(struct mic_info *mic, 362 struct mic_copy_desc *copy) 363{ 364 if (copy->out_len != sum_iovec_len(copy)) { 365 mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n", 366 mic->name, __func__, __LINE__, 367 copy->out_len, sum_iovec_len(copy)); 368 assert(copy->out_len == sum_iovec_len(copy)); 369 } 370} 371 372/* Display an iovec */ 373static void 374disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy, 375 const char *s, int line) 376{ 377 int i; 378 379 for (i = 0; i < copy->iovcnt; i++) 380 mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n", 381 mic->name, s, line, i, 382 copy->iov[i].iov_base, copy->iov[i].iov_len); 383} 384 385static inline __u16 read_avail_idx(struct mic_vring *vr) 386{ 387 return ACCESS_ONCE(vr->info->avail_idx); 388} 389 390static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr, 391 struct mic_copy_desc *copy, ssize_t len) 392{ 393 copy->vr_idx = tx ? 0 : 1; 394 copy->update_used = true; 395 if (type == VIRTIO_ID_NET) 396 copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr); 397 else 398 copy->iov[0].iov_len = len; 399} 400 401/* Central API which triggers the copies */ 402static int 403mic_virtio_copy(struct mic_info *mic, int fd, 404 struct mic_vring *vr, struct mic_copy_desc *copy) 405{ 406 int ret; 407 408 ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy); 409 if (ret) { 410 mpsslog("%s %s %d errno %s ret %d\n", 411 mic->name, __func__, __LINE__, 412 strerror(errno), ret); 413 } 414 return ret; 415} 416 417/* 418 * This initialization routine requires at least one 419 * vring i.e. vr0. vr1 is optional. 420 */ 421static void * 422init_vr(struct mic_info *mic, int fd, int type, 423 struct mic_vring *vr0, struct mic_vring *vr1, int num_vq) 424{ 425 int vr_size; 426 char *va; 427 428 vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES, 429 MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info)); 430 va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq, 431 PROT_READ, MAP_SHARED, fd, 0); 432 if (MAP_FAILED == va) { 433 mpsslog("%s %s %d mmap failed errno %s\n", 434 mic->name, __func__, __LINE__, 435 strerror(errno)); 436 goto done; 437 } 438 set_dp(mic, type, va); 439 vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END]; 440 vr0->info = vr0->va + 441 vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN); 442 vring_init(&vr0->vr, 443 MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN); 444 mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ", 445 __func__, mic->name, vr0->va, vr0->info, vr_size, 446 vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN)); 447 mpsslog("magic 0x%x expected 0x%x\n", 448 le32toh(vr0->info->magic), MIC_MAGIC + type); 449 assert(le32toh(vr0->info->magic) == MIC_MAGIC + type); 450 if (vr1) { 451 vr1->va = (struct mic_vring *) 452 &va[MIC_DEVICE_PAGE_END + vr_size]; 453 vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES, 454 MIC_VIRTIO_RING_ALIGN); 455 vring_init(&vr1->vr, 456 MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN); 457 mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ", 458 __func__, mic->name, vr1->va, vr1->info, vr_size, 459 vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN)); 460 mpsslog("magic 0x%x expected 0x%x\n", 461 le32toh(vr1->info->magic), MIC_MAGIC + type + 1); 462 assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1); 463 } 464done: 465 return va; 466} 467 468static void 469wait_for_card_driver(struct mic_info *mic, int fd, int type) 470{ 471 struct pollfd pollfd; 472 int err; 473 struct mic_device_desc *desc = get_device_desc(mic, type); 474 475 pollfd.fd = fd; 476 mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n", 477 mic->name, __func__, type, desc->status); 478 while (1) { 479 pollfd.events = POLLIN; 480 pollfd.revents = 0; 481 err = poll(&pollfd, 1, -1); 482 if (err < 0) { 483 mpsslog("%s %s poll failed %s\n", 484 mic->name, __func__, strerror(errno)); 485 continue; 486 } 487 488 if (pollfd.revents) { 489 mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n", 490 mic->name, __func__, type, desc->status); 491 if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) { 492 mpsslog("%s %s poll.revents %d\n", 493 mic->name, __func__, pollfd.revents); 494 mpsslog("%s %s desc-> type %d status 0x%x\n", 495 mic->name, __func__, type, 496 desc->status); 497 break; 498 } 499 } 500 } 501} 502 503/* Spin till we have some descriptors */ 504static void 505spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr) 506{ 507 __u16 avail_idx = read_avail_idx(vr); 508 509 while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) { 510#ifdef DEBUG 511 mpsslog("%s %s waiting for desc avail %d info_avail %d\n", 512 mic->name, __func__, 513 le16toh(vr->vr.avail->idx), vr->info->avail_idx); 514#endif 515 sched_yield(); 516 } 517} 518 519static void * 520virtio_net(void *arg) 521{ 522 static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)]; 523 static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64))); 524 struct iovec vnet_iov[2][2] = { 525 { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) }, 526 { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } }, 527 { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) }, 528 { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } }, 529 }; 530 struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1]; 531 struct mic_info *mic = (struct mic_info *)arg; 532 char if_name[IFNAMSIZ]; 533 struct pollfd net_poll[MAX_NET_FD]; 534 struct mic_vring tx_vr, rx_vr; 535 struct mic_copy_desc copy; 536 struct mic_device_desc *desc; 537 int err; 538 539 snprintf(if_name, IFNAMSIZ, "mic%d", mic->id); 540 mic->mic_net.tap_fd = tun_alloc(mic, if_name); 541 if (mic->mic_net.tap_fd < 0) 542 goto done; 543 544 if (tap_configure(mic, if_name)) 545 goto done; 546 mpsslog("MIC name %s id %d\n", mic->name, mic->id); 547 548 net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd; 549 net_poll[NET_FD_VIRTIO_NET].events = POLLIN; 550 net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd; 551 net_poll[NET_FD_TUN].events = POLLIN; 552 553 if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd, 554 VIRTIO_ID_NET, &tx_vr, &rx_vr, 555 virtnet_dev_page.dd.num_vq)) { 556 mpsslog("%s init_vr failed %s\n", 557 mic->name, strerror(errno)); 558 goto done; 559 } 560 561 copy.iovcnt = 2; 562 desc = get_device_desc(mic, VIRTIO_ID_NET); 563 564 while (1) { 565 ssize_t len; 566 567 net_poll[NET_FD_VIRTIO_NET].revents = 0; 568 net_poll[NET_FD_TUN].revents = 0; 569 570 /* Start polling for data from tap and virtio net */ 571 err = poll(net_poll, 2, -1); 572 if (err < 0) { 573 mpsslog("%s poll failed %s\n", 574 __func__, strerror(errno)); 575 continue; 576 } 577 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) 578 wait_for_card_driver(mic, mic->mic_net.virtio_net_fd, 579 VIRTIO_ID_NET); 580 /* 581 * Check if there is data to be read from TUN and write to 582 * virtio net fd if there is. 583 */ 584 if (net_poll[NET_FD_TUN].revents & POLLIN) { 585 copy.iov = iov0; 586 len = readv(net_poll[NET_FD_TUN].fd, 587 copy.iov, copy.iovcnt); 588 if (len > 0) { 589 struct virtio_net_hdr *hdr 590 = (struct virtio_net_hdr *)vnet_hdr[0]; 591 592 /* Disable checksums on the card since we are on 593 a reliable PCIe link */ 594 hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID; 595#ifdef DEBUG 596 mpsslog("%s %s %d hdr->flags 0x%x ", mic->name, 597 __func__, __LINE__, hdr->flags); 598 mpsslog("copy.out_len %d hdr->gso_type 0x%x\n", 599 copy.out_len, hdr->gso_type); 600#endif 601#ifdef DEBUG 602 disp_iovec(mic, copy, __func__, __LINE__); 603 mpsslog("%s %s %d read from tap 0x%lx\n", 604 mic->name, __func__, __LINE__, 605 len); 606#endif 607 spin_for_descriptors(mic, &tx_vr); 608 txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy, 609 len); 610 611 err = mic_virtio_copy(mic, 612 mic->mic_net.virtio_net_fd, &tx_vr, 613 &copy); 614 if (err < 0) { 615 mpsslog("%s %s %d mic_virtio_copy %s\n", 616 mic->name, __func__, __LINE__, 617 strerror(errno)); 618 } 619 if (!err) 620 verify_out_len(mic, &copy); 621#ifdef DEBUG 622 disp_iovec(mic, copy, __func__, __LINE__); 623 mpsslog("%s %s %d wrote to net 0x%lx\n", 624 mic->name, __func__, __LINE__, 625 sum_iovec_len(&copy)); 626#endif 627 /* Reinitialize IOV for next run */ 628 iov0[1].iov_len = MAX_NET_PKT_SIZE; 629 } else if (len < 0) { 630 disp_iovec(mic, &copy, __func__, __LINE__); 631 mpsslog("%s %s %d read failed %s ", mic->name, 632 __func__, __LINE__, strerror(errno)); 633 mpsslog("cnt %d sum %zd\n", 634 copy.iovcnt, sum_iovec_len(&copy)); 635 } 636 } 637 638 /* 639 * Check if there is data to be read from virtio net and 640 * write to TUN if there is. 641 */ 642 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) { 643 while (rx_vr.info->avail_idx != 644 le16toh(rx_vr.vr.avail->idx)) { 645 copy.iov = iov1; 646 txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy, 647 MAX_NET_PKT_SIZE 648 + sizeof(struct virtio_net_hdr)); 649 650 err = mic_virtio_copy(mic, 651 mic->mic_net.virtio_net_fd, &rx_vr, 652 &copy); 653 if (!err) { 654#ifdef DEBUG 655 struct virtio_net_hdr *hdr 656 = (struct virtio_net_hdr *) 657 vnet_hdr[1]; 658 659 mpsslog("%s %s %d hdr->flags 0x%x, ", 660 mic->name, __func__, __LINE__, 661 hdr->flags); 662 mpsslog("out_len %d gso_type 0x%x\n", 663 copy.out_len, 664 hdr->gso_type); 665#endif 666 /* Set the correct output iov_len */ 667 iov1[1].iov_len = copy.out_len - 668 sizeof(struct virtio_net_hdr); 669 verify_out_len(mic, &copy); 670#ifdef DEBUG 671 disp_iovec(mic, copy, __func__, 672 __LINE__); 673 mpsslog("%s %s %d ", 674 mic->name, __func__, __LINE__); 675 mpsslog("read from net 0x%lx\n", 676 sum_iovec_len(copy)); 677#endif 678 len = writev(net_poll[NET_FD_TUN].fd, 679 copy.iov, copy.iovcnt); 680 if (len != sum_iovec_len(&copy)) { 681 mpsslog("Tun write failed %s ", 682 strerror(errno)); 683 mpsslog("len 0x%zx ", len); 684 mpsslog("read_len 0x%zx\n", 685 sum_iovec_len(&copy)); 686 } else { 687#ifdef DEBUG 688 disp_iovec(mic, &copy, __func__, 689 __LINE__); 690 mpsslog("%s %s %d ", 691 mic->name, __func__, 692 __LINE__); 693 mpsslog("wrote to tap 0x%lx\n", 694 len); 695#endif 696 } 697 } else { 698 mpsslog("%s %s %d mic_virtio_copy %s\n", 699 mic->name, __func__, __LINE__, 700 strerror(errno)); 701 break; 702 } 703 } 704 } 705 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR) 706 mpsslog("%s: %s: POLLERR\n", __func__, mic->name); 707 } 708done: 709 pthread_exit(NULL); 710} 711 712/* virtio_console */ 713#define VIRTIO_CONSOLE_FD 0 714#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1) 715#define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */ 716#define MAX_BUFFER_SIZE PAGE_SIZE 717 718static void * 719virtio_console(void *arg) 720{ 721 static __u8 vcons_buf[2][PAGE_SIZE]; 722 struct iovec vcons_iov[2] = { 723 { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) }, 724 { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) }, 725 }; 726 struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1]; 727 struct mic_info *mic = (struct mic_info *)arg; 728 int err; 729 struct pollfd console_poll[MAX_CONSOLE_FD]; 730 int pty_fd; 731 char *pts_name; 732 ssize_t len; 733 struct mic_vring tx_vr, rx_vr; 734 struct mic_copy_desc copy; 735 struct mic_device_desc *desc; 736 737 pty_fd = posix_openpt(O_RDWR); 738 if (pty_fd < 0) { 739 mpsslog("can't open a pseudoterminal master device: %s\n", 740 strerror(errno)); 741 goto _return; 742 } 743 pts_name = ptsname(pty_fd); 744 if (pts_name == NULL) { 745 mpsslog("can't get pts name\n"); 746 goto _close_pty; 747 } 748 printf("%s console message goes to %s\n", mic->name, pts_name); 749 mpsslog("%s console message goes to %s\n", mic->name, pts_name); 750 err = grantpt(pty_fd); 751 if (err < 0) { 752 mpsslog("can't grant access: %s %s\n", 753 pts_name, strerror(errno)); 754 goto _close_pty; 755 } 756 err = unlockpt(pty_fd); 757 if (err < 0) { 758 mpsslog("can't unlock a pseudoterminal: %s %s\n", 759 pts_name, strerror(errno)); 760 goto _close_pty; 761 } 762 console_poll[MONITOR_FD].fd = pty_fd; 763 console_poll[MONITOR_FD].events = POLLIN; 764 765 console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd; 766 console_poll[VIRTIO_CONSOLE_FD].events = POLLIN; 767 768 if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd, 769 VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr, 770 virtcons_dev_page.dd.num_vq)) { 771 mpsslog("%s init_vr failed %s\n", 772 mic->name, strerror(errno)); 773 goto _close_pty; 774 } 775 776 copy.iovcnt = 1; 777 desc = get_device_desc(mic, VIRTIO_ID_CONSOLE); 778 779 for (;;) { 780 console_poll[MONITOR_FD].revents = 0; 781 console_poll[VIRTIO_CONSOLE_FD].revents = 0; 782 err = poll(console_poll, MAX_CONSOLE_FD, -1); 783 if (err < 0) { 784 mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__, 785 strerror(errno)); 786 continue; 787 } 788 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) 789 wait_for_card_driver(mic, 790 mic->mic_console.virtio_console_fd, 791 VIRTIO_ID_CONSOLE); 792 793 if (console_poll[MONITOR_FD].revents & POLLIN) { 794 copy.iov = iov0; 795 len = readv(pty_fd, copy.iov, copy.iovcnt); 796 if (len > 0) { 797#ifdef DEBUG 798 disp_iovec(mic, copy, __func__, __LINE__); 799 mpsslog("%s %s %d read from tap 0x%lx\n", 800 mic->name, __func__, __LINE__, 801 len); 802#endif 803 spin_for_descriptors(mic, &tx_vr); 804 txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr, 805 &copy, len); 806 807 err = mic_virtio_copy(mic, 808 mic->mic_console.virtio_console_fd, 809 &tx_vr, &copy); 810 if (err < 0) { 811 mpsslog("%s %s %d mic_virtio_copy %s\n", 812 mic->name, __func__, __LINE__, 813 strerror(errno)); 814 } 815 if (!err) 816 verify_out_len(mic, &copy); 817#ifdef DEBUG 818 disp_iovec(mic, copy, __func__, __LINE__); 819 mpsslog("%s %s %d wrote to net 0x%lx\n", 820 mic->name, __func__, __LINE__, 821 sum_iovec_len(copy)); 822#endif 823 /* Reinitialize IOV for next run */ 824 iov0->iov_len = PAGE_SIZE; 825 } else if (len < 0) { 826 disp_iovec(mic, &copy, __func__, __LINE__); 827 mpsslog("%s %s %d read failed %s ", 828 mic->name, __func__, __LINE__, 829 strerror(errno)); 830 mpsslog("cnt %d sum %zd\n", 831 copy.iovcnt, sum_iovec_len(&copy)); 832 } 833 } 834 835 if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) { 836 while (rx_vr.info->avail_idx != 837 le16toh(rx_vr.vr.avail->idx)) { 838 copy.iov = iov1; 839 txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr, 840 &copy, PAGE_SIZE); 841 842 err = mic_virtio_copy(mic, 843 mic->mic_console.virtio_console_fd, 844 &rx_vr, &copy); 845 if (!err) { 846 /* Set the correct output iov_len */ 847 iov1->iov_len = copy.out_len; 848 verify_out_len(mic, &copy); 849#ifdef DEBUG 850 disp_iovec(mic, copy, __func__, 851 __LINE__); 852 mpsslog("%s %s %d ", 853 mic->name, __func__, __LINE__); 854 mpsslog("read from net 0x%lx\n", 855 sum_iovec_len(copy)); 856#endif 857 len = writev(pty_fd, 858 copy.iov, copy.iovcnt); 859 if (len != sum_iovec_len(&copy)) { 860 mpsslog("Tun write failed %s ", 861 strerror(errno)); 862 mpsslog("len 0x%zx ", len); 863 mpsslog("read_len 0x%zx\n", 864 sum_iovec_len(&copy)); 865 } else { 866#ifdef DEBUG 867 disp_iovec(mic, copy, __func__, 868 __LINE__); 869 mpsslog("%s %s %d ", 870 mic->name, __func__, 871 __LINE__); 872 mpsslog("wrote to tap 0x%lx\n", 873 len); 874#endif 875 } 876 } else { 877 mpsslog("%s %s %d mic_virtio_copy %s\n", 878 mic->name, __func__, __LINE__, 879 strerror(errno)); 880 break; 881 } 882 } 883 } 884 if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR) 885 mpsslog("%s: %s: POLLERR\n", __func__, mic->name); 886 } 887_close_pty: 888 close(pty_fd); 889_return: 890 pthread_exit(NULL); 891} 892 893static void 894add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd) 895{ 896 char path[PATH_MAX]; 897 int fd, err; 898 899 snprintf(path, PATH_MAX, "/dev/mic%d", mic->id); 900 fd = open(path, O_RDWR); 901 if (fd < 0) { 902 mpsslog("Could not open %s %s\n", path, strerror(errno)); 903 return; 904 } 905 906 err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd); 907 if (err < 0) { 908 mpsslog("Could not add %d %s\n", dd->type, strerror(errno)); 909 close(fd); 910 return; 911 } 912 switch (dd->type) { 913 case VIRTIO_ID_NET: 914 mic->mic_net.virtio_net_fd = fd; 915 mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name); 916 break; 917 case VIRTIO_ID_CONSOLE: 918 mic->mic_console.virtio_console_fd = fd; 919 mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name); 920 break; 921 case VIRTIO_ID_BLOCK: 922 mic->mic_virtblk.virtio_block_fd = fd; 923 mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name); 924 break; 925 } 926} 927 928static bool 929set_backend_file(struct mic_info *mic) 930{ 931 FILE *config; 932 char buff[PATH_MAX], *line, *evv, *p; 933 934 snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id); 935 config = fopen(buff, "r"); 936 if (config == NULL) 937 return false; 938 do { /* look for "virtblk_backend=XXXX" */ 939 line = fgets(buff, PATH_MAX, config); 940 if (line == NULL) 941 break; 942 if (*line == '#') 943 continue; 944 p = strchr(line, '\n'); 945 if (p) 946 *p = '\0'; 947 } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0); 948 fclose(config); 949 if (line == NULL) 950 return false; 951 evv = strchr(line, '='); 952 if (evv == NULL) 953 return false; 954 mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1); 955 if (mic->mic_virtblk.backend_file == NULL) { 956 mpsslog("%s %d can't allocate memory\n", mic->name, mic->id); 957 return false; 958 } 959 strcpy(mic->mic_virtblk.backend_file, evv + 1); 960 return true; 961} 962 963#define SECTOR_SIZE 512 964static bool 965set_backend_size(struct mic_info *mic) 966{ 967 mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0, 968 SEEK_END); 969 if (mic->mic_virtblk.backend_size < 0) { 970 mpsslog("%s: can't seek: %s\n", 971 mic->name, mic->mic_virtblk.backend_file); 972 return false; 973 } 974 virtblk_dev_page.blk_config.capacity = 975 mic->mic_virtblk.backend_size / SECTOR_SIZE; 976 if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0) 977 virtblk_dev_page.blk_config.capacity++; 978 979 virtblk_dev_page.blk_config.capacity = 980 htole64(virtblk_dev_page.blk_config.capacity); 981 982 return true; 983} 984 985static bool 986open_backend(struct mic_info *mic) 987{ 988 if (!set_backend_file(mic)) 989 goto _error_exit; 990 mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR); 991 if (mic->mic_virtblk.backend < 0) { 992 mpsslog("%s: can't open: %s\n", mic->name, 993 mic->mic_virtblk.backend_file); 994 goto _error_free; 995 } 996 if (!set_backend_size(mic)) 997 goto _error_close; 998 mic->mic_virtblk.backend_addr = mmap(NULL, 999 mic->mic_virtblk.backend_size, 1000 PROT_READ|PROT_WRITE, MAP_SHARED, 1001 mic->mic_virtblk.backend, 0L); 1002 if (mic->mic_virtblk.backend_addr == MAP_FAILED) { 1003 mpsslog("%s: can't map: %s %s\n", 1004 mic->name, mic->mic_virtblk.backend_file, 1005 strerror(errno)); 1006 goto _error_close; 1007 } 1008 return true; 1009 1010 _error_close: 1011 close(mic->mic_virtblk.backend); 1012 _error_free: 1013 free(mic->mic_virtblk.backend_file); 1014 _error_exit: 1015 return false; 1016} 1017 1018static void 1019close_backend(struct mic_info *mic) 1020{ 1021 munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size); 1022 close(mic->mic_virtblk.backend); 1023 free(mic->mic_virtblk.backend_file); 1024} 1025 1026static bool 1027start_virtblk(struct mic_info *mic, struct mic_vring *vring) 1028{ 1029 if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) { 1030 mpsslog("%s: blk_config is not 8 byte aligned.\n", 1031 mic->name); 1032 return false; 1033 } 1034 add_virtio_device(mic, &virtblk_dev_page.dd); 1035 if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd, 1036 VIRTIO_ID_BLOCK, vring, NULL, 1037 virtblk_dev_page.dd.num_vq)) { 1038 mpsslog("%s init_vr failed %s\n", 1039 mic->name, strerror(errno)); 1040 return false; 1041 } 1042 return true; 1043} 1044 1045static void 1046stop_virtblk(struct mic_info *mic) 1047{ 1048 int vr_size, ret; 1049 1050 vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES, 1051 MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info)); 1052 ret = munmap(mic->mic_virtblk.block_dp, 1053 MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq); 1054 if (ret < 0) 1055 mpsslog("%s munmap errno %d\n", mic->name, errno); 1056 close(mic->mic_virtblk.virtio_block_fd); 1057} 1058 1059static __u8 1060header_error_check(struct vring_desc *desc) 1061{ 1062 if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) { 1063 mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n", 1064 __func__, __LINE__); 1065 return -EIO; 1066 } 1067 if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) { 1068 mpsslog("%s() %d: alone\n", 1069 __func__, __LINE__); 1070 return -EIO; 1071 } 1072 if (le16toh(desc->flags) & VRING_DESC_F_WRITE) { 1073 mpsslog("%s() %d: not read\n", 1074 __func__, __LINE__); 1075 return -EIO; 1076 } 1077 return 0; 1078} 1079 1080static int 1081read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx) 1082{ 1083 struct iovec iovec; 1084 struct mic_copy_desc copy; 1085 1086 iovec.iov_len = sizeof(*hdr); 1087 iovec.iov_base = hdr; 1088 copy.iov = &iovec; 1089 copy.iovcnt = 1; 1090 copy.vr_idx = 0; /* only one vring on virtio_block */ 1091 copy.update_used = false; /* do not update used index */ 1092 return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy); 1093} 1094 1095static int 1096transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt) 1097{ 1098 struct mic_copy_desc copy; 1099 1100 copy.iov = iovec; 1101 copy.iovcnt = iovcnt; 1102 copy.vr_idx = 0; /* only one vring on virtio_block */ 1103 copy.update_used = false; /* do not update used index */ 1104 return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy); 1105} 1106 1107static __u8 1108status_error_check(struct vring_desc *desc) 1109{ 1110 if (le32toh(desc->len) != sizeof(__u8)) { 1111 mpsslog("%s() %d: length is not sizeof(status)\n", 1112 __func__, __LINE__); 1113 return -EIO; 1114 } 1115 return 0; 1116} 1117 1118static int 1119write_status(int fd, __u8 *status) 1120{ 1121 struct iovec iovec; 1122 struct mic_copy_desc copy; 1123 1124 iovec.iov_base = status; 1125 iovec.iov_len = sizeof(*status); 1126 copy.iov = &iovec; 1127 copy.iovcnt = 1; 1128 copy.vr_idx = 0; /* only one vring on virtio_block */ 1129 copy.update_used = true; /* Update used index */ 1130 return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy); 1131} 1132 1133static void * 1134virtio_block(void *arg) 1135{ 1136 struct mic_info *mic = (struct mic_info *)arg; 1137 int ret; 1138 struct pollfd block_poll; 1139 struct mic_vring vring; 1140 __u16 avail_idx; 1141 __u32 desc_idx; 1142 struct vring_desc *desc; 1143 struct iovec *iovec, *piov; 1144 __u8 status; 1145 __u32 buffer_desc_idx; 1146 struct virtio_blk_outhdr hdr; 1147 void *fos; 1148 1149 for (;;) { /* forever */ 1150 if (!open_backend(mic)) { /* No virtblk */ 1151 for (mic->mic_virtblk.signaled = 0; 1152 !mic->mic_virtblk.signaled;) 1153 sleep(1); 1154 continue; 1155 } 1156 1157 /* backend file is specified. */ 1158 if (!start_virtblk(mic, &vring)) 1159 goto _close_backend; 1160 iovec = malloc(sizeof(*iovec) * 1161 le32toh(virtblk_dev_page.blk_config.seg_max)); 1162 if (!iovec) { 1163 mpsslog("%s: can't alloc iovec: %s\n", 1164 mic->name, strerror(ENOMEM)); 1165 goto _stop_virtblk; 1166 } 1167 1168 block_poll.fd = mic->mic_virtblk.virtio_block_fd; 1169 block_poll.events = POLLIN; 1170 for (mic->mic_virtblk.signaled = 0; 1171 !mic->mic_virtblk.signaled;) { 1172 block_poll.revents = 0; 1173 /* timeout in 1 sec to see signaled */ 1174 ret = poll(&block_poll, 1, 1000); 1175 if (ret < 0) { 1176 mpsslog("%s %d: poll failed: %s\n", 1177 __func__, __LINE__, 1178 strerror(errno)); 1179 continue; 1180 } 1181 1182 if (!(block_poll.revents & POLLIN)) { 1183#ifdef DEBUG 1184 mpsslog("%s %d: block_poll.revents=0x%x\n", 1185 __func__, __LINE__, block_poll.revents); 1186#endif 1187 continue; 1188 } 1189 1190 /* POLLIN */ 1191 while (vring.info->avail_idx != 1192 le16toh(vring.vr.avail->idx)) { 1193 /* read header element */ 1194 avail_idx = 1195 vring.info->avail_idx & 1196 (vring.vr.num - 1); 1197 desc_idx = le16toh( 1198 vring.vr.avail->ring[avail_idx]); 1199 desc = &vring.vr.desc[desc_idx]; 1200#ifdef DEBUG 1201 mpsslog("%s() %d: avail_idx=%d ", 1202 __func__, __LINE__, 1203 vring.info->avail_idx); 1204 mpsslog("vring.vr.num=%d desc=%p\n", 1205 vring.vr.num, desc); 1206#endif 1207 status = header_error_check(desc); 1208 ret = read_header( 1209 mic->mic_virtblk.virtio_block_fd, 1210 &hdr, desc_idx); 1211 if (ret < 0) { 1212 mpsslog("%s() %d %s: ret=%d %s\n", 1213 __func__, __LINE__, 1214 mic->name, ret, 1215 strerror(errno)); 1216 break; 1217 } 1218 /* buffer element */ 1219 piov = iovec; 1220 status = 0; 1221 fos = mic->mic_virtblk.backend_addr + 1222 (hdr.sector * SECTOR_SIZE); 1223 buffer_desc_idx = next_desc(desc); 1224 desc_idx = buffer_desc_idx; 1225 for (desc = &vring.vr.desc[buffer_desc_idx]; 1226 desc->flags & VRING_DESC_F_NEXT; 1227 desc_idx = next_desc(desc), 1228 desc = &vring.vr.desc[desc_idx]) { 1229 piov->iov_len = desc->len; 1230 piov->iov_base = fos; 1231 piov++; 1232 fos += desc->len; 1233 } 1234 /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */ 1235 if (hdr.type & ~(VIRTIO_BLK_T_OUT | 1236 VIRTIO_BLK_T_GET_ID)) { 1237 /* 1238 VIRTIO_BLK_T_IN - does not do 1239 anything. Probably for documenting. 1240 VIRTIO_BLK_T_SCSI_CMD - for 1241 virtio_scsi. 1242 VIRTIO_BLK_T_FLUSH - turned off in 1243 config space. 1244 VIRTIO_BLK_T_BARRIER - defined but not 1245 used in anywhere. 1246 */ 1247 mpsslog("%s() %d: type %x ", 1248 __func__, __LINE__, 1249 hdr.type); 1250 mpsslog("is not supported\n"); 1251 status = -ENOTSUP; 1252 1253 } else { 1254 ret = transfer_blocks( 1255 mic->mic_virtblk.virtio_block_fd, 1256 iovec, 1257 piov - iovec); 1258 if (ret < 0 && 1259 status != 0) 1260 status = ret; 1261 } 1262 /* write status and update used pointer */ 1263 if (status != 0) 1264 status = status_error_check(desc); 1265 ret = write_status( 1266 mic->mic_virtblk.virtio_block_fd, 1267 &status); 1268#ifdef DEBUG 1269 mpsslog("%s() %d: write status=%d on desc=%p\n", 1270 __func__, __LINE__, 1271 status, desc); 1272#endif 1273 } 1274 } 1275 free(iovec); 1276_stop_virtblk: 1277 stop_virtblk(mic); 1278_close_backend: 1279 close_backend(mic); 1280 } /* forever */ 1281 1282 pthread_exit(NULL); 1283} 1284 1285static void 1286reset(struct mic_info *mic) 1287{ 1288#define RESET_TIMEOUT 120 1289 int i = RESET_TIMEOUT; 1290 setsysfs(mic->name, "state", "reset"); 1291 while (i) { 1292 char *state; 1293 state = readsysfs(mic->name, "state"); 1294 if (!state) 1295 goto retry; 1296 mpsslog("%s: %s %d state %s\n", 1297 mic->name, __func__, __LINE__, state); 1298 1299 /* 1300 * If the shutdown was initiated by OSPM, the state stays 1301 * in "suspended" which is also a valid condition for reset. 1302 */ 1303 if ((!strcmp(state, "offline")) || 1304 (!strcmp(state, "suspended"))) { 1305 free(state); 1306 break; 1307 } 1308 free(state); 1309retry: 1310 sleep(1); 1311 i--; 1312 } 1313} 1314 1315static int 1316get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status) 1317{ 1318 if (!strcmp(shutdown_status, "nop")) 1319 return MIC_NOP; 1320 if (!strcmp(shutdown_status, "crashed")) 1321 return MIC_CRASHED; 1322 if (!strcmp(shutdown_status, "halted")) 1323 return MIC_HALTED; 1324 if (!strcmp(shutdown_status, "poweroff")) 1325 return MIC_POWER_OFF; 1326 if (!strcmp(shutdown_status, "restart")) 1327 return MIC_RESTART; 1328 mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status); 1329 /* Invalid state */ 1330 assert(0); 1331}; 1332 1333static int get_mic_state(struct mic_info *mic, char *state) 1334{ 1335 if (!strcmp(state, "offline")) 1336 return MIC_OFFLINE; 1337 if (!strcmp(state, "online")) 1338 return MIC_ONLINE; 1339 if (!strcmp(state, "shutting_down")) 1340 return MIC_SHUTTING_DOWN; 1341 if (!strcmp(state, "reset_failed")) 1342 return MIC_RESET_FAILED; 1343 if (!strcmp(state, "suspending")) 1344 return MIC_SUSPENDING; 1345 if (!strcmp(state, "suspended")) 1346 return MIC_SUSPENDED; 1347 mpsslog("%s: BUG invalid state %s\n", mic->name, state); 1348 /* Invalid state */ 1349 assert(0); 1350}; 1351 1352static void mic_handle_shutdown(struct mic_info *mic) 1353{ 1354#define SHUTDOWN_TIMEOUT 60 1355 int i = SHUTDOWN_TIMEOUT, ret, stat = 0; 1356 char *shutdown_status; 1357 while (i) { 1358 shutdown_status = readsysfs(mic->name, "shutdown_status"); 1359 if (!shutdown_status) 1360 continue; 1361 mpsslog("%s: %s %d shutdown_status %s\n", 1362 mic->name, __func__, __LINE__, shutdown_status); 1363 switch (get_mic_shutdown_status(mic, shutdown_status)) { 1364 case MIC_RESTART: 1365 mic->restart = 1; 1366 case MIC_HALTED: 1367 case MIC_POWER_OFF: 1368 case MIC_CRASHED: 1369 free(shutdown_status); 1370 goto reset; 1371 default: 1372 break; 1373 } 1374 free(shutdown_status); 1375 sleep(1); 1376 i--; 1377 } 1378reset: 1379 ret = kill(mic->pid, SIGTERM); 1380 mpsslog("%s: %s %d kill pid %d ret %d\n", 1381 mic->name, __func__, __LINE__, 1382 mic->pid, ret); 1383 if (!ret) { 1384 ret = waitpid(mic->pid, &stat, 1385 WIFSIGNALED(stat)); 1386 mpsslog("%s: %s %d waitpid ret %d pid %d\n", 1387 mic->name, __func__, __LINE__, 1388 ret, mic->pid); 1389 } 1390 if (ret == mic->pid) 1391 reset(mic); 1392} 1393 1394static void * 1395mic_config(void *arg) 1396{ 1397 struct mic_info *mic = (struct mic_info *)arg; 1398 char *state = NULL; 1399 char pathname[PATH_MAX]; 1400 int fd, ret; 1401 struct pollfd ufds[1]; 1402 char value[4096]; 1403 1404 snprintf(pathname, PATH_MAX - 1, "%s/%s/%s", 1405 MICSYSFSDIR, mic->name, "state"); 1406 1407 fd = open(pathname, O_RDONLY); 1408 if (fd < 0) { 1409 mpsslog("%s: opening file %s failed %s\n", 1410 mic->name, pathname, strerror(errno)); 1411 goto error; 1412 } 1413 1414 do { 1415 ret = lseek(fd, 0, SEEK_SET); 1416 if (ret < 0) { 1417 mpsslog("%s: Failed to seek to file start '%s': %s\n", 1418 mic->name, pathname, strerror(errno)); 1419 goto close_error1; 1420 } 1421 ret = read(fd, value, sizeof(value)); 1422 if (ret < 0) { 1423 mpsslog("%s: Failed to read sysfs entry '%s': %s\n", 1424 mic->name, pathname, strerror(errno)); 1425 goto close_error1; 1426 } 1427retry: 1428 state = readsysfs(mic->name, "state"); 1429 if (!state) 1430 goto retry; 1431 mpsslog("%s: %s %d state %s\n", 1432 mic->name, __func__, __LINE__, state); 1433 switch (get_mic_state(mic, state)) { 1434 case MIC_SHUTTING_DOWN: 1435 mic_handle_shutdown(mic); 1436 goto close_error; 1437 case MIC_SUSPENDING: 1438 mic->boot_on_resume = 1; 1439 setsysfs(mic->name, "state", "suspend"); 1440 mic_handle_shutdown(mic); 1441 goto close_error; 1442 case MIC_OFFLINE: 1443 if (mic->boot_on_resume) { 1444 setsysfs(mic->name, "state", "boot"); 1445 mic->boot_on_resume = 0; 1446 } 1447 break; 1448 default: 1449 break; 1450 } 1451 free(state); 1452 1453 ufds[0].fd = fd; 1454 ufds[0].events = POLLERR | POLLPRI; 1455 ret = poll(ufds, 1, -1); 1456 if (ret < 0) { 1457 mpsslog("%s: poll failed %s\n", 1458 mic->name, strerror(errno)); 1459 goto close_error1; 1460 } 1461 } while (1); 1462close_error: 1463 free(state); 1464close_error1: 1465 close(fd); 1466error: 1467 init_mic(mic); 1468 pthread_exit(NULL); 1469} 1470 1471static void 1472set_cmdline(struct mic_info *mic) 1473{ 1474 char buffer[PATH_MAX]; 1475 int len; 1476 1477 len = snprintf(buffer, PATH_MAX, 1478 "clocksource=tsc highres=off nohz=off "); 1479 len += snprintf(buffer + len, PATH_MAX, 1480 "cpufreq_on;corec6_off;pc3_off;pc6_off "); 1481 len += snprintf(buffer + len, PATH_MAX, 1482 "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0", 1483 mic->id); 1484 1485 setsysfs(mic->name, "cmdline", buffer); 1486 mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer); 1487 snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id); 1488 mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer); 1489} 1490 1491static void 1492set_log_buf_info(struct mic_info *mic) 1493{ 1494 int fd; 1495 off_t len; 1496 char system_map[] = "/lib/firmware/mic/System.map"; 1497 char *map, *temp, log_buf[17] = {'\0'}; 1498 1499 fd = open(system_map, O_RDONLY); 1500 if (fd < 0) { 1501 mpsslog("%s: Opening System.map failed: %d\n", 1502 mic->name, errno); 1503 return; 1504 } 1505 len = lseek(fd, 0, SEEK_END); 1506 if (len < 0) { 1507 mpsslog("%s: Reading System.map size failed: %d\n", 1508 mic->name, errno); 1509 close(fd); 1510 return; 1511 } 1512 map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); 1513 if (map == MAP_FAILED) { 1514 mpsslog("%s: mmap of System.map failed: %d\n", 1515 mic->name, errno); 1516 close(fd); 1517 return; 1518 } 1519 temp = strstr(map, "__log_buf"); 1520 if (!temp) { 1521 mpsslog("%s: __log_buf not found: %d\n", mic->name, errno); 1522 munmap(map, len); 1523 close(fd); 1524 return; 1525 } 1526 strncpy(log_buf, temp - 19, 16); 1527 setsysfs(mic->name, "log_buf_addr", log_buf); 1528 mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf); 1529 temp = strstr(map, "log_buf_len"); 1530 if (!temp) { 1531 mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno); 1532 munmap(map, len); 1533 close(fd); 1534 return; 1535 } 1536 strncpy(log_buf, temp - 19, 16); 1537 setsysfs(mic->name, "log_buf_len", log_buf); 1538 mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf); 1539 munmap(map, len); 1540 close(fd); 1541} 1542 1543static void init_mic(struct mic_info *mic); 1544 1545static void 1546change_virtblk_backend(int x, siginfo_t *siginfo, void *p) 1547{ 1548 struct mic_info *mic; 1549 1550 for (mic = mic_list.next; mic != NULL; mic = mic->next) 1551 mic->mic_virtblk.signaled = 1/* true */; 1552} 1553 1554static void 1555init_mic(struct mic_info *mic) 1556{ 1557 struct sigaction ignore = { 1558 .sa_flags = 0, 1559 .sa_handler = SIG_IGN 1560 }; 1561 struct sigaction act = { 1562 .sa_flags = SA_SIGINFO, 1563 .sa_sigaction = change_virtblk_backend, 1564 }; 1565 char buffer[PATH_MAX]; 1566 int err; 1567 1568 /* 1569 * Currently, one virtio block device is supported for each MIC card 1570 * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon. 1571 * The signal informs the virtio block backend about a change in the 1572 * configuration file which specifies the virtio backend file name on 1573 * the host. Virtio block backend then re-reads the configuration file 1574 * and switches to the new block device. This signalling mechanism may 1575 * not be required once multiple virtio block devices are supported by 1576 * the MIC daemon. 1577 */ 1578 sigaction(SIGUSR1, &ignore, NULL); 1579 1580 mic->pid = fork(); 1581 switch (mic->pid) { 1582 case 0: 1583 set_log_buf_info(mic); 1584 set_cmdline(mic); 1585 add_virtio_device(mic, &virtcons_dev_page.dd); 1586 add_virtio_device(mic, &virtnet_dev_page.dd); 1587 err = pthread_create(&mic->mic_console.console_thread, NULL, 1588 virtio_console, mic); 1589 if (err) 1590 mpsslog("%s virtcons pthread_create failed %s\n", 1591 mic->name, strerror(err)); 1592 err = pthread_create(&mic->mic_net.net_thread, NULL, 1593 virtio_net, mic); 1594 if (err) 1595 mpsslog("%s virtnet pthread_create failed %s\n", 1596 mic->name, strerror(err)); 1597 err = pthread_create(&mic->mic_virtblk.block_thread, NULL, 1598 virtio_block, mic); 1599 if (err) 1600 mpsslog("%s virtblk pthread_create failed %s\n", 1601 mic->name, strerror(err)); 1602 sigemptyset(&act.sa_mask); 1603 err = sigaction(SIGUSR1, &act, NULL); 1604 if (err) 1605 mpsslog("%s sigaction SIGUSR1 failed %s\n", 1606 mic->name, strerror(errno)); 1607 while (1) 1608 sleep(60); 1609 case -1: 1610 mpsslog("fork failed MIC name %s id %d errno %d\n", 1611 mic->name, mic->id, errno); 1612 break; 1613 default: 1614 if (mic->restart) { 1615 snprintf(buffer, PATH_MAX, "boot"); 1616 setsysfs(mic->name, "state", buffer); 1617 mpsslog("%s restarting mic %d\n", 1618 mic->name, mic->restart); 1619 mic->restart = 0; 1620 } 1621 pthread_create(&mic->config_thread, NULL, mic_config, mic); 1622 } 1623} 1624 1625static void 1626start_daemon(void) 1627{ 1628 struct mic_info *mic; 1629 1630 for (mic = mic_list.next; mic != NULL; mic = mic->next) 1631 init_mic(mic); 1632 1633 while (1) 1634 sleep(60); 1635} 1636 1637static int 1638init_mic_list(void) 1639{ 1640 struct mic_info *mic = &mic_list; 1641 struct dirent *file; 1642 DIR *dp; 1643 int cnt = 0; 1644 1645 dp = opendir(MICSYSFSDIR); 1646 if (!dp) 1647 return 0; 1648 1649 while ((file = readdir(dp)) != NULL) { 1650 if (!strncmp(file->d_name, "mic", 3)) { 1651 mic->next = calloc(1, sizeof(struct mic_info)); 1652 if (mic->next) { 1653 mic = mic->next; 1654 mic->id = atoi(&file->d_name[3]); 1655 mic->name = malloc(strlen(file->d_name) + 16); 1656 if (mic->name) 1657 strcpy(mic->name, file->d_name); 1658 mpsslog("MIC name %s id %d\n", mic->name, 1659 mic->id); 1660 cnt++; 1661 } 1662 } 1663 } 1664 1665 closedir(dp); 1666 return cnt; 1667} 1668 1669void 1670mpsslog(char *format, ...) 1671{ 1672 va_list args; 1673 char buffer[4096]; 1674 char ts[52], *ts1; 1675 time_t t; 1676 1677 if (logfp == NULL) 1678 return; 1679 1680 va_start(args, format); 1681 vsprintf(buffer, format, args); 1682 va_end(args); 1683 1684 time(&t); 1685 ts1 = ctime_r(&t, ts); 1686 ts1[strlen(ts1) - 1] = '\0'; 1687 fprintf(logfp, "%s: %s", ts1, buffer); 1688 1689 fflush(logfp); 1690} 1691 1692int 1693main(int argc, char *argv[]) 1694{ 1695 int cnt; 1696 pid_t pid; 1697 1698 myname = argv[0]; 1699 1700 logfp = fopen(LOGFILE_NAME, "a+"); 1701 if (!logfp) { 1702 fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME); 1703 exit(1); 1704 } 1705 pid = fork(); 1706 switch (pid) { 1707 case 0: 1708 break; 1709 case -1: 1710 exit(2); 1711 default: 1712 exit(0); 1713 } 1714 1715 mpsslog("MIC Daemon start\n"); 1716 1717 cnt = init_mic_list(); 1718 if (cnt == 0) { 1719 mpsslog("MIC module not loaded\n"); 1720 exit(3); 1721 } 1722 mpsslog("MIC found %d devices\n", cnt); 1723 1724 start_daemon(); 1725 1726 exit(0); 1727}