Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull s390 fixes from Martin Schwidefsky:
"Bug fixes for 3.6-rc7, including some important patches for large page
related memory management issues."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
s390/dasd: fix read unit address configuration loop
s390/dasd: fix pathgroup race
s390/mm: fix user access page-table walk code
s390/hwcaps: do not report high gprs for 31 bit kernel
s390/cio: invalidate cdev pointer before deregistration
s390/cio: fix IO subchannel event race
s390/dasd: move wake_up call
s390/hugetlb: use direct TLB flushing for hugetlbfs pages
s390/mm: fix deadlock in unmap_hugepage_range()

+146 -107
+10 -14
arch/s390/include/asm/hugetlb.h
··· 66 66 return pte; 67 67 } 68 68 69 - static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 70 - unsigned long addr, pte_t *ptep) 71 - { 72 - pte_t pte = huge_ptep_get(ptep); 73 - 74 - mm->context.flush_mm = 1; 75 - pmd_clear((pmd_t *) ptep); 76 - return pte; 77 - } 78 - 79 69 static inline void __pmd_csp(pmd_t *pmdp) 80 70 { 81 71 register unsigned long reg2 asm("2") = pmd_val(*pmdp); ··· 107 117 __pmd_csp(pmdp); 108 118 } 109 119 120 + static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 121 + unsigned long addr, pte_t *ptep) 122 + { 123 + pte_t pte = huge_ptep_get(ptep); 124 + 125 + huge_ptep_invalidate(mm, addr, ptep); 126 + return pte; 127 + } 128 + 110 129 #define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ 111 130 ({ \ 112 131 int __changed = !pte_same(huge_ptep_get(__ptep), __entry); \ ··· 130 131 ({ \ 131 132 pte_t __pte = huge_ptep_get(__ptep); \ 132 133 if (pte_write(__pte)) { \ 133 - (__mm)->context.flush_mm = 1; \ 134 - if (atomic_read(&(__mm)->context.attach_count) > 1 || \ 135 - (__mm) != current->active_mm) \ 136 - huge_ptep_invalidate(__mm, __addr, __ptep); \ 134 + huge_ptep_invalidate(__mm, __addr, __ptep); \ 137 135 set_huge_pte_at(__mm, __addr, __ptep, \ 138 136 huge_pte_wrprotect(__pte)); \ 139 137 } \
-2
arch/s390/include/asm/tlbflush.h
··· 90 90 91 91 static inline void __tlb_flush_mm_cond(struct mm_struct * mm) 92 92 { 93 - spin_lock(&mm->page_table_lock); 94 93 if (mm->context.flush_mm) { 95 94 __tlb_flush_mm(mm); 96 95 mm->context.flush_mm = 0; 97 96 } 98 - spin_unlock(&mm->page_table_lock); 99 97 } 100 98 101 99 /*
+2
arch/s390/kernel/setup.c
··· 974 974 if (MACHINE_HAS_HPAGE) 975 975 elf_hwcap |= HWCAP_S390_HPAGE; 976 976 977 + #if defined(CONFIG_64BIT) 977 978 /* 978 979 * 64-bit register support for 31-bit processes 979 980 * HWCAP_S390_HIGH_GPRS is bit 9. 980 981 */ 981 982 elf_hwcap |= HWCAP_S390_HIGH_GPRS; 983 + #endif 982 984 983 985 get_cpu_id(&cpu_id); 984 986 switch (cpu_id.machine) {
+63 -79
arch/s390/lib/uaccess_pt.c
··· 2 2 * User access functions based on page table walks for enhanced 3 3 * system layout without hardware support. 4 4 * 5 - * Copyright IBM Corp. 2006 5 + * Copyright IBM Corp. 2006, 2012 6 6 * Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com) 7 7 */ 8 8 9 9 #include <linux/errno.h> 10 10 #include <linux/hardirq.h> 11 11 #include <linux/mm.h> 12 + #include <linux/hugetlb.h> 12 13 #include <asm/uaccess.h> 13 14 #include <asm/futex.h> 14 15 #include "uaccess.h" 15 16 16 - static inline pte_t *follow_table(struct mm_struct *mm, unsigned long addr) 17 + 18 + /* 19 + * Returns kernel address for user virtual address. If the returned address is 20 + * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occured and the address 21 + * contains the (negative) exception code. 22 + */ 23 + static __always_inline unsigned long follow_table(struct mm_struct *mm, 24 + unsigned long addr, int write) 17 25 { 18 26 pgd_t *pgd; 19 27 pud_t *pud; 20 28 pmd_t *pmd; 29 + pte_t *ptep; 21 30 22 31 pgd = pgd_offset(mm, addr); 23 32 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) 24 - return (pte_t *) 0x3a; 33 + return -0x3aUL; 25 34 26 35 pud = pud_offset(pgd, addr); 27 36 if (pud_none(*pud) || unlikely(pud_bad(*pud))) 28 - return (pte_t *) 0x3b; 37 + return -0x3bUL; 29 38 30 39 pmd = pmd_offset(pud, addr); 31 - if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) 32 - return (pte_t *) 0x10; 40 + if (pmd_none(*pmd)) 41 + return -0x10UL; 42 + if (pmd_huge(*pmd)) { 43 + if (write && (pmd_val(*pmd) & _SEGMENT_ENTRY_RO)) 44 + return -0x04UL; 45 + return (pmd_val(*pmd) & HPAGE_MASK) + (addr & ~HPAGE_MASK); 46 + } 47 + if (unlikely(pmd_bad(*pmd))) 48 + return -0x10UL; 33 49 34 - return pte_offset_map(pmd, addr); 50 + ptep = pte_offset_map(pmd, addr); 51 + if (!pte_present(*ptep)) 52 + return -0x11UL; 53 + if (write && !pte_write(*ptep)) 54 + return -0x04UL; 55 + 56 + return (pte_val(*ptep) & PAGE_MASK) + (addr & ~PAGE_MASK); 35 57 } 36 58 37 59 static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr, 38 60 size_t n, int write_user) 39 61 { 40 62 struct mm_struct *mm = current->mm; 41 - unsigned long offset, pfn, done, size; 42 - pte_t *pte; 63 + unsigned long offset, done, size, kaddr; 43 64 void *from, *to; 44 65 45 66 done = 0; 46 67 retry: 47 68 spin_lock(&mm->page_table_lock); 48 69 do { 49 - pte = follow_table(mm, uaddr); 50 - if ((unsigned long) pte < 0x1000) 70 + kaddr = follow_table(mm, uaddr, write_user); 71 + if (IS_ERR_VALUE(kaddr)) 51 72 goto fault; 52 - if (!pte_present(*pte)) { 53 - pte = (pte_t *) 0x11; 54 - goto fault; 55 - } else if (write_user && !pte_write(*pte)) { 56 - pte = (pte_t *) 0x04; 57 - goto fault; 58 - } 59 73 60 - pfn = pte_pfn(*pte); 61 - offset = uaddr & (PAGE_SIZE - 1); 74 + offset = uaddr & ~PAGE_MASK; 62 75 size = min(n - done, PAGE_SIZE - offset); 63 76 if (write_user) { 64 - to = (void *)((pfn << PAGE_SHIFT) + offset); 77 + to = (void *) kaddr; 65 78 from = kptr + done; 66 79 } else { 67 - from = (void *)((pfn << PAGE_SHIFT) + offset); 80 + from = (void *) kaddr; 68 81 to = kptr + done; 69 82 } 70 83 memcpy(to, from, size); ··· 88 75 return n - done; 89 76 fault: 90 77 spin_unlock(&mm->page_table_lock); 91 - if (__handle_fault(uaddr, (unsigned long) pte, write_user)) 78 + if (__handle_fault(uaddr, -kaddr, write_user)) 92 79 return n - done; 93 80 goto retry; 94 81 } ··· 97 84 * Do DAT for user address by page table walk, return kernel address. 98 85 * This function needs to be called with current->mm->page_table_lock held. 99 86 */ 100 - static __always_inline unsigned long __dat_user_addr(unsigned long uaddr) 87 + static __always_inline unsigned long __dat_user_addr(unsigned long uaddr, 88 + int write) 101 89 { 102 90 struct mm_struct *mm = current->mm; 103 - unsigned long pfn; 104 - pte_t *pte; 91 + unsigned long kaddr; 105 92 int rc; 106 93 107 94 retry: 108 - pte = follow_table(mm, uaddr); 109 - if ((unsigned long) pte < 0x1000) 95 + kaddr = follow_table(mm, uaddr, write); 96 + if (IS_ERR_VALUE(kaddr)) 110 97 goto fault; 111 - if (!pte_present(*pte)) { 112 - pte = (pte_t *) 0x11; 113 - goto fault; 114 - } 115 98 116 - pfn = pte_pfn(*pte); 117 - return (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1)); 99 + return kaddr; 118 100 fault: 119 101 spin_unlock(&mm->page_table_lock); 120 - rc = __handle_fault(uaddr, (unsigned long) pte, 0); 102 + rc = __handle_fault(uaddr, -kaddr, write); 121 103 spin_lock(&mm->page_table_lock); 122 104 if (!rc) 123 105 goto retry; ··· 167 159 168 160 static size_t strnlen_user_pt(size_t count, const char __user *src) 169 161 { 170 - char *addr; 171 162 unsigned long uaddr = (unsigned long) src; 172 163 struct mm_struct *mm = current->mm; 173 - unsigned long offset, pfn, done, len; 174 - pte_t *pte; 164 + unsigned long offset, done, len, kaddr; 175 165 size_t len_str; 176 166 177 167 if (segment_eq(get_fs(), KERNEL_DS)) ··· 178 172 retry: 179 173 spin_lock(&mm->page_table_lock); 180 174 do { 181 - pte = follow_table(mm, uaddr); 182 - if ((unsigned long) pte < 0x1000) 175 + kaddr = follow_table(mm, uaddr, 0); 176 + if (IS_ERR_VALUE(kaddr)) 183 177 goto fault; 184 - if (!pte_present(*pte)) { 185 - pte = (pte_t *) 0x11; 186 - goto fault; 187 - } 188 178 189 - pfn = pte_pfn(*pte); 190 - offset = uaddr & (PAGE_SIZE-1); 191 - addr = (char *)(pfn << PAGE_SHIFT) + offset; 179 + offset = uaddr & ~PAGE_MASK; 192 180 len = min(count - done, PAGE_SIZE - offset); 193 - len_str = strnlen(addr, len); 181 + len_str = strnlen((char *) kaddr, len); 194 182 done += len_str; 195 183 uaddr += len_str; 196 184 } while ((len_str == len) && (done < count)); ··· 192 192 return done + 1; 193 193 fault: 194 194 spin_unlock(&mm->page_table_lock); 195 - if (__handle_fault(uaddr, (unsigned long) pte, 0)) 195 + if (__handle_fault(uaddr, -kaddr, 0)) 196 196 return 0; 197 197 goto retry; 198 198 } ··· 225 225 const void __user *from) 226 226 { 227 227 struct mm_struct *mm = current->mm; 228 - unsigned long offset_from, offset_to, offset_max, pfn_from, pfn_to, 229 - uaddr, done, size, error_code; 228 + unsigned long offset_max, uaddr, done, size, error_code; 230 229 unsigned long uaddr_from = (unsigned long) from; 231 230 unsigned long uaddr_to = (unsigned long) to; 232 - pte_t *pte_from, *pte_to; 231 + unsigned long kaddr_to, kaddr_from; 233 232 int write_user; 234 233 235 234 if (segment_eq(get_fs(), KERNEL_DS)) { ··· 241 242 do { 242 243 write_user = 0; 243 244 uaddr = uaddr_from; 244 - pte_from = follow_table(mm, uaddr_from); 245 - error_code = (unsigned long) pte_from; 246 - if (error_code < 0x1000) 245 + kaddr_from = follow_table(mm, uaddr_from, 0); 246 + error_code = kaddr_from; 247 + if (IS_ERR_VALUE(error_code)) 247 248 goto fault; 248 - if (!pte_present(*pte_from)) { 249 - error_code = 0x11; 250 - goto fault; 251 - } 252 249 253 250 write_user = 1; 254 251 uaddr = uaddr_to; 255 - pte_to = follow_table(mm, uaddr_to); 256 - error_code = (unsigned long) pte_to; 257 - if (error_code < 0x1000) 252 + kaddr_to = follow_table(mm, uaddr_to, 1); 253 + error_code = (unsigned long) kaddr_to; 254 + if (IS_ERR_VALUE(error_code)) 258 255 goto fault; 259 - if (!pte_present(*pte_to)) { 260 - error_code = 0x11; 261 - goto fault; 262 - } else if (!pte_write(*pte_to)) { 263 - error_code = 0x04; 264 - goto fault; 265 - } 266 256 267 - pfn_from = pte_pfn(*pte_from); 268 - pfn_to = pte_pfn(*pte_to); 269 - offset_from = uaddr_from & (PAGE_SIZE-1); 270 - offset_to = uaddr_from & (PAGE_SIZE-1); 271 - offset_max = max(offset_from, offset_to); 257 + offset_max = max(uaddr_from & ~PAGE_MASK, 258 + uaddr_to & ~PAGE_MASK); 272 259 size = min(n - done, PAGE_SIZE - offset_max); 273 260 274 - memcpy((void *)(pfn_to << PAGE_SHIFT) + offset_to, 275 - (void *)(pfn_from << PAGE_SHIFT) + offset_from, size); 261 + memcpy((void *) kaddr_to, (void *) kaddr_from, size); 276 262 done += size; 277 263 uaddr_from += size; 278 264 uaddr_to += size; ··· 266 282 return n - done; 267 283 fault: 268 284 spin_unlock(&mm->page_table_lock); 269 - if (__handle_fault(uaddr, error_code, write_user)) 285 + if (__handle_fault(uaddr, -error_code, write_user)) 270 286 return n - done; 271 287 goto retry; 272 288 } ··· 325 341 return __futex_atomic_op_pt(op, uaddr, oparg, old); 326 342 spin_lock(&current->mm->page_table_lock); 327 343 uaddr = (u32 __force __user *) 328 - __dat_user_addr((__force unsigned long) uaddr); 344 + __dat_user_addr((__force unsigned long) uaddr, 1); 329 345 if (!uaddr) { 330 346 spin_unlock(&current->mm->page_table_lock); 331 347 return -EFAULT; ··· 362 378 return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval); 363 379 spin_lock(&current->mm->page_table_lock); 364 380 uaddr = (u32 __force __user *) 365 - __dat_user_addr((__force unsigned long) uaddr); 381 + __dat_user_addr((__force unsigned long) uaddr, 1); 366 382 if (!uaddr) { 367 383 spin_unlock(&current->mm->page_table_lock); 368 384 return -EFAULT;
+14 -3
drivers/s390/block/dasd.c
··· 534 534 if (rc) 535 535 device->target = device->state; 536 536 537 - if (device->state == device->target) 538 - wake_up(&dasd_init_waitq); 539 - 540 537 /* let user-space know that the device status changed */ 541 538 kobject_uevent(&device->cdev->dev.kobj, KOBJ_CHANGE); 539 + 540 + if (device->state == device->target) 541 + wake_up(&dasd_init_waitq); 542 542 } 543 543 544 544 /* ··· 2157 2157 test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) && 2158 2158 (!dasd_eer_enabled(device))) { 2159 2159 cqr->status = DASD_CQR_FAILED; 2160 + cqr->intrc = -EAGAIN; 2160 2161 continue; 2161 2162 } 2162 2163 /* Don't try to start requests if device is stopped */ ··· 3271 3270 dasd_schedule_device_bh(device); 3272 3271 } 3273 3272 if (path_event[chp] & PE_PATHGROUP_ESTABLISHED) { 3273 + if (!(device->path_data.opm & eventlpm) && 3274 + !(device->path_data.tbvpm & eventlpm)) { 3275 + /* 3276 + * we can not establish a pathgroup on an 3277 + * unavailable path, so trigger a path 3278 + * verification first 3279 + */ 3280 + device->path_data.tbvpm |= eventlpm; 3281 + dasd_schedule_device_bh(device); 3282 + } 3274 3283 DBF_DEV_EVENT(DBF_WARNING, device, "%s", 3275 3284 "Pathgroup re-established\n"); 3276 3285 if (device->discipline->kick_validate)
+26 -1
drivers/s390/block/dasd_alias.c
··· 384 384 group->next = NULL; 385 385 }; 386 386 387 + static int 388 + suborder_not_supported(struct dasd_ccw_req *cqr) 389 + { 390 + char *sense; 391 + char reason; 392 + char msg_format; 393 + char msg_no; 394 + 395 + sense = dasd_get_sense(&cqr->irb); 396 + if (!sense) 397 + return 0; 398 + 399 + reason = sense[0]; 400 + msg_format = (sense[7] & 0xF0); 401 + msg_no = (sense[7] & 0x0F); 402 + 403 + /* command reject, Format 0 MSG 4 - invalid parameter */ 404 + if ((reason == 0x80) && (msg_format == 0x00) && (msg_no == 0x04)) 405 + return 1; 406 + 407 + return 0; 408 + } 409 + 387 410 static int read_unit_address_configuration(struct dasd_device *device, 388 411 struct alias_lcu *lcu) 389 412 { ··· 458 435 459 436 do { 460 437 rc = dasd_sleep_on(cqr); 438 + if (rc && suborder_not_supported(cqr)) 439 + return -EOPNOTSUPP; 461 440 } while (rc && (cqr->retries > 0)); 462 441 if (rc) { 463 442 spin_lock_irqsave(&lcu->lock, flags); ··· 546 521 * processing the data 547 522 */ 548 523 spin_lock_irqsave(&lcu->lock, flags); 549 - if (rc || (lcu->flags & NEED_UAC_UPDATE)) { 524 + if ((rc && (rc != -EOPNOTSUPP)) || (lcu->flags & NEED_UAC_UPDATE)) { 550 525 DBF_DEV_EVENT(DBF_WARNING, device, "could not update" 551 526 " alias data in lcu (rc = %d), retry later", rc); 552 527 schedule_delayed_work(&lcu->ruac_data.dwork, 30*HZ);
+25 -7
drivers/s390/block/dasd_eckd.c
··· 1507 1507 * call might change behaviour of DASD devices. 1508 1508 */ 1509 1509 static int 1510 - dasd_eckd_psf_ssc(struct dasd_device *device, int enable_pav) 1510 + dasd_eckd_psf_ssc(struct dasd_device *device, int enable_pav, 1511 + unsigned long flags) 1511 1512 { 1512 1513 struct dasd_ccw_req *cqr; 1513 1514 int rc; ··· 1517 1516 if (IS_ERR(cqr)) 1518 1517 return PTR_ERR(cqr); 1519 1518 1519 + /* 1520 + * set flags e.g. turn on failfast, to prevent blocking 1521 + * the calling function should handle failed requests 1522 + */ 1523 + cqr->flags |= flags; 1524 + 1520 1525 rc = dasd_sleep_on(cqr); 1521 1526 if (!rc) 1522 1527 /* trigger CIO to reprobe devices */ 1523 1528 css_schedule_reprobe(); 1529 + else if (cqr->intrc == -EAGAIN) 1530 + rc = -EAGAIN; 1531 + 1524 1532 dasd_sfree_request(cqr, cqr->memdev); 1525 1533 return rc; 1526 1534 } ··· 1537 1527 /* 1538 1528 * Valide storage server of current device. 1539 1529 */ 1540 - static void dasd_eckd_validate_server(struct dasd_device *device) 1530 + static int dasd_eckd_validate_server(struct dasd_device *device, 1531 + unsigned long flags) 1541 1532 { 1542 1533 int rc; 1543 1534 struct dasd_eckd_private *private; ··· 1547 1536 private = (struct dasd_eckd_private *) device->private; 1548 1537 if (private->uid.type == UA_BASE_PAV_ALIAS || 1549 1538 private->uid.type == UA_HYPER_PAV_ALIAS) 1550 - return; 1539 + return 0; 1551 1540 if (dasd_nopav || MACHINE_IS_VM) 1552 1541 enable_pav = 0; 1553 1542 else 1554 1543 enable_pav = 1; 1555 - rc = dasd_eckd_psf_ssc(device, enable_pav); 1544 + rc = dasd_eckd_psf_ssc(device, enable_pav, flags); 1556 1545 1557 1546 /* may be requested feature is not available on server, 1558 1547 * therefore just report error and go ahead */ 1559 1548 DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "PSF-SSC for SSID %04x " 1560 1549 "returned rc=%d", private->uid.ssid, rc); 1550 + return rc; 1561 1551 } 1562 1552 1563 1553 /* ··· 1568 1556 { 1569 1557 struct dasd_device *device = container_of(work, struct dasd_device, 1570 1558 kick_validate); 1571 - dasd_eckd_validate_server(device); 1559 + if (dasd_eckd_validate_server(device, DASD_CQR_FLAGS_FAILFAST) 1560 + == -EAGAIN) { 1561 + /* schedule worker again if failed */ 1562 + schedule_work(&device->kick_validate); 1563 + return; 1564 + } 1565 + 1572 1566 dasd_put_device(device); 1573 1567 } 1574 1568 ··· 1703 1685 if (rc) 1704 1686 goto out_err2; 1705 1687 1706 - dasd_eckd_validate_server(device); 1688 + dasd_eckd_validate_server(device, 0); 1707 1689 1708 1690 /* device may report different configuration data after LCU setup */ 1709 1691 rc = dasd_eckd_read_conf(device); ··· 4171 4153 rc = dasd_alias_make_device_known_to_lcu(device); 4172 4154 if (rc) 4173 4155 return rc; 4174 - dasd_eckd_validate_server(device); 4156 + dasd_eckd_validate_server(device, DASD_CQR_FLAGS_FAILFAST); 4175 4157 4176 4158 /* RE-Read Configuration Data */ 4177 4159 rc = dasd_eckd_read_conf(device);
+6 -1
drivers/s390/cio/device.c
··· 1426 1426 return IO_SCH_REPROBE; 1427 1427 if (cdev->online) 1428 1428 return IO_SCH_VERIFY; 1429 + if (cdev->private->state == DEV_STATE_NOT_OPER) 1430 + return IO_SCH_UNREG_ATTACH; 1429 1431 return IO_SCH_NOP; 1430 1432 } 1431 1433 ··· 1521 1519 goto out; 1522 1520 break; 1523 1521 case IO_SCH_UNREG_ATTACH: 1522 + spin_lock_irqsave(sch->lock, flags); 1524 1523 if (cdev->private->flags.resuming) { 1525 1524 /* Device will be handled later. */ 1526 1525 rc = 0; 1527 - goto out; 1526 + goto out_unlock; 1528 1527 } 1528 + sch_set_cdev(sch, NULL); 1529 + spin_unlock_irqrestore(sch->lock, flags); 1529 1530 /* Unregister ccw device. */ 1530 1531 ccw_device_unregister(cdev); 1531 1532 break;