Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next

Pull sparc updates from David Miller:

1) Use register window state adjustment instructions when available,
from Anthony Yznaga.

2) Add VCC console concentrator driver, from Jag Raman.

3) Add 16GB hugepage support, from Nitin Gupta.

4) Support cpu 'poke' hypercall, from Vijay Kumar.

5) Add M7/M8 optimized memcpy/memset/copy_{to,from}_user, from Babu
Moger.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next: (33 commits)
sparc64: Handle additional cases of no fault loads
sparc64: speed up etrap/rtrap on NG2 and later processors
sparc64: vcc: make ktermios const
sparc: leon: grpci1: constify of_device_id
sparc: leon: grpci2: constify of_device_id
sparc64: vcc: Check for IS_ERR() instead of NULL
sparc64: Cleanup hugepage table walk functions
sparc64: Add 16GB hugepage support
sparc64: Support huge PUD case in get_user_pages
sparc64: vcc: Add install & cleanup TTY operations
sparc64: vcc: Add break_ctl TTY operation
sparc64: vcc: Add chars_in_buffer TTY operation
sparc64: vcc: Add write & write_room TTY operations
sparc64: vcc: Add hangup TTY operation
sparc64: vcc: Add open & close TTY operations
sparc64: vcc: Enable LDC event processing engine
sparc64: vcc: Add RX & TX timer for delayed LDC operation
sparc64: vcc: Create sysfs attribute group
sparc64: vcc: Enable VCC port probe and removal
sparc64: vcc: TTY driver initialization and cleanup
...

+3489 -298
+1
MAINTAINERS
··· 12489 12489 F: drivers/tty/serial/sunsu.c 12490 12490 F: drivers/tty/serial/sunzilog.c 12491 12491 F: drivers/tty/serial/sunzilog.h 12492 + F: drivers/tty/vcc.c 12492 12493 12493 12494 SPARSE CHECKER 12494 12495 M: "Christopher Li" <sparse@chrisli.org>
+1
arch/sparc/configs/sparc64_defconfig
··· 238 238 # CONFIG_CRYPTO_ANSI_CPRNG is not set 239 239 CONFIG_CRC16=m 240 240 CONFIG_LIBCRC32C=m 241 + CONFIG_VCC=m
+7
arch/sparc/include/asm/hugetlb.h
··· 4 4 #include <asm/page.h> 5 5 #include <asm-generic/hugetlb.h> 6 6 7 + #ifdef CONFIG_HUGETLB_PAGE 8 + struct pud_huge_patch_entry { 9 + unsigned int addr; 10 + unsigned int insn; 11 + }; 12 + extern struct pud_huge_patch_entry __pud_huge_patch, __pud_huge_patch_end; 13 + #endif 7 14 8 15 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 9 16 pte_t *ptep, pte_t pte);
+18
arch/sparc/include/asm/hypervisor.h
··· 298 298 unsigned long sun4v_cpu_yield(void); 299 299 #endif 300 300 301 + /* cpu_poke() 302 + * TRAP: HV_FAST_TRAP 303 + * FUNCTION: HV_FAST_CPU_POKE 304 + * RET0: status 305 + * ERRORS: ENOCPU cpuid refers to a CPU that does not exist 306 + * EINVAL cpuid is current CPU 307 + * 308 + * Poke CPU cpuid. If the target CPU is currently suspended having 309 + * invoked the cpu-yield service, that vCPU will be resumed. 310 + * Poke interrupts may only be sent to valid, non-local CPUs. 311 + * It is not legal to poke the current vCPU. 312 + */ 313 + #define HV_FAST_CPU_POKE 0x13 314 + 315 + #ifndef __ASSEMBLY__ 316 + unsigned long sun4v_cpu_poke(unsigned long cpuid); 317 + #endif 318 + 301 319 /* cpu_qconf() 302 320 * TRAP: HV_FAST_TRAP 303 321 * FUNCTION: HV_FAST_CPU_QCONF
+2 -1
arch/sparc/include/asm/page_64.h
··· 17 17 18 18 #define HPAGE_SHIFT 23 19 19 #define REAL_HPAGE_SHIFT 22 20 + #define HPAGE_16GB_SHIFT 34 20 21 #define HPAGE_2GB_SHIFT 31 21 22 #define HPAGE_256MB_SHIFT 28 22 23 #define HPAGE_64K_SHIFT 16 ··· 29 28 #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 30 29 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA 31 30 #define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT)) 32 - #define HUGE_MAX_HSTATE 4 31 + #define HUGE_MAX_HSTATE 5 33 32 #endif 34 33 35 34 #ifndef __ASSEMBLY__
+18 -2
arch/sparc/include/asm/pgtable_64.h
··· 414 414 return !!(pmd_val(pmd) & _PAGE_PMD_HUGE); 415 415 } 416 416 417 + static inline bool is_hugetlb_pud(pud_t pud) 418 + { 419 + return !!(pud_val(pud) & _PAGE_PUD_HUGE); 420 + } 421 + 417 422 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 418 423 static inline pmd_t pmd_mkhuge(pmd_t pmd) 419 424 { ··· 692 687 return pte_write(pte); 693 688 } 694 689 690 + #define pud_write(pud) pte_write(__pte(pud_val(pud))) 691 + 695 692 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 696 693 static inline unsigned long pmd_dirty(pmd_t pmd) 697 694 { ··· 830 823 831 824 return ((unsigned long) __va(pfn << PAGE_SHIFT)); 832 825 } 826 + 827 + static inline unsigned long pud_page_vaddr(pud_t pud) 828 + { 829 + pte_t pte = __pte(pud_val(pud)); 830 + unsigned long pfn; 831 + 832 + pfn = pte_pfn(pte); 833 + 834 + return ((unsigned long) __va(pfn << PAGE_SHIFT)); 835 + } 836 + 833 837 #define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd)) 834 - #define pud_page_vaddr(pud) \ 835 - ((unsigned long) __va(pud_val(pud))) 836 838 #define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud)) 837 839 #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL) 838 840 #define pud_present(pud) (pud_val(pud) != 0U)
+5
arch/sparc/include/asm/smp_64.h
··· 33 33 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); 34 34 extern cpumask_t cpu_core_map[NR_CPUS]; 35 35 36 + void smp_init_cpu_poke(void); 37 + void scheduler_poke(void); 38 + 36 39 void arch_send_call_function_single_ipi(int cpu); 37 40 void arch_send_call_function_ipi_mask(const struct cpumask *mask); 38 41 ··· 77 74 #define smp_fetch_global_regs() do { } while (0) 78 75 #define smp_fetch_global_pmu() do { } while (0) 79 76 #define smp_fill_in_cpu_possible_map() do { } while (0) 77 + #define smp_init_cpu_poke() do { } while (0) 78 + #define scheduler_poke() do { } while (0) 80 79 81 80 #endif /* !(CONFIG_SMP) */ 82 81
+2
arch/sparc/include/asm/trap_block.h
··· 73 73 }; 74 74 extern struct sun4v_1insn_patch_entry __sun4v_1insn_patch, 75 75 __sun4v_1insn_patch_end; 76 + extern struct sun4v_1insn_patch_entry __fast_win_ctrl_1insn_patch, 77 + __fast_win_ctrl_1insn_patch_end; 76 78 77 79 struct sun4v_2insn_patch_entry { 78 80 unsigned int addr;
+36
arch/sparc/include/asm/tsb.h
··· 195 195 nop; \ 196 196 699: 197 197 198 + /* PUD has been loaded into REG1, interpret the value, seeing 199 + * if it is a HUGE PUD or a normal one. If it is not valid 200 + * then jump to FAIL_LABEL. If it is a HUGE PUD, and it 201 + * translates to a valid PTE, branch to PTE_LABEL. 202 + * 203 + * We have to propagate bits [32:22] from the virtual address 204 + * to resolve at 4M granularity. 205 + */ 206 + #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 207 + #define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \ 208 + 700: ba 700f; \ 209 + nop; \ 210 + .section .pud_huge_patch, "ax"; \ 211 + .word 700b; \ 212 + nop; \ 213 + .previous; \ 214 + brz,pn REG1, FAIL_LABEL; \ 215 + sethi %uhi(_PAGE_PUD_HUGE), REG2; \ 216 + sllx REG2, 32, REG2; \ 217 + andcc REG1, REG2, %g0; \ 218 + be,pt %xcc, 700f; \ 219 + sethi %hi(0x1ffc0000), REG2; \ 220 + sllx REG2, 1, REG2; \ 221 + brgez,pn REG1, FAIL_LABEL; \ 222 + andn REG1, REG2, REG1; \ 223 + and VADDR, REG2, REG2; \ 224 + brlz,pt REG1, PTE_LABEL; \ 225 + or REG1, REG2, REG1; \ 226 + 700: 227 + #else 228 + #define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \ 229 + brz,pn REG1, FAIL_LABEL; \ 230 + nop; 231 + #endif 232 + 198 233 /* PMD has been loaded into REG1, interpret the value, seeing 199 234 * if it is a HUGE PMD or a normal one. If it is not valid 200 235 * then jump to FAIL_LABEL. If it is a HUGE PMD, and it ··· 277 242 srlx REG2, 64 - PAGE_SHIFT, REG2; \ 278 243 andn REG2, 0x7, REG2; \ 279 244 ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ 245 + USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \ 280 246 brz,pn REG1, FAIL_LABEL; \ 281 247 sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ 282 248 srlx REG2, 64 - PAGE_SHIFT, REG2; \
+9
arch/sparc/include/asm/vio.h
··· 52 52 #define VDEV_NETWORK_SWITCH 0x02 53 53 #define VDEV_DISK 0x03 54 54 #define VDEV_DISK_SERVER 0x04 55 + #define VDEV_CONSOLE_CON 0x05 55 56 56 57 u8 resv1[3]; 57 58 u64 resv2[5]; ··· 281 280 u32 pending; 282 281 int ncookies; 283 282 struct ldc_trans_cookie cookies[VIO_MAX_RING_COOKIES]; 283 + }; 284 + 285 + #define VIO_TAG_SIZE ((int)sizeof(struct vio_msg_tag)) 286 + #define VIO_VCC_MTU_SIZE (LDC_PACKET_SIZE - VIO_TAG_SIZE) 287 + 288 + struct vio_vcc { 289 + struct vio_msg_tag tag; 290 + char data[VIO_VCC_MTU_SIZE]; 284 291 }; 285 292 286 293 static inline void *vio_dring_cur(struct vio_dring_state *dr)
+22 -4
arch/sparc/kernel/etrap_64.S
··· 38 38 or %g1, %g3, %g1 39 39 bne,pn %xcc, 1f 40 40 sub %sp, STACKFRAME_SZ+TRACEREG_SZ-STACK_BIAS, %g2 41 - wrpr %g0, 7, %cleanwin 41 + 661: wrpr %g0, 7, %cleanwin 42 + .section .fast_win_ctrl_1insn_patch, "ax" 43 + .word 661b 44 + .word 0x85880000 ! allclean 45 + .previous 42 46 43 47 sethi %hi(TASK_REGOFF), %g2 44 48 sethi %hi(TSTATE_PEF), %g3 ··· 92 88 93 89 bne,pn %xcc, 3f 94 90 mov PRIMARY_CONTEXT, %l4 95 - rdpr %canrestore, %g3 91 + 661: rdpr %canrestore, %g3 92 + .section .fast_win_ctrl_1insn_patch, "ax" 93 + .word 661b 94 + nop 95 + .previous 96 + 96 97 rdpr %wstate, %g2 97 - wrpr %g0, 0, %canrestore 98 + 661: wrpr %g0, 0, %canrestore 99 + .section .fast_win_ctrl_1insn_patch, "ax" 100 + .word 661b 101 + nop 102 + .previous 98 103 sll %g2, 3, %g2 99 104 100 105 /* Set TI_SYS_FPDEPTH to 1 and clear TI_SYS_NOERROR. */ 101 106 mov 1, %l5 102 107 sth %l5, [%l6 + TI_SYS_NOERROR] 103 108 104 - wrpr %g3, 0, %otherwin 109 + 661: wrpr %g3, 0, %otherwin 110 + .section .fast_win_ctrl_1insn_patch, "ax" 111 + .word 661b 112 + .word 0x87880000 ! otherw 113 + .previous 114 + 105 115 wrpr %g2, 0, %wstate 106 116 sethi %hi(sparc64_kern_pri_context), %g2 107 117 ldx [%g2 + %lo(sparc64_kern_pri_context)], %g3
+15 -3
arch/sparc/kernel/head_64.S
··· 603 603 be,pt %xcc, niagara4_patch 604 604 nop 605 605 cmp %g1, SUN4V_CHIP_SPARC_M7 606 - be,pt %xcc, niagara4_patch 606 + be,pt %xcc, sparc_m7_patch 607 607 nop 608 608 cmp %g1, SUN4V_CHIP_SPARC_M8 609 - be,pt %xcc, niagara4_patch 609 + be,pt %xcc, sparc_m7_patch 610 610 nop 611 611 cmp %g1, SUN4V_CHIP_SPARC_SN 612 612 be,pt %xcc, niagara4_patch ··· 621 621 622 622 ba,a,pt %xcc, 80f 623 623 nop 624 + 625 + sparc_m7_patch: 626 + call m7_patch_copyops 627 + nop 628 + call m7_patch_bzero 629 + nop 630 + call m7_patch_pageops 631 + nop 632 + 633 + ba,a,pt %xcc, 80f 634 + nop 635 + 624 636 niagara4_patch: 625 637 call niagara4_patch_copyops 626 638 nop ··· 893 881 #include "misctrap.S" 894 882 #include "syscalls.S" 895 883 #include "helpers.S" 896 - #include "hvcalls.S" 897 884 #include "sun4v_tlb_miss.S" 898 885 #include "sun4v_ivec.S" 899 886 #include "ktlb.S" ··· 937 926 938 927 ! 0x0000000000428000 939 928 929 + #include "hvcalls.S" 940 930 #include "systbls_64.S" 941 931 942 932 .data
+1 -1
arch/sparc/kernel/hvapi.c
··· 189 189 190 190 group = HV_GRP_CORE; 191 191 major = 1; 192 - minor = 1; 192 + minor = 6; 193 193 if (sun4v_hvapi_register(group, major, &minor)) 194 194 goto bad; 195 195
+11
arch/sparc/kernel/hvcalls.S
··· 106 106 nop 107 107 ENDPROC(sun4v_cpu_yield) 108 108 109 + /* %o0: cpuid 110 + * 111 + * returns %o0: status 112 + */ 113 + ENTRY(sun4v_cpu_poke) 114 + mov HV_FAST_CPU_POKE, %o5 115 + ta HV_FAST_TRAP 116 + retl 117 + nop 118 + ENDPROC(sun4v_cpu_poke) 119 + 109 120 /* %o0: type 110 121 * %o1: queue paddr 111 122 * %o2: num queue entries
+2
arch/sparc/kernel/ldc.c
··· 1480 1480 { 1481 1481 return __set_rx_head(lp, lp->rx_tail); 1482 1482 } 1483 + EXPORT_SYMBOL(ldc_rx_reset); 1483 1484 1484 1485 void __ldc_print(struct ldc_channel *lp, const char *caller) 1485 1486 { ··· 1494 1493 lp->tx_head, lp->tx_tail, lp->tx_num_entries, 1495 1494 lp->rcv_nxt, lp->snd_nxt); 1496 1495 } 1496 + EXPORT_SYMBOL(__ldc_print); 1497 1497 1498 1498 static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size) 1499 1499 {
+1 -1
arch/sparc/kernel/leon_pci_grpci1.c
··· 695 695 return err; 696 696 } 697 697 698 - static struct of_device_id grpci1_of_match[] = { 698 + static const struct of_device_id grpci1_of_match[] __initconst = { 699 699 { 700 700 .name = "GAISLER_PCIFBRG", 701 701 },
+1 -1
arch/sparc/kernel/leon_pci_grpci2.c
··· 886 886 return err; 887 887 } 888 888 889 - static struct of_device_id grpci2_of_match[] = { 889 + static const struct of_device_id grpci2_of_match[] __initconst = { 890 890 { 891 891 .name = "GAISLER_GRPCI2", 892 892 },
+6 -1
arch/sparc/kernel/process_64.c
··· 77 77 : "=&r" (pstate) 78 78 : "i" (PSTATE_IE)); 79 79 80 - if (!need_resched() && !cpu_is_offline(smp_processor_id())) 80 + if (!need_resched() && !cpu_is_offline(smp_processor_id())) { 81 81 sun4v_cpu_yield(); 82 + /* If resumed by cpu_poke then we need to explicitly 83 + * call scheduler_ipi(). 84 + */ 85 + scheduler_poke(); 86 + } 82 87 83 88 /* Re-enable interrupts. */ 84 89 __asm__ __volatile__(
+11 -2
arch/sparc/kernel/rtrap_64.S
··· 224 224 rdpr %otherwin, %l2 225 225 srl %l1, 3, %l1 226 226 227 - wrpr %l2, %g0, %canrestore 227 + 661: wrpr %l2, %g0, %canrestore 228 + .section .fast_win_ctrl_1insn_patch, "ax" 229 + .word 661b 230 + .word 0x89880000 ! normalw 231 + .previous 232 + 228 233 wrpr %l1, %g0, %wstate 229 234 brnz,pt %l2, user_rtt_restore 230 - wrpr %g0, %g0, %otherwin 235 + 661: wrpr %g0, %g0, %otherwin 236 + .section .fast_win_ctrl_1insn_patch, "ax" 237 + .word 661b 238 + nop 239 + .previous 231 240 232 241 ldx [%g6 + TI_FLAGS], %g3 233 242 wr %g0, ASI_AIUP, %asi
+6
arch/sparc/kernel/setup_64.c
··· 300 300 break; 301 301 } 302 302 303 + if (sun4v_chip_type != SUN4V_CHIP_NIAGARA1) { 304 + sun4v_patch_1insn_range(&__fast_win_ctrl_1insn_patch, 305 + &__fast_win_ctrl_1insn_patch_end); 306 + } 307 + 303 308 sun4v_hvapi_init(); 304 309 } 305 310 ··· 368 363 check_if_starfire(); 369 364 per_cpu_patch(); 370 365 sun4v_patch(); 366 + smp_init_cpu_poke(); 371 367 372 368 cpu = hard_smp_processor_id(); 373 369 if (cpu >= NR_CPUS) {
+77 -3
arch/sparc/kernel/smp_64.c
··· 74 74 75 75 static cpumask_t smp_commenced_mask; 76 76 77 + static DEFINE_PER_CPU(bool, poke); 78 + static bool cpu_poke; 79 + 77 80 void smp_info(struct seq_file *m) 78 81 { 79 82 int i; ··· 1442 1439 { 1443 1440 } 1444 1441 1442 + static void send_cpu_ipi(int cpu) 1443 + { 1444 + xcall_deliver((u64) &xcall_receive_signal, 1445 + 0, 0, cpumask_of(cpu)); 1446 + } 1447 + 1448 + void scheduler_poke(void) 1449 + { 1450 + if (!cpu_poke) 1451 + return; 1452 + 1453 + if (!__this_cpu_read(poke)) 1454 + return; 1455 + 1456 + __this_cpu_write(poke, false); 1457 + set_softint(1 << PIL_SMP_RECEIVE_SIGNAL); 1458 + } 1459 + 1460 + static unsigned long send_cpu_poke(int cpu) 1461 + { 1462 + unsigned long hv_err; 1463 + 1464 + per_cpu(poke, cpu) = true; 1465 + hv_err = sun4v_cpu_poke(cpu); 1466 + if (hv_err != HV_EOK) { 1467 + per_cpu(poke, cpu) = false; 1468 + pr_err_ratelimited("%s: sun4v_cpu_poke() fails err=%lu\n", 1469 + __func__, hv_err); 1470 + } 1471 + 1472 + return hv_err; 1473 + } 1474 + 1445 1475 void smp_send_reschedule(int cpu) 1446 1476 { 1447 1477 if (cpu == smp_processor_id()) { 1448 1478 WARN_ON_ONCE(preemptible()); 1449 1479 set_softint(1 << PIL_SMP_RECEIVE_SIGNAL); 1450 - } else { 1451 - xcall_deliver((u64) &xcall_receive_signal, 1452 - 0, 0, cpumask_of(cpu)); 1480 + return; 1453 1481 } 1482 + 1483 + /* Use cpu poke to resume idle cpu if supported. */ 1484 + if (cpu_poke && idle_cpu(cpu)) { 1485 + unsigned long ret; 1486 + 1487 + ret = send_cpu_poke(cpu); 1488 + if (ret == HV_EOK) 1489 + return; 1490 + } 1491 + 1492 + /* Use IPI in following cases: 1493 + * - cpu poke not supported 1494 + * - cpu not idle 1495 + * - send_cpu_poke() returns with error 1496 + */ 1497 + send_cpu_ipi(cpu); 1498 + } 1499 + 1500 + void smp_init_cpu_poke(void) 1501 + { 1502 + unsigned long major; 1503 + unsigned long minor; 1504 + int ret; 1505 + 1506 + if (tlb_type != hypervisor) 1507 + return; 1508 + 1509 + ret = sun4v_hvapi_get(HV_GRP_CORE, &major, &minor); 1510 + if (ret) { 1511 + pr_debug("HV_GRP_CORE is not registered\n"); 1512 + return; 1513 + } 1514 + 1515 + if (major == 1 && minor >= 6) { 1516 + /* CPU POKE is registered. */ 1517 + cpu_poke = true; 1518 + return; 1519 + } 1520 + 1521 + pr_debug("CPU_POKE not supported\n"); 1454 1522 } 1455 1523 1456 1524 void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
+51
arch/sparc/kernel/traps_64.c
··· 265 265 sun4v_insn_access_exception(regs, addr, type_ctx); 266 266 } 267 267 268 + bool is_no_fault_exception(struct pt_regs *regs) 269 + { 270 + unsigned char asi; 271 + u32 insn; 272 + 273 + if (get_user(insn, (u32 __user *)regs->tpc) == -EFAULT) 274 + return false; 275 + 276 + /* 277 + * Must do a little instruction decoding here in order to 278 + * decide on a course of action. The bits of interest are: 279 + * insn[31:30] = op, where 3 indicates the load/store group 280 + * insn[24:19] = op3, which identifies individual opcodes 281 + * insn[13] indicates an immediate offset 282 + * op3[4]=1 identifies alternate space instructions 283 + * op3[5:4]=3 identifies floating point instructions 284 + * op3[2]=1 identifies stores 285 + * See "Opcode Maps" in the appendix of any Sparc V9 286 + * architecture spec for full details. 287 + */ 288 + if ((insn & 0xc0800000) == 0xc0800000) { /* op=3, op3[4]=1 */ 289 + if (insn & 0x2000) /* immediate offset */ 290 + asi = (regs->tstate >> 24); /* saved %asi */ 291 + else 292 + asi = (insn >> 5); /* immediate asi */ 293 + if ((asi & 0xf2) == ASI_PNF) { 294 + if (insn & 0x1000000) { /* op3[5:4]=3 */ 295 + handle_ldf_stq(insn, regs); 296 + return true; 297 + } else if (insn & 0x200000) { /* op3[2], stores */ 298 + return false; 299 + } 300 + handle_ld_nf(insn, regs); 301 + return true; 302 + } 303 + } 304 + return false; 305 + } 306 + 268 307 void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) 269 308 { 270 309 enum ctx_state prev_state = exception_enter(); ··· 334 295 "SFAR[%016lx], going.\n", sfsr, sfar); 335 296 die_if_kernel("Dax", regs); 336 297 } 298 + 299 + if (is_no_fault_exception(regs)) 300 + return; 337 301 338 302 info.si_signo = SIGSEGV; 339 303 info.si_errno = 0; ··· 394 352 regs->tpc &= 0xffffffff; 395 353 regs->tnpc &= 0xffffffff; 396 354 } 355 + if (is_no_fault_exception(regs)) 356 + return; 357 + 397 358 info.si_signo = SIGSEGV; 398 359 info.si_errno = 0; 399 360 info.si_code = SEGV_MAPERR; ··· 2620 2575 kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc)); 2621 2576 goto out; 2622 2577 } 2578 + if (is_no_fault_exception(regs)) 2579 + return; 2580 + 2623 2581 info.si_signo = SIGBUS; 2624 2582 info.si_errno = 0; 2625 2583 info.si_code = BUS_ADRALN; ··· 2645 2597 kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc)); 2646 2598 return; 2647 2599 } 2600 + if (is_no_fault_exception(regs)) 2601 + return; 2602 + 2648 2603 info.si_signo = SIGBUS; 2649 2604 info.si_errno = 0; 2650 2605 info.si_code = BUS_ADRALN;
+1 -1
arch/sparc/kernel/tsb.S
··· 117 117 /* Valid PTE is now in %g5. */ 118 118 119 119 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 120 - sethi %uhi(_PAGE_PMD_HUGE), %g7 120 + sethi %uhi(_PAGE_PMD_HUGE | _PAGE_PUD_HUGE), %g7 121 121 sllx %g7, 32, %g7 122 122 123 123 andcc %g5, %g7, %g0
+1
arch/sparc/kernel/vio.c
··· 246 246 247 247 return node; 248 248 } 249 + EXPORT_SYMBOL(vio_vdev_node); 249 250 250 251 static void vio_fill_channel_info(struct mdesc_handle *hp, u64 mp, 251 252 struct vio_dev *vdev)
+9 -3
arch/sparc/kernel/viohs.c
··· 814 814 case VDEV_NETWORK_SWITCH: 815 815 case VDEV_DISK: 816 816 case VDEV_DISK_SERVER: 817 + case VDEV_CONSOLE_CON: 817 818 break; 818 819 819 820 default: 820 821 return -EINVAL; 821 822 } 822 823 823 - if (!ops || !ops->send_attr || !ops->handle_attr || 824 - !ops->handshake_complete) 825 - return -EINVAL; 824 + if (dev_class == VDEV_NETWORK || 825 + dev_class == VDEV_NETWORK_SWITCH || 826 + dev_class == VDEV_DISK || 827 + dev_class == VDEV_DISK_SERVER) { 828 + if (!ops || !ops->send_attr || !ops->handle_attr || 829 + !ops->handshake_complete) 830 + return -EINVAL; 831 + } 826 832 827 833 if (!ver_table || ver_table_size < 0) 828 834 return -EINVAL;
+10
arch/sparc/kernel/vmlinux.lds.S
··· 154 154 *(.get_tick_patch) 155 155 __get_tick_patch_end = .; 156 156 } 157 + .pud_huge_patch : { 158 + __pud_huge_patch = .; 159 + *(.pud_huge_patch) 160 + __pud_huge_patch_end = .; 161 + } 162 + .fast_win_ctrl_1insn_patch : { 163 + __fast_win_ctrl_1insn_patch = .; 164 + *(.fast_win_ctrl_1insn_patch) 165 + __fast_win_ctrl_1insn_patch_end = .; 166 + } 157 167 PERCPU_SECTION(SMP_CACHE_BYTES) 158 168 159 169 #ifdef CONFIG_JUMP_LABEL
+40
arch/sparc/lib/M7copy_from_user.S
··· 1 + /* 2 + * M7copy_from_user.S: SPARC M7 optimized copy from userspace. 3 + * 4 + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. 5 + */ 6 + 7 + 8 + #define EX_LD(x, y) \ 9 + 98: x; \ 10 + .section __ex_table,"a"; \ 11 + .align 4; \ 12 + .word 98b, y; \ 13 + .text; \ 14 + .align 4; 15 + 16 + #define EX_LD_FP(x, y) \ 17 + 98: x; \ 18 + .section __ex_table,"a"; \ 19 + .align 4; \ 20 + .word 98b, y##_fp; \ 21 + .text; \ 22 + .align 4; 23 + 24 + #ifndef ASI_AIUS 25 + #define ASI_AIUS 0x11 26 + #endif 27 + 28 + #define FUNC_NAME M7copy_from_user 29 + #define LOAD(type,addr,dest) type##a [addr] %asi, dest 30 + #define EX_RETVAL(x) 0 31 + 32 + #ifdef __KERNEL__ 33 + #define PREAMBLE \ 34 + rd %asi, %g1; \ 35 + cmp %g1, ASI_AIUS; \ 36 + bne,pn %icc, raw_copy_in_user; \ 37 + nop 38 + #endif 39 + 40 + #include "M7memcpy.S"
+51
arch/sparc/lib/M7copy_to_user.S
··· 1 + /* 2 + * M7copy_to_user.S: SPARC M7 optimized copy to userspace. 3 + * 4 + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. 5 + */ 6 + 7 + 8 + #define EX_ST(x, y) \ 9 + 98: x; \ 10 + .section __ex_table,"a"; \ 11 + .align 4; \ 12 + .word 98b, y; \ 13 + .text; \ 14 + .align 4; 15 + 16 + #define EX_ST_FP(x, y) \ 17 + 98: x; \ 18 + .section __ex_table,"a"; \ 19 + .align 4; \ 20 + .word 98b, y##_fp; \ 21 + .text; \ 22 + .align 4; 23 + 24 + 25 + #ifndef ASI_AIUS 26 + #define ASI_AIUS 0x11 27 + #endif 28 + 29 + #ifndef ASI_BLK_INIT_QUAD_LDD_AIUS 30 + #define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23 31 + #endif 32 + 33 + #define FUNC_NAME M7copy_to_user 34 + #define STORE(type,src,addr) type##a src, [addr] %asi 35 + #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS 36 + #define STORE_MRU_ASI ASI_ST_BLKINIT_MRU_S 37 + #define EX_RETVAL(x) 0 38 + 39 + #ifdef __KERNEL__ 40 + /* Writing to %asi is _expensive_ so we hardcode it. 41 + * Reading %asi to check for KERNEL_DS is comparatively 42 + * cheap. 43 + */ 44 + #define PREAMBLE \ 45 + rd %asi, %g1; \ 46 + cmp %g1, ASI_AIUS; \ 47 + bne,pn %icc, raw_copy_in_user; \ 48 + nop 49 + #endif 50 + 51 + #include "M7memcpy.S"
+923
arch/sparc/lib/M7memcpy.S
··· 1 + /* 2 + * M7memcpy: Optimized SPARC M7 memcpy 3 + * 4 + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. 5 + */ 6 + 7 + .file "M7memcpy.S" 8 + 9 + /* 10 + * memcpy(s1, s2, len) 11 + * 12 + * Copy s2 to s1, always copy n bytes. 13 + * Note: this C code does not work for overlapped copies. 14 + * 15 + * Fast assembler language version of the following C-program for memcpy 16 + * which represents the `standard' for the C-library. 17 + * 18 + * void * 19 + * memcpy(void *s, const void *s0, size_t n) 20 + * { 21 + * if (n != 0) { 22 + * char *s1 = s; 23 + * const char *s2 = s0; 24 + * do { 25 + * *s1++ = *s2++; 26 + * } while (--n != 0); 27 + * } 28 + * return (s); 29 + * } 30 + * 31 + * 32 + * SPARC T7/M7 Flow : 33 + * 34 + * if (count < SMALL_MAX) { 35 + * if count < SHORTCOPY (SHORTCOPY=3) 36 + * copy bytes; exit with dst addr 37 + * if src & dst aligned on word boundary but not long word boundary, 38 + * copy with ldw/stw; branch to finish_up 39 + * if src & dst aligned on long word boundary 40 + * copy with ldx/stx; branch to finish_up 41 + * if src & dst not aligned and length <= SHORTCHECK (SHORTCHECK=14) 42 + * copy bytes; exit with dst addr 43 + * move enough bytes to get src to word boundary 44 + * if dst now on word boundary 45 + * move_words: 46 + * copy words; branch to finish_up 47 + * if dst now on half word boundary 48 + * load words, shift half words, store words; branch to finish_up 49 + * if dst on byte 1 50 + * load words, shift 3 bytes, store words; branch to finish_up 51 + * if dst on byte 3 52 + * load words, shift 1 byte, store words; branch to finish_up 53 + * finish_up: 54 + * copy bytes; exit with dst addr 55 + * } else { More than SMALL_MAX bytes 56 + * move bytes until dst is on long word boundary 57 + * if( src is on long word boundary ) { 58 + * if (count < MED_MAX) { 59 + * finish_long: src/dst aligned on 8 bytes 60 + * copy with ldx/stx in 8-way unrolled loop; 61 + * copy final 0-63 bytes; exit with dst addr 62 + * } else { src/dst aligned; count > MED_MAX 63 + * align dst on 64 byte boundary; for main data movement: 64 + * prefetch src data to L2 cache; let HW prefetch move data to L1 cache 65 + * Use BIS (block initializing store) to avoid copying store cache 66 + * lines from memory. But pre-store first element of each cache line 67 + * ST_CHUNK lines in advance of the rest of that cache line. That 68 + * gives time for replacement cache lines to be written back without 69 + * excess STQ and Miss Buffer filling. Repeat until near the end, 70 + * then finish up storing before going to finish_long. 71 + * } 72 + * } else { src/dst not aligned on 8 bytes 73 + * if src is word aligned and count < MED_WMAX 74 + * move words in 8-way unrolled loop 75 + * move final 0-31 bytes; exit with dst addr 76 + * if count < MED_UMAX 77 + * use alignaddr/faligndata combined with ldd/std in 8-way 78 + * unrolled loop to move data. 79 + * go to unalign_done 80 + * else 81 + * setup alignaddr for faligndata instructions 82 + * align dst on 64 byte boundary; prefetch src data to L1 cache 83 + * loadx8, falign, block-store, prefetch loop 84 + * (only use block-init-store when src/dst on 8 byte boundaries.) 85 + * unalign_done: 86 + * move remaining bytes for unaligned cases. exit with dst addr. 87 + * } 88 + * 89 + */ 90 + 91 + #include <asm/visasm.h> 92 + #include <asm/asi.h> 93 + 94 + #if !defined(EX_LD) && !defined(EX_ST) 95 + #define NON_USER_COPY 96 + #endif 97 + 98 + #ifndef EX_LD 99 + #define EX_LD(x,y) x 100 + #endif 101 + #ifndef EX_LD_FP 102 + #define EX_LD_FP(x,y) x 103 + #endif 104 + 105 + #ifndef EX_ST 106 + #define EX_ST(x,y) x 107 + #endif 108 + #ifndef EX_ST_FP 109 + #define EX_ST_FP(x,y) x 110 + #endif 111 + 112 + #ifndef EX_RETVAL 113 + #define EX_RETVAL(x) x 114 + #endif 115 + 116 + #ifndef LOAD 117 + #define LOAD(type,addr,dest) type [addr], dest 118 + #endif 119 + 120 + #ifndef STORE 121 + #define STORE(type,src,addr) type src, [addr] 122 + #endif 123 + 124 + /* 125 + * ASI_BLK_INIT_QUAD_LDD_P/ASI_BLK_INIT_QUAD_LDD_S marks the cache 126 + * line as "least recently used" which means if many threads are 127 + * active, it has a high probability of being pushed out of the cache 128 + * between the first initializing store and the final stores. 129 + * Thus, we use ASI_ST_BLKINIT_MRU_P/ASI_ST_BLKINIT_MRU_S which 130 + * marks the cache line as "most recently used" for all 131 + * but the last cache line 132 + */ 133 + #ifndef STORE_ASI 134 + #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA 135 + #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P 136 + #else 137 + #define STORE_ASI 0x80 /* ASI_P */ 138 + #endif 139 + #endif 140 + 141 + #ifndef STORE_MRU_ASI 142 + #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA 143 + #define STORE_MRU_ASI ASI_ST_BLKINIT_MRU_P 144 + #else 145 + #define STORE_MRU_ASI 0x80 /* ASI_P */ 146 + #endif 147 + #endif 148 + 149 + #ifndef STORE_INIT 150 + #define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI 151 + #endif 152 + 153 + #ifndef STORE_INIT_MRU 154 + #define STORE_INIT_MRU(src,addr) stxa src, [addr] STORE_MRU_ASI 155 + #endif 156 + 157 + #ifndef FUNC_NAME 158 + #define FUNC_NAME M7memcpy 159 + #endif 160 + 161 + #ifndef PREAMBLE 162 + #define PREAMBLE 163 + #endif 164 + 165 + #define BLOCK_SIZE 64 166 + #define SHORTCOPY 3 167 + #define SHORTCHECK 14 168 + #define SHORT_LONG 64 /* max copy for short longword-aligned case */ 169 + /* must be at least 64 */ 170 + #define SMALL_MAX 128 171 + #define MED_UMAX 1024 /* max copy for medium un-aligned case */ 172 + #define MED_WMAX 1024 /* max copy for medium word-aligned case */ 173 + #define MED_MAX 1024 /* max copy for medium longword-aligned case */ 174 + #define ST_CHUNK 24 /* ST_CHUNK - block of values for BIS Store */ 175 + #define ALIGN_PRE 24 /* distance for aligned prefetch loop */ 176 + 177 + .register %g2,#scratch 178 + 179 + .section ".text" 180 + .global FUNC_NAME 181 + .type FUNC_NAME, #function 182 + .align 16 183 + FUNC_NAME: 184 + srlx %o2, 31, %g2 185 + cmp %g2, 0 186 + tne %xcc, 5 187 + PREAMBLE 188 + mov %o0, %g1 ! save %o0 189 + brz,pn %o2, .Lsmallx 190 + cmp %o2, 3 191 + ble,pn %icc, .Ltiny_cp 192 + cmp %o2, 19 193 + ble,pn %icc, .Lsmall_cp 194 + or %o0, %o1, %g2 195 + cmp %o2, SMALL_MAX 196 + bl,pn %icc, .Lmedium_cp 197 + nop 198 + 199 + .Lmedium: 200 + neg %o0, %o5 201 + andcc %o5, 7, %o5 ! bytes till DST 8 byte aligned 202 + brz,pt %o5, .Ldst_aligned_on_8 203 + 204 + ! %o5 has the bytes to be written in partial store. 205 + sub %o2, %o5, %o2 206 + sub %o1, %o0, %o1 ! %o1 gets the difference 207 + 7: ! dst aligning loop 208 + add %o1, %o0, %o4 209 + EX_LD(LOAD(ldub, %o4, %o4), memcpy_retl_o2_plus_o5) ! load one byte 210 + subcc %o5, 1, %o5 211 + EX_ST(STORE(stb, %o4, %o0), memcpy_retl_o2_plus_o5_plus_1) 212 + bgu,pt %xcc, 7b 213 + add %o0, 1, %o0 ! advance dst 214 + add %o1, %o0, %o1 ! restore %o1 215 + .Ldst_aligned_on_8: 216 + andcc %o1, 7, %o5 217 + brnz,pt %o5, .Lsrc_dst_unaligned_on_8 218 + nop 219 + 220 + .Lsrc_dst_aligned_on_8: 221 + ! check if we are copying MED_MAX or more bytes 222 + set MED_MAX, %o3 223 + cmp %o2, %o3 ! limit to store buffer size 224 + bgu,pn %xcc, .Llarge_align8_copy 225 + nop 226 + 227 + /* 228 + * Special case for handling when src and dest are both long word aligned 229 + * and total data to move is less than MED_MAX bytes 230 + */ 231 + .Lmedlong: 232 + subcc %o2, 63, %o2 ! adjust length to allow cc test 233 + ble,pn %xcc, .Lmedl63 ! skip big loop if less than 64 bytes 234 + nop 235 + .Lmedl64: 236 + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_63) ! load 237 + subcc %o2, 64, %o2 ! decrement length count 238 + EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_63_64) ! and store 239 + EX_LD(LOAD(ldx, %o1+8, %o3), memcpy_retl_o2_plus_63_56) ! a block of 64 240 + EX_ST(STORE(stx, %o3, %o0+8), memcpy_retl_o2_plus_63_56) 241 + EX_LD(LOAD(ldx, %o1+16, %o4), memcpy_retl_o2_plus_63_48) 242 + EX_ST(STORE(stx, %o4, %o0+16), memcpy_retl_o2_plus_63_48) 243 + EX_LD(LOAD(ldx, %o1+24, %o3), memcpy_retl_o2_plus_63_40) 244 + EX_ST(STORE(stx, %o3, %o0+24), memcpy_retl_o2_plus_63_40) 245 + EX_LD(LOAD(ldx, %o1+32, %o4), memcpy_retl_o2_plus_63_32)! load and store 246 + EX_ST(STORE(stx, %o4, %o0+32), memcpy_retl_o2_plus_63_32) 247 + EX_LD(LOAD(ldx, %o1+40, %o3), memcpy_retl_o2_plus_63_24)! a block of 64 248 + add %o1, 64, %o1 ! increase src ptr by 64 249 + EX_ST(STORE(stx, %o3, %o0+40), memcpy_retl_o2_plus_63_24) 250 + EX_LD(LOAD(ldx, %o1-16, %o4), memcpy_retl_o2_plus_63_16) 251 + add %o0, 64, %o0 ! increase dst ptr by 64 252 + EX_ST(STORE(stx, %o4, %o0-16), memcpy_retl_o2_plus_63_16) 253 + EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_63_8) 254 + bgu,pt %xcc, .Lmedl64 ! repeat if at least 64 bytes left 255 + EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_63_8) 256 + .Lmedl63: 257 + addcc %o2, 32, %o2 ! adjust remaining count 258 + ble,pt %xcc, .Lmedl31 ! to skip if 31 or fewer bytes left 259 + nop 260 + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_31) ! load 261 + sub %o2, 32, %o2 ! decrement length count 262 + EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_31_32) ! and store 263 + EX_LD(LOAD(ldx, %o1+8, %o3), memcpy_retl_o2_plus_31_24) ! a block of 32 264 + add %o1, 32, %o1 ! increase src ptr by 32 265 + EX_ST(STORE(stx, %o3, %o0+8), memcpy_retl_o2_plus_31_24) 266 + EX_LD(LOAD(ldx, %o1-16, %o4), memcpy_retl_o2_plus_31_16) 267 + add %o0, 32, %o0 ! increase dst ptr by 32 268 + EX_ST(STORE(stx, %o4, %o0-16), memcpy_retl_o2_plus_31_16) 269 + EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_31_8) 270 + EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_31_8) 271 + .Lmedl31: 272 + addcc %o2, 16, %o2 ! adjust remaining count 273 + ble,pt %xcc, .Lmedl15 ! skip if 15 or fewer bytes left 274 + nop ! 275 + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_15) 276 + add %o1, 16, %o1 ! increase src ptr by 16 277 + EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_15) 278 + sub %o2, 16, %o2 ! decrease count by 16 279 + EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_15_8) 280 + add %o0, 16, %o0 ! increase dst ptr by 16 281 + EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_15_8) 282 + .Lmedl15: 283 + addcc %o2, 15, %o2 ! restore count 284 + bz,pt %xcc, .Lsmallx ! exit if finished 285 + cmp %o2, 8 286 + blt,pt %xcc, .Lmedw7 ! skip if 7 or fewer bytes left 287 + tst %o2 288 + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2) ! load 8 bytes 289 + add %o1, 8, %o1 ! increase src ptr by 8 290 + add %o0, 8, %o0 ! increase dst ptr by 8 291 + subcc %o2, 8, %o2 ! decrease count by 8 292 + bnz,pn %xcc, .Lmedw7 293 + EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8) ! and store 8 294 + retl 295 + mov EX_RETVAL(%g1), %o0 ! restore %o0 296 + 297 + .align 16 298 + .Lsrc_dst_unaligned_on_8: 299 + ! DST is 8-byte aligned, src is not 300 + 2: 301 + andcc %o1, 0x3, %o5 ! test word alignment 302 + bnz,pt %xcc, .Lunalignsetup ! branch to skip if not word aligned 303 + nop 304 + 305 + /* 306 + * Handle all cases where src and dest are aligned on word 307 + * boundaries. Use unrolled loops for better performance. 308 + * This option wins over standard large data move when 309 + * source and destination is in cache for.Lmedium 310 + * to short data moves. 311 + */ 312 + set MED_WMAX, %o3 313 + cmp %o2, %o3 ! limit to store buffer size 314 + bge,pt %xcc, .Lunalignrejoin ! otherwise rejoin main loop 315 + nop 316 + 317 + subcc %o2, 31, %o2 ! adjust length to allow cc test 318 + ! for end of loop 319 + ble,pt %xcc, .Lmedw31 ! skip big loop if less than 16 320 + .Lmedw32: 321 + EX_LD(LOAD(ld, %o1, %o4), memcpy_retl_o2_plus_31)! move a block of 32 322 + sllx %o4, 32, %o5 323 + EX_LD(LOAD(ld, %o1+4, %o4), memcpy_retl_o2_plus_31) 324 + or %o4, %o5, %o5 325 + EX_ST(STORE(stx, %o5, %o0), memcpy_retl_o2_plus_31) 326 + subcc %o2, 32, %o2 ! decrement length count 327 + EX_LD(LOAD(ld, %o1+8, %o4), memcpy_retl_o2_plus_31_24) 328 + sllx %o4, 32, %o5 329 + EX_LD(LOAD(ld, %o1+12, %o4), memcpy_retl_o2_plus_31_24) 330 + or %o4, %o5, %o5 331 + EX_ST(STORE(stx, %o5, %o0+8), memcpy_retl_o2_plus_31_24) 332 + add %o1, 32, %o1 ! increase src ptr by 32 333 + EX_LD(LOAD(ld, %o1-16, %o4), memcpy_retl_o2_plus_31_16) 334 + sllx %o4, 32, %o5 335 + EX_LD(LOAD(ld, %o1-12, %o4), memcpy_retl_o2_plus_31_16) 336 + or %o4, %o5, %o5 337 + EX_ST(STORE(stx, %o5, %o0+16), memcpy_retl_o2_plus_31_16) 338 + add %o0, 32, %o0 ! increase dst ptr by 32 339 + EX_LD(LOAD(ld, %o1-8, %o4), memcpy_retl_o2_plus_31_8) 340 + sllx %o4, 32, %o5 341 + EX_LD(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_31_8) 342 + or %o4, %o5, %o5 343 + bgu,pt %xcc, .Lmedw32 ! repeat if at least 32 bytes left 344 + EX_ST(STORE(stx, %o5, %o0-8), memcpy_retl_o2_plus_31_8) 345 + .Lmedw31: 346 + addcc %o2, 31, %o2 ! restore count 347 + 348 + bz,pt %xcc, .Lsmallx ! exit if finished 349 + nop 350 + cmp %o2, 16 351 + blt,pt %xcc, .Lmedw15 352 + nop 353 + EX_LD(LOAD(ld, %o1, %o4), memcpy_retl_o2)! move a block of 16 bytes 354 + sllx %o4, 32, %o5 355 + subcc %o2, 16, %o2 ! decrement length count 356 + EX_LD(LOAD(ld, %o1+4, %o4), memcpy_retl_o2_plus_16) 357 + or %o4, %o5, %o5 358 + EX_ST(STORE(stx, %o5, %o0), memcpy_retl_o2_plus_16) 359 + add %o1, 16, %o1 ! increase src ptr by 16 360 + EX_LD(LOAD(ld, %o1-8, %o4), memcpy_retl_o2_plus_8) 361 + add %o0, 16, %o0 ! increase dst ptr by 16 362 + sllx %o4, 32, %o5 363 + EX_LD(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_8) 364 + or %o4, %o5, %o5 365 + EX_ST(STORE(stx, %o5, %o0-8), memcpy_retl_o2_plus_8) 366 + .Lmedw15: 367 + bz,pt %xcc, .Lsmallx ! exit if finished 368 + cmp %o2, 8 369 + blt,pn %xcc, .Lmedw7 ! skip if 7 or fewer bytes left 370 + tst %o2 371 + EX_LD(LOAD(ld, %o1, %o4), memcpy_retl_o2) ! load 4 bytes 372 + subcc %o2, 8, %o2 ! decrease count by 8 373 + EX_ST(STORE(stw, %o4, %o0), memcpy_retl_o2_plus_8)! and store 4 bytes 374 + add %o1, 8, %o1 ! increase src ptr by 8 375 + EX_LD(LOAD(ld, %o1-4, %o3), memcpy_retl_o2_plus_4) ! load 4 bytes 376 + add %o0, 8, %o0 ! increase dst ptr by 8 377 + EX_ST(STORE(stw, %o3, %o0-4), memcpy_retl_o2_plus_4)! and store 4 bytes 378 + bz,pt %xcc, .Lsmallx ! exit if finished 379 + .Lmedw7: ! count is ge 1, less than 8 380 + cmp %o2, 4 ! check for 4 bytes left 381 + blt,pn %xcc, .Lsmallleft3 ! skip if 3 or fewer bytes left 382 + nop ! 383 + EX_LD(LOAD(ld, %o1, %o4), memcpy_retl_o2) ! load 4 bytes 384 + add %o1, 4, %o1 ! increase src ptr by 4 385 + add %o0, 4, %o0 ! increase dst ptr by 4 386 + subcc %o2, 4, %o2 ! decrease count by 4 387 + bnz .Lsmallleft3 388 + EX_ST(STORE(stw, %o4, %o0-4), memcpy_retl_o2_plus_4)! and store 4 bytes 389 + retl 390 + mov EX_RETVAL(%g1), %o0 391 + 392 + .align 16 393 + .Llarge_align8_copy: ! Src and dst share 8 byte alignment 394 + ! align dst to 64 byte boundary 395 + andcc %o0, 0x3f, %o3 ! %o3 == 0 means dst is 64 byte aligned 396 + brz,pn %o3, .Laligned_to_64 397 + andcc %o0, 8, %o3 ! odd long words to move? 398 + brz,pt %o3, .Laligned_to_16 399 + nop 400 + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2) 401 + sub %o2, 8, %o2 402 + add %o1, 8, %o1 ! increment src ptr 403 + add %o0, 8, %o0 ! increment dst ptr 404 + EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8) 405 + .Laligned_to_16: 406 + andcc %o0, 16, %o3 ! pair of long words to move? 407 + brz,pt %o3, .Laligned_to_32 408 + nop 409 + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2) 410 + sub %o2, 16, %o2 411 + EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_16) 412 + add %o1, 16, %o1 ! increment src ptr 413 + EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_8) 414 + add %o0, 16, %o0 ! increment dst ptr 415 + EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8) 416 + .Laligned_to_32: 417 + andcc %o0, 32, %o3 ! four long words to move? 418 + brz,pt %o3, .Laligned_to_64 419 + nop 420 + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2) 421 + sub %o2, 32, %o2 422 + EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_32) 423 + EX_LD(LOAD(ldx, %o1+8, %o4), memcpy_retl_o2_plus_24) 424 + EX_ST(STORE(stx, %o4, %o0+8), memcpy_retl_o2_plus_24) 425 + EX_LD(LOAD(ldx, %o1+16, %o4), memcpy_retl_o2_plus_16) 426 + EX_ST(STORE(stx, %o4, %o0+16), memcpy_retl_o2_plus_16) 427 + add %o1, 32, %o1 ! increment src ptr 428 + EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_8) 429 + add %o0, 32, %o0 ! increment dst ptr 430 + EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8) 431 + .Laligned_to_64: 432 + ! 433 + ! Using block init store (BIS) instructions to avoid fetching cache 434 + ! lines from memory. Use ST_CHUNK stores to first element of each cache 435 + ! line (similar to prefetching) to avoid overfilling STQ or miss buffers. 436 + ! Gives existing cache lines time to be moved out of L1/L2/L3 cache. 437 + ! Initial stores using MRU version of BIS to keep cache line in 438 + ! cache until we are ready to store final element of cache line. 439 + ! Then store last element using the LRU version of BIS. 440 + ! 441 + andn %o2, 0x3f, %o5 ! %o5 is multiple of block size 442 + and %o2, 0x3f, %o2 ! residue bytes in %o2 443 + ! 444 + ! We use STORE_MRU_ASI for the first seven stores to each cache line 445 + ! followed by STORE_ASI (mark as LRU) for the last store. That 446 + ! mixed approach reduces the probability that the cache line is removed 447 + ! before we finish setting it, while minimizing the effects on 448 + ! other cached values during a large memcpy 449 + ! 450 + ! ST_CHUNK batches up initial BIS operations for several cache lines 451 + ! to allow multiple requests to not be blocked by overflowing the 452 + ! the store miss buffer. Then the matching stores for all those 453 + ! BIS operations are executed. 454 + ! 455 + 456 + sub %o0, 8, %o0 ! adjust %o0 for ASI alignment 457 + .Lalign_loop: 458 + cmp %o5, ST_CHUNK*64 459 + blu,pt %xcc, .Lalign_loop_fin 460 + mov ST_CHUNK,%o3 461 + .Lalign_loop_start: 462 + prefetch [%o1 + (ALIGN_PRE * BLOCK_SIZE)], 21 463 + subcc %o3, 1, %o3 464 + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_o5) 465 + add %o1, 64, %o1 466 + add %o0, 8, %o0 467 + EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5) 468 + bgu %xcc,.Lalign_loop_start 469 + add %o0, 56, %o0 470 + 471 + mov ST_CHUNK,%o3 472 + sllx %o3, 6, %o4 ! ST_CHUNK*64 473 + sub %o1, %o4, %o1 ! reset %o1 474 + sub %o0, %o4, %o0 ! reset %o0 475 + 476 + .Lalign_loop_rest: 477 + EX_LD(LOAD(ldx, %o1+8, %o4), memcpy_retl_o2_plus_o5) 478 + add %o0, 16, %o0 479 + EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5) 480 + EX_LD(LOAD(ldx, %o1+16, %o4), memcpy_retl_o2_plus_o5) 481 + add %o0, 8, %o0 482 + EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5) 483 + subcc %o3, 1, %o3 484 + EX_LD(LOAD(ldx, %o1+24, %o4), memcpy_retl_o2_plus_o5) 485 + add %o0, 8, %o0 486 + EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5) 487 + EX_LD(LOAD(ldx, %o1+32, %o4), memcpy_retl_o2_plus_o5) 488 + add %o0, 8, %o0 489 + EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5) 490 + EX_LD(LOAD(ldx, %o1+40, %o4), memcpy_retl_o2_plus_o5) 491 + add %o0, 8, %o0 492 + EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5) 493 + EX_LD(LOAD(ldx, %o1+48, %o4), memcpy_retl_o2_plus_o5) 494 + add %o1, 64, %o1 495 + add %o0, 8, %o0 496 + EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5) 497 + add %o0, 8, %o0 498 + EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_o5) 499 + sub %o5, 64, %o5 500 + bgu %xcc,.Lalign_loop_rest 501 + ! mark cache line as LRU 502 + EX_ST(STORE_INIT(%o4, %o0), memcpy_retl_o2_plus_o5_plus_64) 503 + 504 + cmp %o5, ST_CHUNK*64 505 + bgu,pt %xcc, .Lalign_loop_start 506 + mov ST_CHUNK,%o3 507 + 508 + cmp %o5, 0 509 + beq .Lalign_done 510 + nop 511 + .Lalign_loop_fin: 512 + EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_o5) 513 + EX_ST(STORE(stx, %o4, %o0+8), memcpy_retl_o2_plus_o5) 514 + EX_LD(LOAD(ldx, %o1+8, %o4), memcpy_retl_o2_plus_o5) 515 + EX_ST(STORE(stx, %o4, %o0+8+8), memcpy_retl_o2_plus_o5) 516 + EX_LD(LOAD(ldx, %o1+16, %o4), memcpy_retl_o2_plus_o5) 517 + EX_ST(STORE(stx, %o4, %o0+8+16), memcpy_retl_o2_plus_o5) 518 + subcc %o5, 64, %o5 519 + EX_LD(LOAD(ldx, %o1+24, %o4), memcpy_retl_o2_plus_o5_64) 520 + EX_ST(STORE(stx, %o4, %o0+8+24), memcpy_retl_o2_plus_o5_64) 521 + EX_LD(LOAD(ldx, %o1+32, %o4), memcpy_retl_o2_plus_o5_64) 522 + EX_ST(STORE(stx, %o4, %o0+8+32), memcpy_retl_o2_plus_o5_64) 523 + EX_LD(LOAD(ldx, %o1+40, %o4), memcpy_retl_o2_plus_o5_64) 524 + EX_ST(STORE(stx, %o4, %o0+8+40), memcpy_retl_o2_plus_o5_64) 525 + EX_LD(LOAD(ldx, %o1+48, %o4), memcpy_retl_o2_plus_o5_64) 526 + add %o1, 64, %o1 527 + EX_ST(STORE(stx, %o4, %o0+8+48), memcpy_retl_o2_plus_o5_64) 528 + add %o0, 64, %o0 529 + EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_o5_64) 530 + bgu %xcc,.Lalign_loop_fin 531 + EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_o5_64) 532 + 533 + .Lalign_done: 534 + add %o0, 8, %o0 ! restore %o0 from ASI alignment 535 + membar #StoreStore 536 + sub %o2, 63, %o2 ! adjust length to allow cc test 537 + ba .Lmedl63 ! in .Lmedl63 538 + nop 539 + 540 + .align 16 541 + ! Dst is on 8 byte boundary; src is not; remaining count > SMALL_MAX 542 + .Lunalignsetup: 543 + .Lunalignrejoin: 544 + mov %g1, %o3 ! save %g1 as VISEntryHalf clobbers it 545 + #ifdef NON_USER_COPY 546 + VISEntryHalfFast(.Lmedium_vis_entry_fail_cp) 547 + #else 548 + VISEntryHalf 549 + #endif 550 + mov %o3, %g1 ! restore %g1 551 + 552 + set MED_UMAX, %o3 553 + cmp %o2, %o3 ! check for.Lmedium unaligned limit 554 + bge,pt %xcc,.Lunalign_large 555 + prefetch [%o1 + (4 * BLOCK_SIZE)], 20 556 + andn %o2, 0x3f, %o5 ! %o5 is multiple of block size 557 + and %o2, 0x3f, %o2 ! residue bytes in %o2 558 + cmp %o2, 8 ! Insure we do not load beyond 559 + bgt .Lunalign_adjust ! end of source buffer 560 + andn %o1, 0x7, %o4 ! %o4 has long word aligned src address 561 + add %o2, 64, %o2 ! adjust to leave loop 562 + sub %o5, 64, %o5 ! early if necessary 563 + .Lunalign_adjust: 564 + alignaddr %o1, %g0, %g0 ! generate %gsr 565 + add %o1, %o5, %o1 ! advance %o1 to after blocks 566 + EX_LD_FP(LOAD(ldd, %o4, %f0), memcpy_retl_o2_plus_o5) 567 + .Lunalign_loop: 568 + EX_LD_FP(LOAD(ldd, %o4+8, %f2), memcpy_retl_o2_plus_o5) 569 + faligndata %f0, %f2, %f16 570 + EX_LD_FP(LOAD(ldd, %o4+16, %f4), memcpy_retl_o2_plus_o5) 571 + subcc %o5, BLOCK_SIZE, %o5 572 + EX_ST_FP(STORE(std, %f16, %o0), memcpy_retl_o2_plus_o5_plus_64) 573 + faligndata %f2, %f4, %f18 574 + EX_LD_FP(LOAD(ldd, %o4+24, %f6), memcpy_retl_o2_plus_o5_plus_56) 575 + EX_ST_FP(STORE(std, %f18, %o0+8), memcpy_retl_o2_plus_o5_plus_56) 576 + faligndata %f4, %f6, %f20 577 + EX_LD_FP(LOAD(ldd, %o4+32, %f8), memcpy_retl_o2_plus_o5_plus_48) 578 + EX_ST_FP(STORE(std, %f20, %o0+16), memcpy_retl_o2_plus_o5_plus_48) 579 + faligndata %f6, %f8, %f22 580 + EX_LD_FP(LOAD(ldd, %o4+40, %f10), memcpy_retl_o2_plus_o5_plus_40) 581 + EX_ST_FP(STORE(std, %f22, %o0+24), memcpy_retl_o2_plus_o5_plus_40) 582 + faligndata %f8, %f10, %f24 583 + EX_LD_FP(LOAD(ldd, %o4+48, %f12), memcpy_retl_o2_plus_o5_plus_32) 584 + EX_ST_FP(STORE(std, %f24, %o0+32), memcpy_retl_o2_plus_o5_plus_32) 585 + faligndata %f10, %f12, %f26 586 + EX_LD_FP(LOAD(ldd, %o4+56, %f14), memcpy_retl_o2_plus_o5_plus_24) 587 + add %o4, BLOCK_SIZE, %o4 588 + EX_ST_FP(STORE(std, %f26, %o0+40), memcpy_retl_o2_plus_o5_plus_24) 589 + faligndata %f12, %f14, %f28 590 + EX_LD_FP(LOAD(ldd, %o4, %f0), memcpy_retl_o2_plus_o5_plus_16) 591 + EX_ST_FP(STORE(std, %f28, %o0+48), memcpy_retl_o2_plus_o5_plus_16) 592 + faligndata %f14, %f0, %f30 593 + EX_ST_FP(STORE(std, %f30, %o0+56), memcpy_retl_o2_plus_o5_plus_8) 594 + add %o0, BLOCK_SIZE, %o0 595 + bgu,pt %xcc, .Lunalign_loop 596 + prefetch [%o4 + (5 * BLOCK_SIZE)], 20 597 + ba .Lunalign_done 598 + nop 599 + 600 + .Lunalign_large: 601 + andcc %o0, 0x3f, %o3 ! is dst 64-byte block aligned? 602 + bz %xcc, .Lunalignsrc 603 + sub %o3, 64, %o3 ! %o3 will be multiple of 8 604 + neg %o3 ! bytes until dest is 64 byte aligned 605 + sub %o2, %o3, %o2 ! update cnt with bytes to be moved 606 + ! Move bytes according to source alignment 607 + andcc %o1, 0x1, %o5 608 + bnz %xcc, .Lunalignbyte ! check for byte alignment 609 + nop 610 + andcc %o1, 2, %o5 ! check for half word alignment 611 + bnz %xcc, .Lunalignhalf 612 + nop 613 + ! Src is word aligned 614 + .Lunalignword: 615 + EX_LD_FP(LOAD(ld, %o1, %o4), memcpy_retl_o2_plus_o3) ! load 4 bytes 616 + add %o1, 8, %o1 ! increase src ptr by 8 617 + EX_ST_FP(STORE(stw, %o4, %o0), memcpy_retl_o2_plus_o3) ! and store 4 618 + subcc %o3, 8, %o3 ! decrease count by 8 619 + EX_LD_FP(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_o3_plus_4)! load 4 620 + add %o0, 8, %o0 ! increase dst ptr by 8 621 + bnz %xcc, .Lunalignword 622 + EX_ST_FP(STORE(stw, %o4, %o0-4), memcpy_retl_o2_plus_o3_plus_4) 623 + ba .Lunalignsrc 624 + nop 625 + 626 + ! Src is half-word aligned 627 + .Lunalignhalf: 628 + EX_LD_FP(LOAD(lduh, %o1, %o4), memcpy_retl_o2_plus_o3) ! load 2 bytes 629 + sllx %o4, 32, %o5 ! shift left 630 + EX_LD_FP(LOAD(lduw, %o1+2, %o4), memcpy_retl_o2_plus_o3) 631 + or %o4, %o5, %o5 632 + sllx %o5, 16, %o5 633 + EX_LD_FP(LOAD(lduh, %o1+6, %o4), memcpy_retl_o2_plus_o3) 634 + or %o4, %o5, %o5 635 + EX_ST_FP(STORE(stx, %o5, %o0), memcpy_retl_o2_plus_o3) 636 + add %o1, 8, %o1 637 + subcc %o3, 8, %o3 638 + bnz %xcc, .Lunalignhalf 639 + add %o0, 8, %o0 640 + ba .Lunalignsrc 641 + nop 642 + 643 + ! Src is Byte aligned 644 + .Lunalignbyte: 645 + sub %o0, %o1, %o0 ! share pointer advance 646 + .Lunalignbyte_loop: 647 + EX_LD_FP(LOAD(ldub, %o1, %o4), memcpy_retl_o2_plus_o3) 648 + sllx %o4, 56, %o5 649 + EX_LD_FP(LOAD(lduh, %o1+1, %o4), memcpy_retl_o2_plus_o3) 650 + sllx %o4, 40, %o4 651 + or %o4, %o5, %o5 652 + EX_LD_FP(LOAD(lduh, %o1+3, %o4), memcpy_retl_o2_plus_o3) 653 + sllx %o4, 24, %o4 654 + or %o4, %o5, %o5 655 + EX_LD_FP(LOAD(lduh, %o1+5, %o4), memcpy_retl_o2_plus_o3) 656 + sllx %o4, 8, %o4 657 + or %o4, %o5, %o5 658 + EX_LD_FP(LOAD(ldub, %o1+7, %o4), memcpy_retl_o2_plus_o3) 659 + or %o4, %o5, %o5 660 + add %o0, %o1, %o0 661 + EX_ST_FP(STORE(stx, %o5, %o0), memcpy_retl_o2_plus_o3) 662 + sub %o0, %o1, %o0 663 + subcc %o3, 8, %o3 664 + bnz %xcc, .Lunalignbyte_loop 665 + add %o1, 8, %o1 666 + add %o0,%o1, %o0 ! restore pointer 667 + 668 + ! Destination is now block (64 byte aligned) 669 + .Lunalignsrc: 670 + andn %o2, 0x3f, %o5 ! %o5 is multiple of block size 671 + and %o2, 0x3f, %o2 ! residue bytes in %o2 672 + add %o2, 64, %o2 ! Insure we do not load beyond 673 + sub %o5, 64, %o5 ! end of source buffer 674 + 675 + andn %o1, 0x7, %o4 ! %o4 has long word aligned src address 676 + alignaddr %o1, %g0, %g0 ! generate %gsr 677 + add %o1, %o5, %o1 ! advance %o1 to after blocks 678 + 679 + EX_LD_FP(LOAD(ldd, %o4, %f14), memcpy_retl_o2_plus_o5) 680 + add %o4, 8, %o4 681 + .Lunalign_sloop: 682 + EX_LD_FP(LOAD(ldd, %o4, %f16), memcpy_retl_o2_plus_o5) 683 + faligndata %f14, %f16, %f0 684 + EX_LD_FP(LOAD(ldd, %o4+8, %f18), memcpy_retl_o2_plus_o5) 685 + faligndata %f16, %f18, %f2 686 + EX_LD_FP(LOAD(ldd, %o4+16, %f20), memcpy_retl_o2_plus_o5) 687 + faligndata %f18, %f20, %f4 688 + EX_ST_FP(STORE(std, %f0, %o0), memcpy_retl_o2_plus_o5) 689 + subcc %o5, 64, %o5 690 + EX_LD_FP(LOAD(ldd, %o4+24, %f22), memcpy_retl_o2_plus_o5_plus_56) 691 + faligndata %f20, %f22, %f6 692 + EX_ST_FP(STORE(std, %f2, %o0+8), memcpy_retl_o2_plus_o5_plus_56) 693 + EX_LD_FP(LOAD(ldd, %o4+32, %f24), memcpy_retl_o2_plus_o5_plus_48) 694 + faligndata %f22, %f24, %f8 695 + EX_ST_FP(STORE(std, %f4, %o0+16), memcpy_retl_o2_plus_o5_plus_48) 696 + EX_LD_FP(LOAD(ldd, %o4+40, %f26), memcpy_retl_o2_plus_o5_plus_40) 697 + faligndata %f24, %f26, %f10 698 + EX_ST_FP(STORE(std, %f6, %o0+24), memcpy_retl_o2_plus_o5_plus_40) 699 + EX_LD_FP(LOAD(ldd, %o4+48, %f28), memcpy_retl_o2_plus_o5_plus_40) 700 + faligndata %f26, %f28, %f12 701 + EX_ST_FP(STORE(std, %f8, %o0+32), memcpy_retl_o2_plus_o5_plus_40) 702 + add %o4, 64, %o4 703 + EX_LD_FP(LOAD(ldd, %o4-8, %f30), memcpy_retl_o2_plus_o5_plus_40) 704 + faligndata %f28, %f30, %f14 705 + EX_ST_FP(STORE(std, %f10, %o0+40), memcpy_retl_o2_plus_o5_plus_40) 706 + EX_ST_FP(STORE(std, %f12, %o0+48), memcpy_retl_o2_plus_o5_plus_40) 707 + add %o0, 64, %o0 708 + EX_ST_FP(STORE(std, %f14, %o0-8), memcpy_retl_o2_plus_o5_plus_40) 709 + fsrc2 %f30, %f14 710 + bgu,pt %xcc, .Lunalign_sloop 711 + prefetch [%o4 + (8 * BLOCK_SIZE)], 20 712 + 713 + .Lunalign_done: 714 + ! Handle trailing bytes, 64 to 127 715 + ! Dest long word aligned, Src not long word aligned 716 + cmp %o2, 15 717 + bleu %xcc, .Lunalign_short 718 + 719 + andn %o2, 0x7, %o5 ! %o5 is multiple of 8 720 + and %o2, 0x7, %o2 ! residue bytes in %o2 721 + add %o2, 8, %o2 722 + sub %o5, 8, %o5 ! insure we do not load past end of src 723 + andn %o1, 0x7, %o4 ! %o4 has long word aligned src address 724 + add %o1, %o5, %o1 ! advance %o1 to after multiple of 8 725 + EX_LD_FP(LOAD(ldd, %o4, %f0), memcpy_retl_o2_plus_o5)! fetch partialword 726 + .Lunalign_by8: 727 + EX_LD_FP(LOAD(ldd, %o4+8, %f2), memcpy_retl_o2_plus_o5) 728 + add %o4, 8, %o4 729 + faligndata %f0, %f2, %f16 730 + subcc %o5, 8, %o5 731 + EX_ST_FP(STORE(std, %f16, %o0), memcpy_retl_o2_plus_o5) 732 + fsrc2 %f2, %f0 733 + bgu,pt %xcc, .Lunalign_by8 734 + add %o0, 8, %o0 735 + 736 + .Lunalign_short: 737 + #ifdef NON_USER_COPY 738 + VISExitHalfFast 739 + #else 740 + VISExitHalf 741 + #endif 742 + ba .Lsmallrest 743 + nop 744 + 745 + /* 746 + * This is a special case of nested memcpy. This can happen when kernel 747 + * calls unaligned memcpy back to back without saving FP registers. We need 748 + * traps(context switch) to save/restore FP registers. If the kernel calls 749 + * memcpy without this trap sequence we will hit FP corruption. Let's use 750 + * the normal integer load/store method in this case. 751 + */ 752 + 753 + #ifdef NON_USER_COPY 754 + .Lmedium_vis_entry_fail_cp: 755 + or %o0, %o1, %g2 756 + #endif 757 + .Lmedium_cp: 758 + LOAD(prefetch, %o1 + 0x40, #n_reads_strong) 759 + andcc %g2, 0x7, %g0 760 + bne,pn %xcc, .Lmedium_unaligned_cp 761 + nop 762 + 763 + .Lmedium_noprefetch_cp: 764 + andncc %o2, 0x20 - 1, %o5 765 + be,pn %xcc, 2f 766 + sub %o2, %o5, %o2 767 + 1: EX_LD(LOAD(ldx, %o1 + 0x00, %o3), memcpy_retl_o2_plus_o5) 768 + EX_LD(LOAD(ldx, %o1 + 0x08, %g2), memcpy_retl_o2_plus_o5) 769 + EX_LD(LOAD(ldx, %o1 + 0x10, %g7), memcpy_retl_o2_plus_o5) 770 + EX_LD(LOAD(ldx, %o1 + 0x18, %o4), memcpy_retl_o2_plus_o5) 771 + add %o1, 0x20, %o1 772 + subcc %o5, 0x20, %o5 773 + EX_ST(STORE(stx, %o3, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_32) 774 + EX_ST(STORE(stx, %g2, %o0 + 0x08), memcpy_retl_o2_plus_o5_plus_24) 775 + EX_ST(STORE(stx, %g7, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_24) 776 + EX_ST(STORE(stx, %o4, %o0 + 0x18), memcpy_retl_o2_plus_o5_plus_8) 777 + bne,pt %xcc, 1b 778 + add %o0, 0x20, %o0 779 + 2: andcc %o2, 0x18, %o5 780 + be,pt %xcc, 3f 781 + sub %o2, %o5, %o2 782 + 1: EX_LD(LOAD(ldx, %o1 + 0x00, %o3), memcpy_retl_o2_plus_o5) 783 + add %o1, 0x08, %o1 784 + add %o0, 0x08, %o0 785 + subcc %o5, 0x08, %o5 786 + bne,pt %xcc, 1b 787 + EX_ST(STORE(stx, %o3, %o0 - 0x08), memcpy_retl_o2_plus_o5_plus_8) 788 + 3: brz,pt %o2, .Lexit_cp 789 + cmp %o2, 0x04 790 + bl,pn %xcc, .Ltiny_cp 791 + nop 792 + EX_LD(LOAD(lduw, %o1 + 0x00, %o3), memcpy_retl_o2) 793 + add %o1, 0x04, %o1 794 + add %o0, 0x04, %o0 795 + subcc %o2, 0x04, %o2 796 + bne,pn %xcc, .Ltiny_cp 797 + EX_ST(STORE(stw, %o3, %o0 - 0x04), memcpy_retl_o2_plus_4) 798 + ba,a,pt %xcc, .Lexit_cp 799 + 800 + .Lmedium_unaligned_cp: 801 + /* First get dest 8 byte aligned. */ 802 + sub %g0, %o0, %o3 803 + and %o3, 0x7, %o3 804 + brz,pt %o3, 2f 805 + sub %o2, %o3, %o2 806 + 807 + 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1) 808 + add %o1, 1, %o1 809 + subcc %o3, 1, %o3 810 + add %o0, 1, %o0 811 + bne,pt %xcc, 1b 812 + EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1) 813 + 2: 814 + and %o1, 0x7, %o3 815 + brz,pn %o3, .Lmedium_noprefetch_cp 816 + sll %o3, 3, %o3 817 + mov 64, %g2 818 + sub %g2, %o3, %g2 819 + andn %o1, 0x7, %o1 820 + EX_LD(LOAD(ldx, %o1 + 0x00, %o4), memcpy_retl_o2) 821 + sllx %o4, %o3, %o4 822 + andn %o2, 0x08 - 1, %o5 823 + sub %o2, %o5, %o2 824 + 825 + 1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), memcpy_retl_o2_plus_o5) 826 + add %o1, 0x08, %o1 827 + subcc %o5, 0x08, %o5 828 + srlx %g3, %g2, %g7 829 + or %g7, %o4, %g7 830 + EX_ST(STORE(stx, %g7, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_8) 831 + add %o0, 0x08, %o0 832 + bne,pt %xcc, 1b 833 + sllx %g3, %o3, %o4 834 + srl %o3, 3, %o3 835 + add %o1, %o3, %o1 836 + brz,pn %o2, .Lexit_cp 837 + nop 838 + ba,pt %xcc, .Lsmall_unaligned_cp 839 + 840 + .Ltiny_cp: 841 + EX_LD(LOAD(ldub, %o1 + 0x00, %o3), memcpy_retl_o2) 842 + subcc %o2, 1, %o2 843 + be,pn %xcc, .Lexit_cp 844 + EX_ST(STORE(stb, %o3, %o0 + 0x00), memcpy_retl_o2_plus_1) 845 + EX_LD(LOAD(ldub, %o1 + 0x01, %o3), memcpy_retl_o2) 846 + subcc %o2, 1, %o2 847 + be,pn %xcc, .Lexit_cp 848 + EX_ST(STORE(stb, %o3, %o0 + 0x01), memcpy_retl_o2_plus_1) 849 + EX_LD(LOAD(ldub, %o1 + 0x02, %o3), memcpy_retl_o2) 850 + ba,pt %xcc, .Lexit_cp 851 + EX_ST(STORE(stb, %o3, %o0 + 0x02), memcpy_retl_o2) 852 + 853 + .Lsmall_cp: 854 + andcc %g2, 0x3, %g0 855 + bne,pn %xcc, .Lsmall_unaligned_cp 856 + andn %o2, 0x4 - 1, %o5 857 + sub %o2, %o5, %o2 858 + 1: 859 + EX_LD(LOAD(lduw, %o1 + 0x00, %o3), memcpy_retl_o2_plus_o5) 860 + add %o1, 0x04, %o1 861 + subcc %o5, 0x04, %o5 862 + add %o0, 0x04, %o0 863 + bne,pt %xcc, 1b 864 + EX_ST(STORE(stw, %o3, %o0 - 0x04), memcpy_retl_o2_plus_o5_plus_4) 865 + brz,pt %o2, .Lexit_cp 866 + nop 867 + ba,a,pt %xcc, .Ltiny_cp 868 + 869 + .Lsmall_unaligned_cp: 870 + 1: EX_LD(LOAD(ldub, %o1 + 0x00, %o3), memcpy_retl_o2) 871 + add %o1, 1, %o1 872 + add %o0, 1, %o0 873 + subcc %o2, 1, %o2 874 + bne,pt %xcc, 1b 875 + EX_ST(STORE(stb, %o3, %o0 - 0x01), memcpy_retl_o2_plus_1) 876 + ba,a,pt %xcc, .Lexit_cp 877 + 878 + .Lsmallrest: 879 + tst %o2 880 + bz,pt %xcc, .Lsmallx 881 + cmp %o2, 4 882 + blt,pn %xcc, .Lsmallleft3 883 + nop 884 + sub %o2, 3, %o2 885 + .Lsmallnotalign4: 886 + EX_LD(LOAD(ldub, %o1, %o3), memcpy_retl_o2_plus_3)! read byte 887 + subcc %o2, 4, %o2 ! reduce count by 4 888 + EX_ST(STORE(stb, %o3, %o0), memcpy_retl_o2_plus_7)! write byte & repeat 889 + EX_LD(LOAD(ldub, %o1+1, %o3), memcpy_retl_o2_plus_6)! for total of 4 890 + add %o1, 4, %o1 ! advance SRC by 4 891 + EX_ST(STORE(stb, %o3, %o0+1), memcpy_retl_o2_plus_6) 892 + EX_LD(LOAD(ldub, %o1-2, %o3), memcpy_retl_o2_plus_5) 893 + add %o0, 4, %o0 ! advance DST by 4 894 + EX_ST(STORE(stb, %o3, %o0-2), memcpy_retl_o2_plus_5) 895 + EX_LD(LOAD(ldub, %o1-1, %o3), memcpy_retl_o2_plus_4) 896 + bgu,pt %xcc, .Lsmallnotalign4 ! loop til 3 or fewer bytes remain 897 + EX_ST(STORE(stb, %o3, %o0-1), memcpy_retl_o2_plus_4) 898 + addcc %o2, 3, %o2 ! restore count 899 + bz,pt %xcc, .Lsmallx 900 + .Lsmallleft3: ! 1, 2, or 3 bytes remain 901 + subcc %o2, 1, %o2 902 + EX_LD(LOAD(ldub, %o1, %o3), memcpy_retl_o2_plus_1) ! load one byte 903 + bz,pt %xcc, .Lsmallx 904 + EX_ST(STORE(stb, %o3, %o0), memcpy_retl_o2_plus_1) ! store one byte 905 + EX_LD(LOAD(ldub, %o1+1, %o3), memcpy_retl_o2) ! load second byte 906 + subcc %o2, 1, %o2 907 + bz,pt %xcc, .Lsmallx 908 + EX_ST(STORE(stb, %o3, %o0+1), memcpy_retl_o2_plus_1)! store second byte 909 + EX_LD(LOAD(ldub, %o1+2, %o3), memcpy_retl_o2) ! load third byte 910 + EX_ST(STORE(stb, %o3, %o0+2), memcpy_retl_o2) ! store third byte 911 + .Lsmallx: 912 + retl 913 + mov EX_RETVAL(%g1), %o0 914 + .Lsmallfin: 915 + tst %o2 916 + bnz,pn %xcc, .Lsmallleft3 917 + nop 918 + retl 919 + mov EX_RETVAL(%g1), %o0 ! restore %o0 920 + .Lexit_cp: 921 + retl 922 + mov EX_RETVAL(%g1), %o0 923 + .size FUNC_NAME, .-FUNC_NAME
+352
arch/sparc/lib/M7memset.S
··· 1 + /* 2 + * M7memset.S: SPARC M7 optimized memset. 3 + * 4 + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. 5 + */ 6 + 7 + /* 8 + * M7memset.S: M7 optimized memset. 9 + * 10 + * char *memset(sp, c, n) 11 + * 12 + * Set an array of n chars starting at sp to the character c. 13 + * Return sp. 14 + * 15 + * Fast assembler language version of the following C-program for memset 16 + * which represents the `standard' for the C-library. 17 + * 18 + * void * 19 + * memset(void *sp1, int c, size_t n) 20 + * { 21 + * if (n != 0) { 22 + * char *sp = sp1; 23 + * do { 24 + * *sp++ = (char)c; 25 + * } while (--n != 0); 26 + * } 27 + * return (sp1); 28 + * } 29 + * 30 + * The algorithm is as follows : 31 + * 32 + * For small 6 or fewer bytes stores, bytes will be stored. 33 + * 34 + * For less than 32 bytes stores, align the address on 4 byte boundary. 35 + * Then store as many 4-byte chunks, followed by trailing bytes. 36 + * 37 + * For sizes greater than 32 bytes, align the address on 8 byte boundary. 38 + * if (count >= 64) { 39 + * store 8-bytes chunks to align the address on 64 byte boundary 40 + * if (value to be set is zero && count >= MIN_ZERO) { 41 + * Using BIS stores, set the first long word of each 42 + * 64-byte cache line to zero which will also clear the 43 + * other seven long words of the cache line. 44 + * } 45 + * else if (count >= MIN_LOOP) { 46 + * Using BIS stores, set the first long word of each of 47 + * ST_CHUNK cache lines (64 bytes each) before the main 48 + * loop is entered. 49 + * In the main loop, continue pre-setting the first long 50 + * word of each cache line ST_CHUNK lines in advance while 51 + * setting the other seven long words (56 bytes) of each 52 + * cache line until fewer than ST_CHUNK*64 bytes remain. 53 + * Then set the remaining seven long words of each cache 54 + * line that has already had its first long word set. 55 + * } 56 + * store remaining data in 64-byte chunks until less than 57 + * 64 bytes remain. 58 + * } 59 + * Store as many 8-byte chunks, followed by trailing bytes. 60 + * 61 + * BIS = Block Init Store 62 + * Doing the advance store of the first element of the cache line 63 + * initiates the displacement of a cache line while only using a single 64 + * instruction in the pipeline. That avoids various pipeline delays, 65 + * such as filling the miss buffer. The performance effect is 66 + * similar to prefetching for normal stores. 67 + * The special case for zero fills runs faster and uses fewer instruction 68 + * cycles than the normal memset loop. 69 + * 70 + * We only use BIS for memset of greater than MIN_LOOP bytes because a sequence 71 + * BIS stores must be followed by a membar #StoreStore. The benefit of 72 + * the BIS store must be balanced against the cost of the membar operation. 73 + */ 74 + 75 + /* 76 + * ASI_STBI_P marks the cache line as "least recently used" 77 + * which means if many threads are active, it has a high chance 78 + * of being pushed out of the cache between the first initializing 79 + * store and the final stores. 80 + * Thus, we use ASI_STBIMRU_P which marks the cache line as 81 + * "most recently used" for all but the last store to the cache line. 82 + */ 83 + 84 + #include <asm/asi.h> 85 + #include <asm/page.h> 86 + 87 + #define ASI_STBI_P ASI_BLK_INIT_QUAD_LDD_P 88 + #define ASI_STBIMRU_P ASI_ST_BLKINIT_MRU_P 89 + 90 + 91 + #define ST_CHUNK 24 /* multiple of 4 due to loop unrolling */ 92 + #define MIN_LOOP 16320 93 + #define MIN_ZERO 512 94 + 95 + .section ".text" 96 + .align 32 97 + 98 + /* 99 + * Define clear_page(dest) as memset(dest, 0, PAGE_SIZE) 100 + * (can create a more optimized version later.) 101 + */ 102 + .globl M7clear_page 103 + .globl M7clear_user_page 104 + M7clear_page: /* clear_page(dest) */ 105 + M7clear_user_page: 106 + set PAGE_SIZE, %o1 107 + /* fall through into bzero code */ 108 + 109 + .size M7clear_page,.-M7clear_page 110 + .size M7clear_user_page,.-M7clear_user_page 111 + 112 + /* 113 + * Define bzero(dest, n) as memset(dest, 0, n) 114 + * (can create a more optimized version later.) 115 + */ 116 + .globl M7bzero 117 + M7bzero: /* bzero(dest, size) */ 118 + mov %o1, %o2 119 + mov 0, %o1 120 + /* fall through into memset code */ 121 + 122 + .size M7bzero,.-M7bzero 123 + 124 + .global M7memset 125 + .type M7memset, #function 126 + .register %g3, #scratch 127 + M7memset: 128 + mov %o0, %o5 ! copy sp1 before using it 129 + cmp %o2, 7 ! if small counts, just write bytes 130 + bleu,pn %xcc, .wrchar 131 + and %o1, 0xff, %o1 ! o1 is (char)c 132 + 133 + sll %o1, 8, %o3 134 + or %o1, %o3, %o1 ! now o1 has 2 bytes of c 135 + sll %o1, 16, %o3 136 + cmp %o2, 32 137 + blu,pn %xcc, .wdalign 138 + or %o1, %o3, %o1 ! now o1 has 4 bytes of c 139 + 140 + sllx %o1, 32, %o3 141 + or %o1, %o3, %o1 ! now o1 has 8 bytes of c 142 + 143 + .dbalign: 144 + andcc %o5, 7, %o3 ! is sp1 aligned on a 8 byte bound? 145 + bz,pt %xcc, .blkalign ! already long word aligned 146 + sub %o3, 8, %o3 ! -(bytes till long word aligned) 147 + 148 + add %o2, %o3, %o2 ! update o2 with new count 149 + ! Set -(%o3) bytes till sp1 long word aligned 150 + 1: stb %o1, [%o5] ! there is at least 1 byte to set 151 + inccc %o3 ! byte clearing loop 152 + bl,pt %xcc, 1b 153 + inc %o5 154 + 155 + ! Now sp1 is long word aligned (sp1 is found in %o5) 156 + .blkalign: 157 + cmp %o2, 64 ! check if there are 64 bytes to set 158 + blu,pn %xcc, .wrshort 159 + mov %o2, %o3 160 + 161 + andcc %o5, 63, %o3 ! is sp1 block aligned? 162 + bz,pt %xcc, .blkwr ! now block aligned 163 + sub %o3, 64, %o3 ! o3 is -(bytes till block aligned) 164 + add %o2, %o3, %o2 ! o2 is the remainder 165 + 166 + ! Store -(%o3) bytes till dst is block (64 byte) aligned. 167 + ! Use long word stores. 168 + ! Recall that dst is already long word aligned 169 + 1: 170 + addcc %o3, 8, %o3 171 + stx %o1, [%o5] 172 + bl,pt %xcc, 1b 173 + add %o5, 8, %o5 174 + 175 + ! Now sp1 is block aligned 176 + .blkwr: 177 + andn %o2, 63, %o4 ! calculate size of blocks in bytes 178 + brz,pn %o1, .wrzero ! special case if c == 0 179 + and %o2, 63, %o3 ! %o3 = bytes left after blk stores. 180 + 181 + set MIN_LOOP, %g1 182 + cmp %o4, %g1 ! check there are enough bytes to set 183 + blu,pn %xcc, .short_set ! to justify cost of membar 184 + ! must be > pre-cleared lines 185 + nop 186 + 187 + ! initial cache-clearing stores 188 + ! get store pipeline moving 189 + rd %asi, %g3 ! save %asi to be restored later 190 + wr %g0, ASI_STBIMRU_P, %asi 191 + 192 + ! Primary memset loop for large memsets 193 + .wr_loop: 194 + sub %o5, 8, %o5 ! adjust %o5 for ASI store alignment 195 + mov ST_CHUNK, %g1 196 + .wr_loop_start: 197 + stxa %o1, [%o5+8]%asi 198 + subcc %g1, 4, %g1 199 + stxa %o1, [%o5+8+64]%asi 200 + add %o5, 256, %o5 201 + stxa %o1, [%o5+8-128]%asi 202 + bgu %xcc, .wr_loop_start 203 + stxa %o1, [%o5+8-64]%asi 204 + 205 + sub %o5, ST_CHUNK*64, %o5 ! reset %o5 206 + mov ST_CHUNK, %g1 207 + 208 + .wr_loop_rest: 209 + stxa %o1, [%o5+8+8]%asi 210 + sub %o4, 64, %o4 211 + stxa %o1, [%o5+16+8]%asi 212 + subcc %g1, 1, %g1 213 + stxa %o1, [%o5+24+8]%asi 214 + stxa %o1, [%o5+32+8]%asi 215 + stxa %o1, [%o5+40+8]%asi 216 + add %o5, 64, %o5 217 + stxa %o1, [%o5-8]%asi 218 + bgu %xcc, .wr_loop_rest 219 + stxa %o1, [%o5]ASI_STBI_P 220 + 221 + ! If more than ST_CHUNK*64 bytes remain to set, continue 222 + ! setting the first long word of each cache line in advance 223 + ! to keep the store pipeline moving. 224 + 225 + cmp %o4, ST_CHUNK*64 226 + bge,pt %xcc, .wr_loop_start 227 + mov ST_CHUNK, %g1 228 + 229 + brz,a,pn %o4, .asi_done 230 + add %o5, 8, %o5 ! restore %o5 offset 231 + 232 + .wr_loop_small: 233 + stxa %o1, [%o5+8]%asi 234 + stxa %o1, [%o5+8+8]%asi 235 + stxa %o1, [%o5+16+8]%asi 236 + stxa %o1, [%o5+24+8]%asi 237 + stxa %o1, [%o5+32+8]%asi 238 + subcc %o4, 64, %o4 239 + stxa %o1, [%o5+40+8]%asi 240 + add %o5, 64, %o5 241 + stxa %o1, [%o5-8]%asi 242 + bgu,pt %xcc, .wr_loop_small 243 + stxa %o1, [%o5]ASI_STBI_P 244 + 245 + ba .asi_done 246 + add %o5, 8, %o5 ! restore %o5 offset 247 + 248 + ! Special case loop for zero fill memsets 249 + ! For each 64 byte cache line, single STBI to first element 250 + ! clears line 251 + .wrzero: 252 + cmp %o4, MIN_ZERO ! check if enough bytes to set 253 + ! to pay %asi + membar cost 254 + blu %xcc, .short_set 255 + nop 256 + sub %o4, 256, %o4 257 + 258 + .wrzero_loop: 259 + mov 64, %g3 260 + stxa %o1, [%o5]ASI_STBI_P 261 + subcc %o4, 256, %o4 262 + stxa %o1, [%o5+%g3]ASI_STBI_P 263 + add %o5, 256, %o5 264 + sub %g3, 192, %g3 265 + stxa %o1, [%o5+%g3]ASI_STBI_P 266 + add %g3, 64, %g3 267 + bge,pt %xcc, .wrzero_loop 268 + stxa %o1, [%o5+%g3]ASI_STBI_P 269 + add %o4, 256, %o4 270 + 271 + brz,pn %o4, .bsi_done 272 + nop 273 + 274 + .wrzero_small: 275 + stxa %o1, [%o5]ASI_STBI_P 276 + subcc %o4, 64, %o4 277 + bgu,pt %xcc, .wrzero_small 278 + add %o5, 64, %o5 279 + ba,a .bsi_done 280 + 281 + .asi_done: 282 + wr %g3, 0x0, %asi ! restored saved %asi 283 + .bsi_done: 284 + membar #StoreStore ! required by use of Block Store Init 285 + 286 + .short_set: 287 + cmp %o4, 64 ! check if 64 bytes to set 288 + blu %xcc, 5f 289 + nop 290 + 4: ! set final blocks of 64 bytes 291 + stx %o1, [%o5] 292 + stx %o1, [%o5+8] 293 + stx %o1, [%o5+16] 294 + stx %o1, [%o5+24] 295 + subcc %o4, 64, %o4 296 + stx %o1, [%o5+32] 297 + stx %o1, [%o5+40] 298 + add %o5, 64, %o5 299 + stx %o1, [%o5-16] 300 + bgu,pt %xcc, 4b 301 + stx %o1, [%o5-8] 302 + 303 + 5: 304 + ! Set the remaining long words 305 + .wrshort: 306 + subcc %o3, 8, %o3 ! Can we store any long words? 307 + blu,pn %xcc, .wrchars 308 + and %o2, 7, %o2 ! calc bytes left after long words 309 + 6: 310 + subcc %o3, 8, %o3 311 + stx %o1, [%o5] ! store the long words 312 + bgeu,pt %xcc, 6b 313 + add %o5, 8, %o5 314 + 315 + .wrchars: ! check for extra chars 316 + brnz %o2, .wrfin 317 + nop 318 + retl 319 + nop 320 + 321 + .wdalign: 322 + andcc %o5, 3, %o3 ! is sp1 aligned on a word boundary 323 + bz,pn %xcc, .wrword 324 + andn %o2, 3, %o3 ! create word sized count in %o3 325 + 326 + dec %o2 ! decrement count 327 + stb %o1, [%o5] ! clear a byte 328 + b .wdalign 329 + inc %o5 ! next byte 330 + 331 + .wrword: 332 + subcc %o3, 4, %o3 333 + st %o1, [%o5] ! 4-byte writing loop 334 + bnz,pt %xcc, .wrword 335 + add %o5, 4, %o5 336 + 337 + and %o2, 3, %o2 ! leftover count, if any 338 + 339 + .wrchar: 340 + ! Set the remaining bytes, if any 341 + brz %o2, .exit 342 + nop 343 + .wrfin: 344 + deccc %o2 345 + stb %o1, [%o5] 346 + bgu,pt %xcc, .wrfin 347 + inc %o5 348 + .exit: 349 + retl ! %o0 was preserved 350 + nop 351 + 352 + .size M7memset,.-M7memset
+51
arch/sparc/lib/M7patch.S
··· 1 + /* 2 + * M7patch.S: Patch generic routines with M7 variant. 3 + * 4 + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. 5 + */ 6 + 7 + #include <linux/linkage.h> 8 + 9 + #define BRANCH_ALWAYS 0x10680000 10 + #define NOP 0x01000000 11 + #define NG_DO_PATCH(OLD, NEW) \ 12 + sethi %hi(NEW), %g1; \ 13 + or %g1, %lo(NEW), %g1; \ 14 + sethi %hi(OLD), %g2; \ 15 + or %g2, %lo(OLD), %g2; \ 16 + sub %g1, %g2, %g1; \ 17 + sethi %hi(BRANCH_ALWAYS), %g3; \ 18 + sll %g1, 11, %g1; \ 19 + srl %g1, 11 + 2, %g1; \ 20 + or %g3, %lo(BRANCH_ALWAYS), %g3; \ 21 + or %g3, %g1, %g3; \ 22 + stw %g3, [%g2]; \ 23 + sethi %hi(NOP), %g3; \ 24 + or %g3, %lo(NOP), %g3; \ 25 + stw %g3, [%g2 + 0x4]; \ 26 + flush %g2; 27 + 28 + ENTRY(m7_patch_copyops) 29 + NG_DO_PATCH(memcpy, M7memcpy) 30 + NG_DO_PATCH(raw_copy_from_user, M7copy_from_user) 31 + NG_DO_PATCH(raw_copy_to_user, M7copy_to_user) 32 + retl 33 + nop 34 + ENDPROC(m7_patch_copyops) 35 + 36 + ENTRY(m7_patch_bzero) 37 + NG_DO_PATCH(memset, M7memset) 38 + NG_DO_PATCH(__bzero, M7bzero) 39 + NG_DO_PATCH(__clear_user, NGclear_user) 40 + NG_DO_PATCH(tsb_init, NGtsb_init) 41 + retl 42 + nop 43 + ENDPROC(m7_patch_bzero) 44 + 45 + ENTRY(m7_patch_pageops) 46 + NG_DO_PATCH(copy_user_page, NG4copy_user_page) 47 + NG_DO_PATCH(_clear_page, M7clear_page) 48 + NG_DO_PATCH(clear_user_page, M7clear_user_page) 49 + retl 50 + nop 51 + ENDPROC(m7_patch_pageops)
+5
arch/sparc/lib/Makefile
··· 36 36 lib-$(CONFIG_SPARC64) += NG4memcpy.o NG4copy_from_user.o NG4copy_to_user.o 37 37 lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o 38 38 39 + lib-$(CONFIG_SPARC64) += Memcpy_utils.o 40 + 41 + lib-$(CONFIG_SPARC64) += M7memcpy.o M7copy_from_user.o M7copy_to_user.o 42 + lib-$(CONFIG_SPARC64) += M7patch.o M7memset.o 43 + 39 44 lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o 40 45 lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o 41 46
+345
arch/sparc/lib/Memcpy_utils.S
··· 1 + #ifndef __ASM_MEMCPY_UTILS 2 + #define __ASM_MEMCPY_UTILS 3 + 4 + #include <linux/linkage.h> 5 + #include <asm/asi.h> 6 + #include <asm/visasm.h> 7 + 8 + ENTRY(__restore_asi_fp) 9 + VISExitHalf 10 + retl 11 + wr %g0, ASI_AIUS, %asi 12 + ENDPROC(__restore_asi_fp) 13 + 14 + ENTRY(__restore_asi) 15 + retl 16 + wr %g0, ASI_AIUS, %asi 17 + ENDPROC(__restore_asi) 18 + 19 + ENTRY(memcpy_retl_o2) 20 + ba,pt %xcc, __restore_asi 21 + mov %o2, %o0 22 + ENDPROC(memcpy_retl_o2) 23 + ENTRY(memcpy_retl_o2_plus_1) 24 + ba,pt %xcc, __restore_asi 25 + add %o2, 1, %o0 26 + ENDPROC(memcpy_retl_o2_plus_1) 27 + ENTRY(memcpy_retl_o2_plus_3) 28 + ba,pt %xcc, __restore_asi 29 + add %o2, 3, %o0 30 + ENDPROC(memcpy_retl_o2_plus_3) 31 + ENTRY(memcpy_retl_o2_plus_4) 32 + ba,pt %xcc, __restore_asi 33 + add %o2, 4, %o0 34 + ENDPROC(memcpy_retl_o2_plus_4) 35 + ENTRY(memcpy_retl_o2_plus_5) 36 + ba,pt %xcc, __restore_asi 37 + add %o2, 5, %o0 38 + ENDPROC(memcpy_retl_o2_plus_5) 39 + ENTRY(memcpy_retl_o2_plus_6) 40 + ba,pt %xcc, __restore_asi 41 + add %o2, 6, %o0 42 + ENDPROC(memcpy_retl_o2_plus_6) 43 + ENTRY(memcpy_retl_o2_plus_7) 44 + ba,pt %xcc, __restore_asi 45 + add %o2, 7, %o0 46 + ENDPROC(memcpy_retl_o2_plus_7) 47 + ENTRY(memcpy_retl_o2_plus_8) 48 + ba,pt %xcc, __restore_asi 49 + add %o2, 8, %o0 50 + ENDPROC(memcpy_retl_o2_plus_8) 51 + ENTRY(memcpy_retl_o2_plus_15) 52 + ba,pt %xcc, __restore_asi 53 + add %o2, 15, %o0 54 + ENDPROC(memcpy_retl_o2_plus_15) 55 + ENTRY(memcpy_retl_o2_plus_15_8) 56 + add %o2, 15, %o2 57 + ba,pt %xcc, __restore_asi 58 + add %o2, 8, %o0 59 + ENDPROC(memcpy_retl_o2_plus_15_8) 60 + ENTRY(memcpy_retl_o2_plus_16) 61 + ba,pt %xcc, __restore_asi 62 + add %o2, 16, %o0 63 + ENDPROC(memcpy_retl_o2_plus_16) 64 + ENTRY(memcpy_retl_o2_plus_24) 65 + ba,pt %xcc, __restore_asi 66 + add %o2, 24, %o0 67 + ENDPROC(memcpy_retl_o2_plus_24) 68 + ENTRY(memcpy_retl_o2_plus_31) 69 + ba,pt %xcc, __restore_asi 70 + add %o2, 31, %o0 71 + ENDPROC(memcpy_retl_o2_plus_31) 72 + ENTRY(memcpy_retl_o2_plus_32) 73 + ba,pt %xcc, __restore_asi 74 + add %o2, 32, %o0 75 + ENDPROC(memcpy_retl_o2_plus_32) 76 + ENTRY(memcpy_retl_o2_plus_31_32) 77 + add %o2, 31, %o2 78 + ba,pt %xcc, __restore_asi 79 + add %o2, 32, %o0 80 + ENDPROC(memcpy_retl_o2_plus_31_32) 81 + ENTRY(memcpy_retl_o2_plus_31_24) 82 + add %o2, 31, %o2 83 + ba,pt %xcc, __restore_asi 84 + add %o2, 24, %o0 85 + ENDPROC(memcpy_retl_o2_plus_31_24) 86 + ENTRY(memcpy_retl_o2_plus_31_16) 87 + add %o2, 31, %o2 88 + ba,pt %xcc, __restore_asi 89 + add %o2, 16, %o0 90 + ENDPROC(memcpy_retl_o2_plus_31_16) 91 + ENTRY(memcpy_retl_o2_plus_31_8) 92 + add %o2, 31, %o2 93 + ba,pt %xcc, __restore_asi 94 + add %o2, 8, %o0 95 + ENDPROC(memcpy_retl_o2_plus_31_8) 96 + ENTRY(memcpy_retl_o2_plus_63) 97 + ba,pt %xcc, __restore_asi 98 + add %o2, 63, %o0 99 + ENDPROC(memcpy_retl_o2_plus_63) 100 + ENTRY(memcpy_retl_o2_plus_63_64) 101 + add %o2, 63, %o2 102 + ba,pt %xcc, __restore_asi 103 + add %o2, 64, %o0 104 + ENDPROC(memcpy_retl_o2_plus_63_64) 105 + ENTRY(memcpy_retl_o2_plus_63_56) 106 + add %o2, 63, %o2 107 + ba,pt %xcc, __restore_asi 108 + add %o2, 56, %o0 109 + ENDPROC(memcpy_retl_o2_plus_63_56) 110 + ENTRY(memcpy_retl_o2_plus_63_48) 111 + add %o2, 63, %o2 112 + ba,pt %xcc, __restore_asi 113 + add %o2, 48, %o0 114 + ENDPROC(memcpy_retl_o2_plus_63_48) 115 + ENTRY(memcpy_retl_o2_plus_63_40) 116 + add %o2, 63, %o2 117 + ba,pt %xcc, __restore_asi 118 + add %o2, 40, %o0 119 + ENDPROC(memcpy_retl_o2_plus_63_40) 120 + ENTRY(memcpy_retl_o2_plus_63_32) 121 + add %o2, 63, %o2 122 + ba,pt %xcc, __restore_asi 123 + add %o2, 32, %o0 124 + ENDPROC(memcpy_retl_o2_plus_63_32) 125 + ENTRY(memcpy_retl_o2_plus_63_24) 126 + add %o2, 63, %o2 127 + ba,pt %xcc, __restore_asi 128 + add %o2, 24, %o0 129 + ENDPROC(memcpy_retl_o2_plus_63_24) 130 + ENTRY(memcpy_retl_o2_plus_63_16) 131 + add %o2, 63, %o2 132 + ba,pt %xcc, __restore_asi 133 + add %o2, 16, %o0 134 + ENDPROC(memcpy_retl_o2_plus_63_16) 135 + ENTRY(memcpy_retl_o2_plus_63_8) 136 + add %o2, 63, %o2 137 + ba,pt %xcc, __restore_asi 138 + add %o2, 8, %o0 139 + ENDPROC(memcpy_retl_o2_plus_63_8) 140 + ENTRY(memcpy_retl_o2_plus_o5) 141 + ba,pt %xcc, __restore_asi 142 + add %o2, %o5, %o0 143 + ENDPROC(memcpy_retl_o2_plus_o5) 144 + ENTRY(memcpy_retl_o2_plus_o5_plus_1) 145 + add %o5, 1, %o5 146 + ba,pt %xcc, __restore_asi 147 + add %o2, %o5, %o0 148 + ENDPROC(memcpy_retl_o2_plus_o5_plus_1) 149 + ENTRY(memcpy_retl_o2_plus_o5_plus_4) 150 + add %o5, 4, %o5 151 + ba,pt %xcc, __restore_asi 152 + add %o2, %o5, %o0 153 + ENDPROC(memcpy_retl_o2_plus_o5_plus_4) 154 + ENTRY(memcpy_retl_o2_plus_o5_plus_8) 155 + add %o5, 8, %o5 156 + ba,pt %xcc, __restore_asi 157 + add %o2, %o5, %o0 158 + ENDPROC(memcpy_retl_o2_plus_o5_plus_8) 159 + ENTRY(memcpy_retl_o2_plus_o5_plus_16) 160 + add %o5, 16, %o5 161 + ba,pt %xcc, __restore_asi 162 + add %o2, %o5, %o0 163 + ENDPROC(memcpy_retl_o2_plus_o5_plus_16) 164 + ENTRY(memcpy_retl_o2_plus_o5_plus_24) 165 + add %o5, 24, %o5 166 + ba,pt %xcc, __restore_asi 167 + add %o2, %o5, %o0 168 + ENDPROC(memcpy_retl_o2_plus_o5_plus_24) 169 + ENTRY(memcpy_retl_o2_plus_o5_plus_32) 170 + add %o5, 32, %o5 171 + ba,pt %xcc, __restore_asi 172 + add %o2, %o5, %o0 173 + ENDPROC(memcpy_retl_o2_plus_o5_plus_32) 174 + ENTRY(memcpy_retl_o2_plus_o5_64) 175 + add %o5, 32, %o5 176 + ba,pt %xcc, __restore_asi 177 + add %o2, %o5, %o0 178 + ENDPROC(memcpy_retl_o2_plus_o5_64) 179 + ENTRY(memcpy_retl_o2_plus_g1) 180 + ba,pt %xcc, __restore_asi 181 + add %o2, %g1, %o0 182 + ENDPROC(memcpy_retl_o2_plus_g1) 183 + ENTRY(memcpy_retl_o2_plus_g1_plus_1) 184 + add %g1, 1, %g1 185 + ba,pt %xcc, __restore_asi 186 + add %o2, %g1, %o0 187 + ENDPROC(memcpy_retl_o2_plus_g1_plus_1) 188 + ENTRY(memcpy_retl_o2_plus_g1_plus_8) 189 + add %g1, 8, %g1 190 + ba,pt %xcc, __restore_asi 191 + add %o2, %g1, %o0 192 + ENDPROC(memcpy_retl_o2_plus_g1_plus_8) 193 + ENTRY(memcpy_retl_o2_plus_o4) 194 + ba,pt %xcc, __restore_asi 195 + add %o2, %o4, %o0 196 + ENDPROC(memcpy_retl_o2_plus_o4) 197 + ENTRY(memcpy_retl_o2_plus_o4_plus_8) 198 + add %o4, 8, %o4 199 + ba,pt %xcc, __restore_asi 200 + add %o2, %o4, %o0 201 + ENDPROC(memcpy_retl_o2_plus_o4_plus_8) 202 + ENTRY(memcpy_retl_o2_plus_o4_plus_16) 203 + add %o4, 16, %o4 204 + ba,pt %xcc, __restore_asi 205 + add %o2, %o4, %o0 206 + ENDPROC(memcpy_retl_o2_plus_o4_plus_16) 207 + ENTRY(memcpy_retl_o2_plus_o4_plus_24) 208 + add %o4, 24, %o4 209 + ba,pt %xcc, __restore_asi 210 + add %o2, %o4, %o0 211 + ENDPROC(memcpy_retl_o2_plus_o4_plus_24) 212 + ENTRY(memcpy_retl_o2_plus_o4_plus_32) 213 + add %o4, 32, %o4 214 + ba,pt %xcc, __restore_asi 215 + add %o2, %o4, %o0 216 + ENDPROC(memcpy_retl_o2_plus_o4_plus_32) 217 + ENTRY(memcpy_retl_o2_plus_o4_plus_40) 218 + add %o4, 40, %o4 219 + ba,pt %xcc, __restore_asi 220 + add %o2, %o4, %o0 221 + ENDPROC(memcpy_retl_o2_plus_o4_plus_40) 222 + ENTRY(memcpy_retl_o2_plus_o4_plus_48) 223 + add %o4, 48, %o4 224 + ba,pt %xcc, __restore_asi 225 + add %o2, %o4, %o0 226 + ENDPROC(memcpy_retl_o2_plus_o4_plus_48) 227 + ENTRY(memcpy_retl_o2_plus_o4_plus_56) 228 + add %o4, 56, %o4 229 + ba,pt %xcc, __restore_asi 230 + add %o2, %o4, %o0 231 + ENDPROC(memcpy_retl_o2_plus_o4_plus_56) 232 + ENTRY(memcpy_retl_o2_plus_o4_plus_64) 233 + add %o4, 64, %o4 234 + ba,pt %xcc, __restore_asi 235 + add %o2, %o4, %o0 236 + ENDPROC(memcpy_retl_o2_plus_o4_plus_64) 237 + ENTRY(memcpy_retl_o2_plus_o5_plus_64) 238 + add %o5, 64, %o5 239 + ba,pt %xcc, __restore_asi 240 + add %o2, %o5, %o0 241 + ENDPROC(memcpy_retl_o2_plus_o5_plus_64) 242 + ENTRY(memcpy_retl_o2_plus_o3_fp) 243 + ba,pt %xcc, __restore_asi_fp 244 + add %o2, %o3, %o0 245 + ENDPROC(memcpy_retl_o2_plus_o3_fp) 246 + ENTRY(memcpy_retl_o2_plus_o3_plus_1_fp) 247 + add %o3, 1, %o3 248 + ba,pt %xcc, __restore_asi_fp 249 + add %o2, %o3, %o0 250 + ENDPROC(memcpy_retl_o2_plus_o3_plus_1_fp) 251 + ENTRY(memcpy_retl_o2_plus_o3_plus_4_fp) 252 + add %o3, 4, %o3 253 + ba,pt %xcc, __restore_asi_fp 254 + add %o2, %o3, %o0 255 + ENDPROC(memcpy_retl_o2_plus_o3_plus_4_fp) 256 + ENTRY(memcpy_retl_o2_plus_o4_fp) 257 + ba,pt %xcc, __restore_asi_fp 258 + add %o2, %o4, %o0 259 + ENDPROC(memcpy_retl_o2_plus_o4_fp) 260 + ENTRY(memcpy_retl_o2_plus_o4_plus_8_fp) 261 + add %o4, 8, %o4 262 + ba,pt %xcc, __restore_asi_fp 263 + add %o2, %o4, %o0 264 + ENDPROC(memcpy_retl_o2_plus_o4_plus_8_fp) 265 + ENTRY(memcpy_retl_o2_plus_o4_plus_16_fp) 266 + add %o4, 16, %o4 267 + ba,pt %xcc, __restore_asi_fp 268 + add %o2, %o4, %o0 269 + ENDPROC(memcpy_retl_o2_plus_o4_plus_16_fp) 270 + ENTRY(memcpy_retl_o2_plus_o4_plus_24_fp) 271 + add %o4, 24, %o4 272 + ba,pt %xcc, __restore_asi_fp 273 + add %o2, %o4, %o0 274 + ENDPROC(memcpy_retl_o2_plus_o4_plus_24_fp) 275 + ENTRY(memcpy_retl_o2_plus_o4_plus_32_fp) 276 + add %o4, 32, %o4 277 + ba,pt %xcc, __restore_asi_fp 278 + add %o2, %o4, %o0 279 + ENDPROC(memcpy_retl_o2_plus_o4_plus_32_fp) 280 + ENTRY(memcpy_retl_o2_plus_o4_plus_40_fp) 281 + add %o4, 40, %o4 282 + ba,pt %xcc, __restore_asi_fp 283 + add %o2, %o4, %o0 284 + ENDPROC(memcpy_retl_o2_plus_o4_plus_40_fp) 285 + ENTRY(memcpy_retl_o2_plus_o4_plus_48_fp) 286 + add %o4, 48, %o4 287 + ba,pt %xcc, __restore_asi_fp 288 + add %o2, %o4, %o0 289 + ENDPROC(memcpy_retl_o2_plus_o4_plus_48_fp) 290 + ENTRY(memcpy_retl_o2_plus_o4_plus_56_fp) 291 + add %o4, 56, %o4 292 + ba,pt %xcc, __restore_asi_fp 293 + add %o2, %o4, %o0 294 + ENDPROC(memcpy_retl_o2_plus_o4_plus_56_fp) 295 + ENTRY(memcpy_retl_o2_plus_o4_plus_64_fp) 296 + add %o4, 64, %o4 297 + ba,pt %xcc, __restore_asi_fp 298 + add %o2, %o4, %o0 299 + ENDPROC(memcpy_retl_o2_plus_o4_plus_64_fp) 300 + ENTRY(memcpy_retl_o2_plus_o5_fp) 301 + ba,pt %xcc, __restore_asi_fp 302 + add %o2, %o5, %o0 303 + ENDPROC(memcpy_retl_o2_plus_o5_fp) 304 + ENTRY(memcpy_retl_o2_plus_o5_plus_64_fp) 305 + add %o5, 64, %o5 306 + ba,pt %xcc, __restore_asi_fp 307 + add %o2, %o5, %o0 308 + ENDPROC(memcpy_retl_o2_plus_o5_plus_64_fp) 309 + ENTRY(memcpy_retl_o2_plus_o5_plus_56_fp) 310 + add %o5, 56, %o5 311 + ba,pt %xcc, __restore_asi_fp 312 + add %o2, %o5, %o0 313 + ENDPROC(memcpy_retl_o2_plus_o5_plus_56_fp) 314 + ENTRY(memcpy_retl_o2_plus_o5_plus_48_fp) 315 + add %o5, 48, %o5 316 + ba,pt %xcc, __restore_asi_fp 317 + add %o2, %o5, %o0 318 + ENDPROC(memcpy_retl_o2_plus_o5_plus_48_fp) 319 + ENTRY(memcpy_retl_o2_plus_o5_plus_40_fp) 320 + add %o5, 40, %o5 321 + ba,pt %xcc, __restore_asi_fp 322 + add %o2, %o5, %o0 323 + ENDPROC(memcpy_retl_o2_plus_o5_plus_40_fp) 324 + ENTRY(memcpy_retl_o2_plus_o5_plus_32_fp) 325 + add %o5, 32, %o5 326 + ba,pt %xcc, __restore_asi_fp 327 + add %o2, %o5, %o0 328 + ENDPROC(memcpy_retl_o2_plus_o5_plus_32_fp) 329 + ENTRY(memcpy_retl_o2_plus_o5_plus_24_fp) 330 + add %o5, 24, %o5 331 + ba,pt %xcc, __restore_asi_fp 332 + add %o2, %o5, %o0 333 + ENDPROC(memcpy_retl_o2_plus_o5_plus_24_fp) 334 + ENTRY(memcpy_retl_o2_plus_o5_plus_16_fp) 335 + add %o5, 16, %o5 336 + ba,pt %xcc, __restore_asi_fp 337 + add %o2, %o5, %o0 338 + ENDPROC(memcpy_retl_o2_plus_o5_plus_16_fp) 339 + ENTRY(memcpy_retl_o2_plus_o5_plus_8_fp) 340 + add %o5, 8, %o5 341 + ba,pt %xcc, __restore_asi_fp 342 + add %o2, %o5, %o0 343 + ENDPROC(memcpy_retl_o2_plus_o5_plus_8_fp) 344 + 345 + #endif
+64 -213
arch/sparc/lib/NG4memcpy.S
··· 94 94 .text 95 95 #ifndef EX_RETVAL 96 96 #define EX_RETVAL(x) x 97 - __restore_asi_fp: 98 - VISExitHalf 99 - __restore_asi: 100 - retl 101 - wr %g0, ASI_AIUS, %asi 102 - 103 - ENTRY(NG4_retl_o2) 104 - ba,pt %xcc, __restore_asi 105 - mov %o2, %o0 106 - ENDPROC(NG4_retl_o2) 107 - ENTRY(NG4_retl_o2_plus_1) 108 - ba,pt %xcc, __restore_asi 109 - add %o2, 1, %o0 110 - ENDPROC(NG4_retl_o2_plus_1) 111 - ENTRY(NG4_retl_o2_plus_4) 112 - ba,pt %xcc, __restore_asi 113 - add %o2, 4, %o0 114 - ENDPROC(NG4_retl_o2_plus_4) 115 - ENTRY(NG4_retl_o2_plus_o5) 116 - ba,pt %xcc, __restore_asi 117 - add %o2, %o5, %o0 118 - ENDPROC(NG4_retl_o2_plus_o5) 119 - ENTRY(NG4_retl_o2_plus_o5_plus_4) 120 - add %o5, 4, %o5 121 - ba,pt %xcc, __restore_asi 122 - add %o2, %o5, %o0 123 - ENDPROC(NG4_retl_o2_plus_o5_plus_4) 124 - ENTRY(NG4_retl_o2_plus_o5_plus_8) 125 - add %o5, 8, %o5 126 - ba,pt %xcc, __restore_asi 127 - add %o2, %o5, %o0 128 - ENDPROC(NG4_retl_o2_plus_o5_plus_8) 129 - ENTRY(NG4_retl_o2_plus_o5_plus_16) 130 - add %o5, 16, %o5 131 - ba,pt %xcc, __restore_asi 132 - add %o2, %o5, %o0 133 - ENDPROC(NG4_retl_o2_plus_o5_plus_16) 134 - ENTRY(NG4_retl_o2_plus_o5_plus_24) 135 - add %o5, 24, %o5 136 - ba,pt %xcc, __restore_asi 137 - add %o2, %o5, %o0 138 - ENDPROC(NG4_retl_o2_plus_o5_plus_24) 139 - ENTRY(NG4_retl_o2_plus_o5_plus_32) 140 - add %o5, 32, %o5 141 - ba,pt %xcc, __restore_asi 142 - add %o2, %o5, %o0 143 - ENDPROC(NG4_retl_o2_plus_o5_plus_32) 144 - ENTRY(NG4_retl_o2_plus_g1) 145 - ba,pt %xcc, __restore_asi 146 - add %o2, %g1, %o0 147 - ENDPROC(NG4_retl_o2_plus_g1) 148 - ENTRY(NG4_retl_o2_plus_g1_plus_1) 149 - add %g1, 1, %g1 150 - ba,pt %xcc, __restore_asi 151 - add %o2, %g1, %o0 152 - ENDPROC(NG4_retl_o2_plus_g1_plus_1) 153 - ENTRY(NG4_retl_o2_plus_g1_plus_8) 154 - add %g1, 8, %g1 155 - ba,pt %xcc, __restore_asi 156 - add %o2, %g1, %o0 157 - ENDPROC(NG4_retl_o2_plus_g1_plus_8) 158 - ENTRY(NG4_retl_o2_plus_o4) 159 - ba,pt %xcc, __restore_asi 160 - add %o2, %o4, %o0 161 - ENDPROC(NG4_retl_o2_plus_o4) 162 - ENTRY(NG4_retl_o2_plus_o4_plus_8) 163 - add %o4, 8, %o4 164 - ba,pt %xcc, __restore_asi 165 - add %o2, %o4, %o0 166 - ENDPROC(NG4_retl_o2_plus_o4_plus_8) 167 - ENTRY(NG4_retl_o2_plus_o4_plus_16) 168 - add %o4, 16, %o4 169 - ba,pt %xcc, __restore_asi 170 - add %o2, %o4, %o0 171 - ENDPROC(NG4_retl_o2_plus_o4_plus_16) 172 - ENTRY(NG4_retl_o2_plus_o4_plus_24) 173 - add %o4, 24, %o4 174 - ba,pt %xcc, __restore_asi 175 - add %o2, %o4, %o0 176 - ENDPROC(NG4_retl_o2_plus_o4_plus_24) 177 - ENTRY(NG4_retl_o2_plus_o4_plus_32) 178 - add %o4, 32, %o4 179 - ba,pt %xcc, __restore_asi 180 - add %o2, %o4, %o0 181 - ENDPROC(NG4_retl_o2_plus_o4_plus_32) 182 - ENTRY(NG4_retl_o2_plus_o4_plus_40) 183 - add %o4, 40, %o4 184 - ba,pt %xcc, __restore_asi 185 - add %o2, %o4, %o0 186 - ENDPROC(NG4_retl_o2_plus_o4_plus_40) 187 - ENTRY(NG4_retl_o2_plus_o4_plus_48) 188 - add %o4, 48, %o4 189 - ba,pt %xcc, __restore_asi 190 - add %o2, %o4, %o0 191 - ENDPROC(NG4_retl_o2_plus_o4_plus_48) 192 - ENTRY(NG4_retl_o2_plus_o4_plus_56) 193 - add %o4, 56, %o4 194 - ba,pt %xcc, __restore_asi 195 - add %o2, %o4, %o0 196 - ENDPROC(NG4_retl_o2_plus_o4_plus_56) 197 - ENTRY(NG4_retl_o2_plus_o4_plus_64) 198 - add %o4, 64, %o4 199 - ba,pt %xcc, __restore_asi 200 - add %o2, %o4, %o0 201 - ENDPROC(NG4_retl_o2_plus_o4_plus_64) 202 - ENTRY(NG4_retl_o2_plus_o4_fp) 203 - ba,pt %xcc, __restore_asi_fp 204 - add %o2, %o4, %o0 205 - ENDPROC(NG4_retl_o2_plus_o4_fp) 206 - ENTRY(NG4_retl_o2_plus_o4_plus_8_fp) 207 - add %o4, 8, %o4 208 - ba,pt %xcc, __restore_asi_fp 209 - add %o2, %o4, %o0 210 - ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp) 211 - ENTRY(NG4_retl_o2_plus_o4_plus_16_fp) 212 - add %o4, 16, %o4 213 - ba,pt %xcc, __restore_asi_fp 214 - add %o2, %o4, %o0 215 - ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp) 216 - ENTRY(NG4_retl_o2_plus_o4_plus_24_fp) 217 - add %o4, 24, %o4 218 - ba,pt %xcc, __restore_asi_fp 219 - add %o2, %o4, %o0 220 - ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp) 221 - ENTRY(NG4_retl_o2_plus_o4_plus_32_fp) 222 - add %o4, 32, %o4 223 - ba,pt %xcc, __restore_asi_fp 224 - add %o2, %o4, %o0 225 - ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp) 226 - ENTRY(NG4_retl_o2_plus_o4_plus_40_fp) 227 - add %o4, 40, %o4 228 - ba,pt %xcc, __restore_asi_fp 229 - add %o2, %o4, %o0 230 - ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp) 231 - ENTRY(NG4_retl_o2_plus_o4_plus_48_fp) 232 - add %o4, 48, %o4 233 - ba,pt %xcc, __restore_asi_fp 234 - add %o2, %o4, %o0 235 - ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp) 236 - ENTRY(NG4_retl_o2_plus_o4_plus_56_fp) 237 - add %o4, 56, %o4 238 - ba,pt %xcc, __restore_asi_fp 239 - add %o2, %o4, %o0 240 - ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp) 241 - ENTRY(NG4_retl_o2_plus_o4_plus_64_fp) 242 - add %o4, 64, %o4 243 - ba,pt %xcc, __restore_asi_fp 244 - add %o2, %o4, %o0 245 - ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp) 246 97 #endif 247 98 .align 64 248 99 ··· 126 275 sub %o2, %g1, %o2 127 276 128 277 129 - 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) 278 + 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1) 130 279 add %o1, 1, %o1 131 280 subcc %g1, 1, %g1 132 281 add %o0, 1, %o0 133 282 bne,pt %icc, 1b 134 - EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) 283 + EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1) 135 284 136 285 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) 137 286 LOAD(prefetch, %o1 + 0x080, #n_reads_strong) ··· 156 305 brz,pt %g1, .Llarge_aligned 157 306 sub %o2, %g1, %o2 158 307 159 - 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) 308 + 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1) 160 309 add %o1, 8, %o1 161 310 subcc %g1, 8, %g1 162 311 add %o0, 8, %o0 163 312 bne,pt %icc, 1b 164 - EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8) 313 + EX_ST(STORE(stx, %g2, %o0 - 0x08), memcpy_retl_o2_plus_g1_plus_8) 165 314 166 315 .Llarge_aligned: 167 316 /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ 168 317 andn %o2, 0x3f, %o4 169 318 sub %o2, %o4, %o2 170 319 171 - 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4) 320 + 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o4) 172 321 add %o1, 0x40, %o1 173 - EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4) 322 + EX_LD(LOAD(ldx, %o1 - 0x38, %g2), memcpy_retl_o2_plus_o4) 174 323 subcc %o4, 0x40, %o4 175 - EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64) 176 - EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64) 177 - EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64) 178 - EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64) 324 + EX_LD(LOAD(ldx, %o1 - 0x30, %g3), memcpy_retl_o2_plus_o4_plus_64) 325 + EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), memcpy_retl_o2_plus_o4_plus_64) 326 + EX_LD(LOAD(ldx, %o1 - 0x20, %o5), memcpy_retl_o2_plus_o4_plus_64) 327 + EX_ST(STORE_INIT(%g1, %o0), memcpy_retl_o2_plus_o4_plus_64) 179 328 add %o0, 0x08, %o0 180 - EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56) 329 + EX_ST(STORE_INIT(%g2, %o0), memcpy_retl_o2_plus_o4_plus_56) 181 330 add %o0, 0x08, %o0 182 - EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48) 183 - EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48) 331 + EX_LD(LOAD(ldx, %o1 - 0x18, %g2), memcpy_retl_o2_plus_o4_plus_48) 332 + EX_ST(STORE_INIT(%g3, %o0), memcpy_retl_o2_plus_o4_plus_48) 184 333 add %o0, 0x08, %o0 185 - EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40) 186 - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40) 334 + EX_LD(LOAD(ldx, %o1 - 0x10, %g3), memcpy_retl_o2_plus_o4_plus_40) 335 + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), memcpy_retl_o2_plus_o4_plus_40) 187 336 add %o0, 0x08, %o0 188 - EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32) 189 - EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32) 337 + EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), memcpy_retl_o2_plus_o4_plus_32) 338 + EX_ST(STORE_INIT(%o5, %o0), memcpy_retl_o2_plus_o4_plus_32) 190 339 add %o0, 0x08, %o0 191 - EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24) 340 + EX_ST(STORE_INIT(%g2, %o0), memcpy_retl_o2_plus_o4_plus_24) 192 341 add %o0, 0x08, %o0 193 - EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16) 342 + EX_ST(STORE_INIT(%g3, %o0), memcpy_retl_o2_plus_o4_plus_16) 194 343 add %o0, 0x08, %o0 195 - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8) 344 + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), memcpy_retl_o2_plus_o4_plus_8) 196 345 add %o0, 0x08, %o0 197 346 bne,pt %icc, 1b 198 347 LOAD(prefetch, %o1 + 0x200, #n_reads_strong) ··· 218 367 sub %o2, %o4, %o2 219 368 alignaddr %o1, %g0, %g1 220 369 add %o1, %o4, %o1 221 - EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4) 222 - 1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4) 370 + EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), memcpy_retl_o2_plus_o4) 371 + 1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), memcpy_retl_o2_plus_o4) 223 372 subcc %o4, 0x40, %o4 224 - EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64) 225 - EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64) 226 - EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64) 227 - EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64) 228 - EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64) 229 - EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64) 373 + EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), memcpy_retl_o2_plus_o4_plus_64) 374 + EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), memcpy_retl_o2_plus_o4_plus_64) 375 + EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), memcpy_retl_o2_plus_o4_plus_64) 376 + EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), memcpy_retl_o2_plus_o4_plus_64) 377 + EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), memcpy_retl_o2_plus_o4_plus_64) 378 + EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), memcpy_retl_o2_plus_o4_plus_64) 230 379 faligndata %f0, %f2, %f16 231 - EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64) 380 + EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), memcpy_retl_o2_plus_o4_plus_64) 232 381 faligndata %f2, %f4, %f18 233 382 add %g1, 0x40, %g1 234 383 faligndata %f4, %f6, %f20 ··· 237 386 faligndata %f10, %f12, %f26 238 387 faligndata %f12, %f14, %f28 239 388 faligndata %f14, %f0, %f30 240 - EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64) 241 - EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56) 242 - EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48) 243 - EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40) 244 - EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32) 245 - EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24) 246 - EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16) 247 - EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8) 389 + EX_ST_FP(STORE(std, %f16, %o0 + 0x00), memcpy_retl_o2_plus_o4_plus_64) 390 + EX_ST_FP(STORE(std, %f18, %o0 + 0x08), memcpy_retl_o2_plus_o4_plus_56) 391 + EX_ST_FP(STORE(std, %f20, %o0 + 0x10), memcpy_retl_o2_plus_o4_plus_48) 392 + EX_ST_FP(STORE(std, %f22, %o0 + 0x18), memcpy_retl_o2_plus_o4_plus_40) 393 + EX_ST_FP(STORE(std, %f24, %o0 + 0x20), memcpy_retl_o2_plus_o4_plus_32) 394 + EX_ST_FP(STORE(std, %f26, %o0 + 0x28), memcpy_retl_o2_plus_o4_plus_24) 395 + EX_ST_FP(STORE(std, %f28, %o0 + 0x30), memcpy_retl_o2_plus_o4_plus_16) 396 + EX_ST_FP(STORE(std, %f30, %o0 + 0x38), memcpy_retl_o2_plus_o4_plus_8) 248 397 add %o0, 0x40, %o0 249 398 bne,pt %icc, 1b 250 399 LOAD(prefetch, %g1 + 0x200, #n_reads_strong) ··· 272 421 andncc %o2, 0x20 - 1, %o5 273 422 be,pn %icc, 2f 274 423 sub %o2, %o5, %o2 275 - 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) 276 - EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5) 277 - EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5) 278 - EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5) 424 + 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5) 425 + EX_LD(LOAD(ldx, %o1 + 0x08, %g2), memcpy_retl_o2_plus_o5) 426 + EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), memcpy_retl_o2_plus_o5) 427 + EX_LD(LOAD(ldx, %o1 + 0x18, %o4), memcpy_retl_o2_plus_o5) 279 428 add %o1, 0x20, %o1 280 429 subcc %o5, 0x20, %o5 281 - EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32) 282 - EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24) 283 - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24) 284 - EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8) 430 + EX_ST(STORE(stx, %g1, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_32) 431 + EX_ST(STORE(stx, %g2, %o0 + 0x08), memcpy_retl_o2_plus_o5_plus_24) 432 + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_24) 433 + EX_ST(STORE(stx, %o4, %o0 + 0x18), memcpy_retl_o2_plus_o5_plus_8) 285 434 bne,pt %icc, 1b 286 435 add %o0, 0x20, %o0 287 436 2: andcc %o2, 0x18, %o5 288 437 be,pt %icc, 3f 289 438 sub %o2, %o5, %o2 290 439 291 - 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) 440 + 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5) 292 441 add %o1, 0x08, %o1 293 442 add %o0, 0x08, %o0 294 443 subcc %o5, 0x08, %o5 295 444 bne,pt %icc, 1b 296 - EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8) 445 + EX_ST(STORE(stx, %g1, %o0 - 0x08), memcpy_retl_o2_plus_o5_plus_8) 297 446 3: brz,pt %o2, .Lexit 298 447 cmp %o2, 0x04 299 448 bl,pn %icc, .Ltiny 300 449 nop 301 - EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2) 450 + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), memcpy_retl_o2) 302 451 add %o1, 0x04, %o1 303 452 add %o0, 0x04, %o0 304 453 subcc %o2, 0x04, %o2 305 454 bne,pn %icc, .Ltiny 306 - EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4) 455 + EX_ST(STORE(stw, %g1, %o0 - 0x04), memcpy_retl_o2_plus_4) 307 456 ba,a,pt %icc, .Lexit 308 457 .Lmedium_unaligned: 309 458 /* First get dest 8 byte aligned. */ ··· 312 461 brz,pt %g1, 2f 313 462 sub %o2, %g1, %o2 314 463 315 - 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) 464 + 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1) 316 465 add %o1, 1, %o1 317 466 subcc %g1, 1, %g1 318 467 add %o0, 1, %o0 319 468 bne,pt %icc, 1b 320 - EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) 469 + EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1) 321 470 2: 322 471 and %o1, 0x7, %g1 323 472 brz,pn %g1, .Lmedium_noprefetch ··· 325 474 mov 64, %g2 326 475 sub %g2, %g1, %g2 327 476 andn %o1, 0x7, %o1 328 - EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2) 477 + EX_LD(LOAD(ldx, %o1 + 0x00, %o4), memcpy_retl_o2) 329 478 sllx %o4, %g1, %o4 330 479 andn %o2, 0x08 - 1, %o5 331 480 sub %o2, %o5, %o2 332 - 1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5) 481 + 1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), memcpy_retl_o2_plus_o5) 333 482 add %o1, 0x08, %o1 334 483 subcc %o5, 0x08, %o5 335 484 srlx %g3, %g2, GLOBAL_SPARE 336 485 or GLOBAL_SPARE, %o4, GLOBAL_SPARE 337 - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8) 486 + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_8) 338 487 add %o0, 0x08, %o0 339 488 bne,pt %icc, 1b 340 489 sllx %g3, %g1, %o4 ··· 345 494 ba,pt %icc, .Lsmall_unaligned 346 495 347 496 .Ltiny: 348 - EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) 497 + EX_LD(LOAD(ldub, %o1 + 0x00, %g1), memcpy_retl_o2) 349 498 subcc %o2, 1, %o2 350 499 be,pn %icc, .Lexit 351 - EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1) 352 - EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2) 500 + EX_ST(STORE(stb, %g1, %o0 + 0x00), memcpy_retl_o2_plus_1) 501 + EX_LD(LOAD(ldub, %o1 + 0x01, %g1), memcpy_retl_o2) 353 502 subcc %o2, 1, %o2 354 503 be,pn %icc, .Lexit 355 - EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1) 356 - EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2) 504 + EX_ST(STORE(stb, %g1, %o0 + 0x01), memcpy_retl_o2_plus_1) 505 + EX_LD(LOAD(ldub, %o1 + 0x02, %g1), memcpy_retl_o2) 357 506 ba,pt %icc, .Lexit 358 - EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2) 507 + EX_ST(STORE(stb, %g1, %o0 + 0x02), memcpy_retl_o2) 359 508 360 509 .Lsmall: 361 510 andcc %g2, 0x3, %g0 ··· 363 512 andn %o2, 0x4 - 1, %o5 364 513 sub %o2, %o5, %o2 365 514 1: 366 - EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) 515 + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5) 367 516 add %o1, 0x04, %o1 368 517 subcc %o5, 0x04, %o5 369 518 add %o0, 0x04, %o0 370 519 bne,pt %icc, 1b 371 - EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4) 520 + EX_ST(STORE(stw, %g1, %o0 - 0x04), memcpy_retl_o2_plus_o5_plus_4) 372 521 brz,pt %o2, .Lexit 373 522 nop 374 523 ba,a,pt %icc, .Ltiny 375 524 376 525 .Lsmall_unaligned: 377 - 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) 526 + 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), memcpy_retl_o2) 378 527 add %o1, 1, %o1 379 528 add %o0, 1, %o0 380 529 subcc %o2, 1, %o2 381 530 bne,pt %icc, 1b 382 - EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1) 531 + EX_ST(STORE(stb, %g1, %o0 - 0x01), memcpy_retl_o2_plus_1) 383 532 ba,a,pt %icc, .Lexit 384 533 nop 385 534 .size FUNC_NAME, .-FUNC_NAME
+21 -11
arch/sparc/lib/U3memcpy.S
··· 168 168 FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 169 169 srlx %o2, 31, %g2 170 170 cmp %g2, 0 171 + 172 + /* software trap 5 "Range Check" if dst >= 0x80000000 */ 171 173 tne %xcc, 5 172 174 PREAMBLE 173 175 mov %o0, %o4 176 + 177 + /* if len == 0 */ 174 178 cmp %o2, 0 175 - be,pn %XCC, 85f 179 + be,pn %XCC, end_return 176 180 or %o0, %o1, %o3 181 + 182 + /* if len < 16 */ 177 183 cmp %o2, 16 178 - blu,a,pn %XCC, 80f 184 + blu,a,pn %XCC, less_than_16 179 185 or %o3, %o2, %o3 180 186 187 + /* if len < 192 */ 181 188 cmp %o2, (3 * 64) 182 - blu,pt %XCC, 70f 189 + blu,pt %XCC, less_than_192 183 190 andcc %o3, 0x7, %g0 184 191 185 192 /* Clobbers o5/g1/g2/g3/g7/icc/xcc. We must preserve ··· 369 362 cmp %o2, 0 370 363 add %o1, %g1, %o1 371 364 VISExitHalf 372 - be,pn %XCC, 85f 365 + be,pn %XCC, end_return 373 366 sub %o0, %o1, %o3 374 367 375 368 andcc %g1, 0x7, %g0 ··· 399 392 sub %o2, 2, %o2 400 393 401 394 1: andcc %o2, 0x1, %g0 402 - be,pt %icc, 85f 395 + be,pt %icc, end_return 403 396 nop 404 397 EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2) 405 - ba,pt %xcc, 85f 398 + ba,pt %xcc, end_return 406 399 EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2) 407 400 408 401 .align 64 409 - 70: /* 16 < len <= 64 */ 402 + /* 16 <= len < 192 */ 403 + less_than_192: 410 404 bne,pn %XCC, 75f 411 405 sub %o0, %o1, %o3 412 406 ··· 437 429 EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4) 438 430 add %o1, 0x4, %o1 439 431 1: cmp %o2, 0 440 - be,pt %XCC, 85f 432 + be,pt %XCC, end_return 441 433 nop 442 434 ba,pt %xcc, 90f 443 435 nop ··· 483 475 484 476 srl %g1, 3, %g1 485 477 andcc %o2, 0x7, %o2 486 - be,pn %icc, 85f 478 + be,pn %icc, end_return 487 479 add %o1, %g1, %o1 488 480 ba,pt %xcc, 90f 489 481 sub %o0, %o1, %o3 490 482 491 483 .align 64 492 - 80: /* 0 < len <= 16 */ 484 + /* 0 < len < 16 */ 485 + less_than_16: 493 486 andcc %o3, 0x3, %g0 494 487 bne,pn %XCC, 90f 495 488 sub %o0, %o1, %o3 ··· 502 493 bgu,pt %XCC, 1b 503 494 add %o1, 4, %o1 504 495 505 - 85: retl 496 + end_return: 497 + retl 506 498 mov EX_RETVAL(%o4), %o0 507 499 508 500 .align 32
+44 -1
arch/sparc/mm/gup.c
··· 103 103 return 1; 104 104 } 105 105 106 + static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, 107 + unsigned long end, int write, struct page **pages, 108 + int *nr) 109 + { 110 + struct page *head, *page; 111 + int refs; 112 + 113 + if (!(pud_val(pud) & _PAGE_VALID)) 114 + return 0; 115 + 116 + if (write && !pud_write(pud)) 117 + return 0; 118 + 119 + refs = 0; 120 + page = pud_page(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); 121 + head = compound_head(page); 122 + do { 123 + VM_BUG_ON(compound_head(page) != head); 124 + pages[*nr] = page; 125 + (*nr)++; 126 + page++; 127 + refs++; 128 + } while (addr += PAGE_SIZE, addr != end); 129 + 130 + if (!page_cache_add_speculative(head, refs)) { 131 + *nr -= refs; 132 + return 0; 133 + } 134 + 135 + if (unlikely(pud_val(pud) != pud_val(*pudp))) { 136 + *nr -= refs; 137 + while (refs--) 138 + put_page(head); 139 + return 0; 140 + } 141 + 142 + return 1; 143 + } 144 + 106 145 static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, 107 146 int write, struct page **pages, int *nr) 108 147 { ··· 180 141 next = pud_addr_end(addr, end); 181 142 if (pud_none(pud)) 182 143 return 0; 183 - if (!gup_pmd_range(pud, addr, next, write, pages, nr)) 144 + if (unlikely(pud_large(pud))) { 145 + if (!gup_huge_pud(pudp, pud, addr, next, 146 + write, pages, nr)) 147 + return 0; 148 + } else if (!gup_pmd_range(pud, addr, next, write, pages, nr)) 184 149 return 0; 185 150 } while (pudp++, addr = next, addr != end); 186 151
+61 -43
arch/sparc/mm/hugetlbpage.c
··· 143 143 pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V; 144 144 145 145 switch (shift) { 146 + case HPAGE_16GB_SHIFT: 147 + hugepage_size = _PAGE_SZ16GB_4V; 148 + pte_val(entry) |= _PAGE_PUD_HUGE; 149 + break; 146 150 case HPAGE_2GB_SHIFT: 147 151 hugepage_size = _PAGE_SZ2GB_4V; 148 152 pte_val(entry) |= _PAGE_PMD_HUGE; ··· 191 187 unsigned int shift; 192 188 193 189 switch (tte_szbits) { 190 + case _PAGE_SZ16GB_4V: 191 + shift = HPAGE_16GB_SHIFT; 192 + break; 194 193 case _PAGE_SZ2GB_4V: 195 194 shift = HPAGE_2GB_SHIFT; 196 195 break; ··· 266 259 pgd_t *pgd; 267 260 pud_t *pud; 268 261 pmd_t *pmd; 269 - pte_t *pte = NULL; 270 262 271 263 pgd = pgd_offset(mm, addr); 272 264 pud = pud_alloc(mm, pgd, addr); 273 - if (pud) { 274 - pmd = pmd_alloc(mm, pud, addr); 275 - if (!pmd) 276 - return NULL; 277 - 278 - if (sz >= PMD_SIZE) 279 - pte = (pte_t *)pmd; 280 - else 281 - pte = pte_alloc_map(mm, pmd, addr); 282 - } 283 - 284 - return pte; 265 + if (!pud) 266 + return NULL; 267 + if (sz >= PUD_SIZE) 268 + return (pte_t *)pud; 269 + pmd = pmd_alloc(mm, pud, addr); 270 + if (!pmd) 271 + return NULL; 272 + if (sz >= PMD_SIZE) 273 + return (pte_t *)pmd; 274 + return pte_alloc_map(mm, pmd, addr); 285 275 } 286 276 287 277 pte_t *huge_pte_offset(struct mm_struct *mm, ··· 287 283 pgd_t *pgd; 288 284 pud_t *pud; 289 285 pmd_t *pmd; 290 - pte_t *pte = NULL; 291 286 292 287 pgd = pgd_offset(mm, addr); 293 - if (!pgd_none(*pgd)) { 294 - pud = pud_offset(pgd, addr); 295 - if (!pud_none(*pud)) { 296 - pmd = pmd_offset(pud, addr); 297 - if (!pmd_none(*pmd)) { 298 - if (is_hugetlb_pmd(*pmd)) 299 - pte = (pte_t *)pmd; 300 - else 301 - pte = pte_offset_map(pmd, addr); 302 - } 303 - } 304 - } 305 - 306 - return pte; 288 + if (pgd_none(*pgd)) 289 + return NULL; 290 + pud = pud_offset(pgd, addr); 291 + if (pud_none(*pud)) 292 + return NULL; 293 + if (is_hugetlb_pud(*pud)) 294 + return (pte_t *)pud; 295 + pmd = pmd_offset(pud, addr); 296 + if (pmd_none(*pmd)) 297 + return NULL; 298 + if (is_hugetlb_pmd(*pmd)) 299 + return (pte_t *)pmd; 300 + return pte_offset_map(pmd, addr); 307 301 } 308 302 309 303 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 310 304 pte_t *ptep, pte_t entry) 311 305 { 312 - unsigned int i, nptes, orig_shift, shift; 313 - unsigned long size; 306 + unsigned int nptes, orig_shift, shift; 307 + unsigned long i, size; 314 308 pte_t orig; 315 309 316 310 size = huge_tte_to_size(entry); 317 - shift = size >= HPAGE_SIZE ? PMD_SHIFT : PAGE_SHIFT; 311 + 312 + shift = PAGE_SHIFT; 313 + if (size >= PUD_SIZE) 314 + shift = PUD_SHIFT; 315 + else if (size >= PMD_SIZE) 316 + shift = PMD_SHIFT; 317 + else 318 + shift = PAGE_SHIFT; 319 + 318 320 nptes = size >> shift; 319 321 320 322 if (!pte_present(*ptep) && pte_present(entry)) ··· 343 333 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, 344 334 pte_t *ptep) 345 335 { 346 - unsigned int i, nptes, hugepage_shift; 336 + unsigned int i, nptes, orig_shift, shift; 347 337 unsigned long size; 348 338 pte_t entry; 349 339 350 340 entry = *ptep; 351 341 size = huge_tte_to_size(entry); 352 - if (size >= HPAGE_SIZE) 353 - nptes = size >> PMD_SHIFT; 354 - else 355 - nptes = size >> PAGE_SHIFT; 356 342 357 - hugepage_shift = pte_none(entry) ? PAGE_SHIFT : 358 - huge_tte_to_shift(entry); 343 + shift = PAGE_SHIFT; 344 + if (size >= PUD_SIZE) 345 + shift = PUD_SHIFT; 346 + else if (size >= PMD_SIZE) 347 + shift = PMD_SHIFT; 348 + else 349 + shift = PAGE_SHIFT; 350 + 351 + nptes = size >> shift; 352 + orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry); 359 353 360 354 if (pte_present(entry)) 361 355 mm->context.hugetlb_pte_count -= nptes; ··· 368 354 for (i = 0; i < nptes; i++) 369 355 ptep[i] = __pte(0UL); 370 356 371 - maybe_tlb_batch_add(mm, addr, ptep, entry, 0, hugepage_shift); 357 + maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift); 372 358 /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */ 373 359 if (size == HPAGE_SIZE) 374 360 maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0, 375 - hugepage_shift); 361 + orig_shift); 376 362 377 363 return entry; 378 364 } ··· 385 371 386 372 int pud_huge(pud_t pud) 387 373 { 388 - return 0; 374 + return !pud_none(pud) && 375 + (pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID; 389 376 } 390 377 391 378 static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, ··· 450 435 next = pud_addr_end(addr, end); 451 436 if (pud_none_or_clear_bad(pud)) 452 437 continue; 453 - hugetlb_free_pmd_range(tlb, pud, addr, next, floor, 454 - ceiling); 438 + if (is_hugetlb_pud(*pud)) 439 + pud_clear(pud); 440 + else 441 + hugetlb_free_pmd_range(tlb, pud, addr, next, floor, 442 + ceiling); 455 443 } while (pud++, addr = next, addr != end); 456 444 457 445 start &= PGDIR_MASK;
+47 -7
arch/sparc/mm/init_64.c
··· 348 348 349 349 arch_initcall(hugetlbpage_init); 350 350 351 + static void __init pud_huge_patch(void) 352 + { 353 + struct pud_huge_patch_entry *p; 354 + unsigned long addr; 355 + 356 + p = &__pud_huge_patch; 357 + addr = p->addr; 358 + *(unsigned int *)addr = p->insn; 359 + 360 + __asm__ __volatile__("flush %0" : : "r" (addr)); 361 + } 362 + 351 363 static int __init setup_hugepagesz(char *string) 352 364 { 353 365 unsigned long long hugepage_size; ··· 372 360 hugepage_shift = ilog2(hugepage_size); 373 361 374 362 switch (hugepage_shift) { 363 + case HPAGE_16GB_SHIFT: 364 + hv_pgsz_mask = HV_PGSZ_MASK_16GB; 365 + hv_pgsz_idx = HV_PGSZ_IDX_16GB; 366 + pud_huge_patch(); 367 + break; 375 368 case HPAGE_2GB_SHIFT: 376 369 hv_pgsz_mask = HV_PGSZ_MASK_2GB; 377 370 hv_pgsz_idx = HV_PGSZ_IDX_2GB; ··· 417 400 { 418 401 struct mm_struct *mm; 419 402 unsigned long flags; 403 + bool is_huge_tsb; 420 404 pte_t pte = *ptep; 421 405 422 406 if (tlb_type != hypervisor) { ··· 435 417 436 418 spin_lock_irqsave(&mm->context.lock, flags); 437 419 420 + is_huge_tsb = false; 438 421 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 439 - if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) && 440 - is_hugetlb_pmd(__pmd(pte_val(pte)))) { 441 - /* We are fabricating 8MB pages using 4MB real hw pages. */ 442 - pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT)); 443 - __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, 444 - address, pte_val(pte)); 445 - } else 422 + if (mm->context.hugetlb_pte_count || mm->context.thp_pte_count) { 423 + unsigned long hugepage_size = PAGE_SIZE; 424 + 425 + if (is_vm_hugetlb_page(vma)) 426 + hugepage_size = huge_page_size(hstate_vma(vma)); 427 + 428 + if (hugepage_size >= PUD_SIZE) { 429 + unsigned long mask = 0x1ffc00000UL; 430 + 431 + /* Transfer bits [32:22] from address to resolve 432 + * at 4M granularity. 433 + */ 434 + pte_val(pte) &= ~mask; 435 + pte_val(pte) |= (address & mask); 436 + } else if (hugepage_size >= PMD_SIZE) { 437 + /* We are fabricating 8MB pages using 4MB 438 + * real hw pages. 439 + */ 440 + pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT)); 441 + } 442 + 443 + if (hugepage_size >= PMD_SIZE) { 444 + __update_mmu_tsb_insert(mm, MM_TSB_HUGE, 445 + REAL_HPAGE_SHIFT, address, pte_val(pte)); 446 + is_huge_tsb = true; 447 + } 448 + } 446 449 #endif 450 + if (!is_huge_tsb) 447 451 __update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT, 448 452 address, pte_val(pte)); 449 453
+5
drivers/tty/Kconfig
··· 458 458 help 459 459 FDC channel number to use for KGDB. 460 460 461 + config VCC 462 + tristate "Sun Virtual Console Concentrator" 463 + depends on SUN_LDOMS 464 + help 465 + Support for Sun logical domain consoles. 461 466 endif # TTY
+1
drivers/tty/Makefile
··· 33 33 obj-$(CONFIG_GOLDFISH_TTY) += goldfish.o 34 34 obj-$(CONFIG_DA_TTY) += metag_da.o 35 35 obj-$(CONFIG_MIPS_EJTAG_FDC_TTY) += mips_ejtag_fdc.o 36 + obj-$(CONFIG_VCC) += vcc.o 36 37 37 38 obj-y += ipwireless/
+1155
drivers/tty/vcc.c
··· 1 + /* vcc.c: sun4v virtual channel concentrator 2 + * 3 + * Copyright (C) 2017 Oracle. All rights reserved. 4 + */ 5 + 6 + #include <linux/delay.h> 7 + #include <linux/interrupt.h> 8 + #include <linux/module.h> 9 + #include <linux/slab.h> 10 + #include <linux/sysfs.h> 11 + #include <linux/tty.h> 12 + #include <linux/tty_flip.h> 13 + #include <asm/vio.h> 14 + #include <asm/ldc.h> 15 + 16 + #define DRV_MODULE_NAME "vcc" 17 + #define DRV_MODULE_VERSION "1.1" 18 + #define DRV_MODULE_RELDATE "July 1, 2017" 19 + 20 + static char version[] = 21 + DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")"; 22 + 23 + MODULE_DESCRIPTION("Sun LDOM virtual console concentrator driver"); 24 + MODULE_LICENSE("GPL"); 25 + MODULE_VERSION(DRV_MODULE_VERSION); 26 + 27 + struct vcc_port { 28 + struct vio_driver_state vio; 29 + 30 + spinlock_t lock; 31 + char *domain; 32 + struct tty_struct *tty; /* only populated while dev is open */ 33 + unsigned long index; /* index into the vcc_table */ 34 + 35 + u64 refcnt; 36 + bool excl_locked; 37 + 38 + bool removed; 39 + 40 + /* This buffer is required to support the tty write_room interface 41 + * and guarantee that any characters that the driver accepts will 42 + * be eventually sent, either immediately or later. 43 + */ 44 + int chars_in_buffer; 45 + struct vio_vcc buffer; 46 + 47 + struct timer_list rx_timer; 48 + struct timer_list tx_timer; 49 + }; 50 + 51 + /* Microseconds that thread will delay waiting for a vcc port ref */ 52 + #define VCC_REF_DELAY 100 53 + 54 + #define VCC_MAX_PORTS 1024 55 + #define VCC_MINOR_START 0 /* must be zero */ 56 + #define VCC_BUFF_LEN VIO_VCC_MTU_SIZE 57 + 58 + #define VCC_CTL_BREAK -1 59 + #define VCC_CTL_HUP -2 60 + 61 + static const char vcc_driver_name[] = "vcc"; 62 + static const char vcc_device_node[] = "vcc"; 63 + static struct tty_driver *vcc_tty_driver; 64 + 65 + static struct vcc_port *vcc_table[VCC_MAX_PORTS]; 66 + static DEFINE_SPINLOCK(vcc_table_lock); 67 + 68 + int vcc_dbg; 69 + int vcc_dbg_ldc; 70 + int vcc_dbg_vio; 71 + 72 + module_param(vcc_dbg, uint, 0664); 73 + module_param(vcc_dbg_ldc, uint, 0664); 74 + module_param(vcc_dbg_vio, uint, 0664); 75 + 76 + #define VCC_DBG_DRV 0x1 77 + #define VCC_DBG_LDC 0x2 78 + #define VCC_DBG_PKT 0x4 79 + 80 + #define vccdbg(f, a...) \ 81 + do { \ 82 + if (vcc_dbg & VCC_DBG_DRV) \ 83 + pr_info(f, ## a); \ 84 + } while (0) \ 85 + 86 + #define vccdbgl(l) \ 87 + do { \ 88 + if (vcc_dbg & VCC_DBG_LDC) \ 89 + ldc_print(l); \ 90 + } while (0) \ 91 + 92 + #define vccdbgp(pkt) \ 93 + do { \ 94 + if (vcc_dbg & VCC_DBG_PKT) { \ 95 + int i; \ 96 + for (i = 0; i < pkt.tag.stype; i++) \ 97 + pr_info("[%c]", pkt.data[i]); \ 98 + } \ 99 + } while (0) \ 100 + 101 + /* Note: Be careful when adding flags to this line discipline. Don't 102 + * add anything that will cause echoing or we'll go into recursive 103 + * loop echoing chars back and forth with the console drivers. 104 + */ 105 + static const struct ktermios vcc_tty_termios = { 106 + .c_iflag = IGNBRK | IGNPAR, 107 + .c_oflag = OPOST, 108 + .c_cflag = B38400 | CS8 | CREAD | HUPCL, 109 + .c_cc = INIT_C_CC, 110 + .c_ispeed = 38400, 111 + .c_ospeed = 38400 112 + }; 113 + 114 + /** 115 + * vcc_table_add() - Add VCC port to the VCC table 116 + * @port: pointer to the VCC port 117 + * 118 + * Return: index of the port in the VCC table on success, 119 + * -1 on failure 120 + */ 121 + static int vcc_table_add(struct vcc_port *port) 122 + { 123 + unsigned long flags; 124 + int i; 125 + 126 + spin_lock_irqsave(&vcc_table_lock, flags); 127 + for (i = VCC_MINOR_START; i < VCC_MAX_PORTS; i++) { 128 + if (!vcc_table[i]) { 129 + vcc_table[i] = port; 130 + break; 131 + } 132 + } 133 + spin_unlock_irqrestore(&vcc_table_lock, flags); 134 + 135 + if (i < VCC_MAX_PORTS) 136 + return i; 137 + else 138 + return -1; 139 + } 140 + 141 + /** 142 + * vcc_table_remove() - Removes a VCC port from the VCC table 143 + * @index: Index into the VCC table 144 + */ 145 + static void vcc_table_remove(unsigned long index) 146 + { 147 + unsigned long flags; 148 + 149 + if (WARN_ON(index >= VCC_MAX_PORTS)) 150 + return; 151 + 152 + spin_lock_irqsave(&vcc_table_lock, flags); 153 + vcc_table[index] = NULL; 154 + spin_unlock_irqrestore(&vcc_table_lock, flags); 155 + } 156 + 157 + /** 158 + * vcc_get() - Gets a reference to VCC port 159 + * @index: Index into the VCC table 160 + * @excl: Indicates if an exclusive access is requested 161 + * 162 + * Return: reference to the VCC port, if found 163 + * NULL, if port not found 164 + */ 165 + static struct vcc_port *vcc_get(unsigned long index, bool excl) 166 + { 167 + struct vcc_port *port; 168 + unsigned long flags; 169 + 170 + try_again: 171 + spin_lock_irqsave(&vcc_table_lock, flags); 172 + 173 + port = vcc_table[index]; 174 + if (!port) { 175 + spin_unlock_irqrestore(&vcc_table_lock, flags); 176 + return NULL; 177 + } 178 + 179 + if (!excl) { 180 + if (port->excl_locked) { 181 + spin_unlock_irqrestore(&vcc_table_lock, flags); 182 + udelay(VCC_REF_DELAY); 183 + goto try_again; 184 + } 185 + port->refcnt++; 186 + spin_unlock_irqrestore(&vcc_table_lock, flags); 187 + return port; 188 + } 189 + 190 + if (port->refcnt) { 191 + spin_unlock_irqrestore(&vcc_table_lock, flags); 192 + /* Threads wanting exclusive access will wait half the time, 193 + * probably giving them higher priority in the case of 194 + * multiple waiters. 195 + */ 196 + udelay(VCC_REF_DELAY/2); 197 + goto try_again; 198 + } 199 + 200 + port->refcnt++; 201 + port->excl_locked = true; 202 + spin_unlock_irqrestore(&vcc_table_lock, flags); 203 + 204 + return port; 205 + } 206 + 207 + /** 208 + * vcc_put() - Returns a reference to VCC port 209 + * @port: pointer to VCC port 210 + * @excl: Indicates if the returned reference is an exclusive reference 211 + * 212 + * Note: It's the caller's responsibility to ensure the correct value 213 + * for the excl flag 214 + */ 215 + static void vcc_put(struct vcc_port *port, bool excl) 216 + { 217 + unsigned long flags; 218 + 219 + if (!port) 220 + return; 221 + 222 + spin_lock_irqsave(&vcc_table_lock, flags); 223 + 224 + /* check if caller attempted to put with the wrong flags */ 225 + if (WARN_ON((excl && !port->excl_locked) || 226 + (!excl && port->excl_locked))) 227 + goto done; 228 + 229 + port->refcnt--; 230 + 231 + if (excl) 232 + port->excl_locked = false; 233 + 234 + done: 235 + spin_unlock_irqrestore(&vcc_table_lock, flags); 236 + } 237 + 238 + /** 239 + * vcc_get_ne() - Get a non-exclusive reference to VCC port 240 + * @index: Index into the VCC table 241 + * 242 + * Gets a non-exclusive reference to VCC port, if it's not removed 243 + * 244 + * Return: pointer to the VCC port, if found 245 + * NULL, if port not found 246 + */ 247 + static struct vcc_port *vcc_get_ne(unsigned long index) 248 + { 249 + struct vcc_port *port; 250 + 251 + port = vcc_get(index, false); 252 + 253 + if (port && port->removed) { 254 + vcc_put(port, false); 255 + return NULL; 256 + } 257 + 258 + return port; 259 + } 260 + 261 + static void vcc_kick_rx(struct vcc_port *port) 262 + { 263 + struct vio_driver_state *vio = &port->vio; 264 + 265 + assert_spin_locked(&port->lock); 266 + 267 + if (!timer_pending(&port->rx_timer) && !port->removed) { 268 + disable_irq_nosync(vio->vdev->rx_irq); 269 + port->rx_timer.expires = (jiffies + 1); 270 + add_timer(&port->rx_timer); 271 + } 272 + } 273 + 274 + static void vcc_kick_tx(struct vcc_port *port) 275 + { 276 + assert_spin_locked(&port->lock); 277 + 278 + if (!timer_pending(&port->tx_timer) && !port->removed) { 279 + port->tx_timer.expires = (jiffies + 1); 280 + add_timer(&port->tx_timer); 281 + } 282 + } 283 + 284 + static int vcc_rx_check(struct tty_struct *tty, int size) 285 + { 286 + if (WARN_ON(!tty || !tty->port)) 287 + return 1; 288 + 289 + /* tty_buffer_request_room won't sleep because it uses 290 + * GFP_ATOMIC flag to allocate buffer 291 + */ 292 + if (test_bit(TTY_THROTTLED, &tty->flags) || 293 + (tty_buffer_request_room(tty->port, VCC_BUFF_LEN) < VCC_BUFF_LEN)) 294 + return 0; 295 + 296 + return 1; 297 + } 298 + 299 + static int vcc_rx(struct tty_struct *tty, char *buf, int size) 300 + { 301 + int len = 0; 302 + 303 + if (WARN_ON(!tty || !tty->port)) 304 + return len; 305 + 306 + len = tty_insert_flip_string(tty->port, buf, size); 307 + if (len) 308 + tty_flip_buffer_push(tty->port); 309 + 310 + return len; 311 + } 312 + 313 + static int vcc_ldc_read(struct vcc_port *port) 314 + { 315 + struct vio_driver_state *vio = &port->vio; 316 + struct tty_struct *tty; 317 + struct vio_vcc pkt; 318 + int rv = 0; 319 + 320 + tty = port->tty; 321 + if (!tty) { 322 + rv = ldc_rx_reset(vio->lp); 323 + vccdbg("VCC: reset rx q: rv=%d\n", rv); 324 + goto done; 325 + } 326 + 327 + /* Read as long as LDC has incoming data. */ 328 + while (1) { 329 + if (!vcc_rx_check(tty, VIO_VCC_MTU_SIZE)) { 330 + vcc_kick_rx(port); 331 + break; 332 + } 333 + 334 + vccdbgl(vio->lp); 335 + 336 + rv = ldc_read(vio->lp, &pkt, sizeof(pkt)); 337 + if (rv <= 0) 338 + break; 339 + 340 + vccdbg("VCC: ldc_read()=%d\n", rv); 341 + vccdbg("TAG [%02x:%02x:%04x:%08x]\n", 342 + pkt.tag.type, pkt.tag.stype, 343 + pkt.tag.stype_env, pkt.tag.sid); 344 + 345 + if (pkt.tag.type == VIO_TYPE_DATA) { 346 + vccdbgp(pkt); 347 + /* vcc_rx_check ensures memory availability */ 348 + vcc_rx(tty, pkt.data, pkt.tag.stype); 349 + } else { 350 + pr_err("VCC: unknown msg [%02x:%02x:%04x:%08x]\n", 351 + pkt.tag.type, pkt.tag.stype, 352 + pkt.tag.stype_env, pkt.tag.sid); 353 + rv = -ECONNRESET; 354 + break; 355 + } 356 + 357 + WARN_ON(rv != LDC_PACKET_SIZE); 358 + } 359 + 360 + done: 361 + return rv; 362 + } 363 + 364 + static void vcc_rx_timer(unsigned long index) 365 + { 366 + struct vio_driver_state *vio; 367 + struct vcc_port *port; 368 + unsigned long flags; 369 + int rv; 370 + 371 + port = vcc_get_ne(index); 372 + if (!port) 373 + return; 374 + 375 + spin_lock_irqsave(&port->lock, flags); 376 + port->rx_timer.expires = 0; 377 + 378 + vio = &port->vio; 379 + 380 + enable_irq(vio->vdev->rx_irq); 381 + 382 + if (!port->tty || port->removed) 383 + goto done; 384 + 385 + rv = vcc_ldc_read(port); 386 + if (rv == -ECONNRESET) 387 + vio_conn_reset(vio); 388 + 389 + done: 390 + spin_unlock_irqrestore(&port->lock, flags); 391 + vcc_put(port, false); 392 + } 393 + 394 + static void vcc_tx_timer(unsigned long index) 395 + { 396 + struct vcc_port *port; 397 + struct vio_vcc *pkt; 398 + unsigned long flags; 399 + int tosend = 0; 400 + int rv; 401 + 402 + port = vcc_get_ne(index); 403 + if (!port) 404 + return; 405 + 406 + spin_lock_irqsave(&port->lock, flags); 407 + port->tx_timer.expires = 0; 408 + 409 + if (!port->tty || port->removed) 410 + goto done; 411 + 412 + tosend = min(VCC_BUFF_LEN, port->chars_in_buffer); 413 + if (!tosend) 414 + goto done; 415 + 416 + pkt = &port->buffer; 417 + pkt->tag.type = VIO_TYPE_DATA; 418 + pkt->tag.stype = tosend; 419 + vccdbgl(port->vio.lp); 420 + 421 + rv = ldc_write(port->vio.lp, pkt, (VIO_TAG_SIZE + tosend)); 422 + WARN_ON(!rv); 423 + 424 + if (rv < 0) { 425 + vccdbg("VCC: ldc_write()=%d\n", rv); 426 + vcc_kick_tx(port); 427 + } else { 428 + struct tty_struct *tty = port->tty; 429 + 430 + port->chars_in_buffer = 0; 431 + if (tty) 432 + tty_wakeup(tty); 433 + } 434 + 435 + done: 436 + spin_unlock_irqrestore(&port->lock, flags); 437 + vcc_put(port, false); 438 + } 439 + 440 + /** 441 + * vcc_event() - LDC event processing engine 442 + * @arg: VCC private data 443 + * @event: LDC event 444 + * 445 + * Handles LDC events for VCC 446 + */ 447 + static void vcc_event(void *arg, int event) 448 + { 449 + struct vio_driver_state *vio; 450 + struct vcc_port *port; 451 + unsigned long flags; 452 + int rv; 453 + 454 + port = arg; 455 + vio = &port->vio; 456 + 457 + spin_lock_irqsave(&port->lock, flags); 458 + 459 + switch (event) { 460 + case LDC_EVENT_RESET: 461 + case LDC_EVENT_UP: 462 + vio_link_state_change(vio, event); 463 + break; 464 + 465 + case LDC_EVENT_DATA_READY: 466 + rv = vcc_ldc_read(port); 467 + if (rv == -ECONNRESET) 468 + vio_conn_reset(vio); 469 + break; 470 + 471 + default: 472 + pr_err("VCC: unexpected LDC event(%d)\n", event); 473 + } 474 + 475 + spin_unlock_irqrestore(&port->lock, flags); 476 + } 477 + 478 + static struct ldc_channel_config vcc_ldc_cfg = { 479 + .event = vcc_event, 480 + .mtu = VIO_VCC_MTU_SIZE, 481 + .mode = LDC_MODE_RAW, 482 + .debug = 0, 483 + }; 484 + 485 + /* Ordered from largest major to lowest */ 486 + static struct vio_version vcc_versions[] = { 487 + { .major = 1, .minor = 0 }, 488 + }; 489 + 490 + static struct tty_port_operations vcc_port_ops = { 0 }; 491 + 492 + static ssize_t vcc_sysfs_domain_show(struct device *dev, 493 + struct device_attribute *attr, 494 + char *buf) 495 + { 496 + struct vcc_port *port; 497 + int rv; 498 + 499 + port = dev_get_drvdata(dev); 500 + if (!port) 501 + return -ENODEV; 502 + 503 + rv = scnprintf(buf, PAGE_SIZE, "%s\n", port->domain); 504 + 505 + return rv; 506 + } 507 + 508 + static int vcc_send_ctl(struct vcc_port *port, int ctl) 509 + { 510 + struct vio_vcc pkt; 511 + int rv; 512 + 513 + pkt.tag.type = VIO_TYPE_CTRL; 514 + pkt.tag.sid = ctl; 515 + pkt.tag.stype = 0; 516 + 517 + rv = ldc_write(port->vio.lp, &pkt, sizeof(pkt.tag)); 518 + WARN_ON(!rv); 519 + vccdbg("VCC: ldc_write(%ld)=%d\n", sizeof(pkt.tag), rv); 520 + 521 + return rv; 522 + } 523 + 524 + static ssize_t vcc_sysfs_break_store(struct device *dev, 525 + struct device_attribute *attr, 526 + const char *buf, size_t count) 527 + { 528 + struct vcc_port *port; 529 + unsigned long flags; 530 + int rv = count; 531 + int brk; 532 + 533 + port = dev_get_drvdata(dev); 534 + if (!port) 535 + return -ENODEV; 536 + 537 + spin_lock_irqsave(&port->lock, flags); 538 + 539 + if (sscanf(buf, "%ud", &brk) != 1 || brk != 1) 540 + rv = -EINVAL; 541 + else if (vcc_send_ctl(port, VCC_CTL_BREAK) < 0) 542 + vcc_kick_tx(port); 543 + 544 + spin_unlock_irqrestore(&port->lock, flags); 545 + 546 + return rv; 547 + } 548 + 549 + static DEVICE_ATTR(domain, 0400, vcc_sysfs_domain_show, NULL); 550 + static DEVICE_ATTR(break, 0200, NULL, vcc_sysfs_break_store); 551 + 552 + static struct attribute *vcc_sysfs_entries[] = { 553 + &dev_attr_domain.attr, 554 + &dev_attr_break.attr, 555 + NULL 556 + }; 557 + 558 + static struct attribute_group vcc_attribute_group = { 559 + .name = NULL, 560 + .attrs = vcc_sysfs_entries, 561 + }; 562 + 563 + /** 564 + * vcc_probe() - Initialize VCC port 565 + * @vdev: Pointer to VIO device of the new VCC port 566 + * @id: VIO device ID 567 + * 568 + * Initializes a VCC port to receive serial console data from 569 + * the guest domain. Sets up a TTY end point on the control 570 + * domain. Sets up VIO/LDC link between the guest & control 571 + * domain endpoints. 572 + * 573 + * Return: status of the probe 574 + */ 575 + static int vcc_probe(struct vio_dev *vdev, const struct vio_device_id *id) 576 + { 577 + struct mdesc_handle *hp; 578 + struct vcc_port *port; 579 + struct device *dev; 580 + const char *domain; 581 + char *name; 582 + u64 node; 583 + int rv; 584 + 585 + vccdbg("VCC: name=%s\n", dev_name(&vdev->dev)); 586 + 587 + if (!vcc_tty_driver) { 588 + pr_err("VCC: TTY driver not registered\n"); 589 + return -ENODEV; 590 + } 591 + 592 + port = kzalloc(sizeof(struct vcc_port), GFP_KERNEL); 593 + if (!port) 594 + return -ENOMEM; 595 + 596 + name = kstrdup(dev_name(&vdev->dev), GFP_KERNEL); 597 + 598 + rv = vio_driver_init(&port->vio, vdev, VDEV_CONSOLE_CON, vcc_versions, 599 + ARRAY_SIZE(vcc_versions), NULL, name); 600 + if (rv) 601 + goto free_port; 602 + 603 + port->vio.debug = vcc_dbg_vio; 604 + vcc_ldc_cfg.debug = vcc_dbg_ldc; 605 + 606 + rv = vio_ldc_alloc(&port->vio, &vcc_ldc_cfg, port); 607 + if (rv) 608 + goto free_port; 609 + 610 + spin_lock_init(&port->lock); 611 + 612 + port->index = vcc_table_add(port); 613 + if (port->index == -1) { 614 + pr_err("VCC: no more TTY indices left for allocation\n"); 615 + goto free_ldc; 616 + } 617 + 618 + /* Register the device using VCC table index as TTY index */ 619 + dev = tty_register_device(vcc_tty_driver, port->index, &vdev->dev); 620 + if (IS_ERR(dev)) { 621 + rv = PTR_ERR(dev); 622 + goto free_table; 623 + } 624 + 625 + hp = mdesc_grab(); 626 + 627 + node = vio_vdev_node(hp, vdev); 628 + if (node == MDESC_NODE_NULL) { 629 + rv = -ENXIO; 630 + mdesc_release(hp); 631 + goto unreg_tty; 632 + } 633 + 634 + domain = mdesc_get_property(hp, node, "vcc-domain-name", NULL); 635 + if (!domain) { 636 + rv = -ENXIO; 637 + mdesc_release(hp); 638 + goto unreg_tty; 639 + } 640 + port->domain = kstrdup(domain, GFP_KERNEL); 641 + 642 + mdesc_release(hp); 643 + 644 + rv = sysfs_create_group(&vdev->dev.kobj, &vcc_attribute_group); 645 + if (rv) 646 + goto free_domain; 647 + 648 + init_timer(&port->rx_timer); 649 + port->rx_timer.function = vcc_rx_timer; 650 + port->rx_timer.data = port->index; 651 + 652 + init_timer(&port->tx_timer); 653 + port->tx_timer.function = vcc_tx_timer; 654 + port->tx_timer.data = port->index; 655 + 656 + dev_set_drvdata(&vdev->dev, port); 657 + 658 + /* It's possible to receive IRQs in the middle of vio_port_up. Disable 659 + * IRQs until the port is up. 660 + */ 661 + disable_irq_nosync(vdev->rx_irq); 662 + vio_port_up(&port->vio); 663 + enable_irq(vdev->rx_irq); 664 + 665 + return 0; 666 + 667 + free_domain: 668 + kfree(port->domain); 669 + unreg_tty: 670 + tty_unregister_device(vcc_tty_driver, port->index); 671 + free_table: 672 + vcc_table_remove(port->index); 673 + free_ldc: 674 + vio_ldc_free(&port->vio); 675 + free_port: 676 + kfree(name); 677 + kfree(port); 678 + 679 + return rv; 680 + } 681 + 682 + /** 683 + * vcc_remove() - Terminate a VCC port 684 + * @vdev: Pointer to VIO device of the VCC port 685 + * 686 + * Terminates a VCC port. Sets up the teardown of TTY and 687 + * VIO/LDC link between guest and primary domains. 688 + * 689 + * Return: status of removal 690 + */ 691 + static int vcc_remove(struct vio_dev *vdev) 692 + { 693 + struct vcc_port *port = dev_get_drvdata(&vdev->dev); 694 + 695 + if (!port) 696 + return -ENODEV; 697 + 698 + del_timer_sync(&port->rx_timer); 699 + del_timer_sync(&port->tx_timer); 700 + 701 + /* If there's a process with the device open, do a synchronous 702 + * hangup of the TTY. This *may* cause the process to call close 703 + * asynchronously, but it's not guaranteed. 704 + */ 705 + if (port->tty) 706 + tty_vhangup(port->tty); 707 + 708 + /* Get exclusive reference to VCC, ensures that there are no other 709 + * clients to this port 710 + */ 711 + port = vcc_get(port->index, true); 712 + 713 + if (WARN_ON(!port)) 714 + return -ENODEV; 715 + 716 + tty_unregister_device(vcc_tty_driver, port->index); 717 + 718 + del_timer_sync(&port->vio.timer); 719 + vio_ldc_free(&port->vio); 720 + sysfs_remove_group(&vdev->dev.kobj, &vcc_attribute_group); 721 + dev_set_drvdata(&vdev->dev, NULL); 722 + if (port->tty) { 723 + port->removed = true; 724 + vcc_put(port, true); 725 + } else { 726 + vcc_table_remove(port->index); 727 + 728 + kfree(port->vio.name); 729 + kfree(port->domain); 730 + kfree(port); 731 + } 732 + 733 + return 0; 734 + } 735 + 736 + static const struct vio_device_id vcc_match[] = { 737 + { 738 + .type = "vcc-port", 739 + }, 740 + {}, 741 + }; 742 + MODULE_DEVICE_TABLE(vio, vcc_match); 743 + 744 + static struct vio_driver vcc_driver = { 745 + .id_table = vcc_match, 746 + .probe = vcc_probe, 747 + .remove = vcc_remove, 748 + .name = "vcc", 749 + }; 750 + 751 + static int vcc_open(struct tty_struct *tty, struct file *vcc_file) 752 + { 753 + struct vcc_port *port; 754 + 755 + if (unlikely(!tty)) { 756 + pr_err("VCC: open: Invalid TTY handle\n"); 757 + return -ENXIO; 758 + } 759 + 760 + if (tty->count > 1) 761 + return -EBUSY; 762 + 763 + port = vcc_get_ne(tty->index); 764 + if (unlikely(!port)) { 765 + pr_err("VCC: open: Failed to find VCC port\n"); 766 + return -ENODEV; 767 + } 768 + 769 + if (unlikely(!port->vio.lp)) { 770 + pr_err("VCC: open: LDC channel not configured\n"); 771 + vcc_put(port, false); 772 + return -EPIPE; 773 + } 774 + vccdbgl(port->vio.lp); 775 + 776 + vcc_put(port, false); 777 + 778 + if (unlikely(!tty->port)) { 779 + pr_err("VCC: open: TTY port not found\n"); 780 + return -ENXIO; 781 + } 782 + 783 + if (unlikely(!tty->port->ops)) { 784 + pr_err("VCC: open: TTY ops not defined\n"); 785 + return -ENXIO; 786 + } 787 + 788 + return tty_port_open(tty->port, tty, vcc_file); 789 + } 790 + 791 + static void vcc_close(struct tty_struct *tty, struct file *vcc_file) 792 + { 793 + if (unlikely(!tty)) { 794 + pr_err("VCC: close: Invalid TTY handle\n"); 795 + return; 796 + } 797 + 798 + if (unlikely(tty->count > 1)) 799 + return; 800 + 801 + if (unlikely(!tty->port)) { 802 + pr_err("VCC: close: TTY port not found\n"); 803 + return; 804 + } 805 + 806 + tty_port_close(tty->port, tty, vcc_file); 807 + } 808 + 809 + static void vcc_ldc_hup(struct vcc_port *port) 810 + { 811 + unsigned long flags; 812 + 813 + spin_lock_irqsave(&port->lock, flags); 814 + 815 + if (vcc_send_ctl(port, VCC_CTL_HUP) < 0) 816 + vcc_kick_tx(port); 817 + 818 + spin_unlock_irqrestore(&port->lock, flags); 819 + } 820 + 821 + static void vcc_hangup(struct tty_struct *tty) 822 + { 823 + struct vcc_port *port; 824 + 825 + if (unlikely(!tty)) { 826 + pr_err("VCC: hangup: Invalid TTY handle\n"); 827 + return; 828 + } 829 + 830 + port = vcc_get_ne(tty->index); 831 + if (unlikely(!port)) { 832 + pr_err("VCC: hangup: Failed to find VCC port\n"); 833 + return; 834 + } 835 + 836 + if (unlikely(!tty->port)) { 837 + pr_err("VCC: hangup: TTY port not found\n"); 838 + vcc_put(port, false); 839 + return; 840 + } 841 + 842 + vcc_ldc_hup(port); 843 + 844 + vcc_put(port, false); 845 + 846 + tty_port_hangup(tty->port); 847 + } 848 + 849 + static int vcc_write(struct tty_struct *tty, const unsigned char *buf, 850 + int count) 851 + { 852 + struct vcc_port *port; 853 + struct vio_vcc *pkt; 854 + unsigned long flags; 855 + int total_sent = 0; 856 + int tosend = 0; 857 + int rv = -EINVAL; 858 + 859 + if (unlikely(!tty)) { 860 + pr_err("VCC: write: Invalid TTY handle\n"); 861 + return -ENXIO; 862 + } 863 + 864 + port = vcc_get_ne(tty->index); 865 + if (unlikely(!port)) { 866 + pr_err("VCC: write: Failed to find VCC port"); 867 + return -ENODEV; 868 + } 869 + 870 + spin_lock_irqsave(&port->lock, flags); 871 + 872 + pkt = &port->buffer; 873 + pkt->tag.type = VIO_TYPE_DATA; 874 + 875 + while (count > 0) { 876 + /* Minimum of data to write and space available */ 877 + tosend = min(count, (VCC_BUFF_LEN - port->chars_in_buffer)); 878 + 879 + if (!tosend) 880 + break; 881 + 882 + memcpy(&pkt->data[port->chars_in_buffer], &buf[total_sent], 883 + tosend); 884 + port->chars_in_buffer += tosend; 885 + pkt->tag.stype = tosend; 886 + 887 + vccdbg("TAG [%02x:%02x:%04x:%08x]\n", pkt->tag.type, 888 + pkt->tag.stype, pkt->tag.stype_env, pkt->tag.sid); 889 + vccdbg("DATA [%s]\n", pkt->data); 890 + vccdbgl(port->vio.lp); 891 + 892 + /* Since we know we have enough room in VCC buffer for tosend 893 + * we record that it was sent regardless of whether the 894 + * hypervisor actually took it because we have it buffered. 895 + */ 896 + rv = ldc_write(port->vio.lp, pkt, (VIO_TAG_SIZE + tosend)); 897 + vccdbg("VCC: write: ldc_write(%d)=%d\n", 898 + (VIO_TAG_SIZE + tosend), rv); 899 + 900 + total_sent += tosend; 901 + count -= tosend; 902 + if (rv < 0) { 903 + vcc_kick_tx(port); 904 + break; 905 + } 906 + 907 + port->chars_in_buffer = 0; 908 + } 909 + 910 + spin_unlock_irqrestore(&port->lock, flags); 911 + 912 + vcc_put(port, false); 913 + 914 + vccdbg("VCC: write: total=%d rv=%d", total_sent, rv); 915 + 916 + return total_sent ? total_sent : rv; 917 + } 918 + 919 + static int vcc_write_room(struct tty_struct *tty) 920 + { 921 + struct vcc_port *port; 922 + u64 num; 923 + 924 + if (unlikely(!tty)) { 925 + pr_err("VCC: write_room: Invalid TTY handle\n"); 926 + return -ENXIO; 927 + } 928 + 929 + port = vcc_get_ne(tty->index); 930 + if (unlikely(!port)) { 931 + pr_err("VCC: write_room: Failed to find VCC port\n"); 932 + return -ENODEV; 933 + } 934 + 935 + num = VCC_BUFF_LEN - port->chars_in_buffer; 936 + 937 + vcc_put(port, false); 938 + 939 + return num; 940 + } 941 + 942 + static int vcc_chars_in_buffer(struct tty_struct *tty) 943 + { 944 + struct vcc_port *port; 945 + u64 num; 946 + 947 + if (unlikely(!tty)) { 948 + pr_err("VCC: chars_in_buffer: Invalid TTY handle\n"); 949 + return -ENXIO; 950 + } 951 + 952 + port = vcc_get_ne(tty->index); 953 + if (unlikely(!port)) { 954 + pr_err("VCC: chars_in_buffer: Failed to find VCC port\n"); 955 + return -ENODEV; 956 + } 957 + 958 + num = port->chars_in_buffer; 959 + 960 + vcc_put(port, false); 961 + 962 + return num; 963 + } 964 + 965 + static int vcc_break_ctl(struct tty_struct *tty, int state) 966 + { 967 + struct vcc_port *port; 968 + unsigned long flags; 969 + 970 + if (unlikely(!tty)) { 971 + pr_err("VCC: break_ctl: Invalid TTY handle\n"); 972 + return -ENXIO; 973 + } 974 + 975 + port = vcc_get_ne(tty->index); 976 + if (unlikely(!port)) { 977 + pr_err("VCC: break_ctl: Failed to find VCC port\n"); 978 + return -ENODEV; 979 + } 980 + 981 + /* Turn off break */ 982 + if (state == 0) { 983 + vcc_put(port, false); 984 + return 0; 985 + } 986 + 987 + spin_lock_irqsave(&port->lock, flags); 988 + 989 + if (vcc_send_ctl(port, VCC_CTL_BREAK) < 0) 990 + vcc_kick_tx(port); 991 + 992 + spin_unlock_irqrestore(&port->lock, flags); 993 + 994 + vcc_put(port, false); 995 + 996 + return 0; 997 + } 998 + 999 + static int vcc_install(struct tty_driver *driver, struct tty_struct *tty) 1000 + { 1001 + struct vcc_port *port_vcc; 1002 + struct tty_port *port_tty; 1003 + int ret; 1004 + 1005 + if (unlikely(!tty)) { 1006 + pr_err("VCC: install: Invalid TTY handle\n"); 1007 + return -ENXIO; 1008 + } 1009 + 1010 + if (tty->index >= VCC_MAX_PORTS) 1011 + return -EINVAL; 1012 + 1013 + ret = tty_standard_install(driver, tty); 1014 + if (ret) 1015 + return ret; 1016 + 1017 + port_tty = kzalloc(sizeof(struct tty_port), GFP_KERNEL); 1018 + if (!port_tty) 1019 + return -ENOMEM; 1020 + 1021 + port_vcc = vcc_get(tty->index, true); 1022 + if (!port_vcc) { 1023 + pr_err("VCC: install: Failed to find VCC port\n"); 1024 + tty->port = NULL; 1025 + kfree(port_tty); 1026 + return -ENODEV; 1027 + } 1028 + 1029 + tty_port_init(port_tty); 1030 + port_tty->ops = &vcc_port_ops; 1031 + tty->port = port_tty; 1032 + 1033 + port_vcc->tty = tty; 1034 + 1035 + vcc_put(port_vcc, true); 1036 + 1037 + return 0; 1038 + } 1039 + 1040 + static void vcc_cleanup(struct tty_struct *tty) 1041 + { 1042 + struct vcc_port *port; 1043 + 1044 + if (unlikely(!tty)) { 1045 + pr_err("VCC: cleanup: Invalid TTY handle\n"); 1046 + return; 1047 + } 1048 + 1049 + port = vcc_get(tty->index, true); 1050 + if (port) { 1051 + port->tty = NULL; 1052 + 1053 + if (port->removed) { 1054 + vcc_table_remove(tty->index); 1055 + kfree(port->vio.name); 1056 + kfree(port->domain); 1057 + kfree(port); 1058 + } else { 1059 + vcc_put(port, true); 1060 + } 1061 + } 1062 + 1063 + tty_port_destroy(tty->port); 1064 + kfree(tty->port); 1065 + tty->port = NULL; 1066 + } 1067 + 1068 + static const struct tty_operations vcc_ops = { 1069 + .open = vcc_open, 1070 + .close = vcc_close, 1071 + .hangup = vcc_hangup, 1072 + .write = vcc_write, 1073 + .write_room = vcc_write_room, 1074 + .chars_in_buffer = vcc_chars_in_buffer, 1075 + .break_ctl = vcc_break_ctl, 1076 + .install = vcc_install, 1077 + .cleanup = vcc_cleanup, 1078 + }; 1079 + 1080 + #define VCC_TTY_FLAGS (TTY_DRIVER_DYNAMIC_DEV | TTY_DRIVER_REAL_RAW) 1081 + 1082 + static int vcc_tty_init(void) 1083 + { 1084 + int rv; 1085 + 1086 + pr_info("VCC: %s\n", version); 1087 + 1088 + vcc_tty_driver = tty_alloc_driver(VCC_MAX_PORTS, VCC_TTY_FLAGS); 1089 + if (IS_ERR(vcc_tty_driver)) { 1090 + pr_err("VCC: TTY driver alloc failed\n"); 1091 + return PTR_ERR(vcc_tty_driver); 1092 + } 1093 + 1094 + vcc_tty_driver->driver_name = vcc_driver_name; 1095 + vcc_tty_driver->name = vcc_device_node; 1096 + 1097 + vcc_tty_driver->minor_start = VCC_MINOR_START; 1098 + vcc_tty_driver->type = TTY_DRIVER_TYPE_SYSTEM; 1099 + vcc_tty_driver->init_termios = vcc_tty_termios; 1100 + 1101 + tty_set_operations(vcc_tty_driver, &vcc_ops); 1102 + 1103 + rv = tty_register_driver(vcc_tty_driver); 1104 + if (rv) { 1105 + pr_err("VCC: TTY driver registration failed\n"); 1106 + put_tty_driver(vcc_tty_driver); 1107 + vcc_tty_driver = NULL; 1108 + return rv; 1109 + } 1110 + 1111 + vccdbg("VCC: TTY driver registered\n"); 1112 + 1113 + return 0; 1114 + } 1115 + 1116 + static void vcc_tty_exit(void) 1117 + { 1118 + tty_unregister_driver(vcc_tty_driver); 1119 + put_tty_driver(vcc_tty_driver); 1120 + vccdbg("VCC: TTY driver unregistered\n"); 1121 + 1122 + vcc_tty_driver = NULL; 1123 + } 1124 + 1125 + static int __init vcc_init(void) 1126 + { 1127 + int rv; 1128 + 1129 + rv = vcc_tty_init(); 1130 + if (rv) { 1131 + pr_err("VCC: TTY init failed\n"); 1132 + return rv; 1133 + } 1134 + 1135 + rv = vio_register_driver(&vcc_driver); 1136 + if (rv) { 1137 + pr_err("VCC: VIO driver registration failed\n"); 1138 + vcc_tty_exit(); 1139 + } else { 1140 + vccdbg("VCC: VIO driver registered successfully\n"); 1141 + } 1142 + 1143 + return rv; 1144 + } 1145 + 1146 + static void __exit vcc_exit(void) 1147 + { 1148 + vio_unregister_driver(&vcc_driver); 1149 + vccdbg("VCC: VIO driver unregistered\n"); 1150 + vcc_tty_exit(); 1151 + vccdbg("VCC: TTY driver unregistered\n"); 1152 + } 1153 + 1154 + module_init(vcc_init); 1155 + module_exit(vcc_exit);