Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull s390 updates from Martin Schwidefsky:
"The first part of the s390 updates for 4.14:

- Add machine type 0x3906 for IBM z14

- Add IBM z14 TLB flushing improvements for KVM guests

- Exploit the TOD clock epoch extension to provide a continuous TOD
clock afer 2042/09/17

- Add NIAI spinlock hints for IBM z14

- Rework the vmcp driver and use CMA for the respone buffer of z/VM
CP commands

- Drop some s390 specific asm headers and use the generic version

- Add block discard for DASD-FBA devices under z/VM

- Add average request times to DASD statistics

- A few of those constify patches which seem to be in vogue right now

- Cleanup and bug fixes"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (50 commits)
s390/mm: avoid empty zero pages for KVM guests to avoid postcopy hangs
s390/dasd: Add discard support for FBA devices
s390/zcrypt: make CPRBX const
s390/uaccess: avoid mvcos jump label
s390/mm: use generic mm_hooks
s390/facilities: fix typo
s390/vmcp: simplify vmcp_response_free()
s390/topology: Remove the unused parent_node() macro
s390/dasd: Change unsigned long long to unsigned long
s390/smp: convert cpuhp_setup_state() return code to zero on success
s390: fix 'novx' early parameter handling
s390/dasd: add average request times to dasd statistics
s390/scm: use common completion path
s390/pci: log changes to uid checking
s390/vmcp: simplify vmcp_ioctl()
s390/vmcp: return -ENOTTY for unknown ioctl commands
s390/vmcp: split vmcp header file and move to uapi
s390/vmcp: make use of contiguous memory allocator
s390/cpcmd,vmcp: avoid GFP_DMA allocations
s390/vmcp: fix uaccess check and avoid undefined behavior
...

+1406 -695
+4
Documentation/admin-guide/kernel-parameters.txt
··· 4395 4395 decrease the size and leave more room for directly 4396 4396 mapped kernel RAM. 4397 4397 4398 + vmcp_cma=nn[MG] [KNL,S390] 4399 + Sets the memory size reserved for contiguous memory 4400 + allocations for the vmcp device driver. 4401 + 4398 4402 vmhalt= [KNL,S390] Perform z/VM CP command after system halt. 4399 4403 Format: <command> 4400 4404
+18
arch/s390/Kconfig
··· 222 222 def_bool n 223 223 select HAVE_MARCH_ZEC12_FEATURES 224 224 225 + config HAVE_MARCH_Z14_FEATURES 226 + def_bool n 227 + select HAVE_MARCH_Z13_FEATURES 228 + 225 229 choice 226 230 prompt "Processor type" 227 231 default MARCH_Z196 ··· 286 282 2964 series). The kernel will be slightly faster but will not work on 287 283 older machines. 288 284 285 + config MARCH_Z14 286 + bool "IBM z14" 287 + select HAVE_MARCH_Z14_FEATURES 288 + help 289 + Select this to enable optimizations for IBM z14 (3906 series). 290 + The kernel will be slightly faster but will not work on older 291 + machines. 292 + 289 293 endchoice 290 294 291 295 config MARCH_Z900_TUNE ··· 316 304 317 305 config MARCH_Z13_TUNE 318 306 def_bool TUNE_Z13 || MARCH_Z13 && TUNE_DEFAULT 307 + 308 + config MARCH_Z14_TUNE 309 + def_bool TUNE_Z14 || MARCH_Z14 && TUNE_DEFAULT 319 310 320 311 choice 321 312 prompt "Tune code generation" ··· 357 342 358 343 config TUNE_Z13 359 344 bool "IBM z13" 345 + 346 + config TUNE_Z14 347 + bool "IBM z14" 360 348 361 349 endchoice 362 350
+4 -2
arch/s390/Makefile
··· 31 31 mflags-$(CONFIG_MARCH_Z10) := -march=z10 32 32 mflags-$(CONFIG_MARCH_Z196) := -march=z196 33 33 mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12 34 - mflags-$(CONFIG_MARCH_Z13) := -march=z13 34 + mflags-$(CONFIG_MARCH_Z13) := -march=z13 35 + mflags-$(CONFIG_MARCH_Z14) := -march=z14 35 36 36 37 export CC_FLAGS_MARCH := $(mflags-y) 37 38 ··· 45 44 cflags-$(CONFIG_MARCH_Z10_TUNE) += -mtune=z10 46 45 cflags-$(CONFIG_MARCH_Z196_TUNE) += -mtune=z196 47 46 cflags-$(CONFIG_MARCH_ZEC12_TUNE) += -mtune=zEC12 48 - cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13 47 + cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13 48 + cflags-$(CONFIG_MARCH_Z14_TUNE) += -mtune=z14 49 49 50 50 cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include 51 51
+1
arch/s390/include/asm/Kbuild
··· 16 16 generic-y += mm-arch-hooks.h 17 17 generic-y += preempt.h 18 18 generic-y += trace_clock.h 19 + generic-y += unaligned.h 19 20 generic-y += word-at-a-time.h
+3 -4
arch/s390/include/asm/cpcmd.h
··· 10 10 11 11 /* 12 12 * the lowlevel function for cpcmd 13 - * the caller of __cpcmd has to ensure that the response buffer is below 2 GB 14 13 */ 15 - extern int __cpcmd(const char *cmd, char *response, int rlen, int *response_code); 14 + int __cpcmd(const char *cmd, char *response, int rlen, int *response_code); 16 15 17 16 /* 18 17 * cpcmd is the in-kernel interface for issuing CP commands ··· 24 25 * response_code: return pointer for VM's error code 25 26 * return value: the size of the response. The caller can check if the buffer 26 27 * was large enough by comparing the return value and rlen 27 - * NOTE: If the response buffer is not below 2 GB, cpcmd can sleep 28 + * NOTE: If the response buffer is not in real storage, cpcmd can sleep 28 29 */ 29 - extern int cpcmd(const char *cmd, char *response, int rlen, int *response_code); 30 + int cpcmd(const char *cmd, char *response, int rlen, int *response_code); 30 31 31 32 #endif /* _ASM_S390_CPCMD_H */
+1 -3
arch/s390/include/asm/ebcdic.h
··· 9 9 #ifndef _EBCDIC_H 10 10 #define _EBCDIC_H 11 11 12 - #ifndef _S390_TYPES_H 13 - #include <types.h> 14 - #endif 12 + #include <linux/types.h> 15 13 16 14 extern __u8 _ascebc_500[256]; /* ASCII -> EBCDIC 500 conversion table */ 17 15 extern __u8 _ebcasc_500[256]; /* EBCDIC 500 -> ASCII conversion table */
+1 -1
arch/s390/include/asm/elf.h
··· 191 191 } while (0) 192 192 193 193 #define CORE_DUMP_USE_REGSET 194 - #define ELF_EXEC_PAGESIZE 4096 194 + #define ELF_EXEC_PAGESIZE PAGE_SIZE 195 195 196 196 /* 197 197 * This is the base location for PIE (ET_DYN with INTERP) loads. On
+1 -1
arch/s390/include/asm/ipl.h
··· 81 81 struct ipl_block_fcp fcp; 82 82 struct ipl_block_ccw ccw; 83 83 } ipl_info; 84 - } __attribute__((packed,aligned(4096))); 84 + } __packed __aligned(PAGE_SIZE); 85 85 86 86 /* 87 87 * IPL validity flags
+24 -24
arch/s390/include/asm/lowcore.h
··· 95 95 __u64 int_clock; /* 0x0310 */ 96 96 __u64 mcck_clock; /* 0x0318 */ 97 97 __u64 clock_comparator; /* 0x0320 */ 98 + __u64 boot_clock[2]; /* 0x0328 */ 98 99 99 100 /* Current process. */ 100 - __u64 current_task; /* 0x0328 */ 101 - __u8 pad_0x318[0x320-0x318]; /* 0x0330 */ 102 - __u64 kernel_stack; /* 0x0338 */ 101 + __u64 current_task; /* 0x0338 */ 102 + __u64 kernel_stack; /* 0x0340 */ 103 103 104 104 /* Interrupt, panic and restart stack. */ 105 - __u64 async_stack; /* 0x0340 */ 106 - __u64 panic_stack; /* 0x0348 */ 107 - __u64 restart_stack; /* 0x0350 */ 105 + __u64 async_stack; /* 0x0348 */ 106 + __u64 panic_stack; /* 0x0350 */ 107 + __u64 restart_stack; /* 0x0358 */ 108 108 109 109 /* Restart function and parameter. */ 110 - __u64 restart_fn; /* 0x0358 */ 111 - __u64 restart_data; /* 0x0360 */ 112 - __u64 restart_source; /* 0x0368 */ 110 + __u64 restart_fn; /* 0x0360 */ 111 + __u64 restart_data; /* 0x0368 */ 112 + __u64 restart_source; /* 0x0370 */ 113 113 114 114 /* Address space pointer. */ 115 - __u64 kernel_asce; /* 0x0370 */ 116 - __u64 user_asce; /* 0x0378 */ 115 + __u64 kernel_asce; /* 0x0378 */ 116 + __u64 user_asce; /* 0x0380 */ 117 117 118 118 /* 119 119 * The lpp and current_pid fields form a 120 120 * 64-bit value that is set as program 121 121 * parameter with the LPP instruction. 122 122 */ 123 - __u32 lpp; /* 0x0380 */ 124 - __u32 current_pid; /* 0x0384 */ 123 + __u32 lpp; /* 0x0388 */ 124 + __u32 current_pid; /* 0x038c */ 125 125 126 126 /* SMP info area */ 127 - __u32 cpu_nr; /* 0x0388 */ 128 - __u32 softirq_pending; /* 0x038c */ 129 - __u64 percpu_offset; /* 0x0390 */ 130 - __u64 vdso_per_cpu_data; /* 0x0398 */ 131 - __u64 machine_flags; /* 0x03a0 */ 132 - __u32 preempt_count; /* 0x03a8 */ 133 - __u8 pad_0x03ac[0x03b0-0x03ac]; /* 0x03ac */ 134 - __u64 gmap; /* 0x03b0 */ 135 - __u32 spinlock_lockval; /* 0x03b8 */ 136 - __u32 fpu_flags; /* 0x03bc */ 137 - __u8 pad_0x03c0[0x0400-0x03c0]; /* 0x03c0 */ 127 + __u32 cpu_nr; /* 0x0390 */ 128 + __u32 softirq_pending; /* 0x0394 */ 129 + __u64 percpu_offset; /* 0x0398 */ 130 + __u64 vdso_per_cpu_data; /* 0x03a0 */ 131 + __u64 machine_flags; /* 0x03a8 */ 132 + __u32 preempt_count; /* 0x03b0 */ 133 + __u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */ 134 + __u64 gmap; /* 0x03b8 */ 135 + __u32 spinlock_lockval; /* 0x03c0 */ 136 + __u32 fpu_flags; /* 0x03c4 */ 137 + __u8 pad_0x03c8[0x0400-0x03c8]; /* 0x03c8 */ 138 138 139 139 /* Per cpu primary space access list */ 140 140 __u32 paste[16]; /* 0x0400 */
-11
arch/s390/include/asm/mman.h
··· 1 - /* 2 - * S390 version 3 - * 4 - * Derived from "include/asm-i386/mman.h" 5 - */ 6 - #ifndef __S390_MMAN_H__ 7 - #define __S390_MMAN_H__ 8 - 9 - #include <uapi/asm/mman.h> 10 - 11 - #endif /* __S390_MMAN_H__ */
+4 -29
arch/s390/include/asm/mmu_context.h
··· 12 12 #include <linux/mm_types.h> 13 13 #include <asm/tlbflush.h> 14 14 #include <asm/ctl_reg.h> 15 + #include <asm-generic/mm_hooks.h> 15 16 16 17 static inline int init_new_context(struct task_struct *tsk, 17 18 struct mm_struct *mm) ··· 34 33 mm->context.use_cmma = 0; 35 34 #endif 36 35 switch (mm->context.asce_limit) { 37 - case 1UL << 42: 36 + case _REGION2_SIZE: 38 37 /* 39 38 * forked 3-level task, fall through to set new asce with new 40 39 * mm->pgd ··· 50 49 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | 51 50 _ASCE_USER_BITS | _ASCE_TYPE_REGION1; 52 51 break; 53 - case 1UL << 53: 52 + case _REGION1_SIZE: 54 53 /* forked 4-level task, set new asce with new mm->pgd */ 55 54 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | 56 55 _ASCE_USER_BITS | _ASCE_TYPE_REGION2; 57 56 break; 58 - case 1UL << 31: 57 + case _REGION3_SIZE: 59 58 /* forked 2-level compat task, set new asce with new mm->pgd */ 60 59 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | 61 60 _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; ··· 139 138 set_user_asce(next); 140 139 } 141 140 142 - static inline void arch_dup_mmap(struct mm_struct *oldmm, 143 - struct mm_struct *mm) 144 - { 145 - } 146 - 147 - static inline void arch_exit_mmap(struct mm_struct *mm) 148 - { 149 - } 150 - 151 - static inline void arch_unmap(struct mm_struct *mm, 152 - struct vm_area_struct *vma, 153 - unsigned long start, unsigned long end) 154 - { 155 - } 156 - 157 - static inline void arch_bprm_mm_init(struct mm_struct *mm, 158 - struct vm_area_struct *vma) 159 - { 160 - } 161 - 162 - static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, 163 - bool write, bool execute, bool foreign) 164 - { 165 - /* by default, allow everything */ 166 - return true; 167 - } 168 141 #endif /* __S390_MMU_CONTEXT_H */
+1 -1
arch/s390/include/asm/nmi.h
··· 72 72 u64 ar : 1; /* 33 access register validity */ 73 73 u64 da : 1; /* 34 delayed access exception */ 74 74 u64 : 1; /* 35 */ 75 - u64 gs : 1; /* 36 guarded storage registers */ 75 + u64 gs : 1; /* 36 guarded storage registers validity */ 76 76 u64 : 5; /* 37-41 */ 77 77 u64 pr : 1; /* 42 tod programmable register validity */ 78 78 u64 fc : 1; /* 43 fp control register validity */
+1
arch/s390/include/asm/page-states.h
··· 13 13 #define ESSA_SET_POT_VOLATILE 4 14 14 #define ESSA_SET_STABLE_RESIDENT 5 15 15 #define ESSA_SET_STABLE_IF_RESIDENT 6 16 + #define ESSA_SET_STABLE_NODAT 7 16 17 17 18 #define ESSA_MAX ESSA_SET_STABLE_IF_RESIDENT 18 19
+27 -10
arch/s390/include/asm/page.h
··· 10 10 #include <linux/const.h> 11 11 #include <asm/types.h> 12 12 13 + #define _PAGE_SHIFT 12 14 + #define _PAGE_SIZE (_AC(1, UL) << _PAGE_SHIFT) 15 + #define _PAGE_MASK (~(_PAGE_SIZE - 1)) 16 + 13 17 /* PAGE_SHIFT determines the page size */ 14 - #define PAGE_SHIFT 12 15 - #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) 16 - #define PAGE_MASK (~(PAGE_SIZE-1)) 18 + #define PAGE_SHIFT _PAGE_SHIFT 19 + #define PAGE_SIZE _PAGE_SIZE 20 + #define PAGE_MASK _PAGE_MASK 17 21 #define PAGE_DEFAULT_ACC 0 18 22 #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) 19 23 ··· 137 133 struct page; 138 134 void arch_free_page(struct page *page, int order); 139 135 void arch_alloc_page(struct page *page, int order); 136 + void arch_set_page_dat(struct page *page, int order); 137 + void arch_set_page_nodat(struct page *page, int order); 138 + int arch_test_page_nodat(struct page *page); 140 139 void arch_set_page_states(int make_stable); 141 140 142 141 static inline int devmem_is_allowed(unsigned long pfn) ··· 152 145 153 146 #endif /* !__ASSEMBLY__ */ 154 147 155 - #define __PAGE_OFFSET 0x0UL 156 - #define PAGE_OFFSET 0x0UL 157 - #define __pa(x) (unsigned long)(x) 158 - #define __va(x) (void *)(unsigned long)(x) 159 - #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) 160 - #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) 161 - #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) 148 + #define __PAGE_OFFSET 0x0UL 149 + #define PAGE_OFFSET 0x0UL 150 + 151 + #define __pa(x) ((unsigned long)(x)) 152 + #define __va(x) ((void *)(unsigned long)(x)) 153 + 154 + #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) 162 155 #define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) 156 + 157 + #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) 163 158 #define page_to_virt(page) pfn_to_virt(page_to_pfn(page)) 159 + 160 + #define phys_to_pfn(kaddr) ((kaddr) >> PAGE_SHIFT) 161 + #define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT) 162 + 163 + #define phys_to_page(kaddr) pfn_to_page(phys_to_pfn(kaddr)) 164 + #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) 165 + 166 + #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) 164 167 165 168 #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ 166 169 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+12 -6
arch/s390/include/asm/pgalloc.h
··· 15 15 #include <linux/gfp.h> 16 16 #include <linux/mm.h> 17 17 18 + #define CRST_ALLOC_ORDER 2 19 + 18 20 unsigned long *crst_table_alloc(struct mm_struct *); 19 21 void crst_table_free(struct mm_struct *, unsigned long *); 20 22 ··· 44 42 45 43 static inline void crst_table_init(unsigned long *crst, unsigned long entry) 46 44 { 47 - clear_table(crst, entry, sizeof(unsigned long)*2048); 45 + clear_table(crst, entry, _CRST_TABLE_SIZE); 48 46 } 49 47 50 48 static inline unsigned long pgd_entry_type(struct mm_struct *mm) 51 49 { 52 - if (mm->context.asce_limit <= (1UL << 31)) 50 + if (mm->context.asce_limit <= _REGION3_SIZE) 53 51 return _SEGMENT_ENTRY_EMPTY; 54 - if (mm->context.asce_limit <= (1UL << 42)) 52 + if (mm->context.asce_limit <= _REGION2_SIZE) 55 53 return _REGION3_ENTRY_EMPTY; 56 - if (mm->context.asce_limit <= (1UL << 53)) 54 + if (mm->context.asce_limit <= _REGION1_SIZE) 57 55 return _REGION2_ENTRY_EMPTY; 58 56 return _REGION1_ENTRY_EMPTY; 59 57 } ··· 121 119 122 120 if (!table) 123 121 return NULL; 124 - if (mm->context.asce_limit == (1UL << 31)) { 122 + if (mm->context.asce_limit == _REGION3_SIZE) { 125 123 /* Forking a compat process with 2 page table levels */ 126 124 if (!pgtable_pmd_page_ctor(virt_to_page(table))) { 127 125 crst_table_free(mm, table); ··· 133 131 134 132 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) 135 133 { 136 - if (mm->context.asce_limit == (1UL << 31)) 134 + if (mm->context.asce_limit == _REGION3_SIZE) 137 135 pgtable_pmd_page_dtor(virt_to_page(pgd)); 138 136 crst_table_free(mm, (unsigned long *) pgd); 139 137 } ··· 159 157 #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte) 160 158 161 159 extern void rcu_table_freelist_finish(void); 160 + 161 + void vmem_map_init(void); 162 + void *vmem_crst_alloc(unsigned long val); 163 + pte_t *vmem_pte_alloc(void); 162 164 163 165 #endif /* _S390_PGALLOC_H */
+120 -77
arch/s390/include/asm/pgtable.h
··· 11 11 #ifndef _ASM_S390_PGTABLE_H 12 12 #define _ASM_S390_PGTABLE_H 13 13 14 - /* 15 - * The Linux memory management assumes a three-level page table setup. 16 - * For s390 64 bit we use up to four of the five levels the hardware 17 - * provides (region first tables are not used). 18 - * 19 - * The "pgd_xxx()" functions are trivial for a folded two-level 20 - * setup: the pgd is never bad, and a pmd always exists (as it's folded 21 - * into the pgd entry) 22 - * 23 - * This file contains the functions and defines necessary to modify and use 24 - * the S390 page table tree. 25 - */ 26 - #ifndef __ASSEMBLY__ 27 14 #include <linux/sched.h> 28 15 #include <linux/mm_types.h> 29 16 #include <linux/page-flags.h> ··· 21 34 22 35 extern pgd_t swapper_pg_dir[]; 23 36 extern void paging_init(void); 24 - extern void vmem_map_init(void); 25 - pmd_t *vmem_pmd_alloc(void); 26 - pte_t *vmem_pte_alloc(void); 27 37 28 38 enum { 29 39 PG_DIRECT_MAP_4K = 0, ··· 61 77 #define __HAVE_COLOR_ZERO_PAGE 62 78 63 79 /* TODO: s390 cannot support io_remap_pfn_range... */ 64 - #endif /* !__ASSEMBLY__ */ 65 - 66 - /* 67 - * PMD_SHIFT determines the size of the area a second-level page 68 - * table can map 69 - * PGDIR_SHIFT determines what a third-level page table entry can map 70 - */ 71 - #define PMD_SHIFT 20 72 - #define PUD_SHIFT 31 73 - #define P4D_SHIFT 42 74 - #define PGDIR_SHIFT 53 75 - 76 - #define PMD_SIZE (1UL << PMD_SHIFT) 77 - #define PMD_MASK (~(PMD_SIZE-1)) 78 - #define PUD_SIZE (1UL << PUD_SHIFT) 79 - #define PUD_MASK (~(PUD_SIZE-1)) 80 - #define P4D_SIZE (1UL << P4D_SHIFT) 81 - #define P4D_MASK (~(P4D_SIZE-1)) 82 - #define PGDIR_SIZE (1UL << PGDIR_SHIFT) 83 - #define PGDIR_MASK (~(PGDIR_SIZE-1)) 84 - 85 - /* 86 - * entries per page directory level: the S390 is two-level, so 87 - * we don't really have any PMD directory physically. 88 - * for S390 segment-table entries are combined to one PGD 89 - * that leads to 1024 pte per pgd 90 - */ 91 - #define PTRS_PER_PTE 256 92 - #define PTRS_PER_PMD 2048 93 - #define PTRS_PER_PUD 2048 94 - #define PTRS_PER_P4D 2048 95 - #define PTRS_PER_PGD 2048 96 80 97 81 #define FIRST_USER_ADDRESS 0UL 98 82 ··· 75 123 #define pgd_ERROR(e) \ 76 124 printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e)) 77 125 78 - #ifndef __ASSEMBLY__ 79 126 /* 80 127 * The vmalloc and module area will always be on the topmost area of the 81 128 * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules. ··· 220 269 */ 221 270 222 271 /* Bits in the segment/region table address-space-control-element */ 223 - #define _ASCE_ORIGIN ~0xfffUL/* segment table origin */ 272 + #define _ASCE_ORIGIN ~0xfffUL/* region/segment table origin */ 224 273 #define _ASCE_PRIVATE_SPACE 0x100 /* private space control */ 225 274 #define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */ 226 275 #define _ASCE_SPACE_SWITCH 0x40 /* space switch event */ ··· 271 320 #define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL 272 321 #define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL 273 322 #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ 274 - #define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ 275 - #define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ 276 - #define _SEGMENT_ENTRY_NOEXEC 0x100 /* region no-execute bit */ 323 + #define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* page table origin */ 324 + #define _SEGMENT_ENTRY_PROTECT 0x200 /* segment protection bit */ 325 + #define _SEGMENT_ENTRY_NOEXEC 0x100 /* segment no-execute bit */ 277 326 #define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ 278 327 279 328 #define _SEGMENT_ENTRY (0) ··· 290 339 #else 291 340 #define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */ 292 341 #endif 342 + 343 + #define _CRST_ENTRIES 2048 /* number of region/segment table entries */ 344 + #define _PAGE_ENTRIES 256 /* number of page table entries */ 345 + 346 + #define _CRST_TABLE_SIZE (_CRST_ENTRIES * 8) 347 + #define _PAGE_TABLE_SIZE (_PAGE_ENTRIES * 8) 348 + 349 + #define _REGION1_SHIFT 53 350 + #define _REGION2_SHIFT 42 351 + #define _REGION3_SHIFT 31 352 + #define _SEGMENT_SHIFT 20 353 + 354 + #define _REGION1_INDEX (0x7ffUL << _REGION1_SHIFT) 355 + #define _REGION2_INDEX (0x7ffUL << _REGION2_SHIFT) 356 + #define _REGION3_INDEX (0x7ffUL << _REGION3_SHIFT) 357 + #define _SEGMENT_INDEX (0x7ffUL << _SEGMENT_SHIFT) 358 + #define _PAGE_INDEX (0xffUL << _PAGE_SHIFT) 359 + 360 + #define _REGION1_SIZE (1UL << _REGION1_SHIFT) 361 + #define _REGION2_SIZE (1UL << _REGION2_SHIFT) 362 + #define _REGION3_SIZE (1UL << _REGION3_SHIFT) 363 + #define _SEGMENT_SIZE (1UL << _SEGMENT_SHIFT) 364 + 365 + #define _REGION1_MASK (~(_REGION1_SIZE - 1)) 366 + #define _REGION2_MASK (~(_REGION2_SIZE - 1)) 367 + #define _REGION3_MASK (~(_REGION3_SIZE - 1)) 368 + #define _SEGMENT_MASK (~(_SEGMENT_SIZE - 1)) 369 + 370 + #define PMD_SHIFT _SEGMENT_SHIFT 371 + #define PUD_SHIFT _REGION3_SHIFT 372 + #define P4D_SHIFT _REGION2_SHIFT 373 + #define PGDIR_SHIFT _REGION1_SHIFT 374 + 375 + #define PMD_SIZE _SEGMENT_SIZE 376 + #define PUD_SIZE _REGION3_SIZE 377 + #define P4D_SIZE _REGION2_SIZE 378 + #define PGDIR_SIZE _REGION1_SIZE 379 + 380 + #define PMD_MASK _SEGMENT_MASK 381 + #define PUD_MASK _REGION3_MASK 382 + #define P4D_MASK _REGION2_MASK 383 + #define PGDIR_MASK _REGION1_MASK 384 + 385 + #define PTRS_PER_PTE _PAGE_ENTRIES 386 + #define PTRS_PER_PMD _CRST_ENTRIES 387 + #define PTRS_PER_PUD _CRST_ENTRIES 388 + #define PTRS_PER_P4D _CRST_ENTRIES 389 + #define PTRS_PER_PGD _CRST_ENTRIES 293 390 294 391 /* 295 392 * Segment table and region3 table entry encoding ··· 375 376 376 377 /* Guest Page State used for virtualization */ 377 378 #define _PGSTE_GPS_ZERO 0x0000000080000000UL 379 + #define _PGSTE_GPS_NODAT 0x0000000040000000UL 378 380 #define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL 379 381 #define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL 380 382 #define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL ··· 505 505 * In the case that a guest uses storage keys 506 506 * faults should no longer be backed by zero pages 507 507 */ 508 - #define mm_forbids_zeropage mm_use_skey 508 + #define mm_forbids_zeropage mm_has_pgste 509 509 static inline int mm_use_skey(struct mm_struct *mm) 510 510 { 511 511 #ifdef CONFIG_PGSTE ··· 952 952 #define IPTE_GLOBAL 0 953 953 #define IPTE_LOCAL 1 954 954 955 - static inline void __ptep_ipte(unsigned long address, pte_t *ptep, int local) 955 + #define IPTE_NODAT 0x400 956 + #define IPTE_GUEST_ASCE 0x800 957 + 958 + static inline void __ptep_ipte(unsigned long address, pte_t *ptep, 959 + unsigned long opt, unsigned long asce, 960 + int local) 956 961 { 957 962 unsigned long pto = (unsigned long) ptep; 958 963 959 - /* Invalidation + TLB flush for the pte */ 964 + if (__builtin_constant_p(opt) && opt == 0) { 965 + /* Invalidation + TLB flush for the pte */ 966 + asm volatile( 967 + " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]" 968 + : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address), 969 + [m4] "i" (local)); 970 + return; 971 + } 972 + 973 + /* Invalidate ptes with options + TLB flush of the ptes */ 974 + opt = opt | (asce & _ASCE_ORIGIN); 960 975 asm volatile( 961 - " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]" 962 - : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address), 963 - [m4] "i" (local)); 976 + " .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]" 977 + : [r2] "+a" (address), [r3] "+a" (opt) 978 + : [r1] "a" (pto), [m4] "i" (local) : "memory"); 964 979 } 965 980 966 981 static inline void __ptep_ipte_range(unsigned long address, int nr, ··· 1356 1341 #define IDTE_GLOBAL 0 1357 1342 #define IDTE_LOCAL 1 1358 1343 1359 - static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp, int local) 1344 + #define IDTE_PTOA 0x0800 1345 + #define IDTE_NODAT 0x1000 1346 + #define IDTE_GUEST_ASCE 0x2000 1347 + 1348 + static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, 1349 + unsigned long opt, unsigned long asce, 1350 + int local) 1360 1351 { 1361 1352 unsigned long sto; 1362 1353 1363 - sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t); 1364 - asm volatile( 1365 - " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" 1366 - : "+m" (*pmdp) 1367 - : [r1] "a" (sto), [r2] "a" ((address & HPAGE_MASK)), 1368 - [m4] "i" (local) 1369 - : "cc" ); 1354 + sto = (unsigned long) pmdp - pmd_index(addr) * sizeof(pmd_t); 1355 + if (__builtin_constant_p(opt) && opt == 0) { 1356 + /* flush without guest asce */ 1357 + asm volatile( 1358 + " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" 1359 + : "+m" (*pmdp) 1360 + : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK)), 1361 + [m4] "i" (local) 1362 + : "cc" ); 1363 + } else { 1364 + /* flush with guest asce */ 1365 + asm volatile( 1366 + " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]" 1367 + : "+m" (*pmdp) 1368 + : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK) | opt), 1369 + [r3] "a" (asce), [m4] "i" (local) 1370 + : "cc" ); 1371 + } 1370 1372 } 1371 1373 1372 - static inline void __pudp_idte(unsigned long address, pud_t *pudp, int local) 1374 + static inline void __pudp_idte(unsigned long addr, pud_t *pudp, 1375 + unsigned long opt, unsigned long asce, 1376 + int local) 1373 1377 { 1374 1378 unsigned long r3o; 1375 1379 1376 - r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t); 1380 + r3o = (unsigned long) pudp - pud_index(addr) * sizeof(pud_t); 1377 1381 r3o |= _ASCE_TYPE_REGION3; 1378 - asm volatile( 1379 - " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" 1380 - : "+m" (*pudp) 1381 - : [r1] "a" (r3o), [r2] "a" ((address & PUD_MASK)), 1382 - [m4] "i" (local) 1383 - : "cc"); 1382 + if (__builtin_constant_p(opt) && opt == 0) { 1383 + /* flush without guest asce */ 1384 + asm volatile( 1385 + " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" 1386 + : "+m" (*pudp) 1387 + : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK)), 1388 + [m4] "i" (local) 1389 + : "cc"); 1390 + } else { 1391 + /* flush with guest asce */ 1392 + asm volatile( 1393 + " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]" 1394 + : "+m" (*pudp) 1395 + : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK) | opt), 1396 + [r3] "a" (asce), [m4] "i" (local) 1397 + : "cc" ); 1398 + } 1384 1399 } 1385 1400 1386 1401 pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t); ··· 1592 1547 1593 1548 #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 1594 1549 #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 1595 - 1596 - #endif /* !__ASSEMBLY__ */ 1597 1550 1598 1551 #define kern_addr_valid(addr) (1) 1599 1552
+1 -1
arch/s390/include/asm/qdio.h
··· 80 80 u32 qkey : 4; 81 81 u32 : 28; 82 82 struct qdesfmt0 qdf0[126]; 83 - } __attribute__ ((packed, aligned(4096))); 83 + } __packed __aligned(PAGE_SIZE); 84 84 85 85 #define QIB_AC_OUTBOUND_PCI_SUPPORTED 0x40 86 86 #define QIB_RFLAGS_ENABLE_QEBSM 0x80
+14 -3
arch/s390/include/asm/setup.h
··· 29 29 #define MACHINE_FLAG_TE _BITUL(11) 30 30 #define MACHINE_FLAG_TLB_LC _BITUL(12) 31 31 #define MACHINE_FLAG_VX _BITUL(13) 32 - #define MACHINE_FLAG_NX _BITUL(14) 33 - #define MACHINE_FLAG_GS _BITUL(15) 32 + #define MACHINE_FLAG_TLB_GUEST _BITUL(14) 33 + #define MACHINE_FLAG_NX _BITUL(15) 34 + #define MACHINE_FLAG_GS _BITUL(16) 35 + #define MACHINE_FLAG_SCC _BITUL(17) 34 36 35 37 #define LPP_MAGIC _BITUL(31) 36 38 #define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) ··· 70 68 #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) 71 69 #define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) 72 70 #define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX) 71 + #define MACHINE_HAS_TLB_GUEST (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_GUEST) 73 72 #define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) 74 73 #define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS) 74 + #define MACHINE_HAS_SCC (S390_lowcore.machine_flags & MACHINE_FLAG_SCC) 75 75 76 76 /* 77 77 * Console mode. Override with conmode= ··· 108 104 #define pfault_fini() do { } while (0) 109 105 #endif /* CONFIG_PFAULT */ 110 106 107 + #ifdef CONFIG_VMCP 108 + void vmcp_cma_reserve(void); 109 + #else 110 + static inline void vmcp_cma_reserve(void) { } 111 + #endif 112 + 111 113 void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault); 112 114 113 - extern void cmma_init(void); 115 + void cmma_init(void); 116 + void cmma_init_nodat(void); 114 117 115 118 extern void (*_machine_restart)(char *command); 116 119 extern void (*_machine_halt)(void);
+5 -4
arch/s390/include/asm/spinlock.h
··· 92 92 { 93 93 typecheck(int, lp->lock); 94 94 asm volatile( 95 - "st %1,%0\n" 96 - : "+Q" (lp->lock) 97 - : "d" (0) 98 - : "cc", "memory"); 95 + #ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES 96 + " .long 0xb2fa0070\n" /* NIAI 7 */ 97 + #endif 98 + " st %1,%0\n" 99 + : "=Q" (lp->lock) : "d" (0) : "cc", "memory"); 99 100 } 100 101 101 102 /*
+35 -5
arch/s390/include/asm/timex.h
··· 15 15 /* The value of the TOD clock for 1.1.1970. */ 16 16 #define TOD_UNIX_EPOCH 0x7d91048bca000000ULL 17 17 18 + extern u64 clock_comparator_max; 19 + 18 20 /* Inline functions for clock register access. */ 19 21 static inline int set_tod_clock(__u64 time) 20 22 { ··· 128 126 unsigned long long old; 129 127 130 128 old = S390_lowcore.clock_comparator; 131 - S390_lowcore.clock_comparator = -1ULL; 129 + S390_lowcore.clock_comparator = clock_comparator_max; 132 130 set_clock_comparator(S390_lowcore.clock_comparator); 133 131 return old; 134 132 } ··· 176 174 return (cycles_t) get_tod_clock() >> 2; 177 175 } 178 176 179 - int get_phys_clock(unsigned long long *clock); 177 + int get_phys_clock(unsigned long *clock); 180 178 void init_cpu_timer(void); 181 179 unsigned long long monotonic_clock(void); 182 180 183 - extern u64 sched_clock_base_cc; 181 + extern unsigned char tod_clock_base[16] __aligned(8); 184 182 185 183 /** 186 184 * get_clock_monotonic - returns current time in clock rate units 187 185 * 188 186 * The caller must ensure that preemption is disabled. 189 - * The clock and sched_clock_base get changed via stop_machine. 187 + * The clock and tod_clock_base get changed via stop_machine. 190 188 * Therefore preemption must be disabled when calling this 191 189 * function, otherwise the returned value is not guaranteed to 192 190 * be monotonic. 193 191 */ 194 192 static inline unsigned long long get_tod_clock_monotonic(void) 195 193 { 196 - return get_tod_clock() - sched_clock_base_cc; 194 + return get_tod_clock() - *(unsigned long long *) &tod_clock_base[1]; 197 195 } 198 196 199 197 /** ··· 218 216 static inline unsigned long long tod_to_ns(unsigned long long todval) 219 217 { 220 218 return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9); 219 + } 220 + 221 + /** 222 + * tod_after - compare two 64 bit TOD values 223 + * @a: first 64 bit TOD timestamp 224 + * @b: second 64 bit TOD timestamp 225 + * 226 + * Returns: true if a is later than b 227 + */ 228 + static inline int tod_after(unsigned long long a, unsigned long long b) 229 + { 230 + if (MACHINE_HAS_SCC) 231 + return (long long) a > (long long) b; 232 + return a > b; 233 + } 234 + 235 + /** 236 + * tod_after_eq - compare two 64 bit TOD values 237 + * @a: first 64 bit TOD timestamp 238 + * @b: second 64 bit TOD timestamp 239 + * 240 + * Returns: true if a is later than b 241 + */ 242 + static inline int tod_after_eq(unsigned long long a, unsigned long long b) 243 + { 244 + if (MACHINE_HAS_SCC) 245 + return (long long) a >= (long long) b; 246 + return a >= b; 221 247 } 222 248 223 249 #endif
+3 -3
arch/s390/include/asm/tlb.h
··· 135 135 static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, 136 136 unsigned long address) 137 137 { 138 - if (tlb->mm->context.asce_limit <= (1UL << 31)) 138 + if (tlb->mm->context.asce_limit <= _REGION3_SIZE) 139 139 return; 140 140 pgtable_pmd_page_dtor(virt_to_page(pmd)); 141 141 tlb_remove_table(tlb, pmd); ··· 151 151 static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, 152 152 unsigned long address) 153 153 { 154 - if (tlb->mm->context.asce_limit <= (1UL << 53)) 154 + if (tlb->mm->context.asce_limit <= _REGION1_SIZE) 155 155 return; 156 156 tlb_remove_table(tlb, p4d); 157 157 } ··· 166 166 static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, 167 167 unsigned long address) 168 168 { 169 - if (tlb->mm->context.asce_limit <= (1UL << 42)) 169 + if (tlb->mm->context.asce_limit <= _REGION2_SIZE) 170 170 return; 171 171 tlb_remove_table(tlb, pud); 172 172 }
+6 -1
arch/s390/include/asm/tlbflush.h
··· 20 20 */ 21 21 static inline void __tlb_flush_idte(unsigned long asce) 22 22 { 23 + unsigned long opt; 24 + 25 + opt = IDTE_PTOA; 26 + if (MACHINE_HAS_TLB_GUEST) 27 + opt |= IDTE_GUEST_ASCE; 23 28 /* Global TLB flush for the mm */ 24 29 asm volatile( 25 30 " .insn rrf,0xb98e0000,0,%0,%1,0" 26 - : : "a" (2048), "a" (asce) : "cc"); 31 + : : "a" (opt), "a" (asce) : "cc"); 27 32 } 28 33 29 34 #ifdef CONFIG_SMP
-6
arch/s390/include/asm/topology.h
··· 77 77 return &node_to_cpumask_map[node]; 78 78 } 79 79 80 - /* 81 - * Returns the number of the node containing node 'node'. This 82 - * architecture is flat, so it is a pretty simple function! 83 - */ 84 - #define parent_node(node) (node) 85 - 86 80 #define pcibus_to_node(bus) __pcibus_to_node(bus) 87 81 88 82 #define node_distance(a, b) __node_distance(a, b)
-11
arch/s390/include/asm/types.h
··· 1 - /* 2 - * S390 version 3 - * 4 - * Derived from "include/asm-i386/types.h" 5 - */ 6 - #ifndef _S390_TYPES_H 7 - #define _S390_TYPES_H 8 - 9 - #include <uapi/asm/types.h> 10 - 11 - #endif /* _S390_TYPES_H */
-13
arch/s390/include/asm/unaligned.h
··· 1 - #ifndef _ASM_S390_UNALIGNED_H 2 - #define _ASM_S390_UNALIGNED_H 3 - 4 - /* 5 - * The S390 can do unaligned accesses itself. 6 - */ 7 - #include <linux/unaligned/access_ok.h> 8 - #include <linux/unaligned/generic.h> 9 - 10 - #define get_unaligned __get_unaligned_be 11 - #define put_unaligned __put_unaligned_be 12 - 13 - #endif /* _ASM_S390_UNALIGNED_H */
+1
arch/s390/include/uapi/asm/Kbuild
··· 9 9 generic-y += poll.h 10 10 generic-y += resource.h 11 11 generic-y += sockios.h 12 + generic-y += swab.h 12 13 generic-y += termbits.h
+5 -1
arch/s390/include/uapi/asm/dasd.h
··· 72 72 * 0x02: use diag discipline (diag) 73 73 * 0x04: set the device initially online (internal use only) 74 74 * 0x08: enable ERP related logging 75 - * 0x20: give access to raw eckd data 75 + * 0x10: allow I/O to fail on lost paths 76 + * 0x20: allow I/O to fail when a lock was stolen 77 + * 0x40: give access to raw eckd data 78 + * 0x80: enable discard support 76 79 */ 77 80 #define DASD_FEATURE_DEFAULT 0x00 78 81 #define DASD_FEATURE_READONLY 0x01 ··· 85 82 #define DASD_FEATURE_FAILFAST 0x10 86 83 #define DASD_FEATURE_FAILONSLCK 0x20 87 84 #define DASD_FEATURE_USERAW 0x40 85 + #define DASD_FEATURE_DISCARD 0x80 88 86 89 87 #define DASD_PARTN_BITS 2 90 88
-89
arch/s390/include/uapi/asm/swab.h
··· 1 - #ifndef _S390_SWAB_H 2 - #define _S390_SWAB_H 3 - 4 - /* 5 - * S390 version 6 - * Copyright IBM Corp. 1999 7 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) 8 - */ 9 - 10 - #include <linux/types.h> 11 - 12 - #ifndef __s390x__ 13 - # define __SWAB_64_THRU_32__ 14 - #endif 15 - 16 - #ifdef __s390x__ 17 - static inline __u64 __arch_swab64p(const __u64 *x) 18 - { 19 - __u64 result; 20 - 21 - asm volatile("lrvg %0,%1" : "=d" (result) : "m" (*x)); 22 - return result; 23 - } 24 - #define __arch_swab64p __arch_swab64p 25 - 26 - static inline __u64 __arch_swab64(__u64 x) 27 - { 28 - __u64 result; 29 - 30 - asm volatile("lrvgr %0,%1" : "=d" (result) : "d" (x)); 31 - return result; 32 - } 33 - #define __arch_swab64 __arch_swab64 34 - 35 - static inline void __arch_swab64s(__u64 *x) 36 - { 37 - *x = __arch_swab64p(x); 38 - } 39 - #define __arch_swab64s __arch_swab64s 40 - #endif /* __s390x__ */ 41 - 42 - static inline __u32 __arch_swab32p(const __u32 *x) 43 - { 44 - __u32 result; 45 - 46 - asm volatile( 47 - #ifndef __s390x__ 48 - " icm %0,8,%O1+3(%R1)\n" 49 - " icm %0,4,%O1+2(%R1)\n" 50 - " icm %0,2,%O1+1(%R1)\n" 51 - " ic %0,%1" 52 - : "=&d" (result) : "Q" (*x) : "cc"); 53 - #else /* __s390x__ */ 54 - " lrv %0,%1" 55 - : "=d" (result) : "m" (*x)); 56 - #endif /* __s390x__ */ 57 - return result; 58 - } 59 - #define __arch_swab32p __arch_swab32p 60 - 61 - #ifdef __s390x__ 62 - static inline __u32 __arch_swab32(__u32 x) 63 - { 64 - __u32 result; 65 - 66 - asm volatile("lrvr %0,%1" : "=d" (result) : "d" (x)); 67 - return result; 68 - } 69 - #define __arch_swab32 __arch_swab32 70 - #endif /* __s390x__ */ 71 - 72 - static inline __u16 __arch_swab16p(const __u16 *x) 73 - { 74 - __u16 result; 75 - 76 - asm volatile( 77 - #ifndef __s390x__ 78 - " icm %0,2,%O1+1(%R1)\n" 79 - " ic %0,%1\n" 80 - : "=&d" (result) : "Q" (*x) : "cc"); 81 - #else /* __s390x__ */ 82 - " lrvh %0,%1" 83 - : "=d" (result) : "m" (*x)); 84 - #endif /* __s390x__ */ 85 - return result; 86 - } 87 - #define __arch_swab16p __arch_swab16p 88 - 89 - #endif /* _S390_SWAB_H */
+1
arch/s390/kernel/asm-offsets.c
··· 158 158 OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); 159 159 OFFSET(__LC_INT_CLOCK, lowcore, int_clock); 160 160 OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock); 161 + OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock); 161 162 OFFSET(__LC_CURRENT, lowcore, current_task); 162 163 OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); 163 164 OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
+4 -9
arch/s390/kernel/cpcmd.c
··· 14 14 #include <linux/spinlock.h> 15 15 #include <linux/stddef.h> 16 16 #include <linux/string.h> 17 + #include <linux/mm.h> 17 18 #include <asm/diag.h> 18 19 #include <asm/ebcdic.h> 19 20 #include <asm/cpcmd.h> ··· 29 28 register unsigned long reg3 asm ("3") = cmdlen; 30 29 31 30 asm volatile( 32 - " sam31\n" 33 31 " diag %1,%0,0x8\n" 34 - " sam64\n" 35 32 : "+d" (reg3) : "d" (reg2) : "cc"); 36 33 return reg3; 37 34 } ··· 42 43 register unsigned long reg5 asm ("5") = *rlen; 43 44 44 45 asm volatile( 45 - " sam31\n" 46 46 " diag %2,%0,0x8\n" 47 - " sam64\n" 48 47 " brc 8,1f\n" 49 48 " agr %1,%4\n" 50 49 "1:\n" ··· 54 57 55 58 /* 56 59 * __cpcmd has some restrictions over cpcmd 57 - * - the response buffer must reside below 2GB (if any) 58 60 * - __cpcmd is unlocked and therefore not SMP-safe 59 61 */ 60 62 int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) ··· 84 88 85 89 int cpcmd(const char *cmd, char *response, int rlen, int *response_code) 86 90 { 91 + unsigned long flags; 87 92 char *lowbuf; 88 93 int len; 89 - unsigned long flags; 90 94 91 - if ((virt_to_phys(response) != (unsigned long) response) || 92 - (((unsigned long)response + rlen) >> 31)) { 93 - lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA); 95 + if (is_vmalloc_or_module_addr(response)) { 96 + lowbuf = kmalloc(rlen, GFP_KERNEL); 94 97 if (!lowbuf) { 95 98 pr_warn("The cpcmd kernel function failed to allocate a response buffer\n"); 96 99 return -ENOMEM;
+5 -4
arch/s390/kernel/debug.c
··· 866 866 debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level, 867 867 int exception) 868 868 { 869 - active->id.stck = get_tod_clock_fast() - sched_clock_base_cc; 869 + active->id.stck = get_tod_clock_fast() - 870 + *(unsigned long long *) &tod_clock_base[1]; 870 871 active->id.fields.cpuid = smp_processor_id(); 871 872 active->caller = __builtin_return_address(0); 872 873 active->id.fields.exception = exception; ··· 1456 1455 debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, 1457 1456 int area, debug_entry_t * entry, char *out_buf) 1458 1457 { 1459 - unsigned long sec, usec; 1458 + unsigned long base, sec, usec; 1460 1459 char *except_str; 1461 1460 unsigned long caller; 1462 1461 int rc = 0; 1463 1462 unsigned int level; 1464 1463 1465 1464 level = entry->id.fields.level; 1466 - sec = (entry->id.stck >> 12) + (sched_clock_base_cc >> 12); 1467 - sec = sec - (TOD_UNIX_EPOCH >> 12); 1465 + base = (*(unsigned long *) &tod_clock_base[0]) >> 4; 1466 + sec = (entry->id.stck >> 12) + base - (TOD_UNIX_EPOCH >> 12); 1468 1467 usec = do_div(sec, USEC_PER_SEC); 1469 1468 1470 1469 if (entry->id.fields.exception)
+1 -1
arch/s390/kernel/dumpstack.c
··· 76 76 frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); 77 77 #ifdef CONFIG_CHECK_STACK 78 78 sp = __dump_trace(func, data, sp, 79 - S390_lowcore.panic_stack + frame_size - 4096, 79 + S390_lowcore.panic_stack + frame_size - PAGE_SIZE, 80 80 S390_lowcore.panic_stack + frame_size); 81 81 #endif 82 82 sp = __dump_trace(func, data, sp,
+12 -5
arch/s390/kernel/early.c
··· 53 53 if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0) 54 54 disabled_wait(0); 55 55 56 - sched_clock_base_cc = TOD_UNIX_EPOCH; 57 - S390_lowcore.last_update_clock = sched_clock_base_cc; 56 + memset(tod_clock_base, 0, 16); 57 + *(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH; 58 + S390_lowcore.last_update_clock = TOD_UNIX_EPOCH; 58 59 } 59 60 60 61 #ifdef CONFIG_SHARED_KERNEL ··· 166 165 } 167 166 168 167 /* re-initialize cputime accounting. */ 169 - sched_clock_base_cc = get_tod_clock(); 170 - S390_lowcore.last_update_clock = sched_clock_base_cc; 168 + get_tod_clock_ext(tod_clock_base); 169 + S390_lowcore.last_update_clock = *(__u64 *) &tod_clock_base[1]; 171 170 S390_lowcore.last_update_timer = 0x7fffffffffffffffULL; 172 171 S390_lowcore.user_timer = 0; 173 172 S390_lowcore.system_timer = 0; ··· 388 387 } 389 388 if (test_facility(133)) 390 389 S390_lowcore.machine_flags |= MACHINE_FLAG_GS; 390 + if (test_facility(139) && (tod_clock_base[1] & 0x80)) { 391 + /* Enabled signed clock comparator comparisons */ 392 + S390_lowcore.machine_flags |= MACHINE_FLAG_SCC; 393 + clock_comparator_max = -1ULL >> 1; 394 + __ctl_set_bit(0, 53); 395 + } 391 396 } 392 397 393 398 static inline void save_vector_registers(void) ··· 420 413 { 421 414 S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX; 422 415 __ctl_clear_bit(0, 17); 423 - return 1; 416 + return 0; 424 417 } 425 418 early_param("novx", disable_vector_extension); 426 419
+2 -1
arch/s390/kernel/head.S
··· 302 302 xc 0xe00(256),0xe00 303 303 xc 0xf00(256),0xf00 304 304 lctlg %c0,%c15,0x200(%r0) # initialize control registers 305 - stck __LC_LAST_UPDATE_CLOCK 305 + stcke __LC_BOOT_CLOCK 306 + mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1 306 307 spt 6f-.LPG0(%r13) 307 308 mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) 308 309 l %r15,.Lstack-.LPG0(%r13)
+2 -2
arch/s390/kernel/head64.S
··· 21 21 xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid 22 22 mvi __LC_LPP,0x80 # and set LPP_MAGIC 23 23 .insn s,0xb2800000,__LC_LPP # load program parameter 24 - 0: larl %r1,sched_clock_base_cc 25 - mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK 24 + 0: larl %r1,tod_clock_base 25 + mvc 0(16,%r1),__LC_BOOT_CLOCK 26 26 larl %r13,.LPG1 # get base 27 27 lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers 28 28 lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
+2 -1
arch/s390/kernel/irq.c
··· 105 105 106 106 old_regs = set_irq_regs(regs); 107 107 irq_enter(); 108 - if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) 108 + if (tod_after_eq(S390_lowcore.int_clock, 109 + S390_lowcore.clock_comparator)) 109 110 /* Serve timer interrupts first. */ 110 111 clock_comparator_work(); 111 112 generic_handle_irq(irq);
+3 -2
arch/s390/kernel/relocate_kernel.S
··· 7 7 */ 8 8 9 9 #include <linux/linkage.h> 10 + #include <asm/page.h> 10 11 #include <asm/sigp.h> 11 12 12 13 /* ··· 56 55 .back_pgm: 57 56 lmg %r0,%r15,gprregs-.base(%r13) 58 57 .top: 59 - lghi %r7,4096 # load PAGE_SIZE in r7 60 - lghi %r9,4096 # load PAGE_SIZE in r9 58 + lghi %r7,PAGE_SIZE # load PAGE_SIZE in r7 59 + lghi %r9,PAGE_SIZE # load PAGE_SIZE in r9 61 60 lg %r5,0(%r2) # read another word for indirection page 62 61 aghi %r2,8 # increment pointer 63 62 tml %r5,0x1 # is it a destination page?
+9 -5
arch/s390/kernel/setup.c
··· 305 305 /* 306 306 * Setup lowcore for boot cpu 307 307 */ 308 - BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * 4096); 308 + BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE); 309 309 lc = memblock_virt_alloc_low(sizeof(*lc), sizeof(*lc)); 310 310 lc->restart_psw.mask = PSW_KERNEL_BITS; 311 311 lc->restart_psw.addr = (unsigned long) restart_int_handler; ··· 323 323 lc->io_new_psw.mask = PSW_KERNEL_BITS | 324 324 PSW_MASK_DAT | PSW_MASK_MCHECK; 325 325 lc->io_new_psw.addr = (unsigned long) io_int_handler; 326 - lc->clock_comparator = -1ULL; 326 + lc->clock_comparator = clock_comparator_max; 327 327 lc->kernel_stack = ((unsigned long) &init_thread_union) 328 328 + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); 329 329 lc->async_stack = (unsigned long) ··· 469 469 vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN; 470 470 tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; 471 471 tmp = tmp * (sizeof(struct page) + PAGE_SIZE); 472 - if (tmp + vmalloc_size + MODULES_LEN <= (1UL << 42)) 473 - vmax = 1UL << 42; /* 3-level kernel page table */ 472 + if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE) 473 + vmax = _REGION2_SIZE; /* 3-level kernel page table */ 474 474 else 475 - vmax = 1UL << 53; /* 4-level kernel page table */ 475 + vmax = _REGION1_SIZE; /* 4-level kernel page table */ 476 476 /* module area is at the end of the kernel address space. */ 477 477 MODULES_END = vmax; 478 478 MODULES_VADDR = MODULES_END - MODULES_LEN; ··· 818 818 case 0x2965: 819 819 strcpy(elf_platform, "z13"); 820 820 break; 821 + case 0x3906: 822 + strcpy(elf_platform, "z14"); 823 + break; 821 824 } 822 825 823 826 /* ··· 925 922 setup_memory_end(); 926 923 setup_memory(); 927 924 dma_contiguous_reserve(memory_end); 925 + vmcp_cma_reserve(); 928 926 929 927 check_initrd(); 930 928 reserve_crashkernel();
+1
arch/s390/kernel/smp.c
··· 1181 1181 1182 1182 rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online", 1183 1183 smp_cpu_online, smp_cpu_pre_down); 1184 + rc = rc <= 0 ? rc : 0; 1184 1185 out: 1185 1186 return rc; 1186 1187 }
+18 -4
arch/s390/kernel/suspend.c
··· 98 98 */ 99 99 void page_key_read(unsigned long *pfn) 100 100 { 101 + struct page *page; 101 102 unsigned long addr; 103 + unsigned char key; 102 104 103 - addr = (unsigned long) page_address(pfn_to_page(*pfn)); 104 - *(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr); 105 + page = pfn_to_page(*pfn); 106 + addr = (unsigned long) page_address(page); 107 + key = (unsigned char) page_get_storage_key(addr) & 0x7f; 108 + if (arch_test_page_nodat(page)) 109 + key |= 0x80; 110 + *(unsigned char *) pfn = key; 105 111 } 106 112 107 113 /* ··· 132 126 */ 133 127 void page_key_write(void *address) 134 128 { 135 - page_set_storage_key((unsigned long) address, 136 - page_key_rp->data[page_key_rx], 0); 129 + struct page *page; 130 + unsigned char key; 131 + 132 + key = page_key_rp->data[page_key_rx]; 133 + page_set_storage_key((unsigned long) address, key & 0x7f, 0); 134 + page = virt_to_page(address); 135 + if (key & 0x80) 136 + arch_set_page_nodat(page, 0); 137 + else 138 + arch_set_page_dat(page, 0); 137 139 if (++page_key_rx >= PAGE_KEY_DATA_SIZE) 138 140 return; 139 141 page_key_rp = page_key_rp->next;
+46 -21
arch/s390/kernel/time.c
··· 51 51 #include <asm/cio.h> 52 52 #include "entry.h" 53 53 54 - u64 sched_clock_base_cc = -1; /* Force to data section. */ 55 - EXPORT_SYMBOL_GPL(sched_clock_base_cc); 54 + unsigned char tod_clock_base[16] __aligned(8) = { 55 + /* Force to data section. */ 56 + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 57 + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 58 + }; 59 + EXPORT_SYMBOL_GPL(tod_clock_base); 60 + 61 + u64 clock_comparator_max = -1ULL; 62 + EXPORT_SYMBOL_GPL(clock_comparator_max); 56 63 57 64 static DEFINE_PER_CPU(struct clock_event_device, comparators); 58 65 ··· 82 75 struct ptff_qui qui; 83 76 84 77 /* Initialize TOD steering parameters */ 85 - tod_steering_end = sched_clock_base_cc; 78 + tod_steering_end = *(unsigned long long *) &tod_clock_base[1]; 86 79 vdso_data->ts_end = tod_steering_end; 87 80 88 81 if (!test_facility(28)) ··· 118 111 } 119 112 EXPORT_SYMBOL(monotonic_clock); 120 113 121 - static void tod_to_timeval(__u64 todval, struct timespec64 *xt) 114 + static void ext_to_timespec64(unsigned char *clk, struct timespec64 *xt) 122 115 { 123 - unsigned long long sec; 116 + unsigned long long high, low, rem, sec, nsec; 124 117 125 - sec = todval >> 12; 126 - do_div(sec, 1000000); 118 + /* Split extendnd TOD clock to micro-seconds and sub-micro-seconds */ 119 + high = (*(unsigned long long *) clk) >> 4; 120 + low = (*(unsigned long long *)&clk[7]) << 4; 121 + /* Calculate seconds and nano-seconds */ 122 + sec = high; 123 + rem = do_div(sec, 1000000); 124 + nsec = (((low >> 32) + (rem << 32)) * 1000) >> 32; 125 + 127 126 xt->tv_sec = sec; 128 - todval -= (sec * 1000000) << 12; 129 - xt->tv_nsec = ((todval * 1000) >> 12); 127 + xt->tv_nsec = nsec; 130 128 } 131 129 132 130 void clock_comparator_work(void) 133 131 { 134 132 struct clock_event_device *cd; 135 133 136 - S390_lowcore.clock_comparator = -1ULL; 134 + S390_lowcore.clock_comparator = clock_comparator_max; 137 135 cd = this_cpu_ptr(&comparators); 138 136 cd->event_handler(cd); 139 137 } ··· 160 148 struct clock_event_device *cd; 161 149 int cpu; 162 150 163 - S390_lowcore.clock_comparator = -1ULL; 151 + S390_lowcore.clock_comparator = clock_comparator_max; 164 152 set_clock_comparator(S390_lowcore.clock_comparator); 165 153 166 154 cpu = smp_processor_id(); ··· 191 179 unsigned long param64) 192 180 { 193 181 inc_irq_stat(IRQEXT_CLK); 194 - if (S390_lowcore.clock_comparator == -1ULL) 182 + if (S390_lowcore.clock_comparator == clock_comparator_max) 195 183 set_clock_comparator(S390_lowcore.clock_comparator); 196 184 } 197 185 ··· 209 197 210 198 void read_persistent_clock64(struct timespec64 *ts) 211 199 { 212 - __u64 clock; 200 + unsigned char clk[STORE_CLOCK_EXT_SIZE]; 201 + __u64 delta; 213 202 214 - clock = get_tod_clock() - initial_leap_seconds; 215 - tod_to_timeval(clock - TOD_UNIX_EPOCH, ts); 203 + delta = initial_leap_seconds + TOD_UNIX_EPOCH; 204 + get_tod_clock_ext(clk); 205 + *(__u64 *) &clk[1] -= delta; 206 + if (*(__u64 *) &clk[1] > delta) 207 + clk[0]--; 208 + ext_to_timespec64(clk, ts); 216 209 } 217 210 218 211 void read_boot_clock64(struct timespec64 *ts) 219 212 { 220 - __u64 clock; 213 + unsigned char clk[STORE_CLOCK_EXT_SIZE]; 214 + __u64 delta; 221 215 222 - clock = sched_clock_base_cc - initial_leap_seconds; 223 - tod_to_timeval(clock - TOD_UNIX_EPOCH, ts); 216 + delta = initial_leap_seconds + TOD_UNIX_EPOCH; 217 + memcpy(clk, tod_clock_base, 16); 218 + *(__u64 *) &clk[1] -= delta; 219 + if (*(__u64 *) &clk[1] > delta) 220 + clk[0]--; 221 + ext_to_timespec64(clk, ts); 224 222 } 225 223 226 224 static u64 read_tod_clock(struct clocksource *cs) ··· 357 335 * source. If the clock mode is local it will return -EOPNOTSUPP and 358 336 * -EAGAIN if the clock is not in sync with the external reference. 359 337 */ 360 - int get_phys_clock(unsigned long long *clock) 338 + int get_phys_clock(unsigned long *clock) 361 339 { 362 340 atomic_t *sw_ptr; 363 341 unsigned int sw0, sw1; ··· 428 406 struct ptff_qto qto; 429 407 430 408 /* Fixup the monotonic sched clock. */ 431 - sched_clock_base_cc += delta; 409 + *(unsigned long long *) &tod_clock_base[1] += delta; 410 + if (*(unsigned long long *) &tod_clock_base[1] < delta) 411 + /* Epoch overflow */ 412 + tod_clock_base[0]++; 432 413 /* Adjust TOD steering parameters. */ 433 414 vdso_data->tb_update_count++; 434 415 now = get_tod_clock(); ··· 462 437 static void clock_sync_local(unsigned long long delta) 463 438 { 464 439 /* Add the delta to the clock comparator. */ 465 - if (S390_lowcore.clock_comparator != -1ULL) { 440 + if (S390_lowcore.clock_comparator != clock_comparator_max) { 466 441 S390_lowcore.clock_comparator += delta; 467 442 set_clock_comparator(S390_lowcore.clock_comparator); 468 443 }
+2
arch/s390/kernel/vdso.c
··· 157 157 page_frame = get_zeroed_page(GFP_KERNEL); 158 158 if (!segment_table || !page_table || !page_frame) 159 159 goto out; 160 + arch_set_page_dat(virt_to_page(segment_table), SEGMENT_ORDER); 161 + arch_set_page_dat(virt_to_page(page_table), 0); 160 162 161 163 /* Initialize per-cpu vdso data page */ 162 164 vd = (struct vdso_per_cpu_data *) page_frame;
+3 -1
arch/s390/kernel/vdso32/vdso32.lds.S
··· 2 2 * This is the infamous ld script for the 32 bits vdso 3 3 * library 4 4 */ 5 + 6 + #include <asm/page.h> 5 7 #include <asm/vdso.h> 6 8 7 9 OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") ··· 93 91 .debug_ranges 0 : { *(.debug_ranges) } 94 92 .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } 95 93 96 - . = ALIGN(4096); 94 + . = ALIGN(PAGE_SIZE); 97 95 PROVIDE(_vdso_data = .); 98 96 99 97 /DISCARD/ : {
+3 -1
arch/s390/kernel/vdso64/vdso64.lds.S
··· 2 2 * This is the infamous ld script for the 64 bits vdso 3 3 * library 4 4 */ 5 + 6 + #include <asm/page.h> 5 7 #include <asm/vdso.h> 6 8 7 9 OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") ··· 93 91 .debug_ranges 0 : { *(.debug_ranges) } 94 92 .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } 95 93 96 - . = ALIGN(4096); 94 + . = ALIGN(PAGE_SIZE); 97 95 PROVIDE(_vdso_data = .); 98 96 99 97 /DISCARD/ : {
+4 -4
arch/s390/kvm/diag.c
··· 27 27 unsigned long prefix = kvm_s390_get_prefix(vcpu); 28 28 29 29 start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; 30 - end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; 30 + end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + PAGE_SIZE; 31 31 vcpu->stat.diagnose_10++; 32 32 33 33 if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end ··· 51 51 */ 52 52 gmap_discard(vcpu->arch.gmap, start, prefix); 53 53 if (start <= prefix) 54 - gmap_discard(vcpu->arch.gmap, 0, 4096); 55 - if (end > prefix + 4096) 56 - gmap_discard(vcpu->arch.gmap, 4096, 8192); 54 + gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE); 55 + if (end > prefix + PAGE_SIZE) 56 + gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE); 57 57 gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end); 58 58 } 59 59 return 0;
+17 -18
arch/s390/kvm/gaccess.c
··· 629 629 iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130); 630 630 if (asce.r) 631 631 goto real_address; 632 - ptr = asce.origin * 4096; 632 + ptr = asce.origin * PAGE_SIZE; 633 633 switch (asce.dt) { 634 634 case ASCE_TYPE_REGION1: 635 635 if (vaddr.rfx01 > asce.tl) ··· 674 674 return PGM_REGION_SECOND_TRANS; 675 675 if (edat1) 676 676 dat_protection |= rfte.p; 677 - ptr = rfte.rto * 4096 + vaddr.rsx * 8; 677 + ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8; 678 678 } 679 679 /* fallthrough */ 680 680 case ASCE_TYPE_REGION2: { ··· 692 692 return PGM_REGION_THIRD_TRANS; 693 693 if (edat1) 694 694 dat_protection |= rste.p; 695 - ptr = rste.rto * 4096 + vaddr.rtx * 8; 695 + ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8; 696 696 } 697 697 /* fallthrough */ 698 698 case ASCE_TYPE_REGION3: { ··· 720 720 return PGM_SEGMENT_TRANSLATION; 721 721 if (edat1) 722 722 dat_protection |= rtte.fc0.p; 723 - ptr = rtte.fc0.sto * 4096 + vaddr.sx * 8; 723 + ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8; 724 724 } 725 725 /* fallthrough */ 726 726 case ASCE_TYPE_SEGMENT: { ··· 743 743 goto absolute_address; 744 744 } 745 745 dat_protection |= ste.fc0.p; 746 - ptr = ste.fc0.pto * 2048 + vaddr.px * 8; 746 + ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8; 747 747 } 748 748 } 749 749 if (kvm_is_error_gpa(vcpu->kvm, ptr)) ··· 993 993 parent = sg->parent; 994 994 vaddr.addr = saddr; 995 995 asce.val = sg->orig_asce; 996 - ptr = asce.origin * 4096; 996 + ptr = asce.origin * PAGE_SIZE; 997 997 if (asce.r) { 998 998 *fake = 1; 999 999 ptr = 0; ··· 1029 1029 union region1_table_entry rfte; 1030 1030 1031 1031 if (*fake) { 1032 - ptr += (unsigned long) vaddr.rfx << 53; 1032 + ptr += vaddr.rfx * _REGION1_SIZE; 1033 1033 rfte.val = ptr; 1034 1034 goto shadow_r2t; 1035 1035 } ··· 1044 1044 return PGM_REGION_SECOND_TRANS; 1045 1045 if (sg->edat_level >= 1) 1046 1046 *dat_protection |= rfte.p; 1047 - ptr = rfte.rto << 12UL; 1047 + ptr = rfte.rto * PAGE_SIZE; 1048 1048 shadow_r2t: 1049 1049 rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake); 1050 1050 if (rc) ··· 1055 1055 union region2_table_entry rste; 1056 1056 1057 1057 if (*fake) { 1058 - ptr += (unsigned long) vaddr.rsx << 42; 1058 + ptr += vaddr.rsx * _REGION2_SIZE; 1059 1059 rste.val = ptr; 1060 1060 goto shadow_r3t; 1061 1061 } ··· 1070 1070 return PGM_REGION_THIRD_TRANS; 1071 1071 if (sg->edat_level >= 1) 1072 1072 *dat_protection |= rste.p; 1073 - ptr = rste.rto << 12UL; 1073 + ptr = rste.rto * PAGE_SIZE; 1074 1074 shadow_r3t: 1075 1075 rste.p |= *dat_protection; 1076 1076 rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake); ··· 1082 1082 union region3_table_entry rtte; 1083 1083 1084 1084 if (*fake) { 1085 - ptr += (unsigned long) vaddr.rtx << 31; 1085 + ptr += vaddr.rtx * _REGION3_SIZE; 1086 1086 rtte.val = ptr; 1087 1087 goto shadow_sgt; 1088 1088 } ··· 1098 1098 if (rtte.fc && sg->edat_level >= 2) { 1099 1099 *dat_protection |= rtte.fc0.p; 1100 1100 *fake = 1; 1101 - ptr = rtte.fc1.rfaa << 31UL; 1101 + ptr = rtte.fc1.rfaa * _REGION3_SIZE; 1102 1102 rtte.val = ptr; 1103 1103 goto shadow_sgt; 1104 1104 } ··· 1106 1106 return PGM_SEGMENT_TRANSLATION; 1107 1107 if (sg->edat_level >= 1) 1108 1108 *dat_protection |= rtte.fc0.p; 1109 - ptr = rtte.fc0.sto << 12UL; 1109 + ptr = rtte.fc0.sto * PAGE_SIZE; 1110 1110 shadow_sgt: 1111 1111 rtte.fc0.p |= *dat_protection; 1112 1112 rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake); ··· 1118 1118 union segment_table_entry ste; 1119 1119 1120 1120 if (*fake) { 1121 - ptr += (unsigned long) vaddr.sx << 20; 1121 + ptr += vaddr.sx * _SEGMENT_SIZE; 1122 1122 ste.val = ptr; 1123 1123 goto shadow_pgt; 1124 1124 } ··· 1134 1134 *dat_protection |= ste.fc0.p; 1135 1135 if (ste.fc && sg->edat_level >= 1) { 1136 1136 *fake = 1; 1137 - ptr = ste.fc1.sfaa << 20UL; 1137 + ptr = ste.fc1.sfaa * _SEGMENT_SIZE; 1138 1138 ste.val = ptr; 1139 1139 goto shadow_pgt; 1140 1140 } 1141 - ptr = ste.fc0.pto << 11UL; 1141 + ptr = ste.fc0.pto * (PAGE_SIZE / 2); 1142 1142 shadow_pgt: 1143 1143 ste.fc0.p |= *dat_protection; 1144 1144 rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake); ··· 1187 1187 1188 1188 vaddr.addr = saddr; 1189 1189 if (fake) { 1190 - /* offset in 1MB guest memory block */ 1191 - pte.val = pgt + ((unsigned long) vaddr.px << 12UL); 1190 + pte.val = pgt + vaddr.px * PAGE_SIZE; 1192 1191 goto shadow_page; 1193 1192 } 1194 1193 if (!rc)
+4 -4
arch/s390/kvm/priv.c
··· 329 329 start = kvm_s390_logical_to_effective(vcpu, start); 330 330 if (m3 & SSKE_MB) { 331 331 /* start already designates an absolute address */ 332 - end = (start + (1UL << 20)) & ~((1UL << 20) - 1); 332 + end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1); 333 333 } else { 334 334 start = kvm_s390_real_to_abs(vcpu, start); 335 335 end = start + PAGE_SIZE; ··· 893 893 case 0x00000000: 894 894 /* only 4k frames specify a real address */ 895 895 start = kvm_s390_real_to_abs(vcpu, start); 896 - end = (start + (1UL << 12)) & ~((1UL << 12) - 1); 896 + end = (start + PAGE_SIZE) & ~(PAGE_SIZE - 1); 897 897 break; 898 898 case 0x00001000: 899 - end = (start + (1UL << 20)) & ~((1UL << 20) - 1); 899 + end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1); 900 900 break; 901 901 case 0x00002000: 902 902 /* only support 2G frame size if EDAT2 is available and we are ··· 904 904 if (!test_kvm_facility(vcpu->kvm, 78) || 905 905 psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_24BIT) 906 906 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 907 - end = (start + (1UL << 31)) & ~((1UL << 31) - 1); 907 + end = (start + _REGION3_SIZE) & ~(_REGION3_SIZE - 1); 908 908 break; 909 909 default: 910 910 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+1 -1
arch/s390/kvm/vsie.c
··· 1069 1069 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 1070 1070 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 1071 1071 1072 - BUILD_BUG_ON(sizeof(struct vsie_page) != 4096); 1072 + BUILD_BUG_ON(sizeof(struct vsie_page) != PAGE_SIZE); 1073 1073 scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL); 1074 1074 1075 1075 /* 512 byte alignment */
+1 -1
arch/s390/lib/delay.c
··· 57 57 end = get_tod_clock_fast() + (usecs << 12); 58 58 do { 59 59 clock_saved = 0; 60 - if (end < S390_lowcore.clock_comparator) { 60 + if (tod_after(S390_lowcore.clock_comparator, end)) { 61 61 clock_saved = local_tick_disable(); 62 62 set_clock_comparator(end); 63 63 }
+51 -36
arch/s390/lib/spinlock.c
··· 32 32 } 33 33 __setup("spin_retry=", spin_retry_setup); 34 34 35 + static inline int arch_load_niai4(int *lock) 36 + { 37 + int owner; 38 + 39 + asm volatile( 40 + #ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES 41 + " .long 0xb2fa0040\n" /* NIAI 4 */ 42 + #endif 43 + " l %0,%1\n" 44 + : "=d" (owner) : "Q" (*lock) : "memory"); 45 + return owner; 46 + } 47 + 48 + static inline int arch_cmpxchg_niai8(int *lock, int old, int new) 49 + { 50 + int expected = old; 51 + 52 + asm volatile( 53 + #ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES 54 + " .long 0xb2fa0080\n" /* NIAI 8 */ 55 + #endif 56 + " cs %0,%3,%1\n" 57 + : "=d" (old), "=Q" (*lock) 58 + : "0" (old), "d" (new), "Q" (*lock) 59 + : "cc", "memory"); 60 + return expected == old; 61 + } 62 + 35 63 void arch_spin_lock_wait(arch_spinlock_t *lp) 36 64 { 37 65 int cpu = SPINLOCK_LOCKVAL; 38 - int owner, count, first_diag; 66 + int owner, count; 39 67 40 - first_diag = 1; 68 + /* Pass the virtual CPU to the lock holder if it is not running */ 69 + owner = arch_load_niai4(&lp->lock); 70 + if (owner && arch_vcpu_is_preempted(~owner)) 71 + smp_yield_cpu(~owner); 72 + 73 + count = spin_retry; 41 74 while (1) { 42 - owner = ACCESS_ONCE(lp->lock); 75 + owner = arch_load_niai4(&lp->lock); 43 76 /* Try to get the lock if it is free. */ 44 77 if (!owner) { 45 - if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu)) 78 + if (arch_cmpxchg_niai8(&lp->lock, 0, cpu)) 46 79 return; 47 80 continue; 48 81 } 49 - /* First iteration: check if the lock owner is running. */ 50 - if (first_diag && arch_vcpu_is_preempted(~owner)) { 51 - smp_yield_cpu(~owner); 52 - first_diag = 0; 82 + if (count-- >= 0) 53 83 continue; 54 - } 55 - /* Loop for a while on the lock value. */ 56 84 count = spin_retry; 57 - do { 58 - owner = ACCESS_ONCE(lp->lock); 59 - } while (owner && count-- > 0); 60 - if (!owner) 61 - continue; 62 85 /* 63 86 * For multiple layers of hypervisors, e.g. z/VM + LPAR 64 87 * yield the CPU unconditionally. For LPAR rely on the 65 88 * sense running status. 66 89 */ 67 - if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) { 90 + if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) 68 91 smp_yield_cpu(~owner); 69 - first_diag = 0; 70 - } 71 92 } 72 93 } 73 94 EXPORT_SYMBOL(arch_spin_lock_wait); ··· 96 75 void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) 97 76 { 98 77 int cpu = SPINLOCK_LOCKVAL; 99 - int owner, count, first_diag; 78 + int owner, count; 100 79 101 80 local_irq_restore(flags); 102 - first_diag = 1; 81 + 82 + /* Pass the virtual CPU to the lock holder if it is not running */ 83 + owner = arch_load_niai4(&lp->lock); 84 + if (owner && arch_vcpu_is_preempted(~owner)) 85 + smp_yield_cpu(~owner); 86 + 87 + count = spin_retry; 103 88 while (1) { 104 - owner = ACCESS_ONCE(lp->lock); 89 + owner = arch_load_niai4(&lp->lock); 105 90 /* Try to get the lock if it is free. */ 106 91 if (!owner) { 107 92 local_irq_disable(); 108 - if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu)) 93 + if (arch_cmpxchg_niai8(&lp->lock, 0, cpu)) 109 94 return; 110 95 local_irq_restore(flags); 111 96 continue; 112 97 } 113 - /* Check if the lock owner is running. */ 114 - if (first_diag && arch_vcpu_is_preempted(~owner)) { 115 - smp_yield_cpu(~owner); 116 - first_diag = 0; 98 + if (count-- >= 0) 117 99 continue; 118 - } 119 - /* Loop for a while on the lock value. */ 120 100 count = spin_retry; 121 - do { 122 - owner = ACCESS_ONCE(lp->lock); 123 - } while (owner && count-- > 0); 124 - if (!owner) 125 - continue; 126 101 /* 127 102 * For multiple layers of hypervisors, e.g. z/VM + LPAR 128 103 * yield the CPU unconditionally. For LPAR rely on the 129 104 * sense running status. 130 105 */ 131 - if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) { 106 + if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) 132 107 smp_yield_cpu(~owner); 133 - first_diag = 0; 134 - } 135 108 } 136 109 } 137 110 EXPORT_SYMBOL(arch_spin_lock_wait_flags);
+26 -12
arch/s390/lib/uaccess.c
··· 15 15 #include <asm/mmu_context.h> 16 16 #include <asm/facility.h> 17 17 18 + #ifndef CONFIG_HAVE_MARCH_Z10_FEATURES 18 19 static DEFINE_STATIC_KEY_FALSE(have_mvcos); 20 + 21 + static int __init uaccess_init(void) 22 + { 23 + if (test_facility(27)) 24 + static_branch_enable(&have_mvcos); 25 + return 0; 26 + } 27 + early_initcall(uaccess_init); 28 + 29 + static inline int copy_with_mvcos(void) 30 + { 31 + if (static_branch_likely(&have_mvcos)) 32 + return 1; 33 + return 0; 34 + } 35 + #else 36 + static inline int copy_with_mvcos(void) 37 + { 38 + return 1; 39 + } 40 + #endif 19 41 20 42 static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, 21 43 unsigned long size) ··· 106 84 107 85 unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) 108 86 { 109 - if (static_branch_likely(&have_mvcos)) 87 + if (copy_with_mvcos()) 110 88 return copy_from_user_mvcos(to, from, n); 111 89 return copy_from_user_mvcp(to, from, n); 112 90 } ··· 179 157 180 158 unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) 181 159 { 182 - if (static_branch_likely(&have_mvcos)) 160 + if (copy_with_mvcos()) 183 161 return copy_to_user_mvcos(to, from, n); 184 162 return copy_to_user_mvcs(to, from, n); 185 163 } ··· 242 220 243 221 unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) 244 222 { 245 - if (static_branch_likely(&have_mvcos)) 223 + if (copy_with_mvcos()) 246 224 return copy_in_user_mvcos(to, from, n); 247 225 return copy_in_user_mvc(to, from, n); 248 226 } ··· 314 292 315 293 unsigned long __clear_user(void __user *to, unsigned long size) 316 294 { 317 - if (static_branch_likely(&have_mvcos)) 295 + if (copy_with_mvcos()) 318 296 return clear_user_mvcos(to, size); 319 297 return clear_user_xc(to, size); 320 298 } ··· 371 349 return done; 372 350 } 373 351 EXPORT_SYMBOL(__strncpy_from_user); 374 - 375 - static int __init uaccess_init(void) 376 - { 377 - if (test_facility(27)) 378 - static_branch_enable(&have_mvcos); 379 - return 0; 380 - } 381 - early_initcall(uaccess_init);
+5 -5
arch/s390/mm/fault.c
··· 135 135 pr_alert("AS:%016lx ", asce); 136 136 switch (asce & _ASCE_TYPE_MASK) { 137 137 case _ASCE_TYPE_REGION1: 138 - table = table + ((address >> 53) & 0x7ff); 138 + table += (address & _REGION1_INDEX) >> _REGION1_SHIFT; 139 139 if (bad_address(table)) 140 140 goto bad; 141 141 pr_cont("R1:%016lx ", *table); ··· 144 144 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 145 145 /* fallthrough */ 146 146 case _ASCE_TYPE_REGION2: 147 - table = table + ((address >> 42) & 0x7ff); 147 + table += (address & _REGION2_INDEX) >> _REGION2_SHIFT; 148 148 if (bad_address(table)) 149 149 goto bad; 150 150 pr_cont("R2:%016lx ", *table); ··· 153 153 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 154 154 /* fallthrough */ 155 155 case _ASCE_TYPE_REGION3: 156 - table = table + ((address >> 31) & 0x7ff); 156 + table += (address & _REGION3_INDEX) >> _REGION3_SHIFT; 157 157 if (bad_address(table)) 158 158 goto bad; 159 159 pr_cont("R3:%016lx ", *table); ··· 162 162 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 163 163 /* fallthrough */ 164 164 case _ASCE_TYPE_SEGMENT: 165 - table = table + ((address >> 20) & 0x7ff); 165 + table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; 166 166 if (bad_address(table)) 167 167 goto bad; 168 168 pr_cont("S:%016lx ", *table); ··· 170 170 goto out; 171 171 table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); 172 172 } 173 - table = table + ((address >> 12) & 0xff); 173 + table += (address & _PAGE_INDEX) >> _PAGE_SHIFT; 174 174 if (bad_address(table)) 175 175 goto bad; 176 176 pr_cont("P:%016lx ", *table);
+94 -69
arch/s390/mm/gmap.c
··· 36 36 unsigned long *table; 37 37 unsigned long etype, atype; 38 38 39 - if (limit < (1UL << 31)) { 40 - limit = (1UL << 31) - 1; 39 + if (limit < _REGION3_SIZE) { 40 + limit = _REGION3_SIZE - 1; 41 41 atype = _ASCE_TYPE_SEGMENT; 42 42 etype = _SEGMENT_ENTRY_EMPTY; 43 - } else if (limit < (1UL << 42)) { 44 - limit = (1UL << 42) - 1; 43 + } else if (limit < _REGION2_SIZE) { 44 + limit = _REGION2_SIZE - 1; 45 45 atype = _ASCE_TYPE_REGION3; 46 46 etype = _REGION3_ENTRY_EMPTY; 47 - } else if (limit < (1UL << 53)) { 48 - limit = (1UL << 53) - 1; 47 + } else if (limit < _REGION1_SIZE) { 48 + limit = _REGION1_SIZE - 1; 49 49 atype = _ASCE_TYPE_REGION2; 50 50 etype = _REGION2_ENTRY_EMPTY; 51 51 } else { ··· 65 65 spin_lock_init(&gmap->guest_table_lock); 66 66 spin_lock_init(&gmap->shadow_lock); 67 67 atomic_set(&gmap->ref_count, 1); 68 - page = alloc_pages(GFP_KERNEL, 2); 68 + page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); 69 69 if (!page) 70 70 goto out_free; 71 71 page->index = 0; ··· 186 186 gmap_flush_tlb(gmap); 187 187 /* Free all segment & region tables. */ 188 188 list_for_each_entry_safe(page, next, &gmap->crst_list, lru) 189 - __free_pages(page, 2); 189 + __free_pages(page, CRST_ALLOC_ORDER); 190 190 gmap_radix_tree_free(&gmap->guest_to_host); 191 191 gmap_radix_tree_free(&gmap->host_to_guest); 192 192 ··· 306 306 unsigned long *new; 307 307 308 308 /* since we dont free the gmap table until gmap_free we can unlock */ 309 - page = alloc_pages(GFP_KERNEL, 2); 309 + page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); 310 310 if (!page) 311 311 return -ENOMEM; 312 312 new = (unsigned long *) page_to_phys(page); ··· 321 321 } 322 322 spin_unlock(&gmap->guest_table_lock); 323 323 if (page) 324 - __free_pages(page, 2); 324 + __free_pages(page, CRST_ALLOC_ORDER); 325 325 return 0; 326 326 } 327 327 ··· 546 546 /* Create higher level tables in the gmap page table */ 547 547 table = gmap->table; 548 548 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { 549 - table += (gaddr >> 53) & 0x7ff; 549 + table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT; 550 550 if ((*table & _REGION_ENTRY_INVALID) && 551 551 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, 552 - gaddr & 0xffe0000000000000UL)) 552 + gaddr & _REGION1_MASK)) 553 553 return -ENOMEM; 554 554 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 555 555 } 556 556 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { 557 - table += (gaddr >> 42) & 0x7ff; 557 + table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; 558 558 if ((*table & _REGION_ENTRY_INVALID) && 559 559 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, 560 - gaddr & 0xfffffc0000000000UL)) 560 + gaddr & _REGION2_MASK)) 561 561 return -ENOMEM; 562 562 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 563 563 } 564 564 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { 565 - table += (gaddr >> 31) & 0x7ff; 565 + table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; 566 566 if ((*table & _REGION_ENTRY_INVALID) && 567 567 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, 568 - gaddr & 0xffffffff80000000UL)) 568 + gaddr & _REGION3_MASK)) 569 569 return -ENOMEM; 570 570 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 571 571 } 572 - table += (gaddr >> 20) & 0x7ff; 572 + table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; 573 573 /* Walk the parent mm page table */ 574 574 mm = gmap->mm; 575 575 pgd = pgd_offset(mm, vmaddr); ··· 771 771 table = gmap->table; 772 772 switch (gmap->asce & _ASCE_TYPE_MASK) { 773 773 case _ASCE_TYPE_REGION1: 774 - table += (gaddr >> 53) & 0x7ff; 774 + table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT; 775 775 if (level == 4) 776 776 break; 777 777 if (*table & _REGION_ENTRY_INVALID) ··· 779 779 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 780 780 /* Fallthrough */ 781 781 case _ASCE_TYPE_REGION2: 782 - table += (gaddr >> 42) & 0x7ff; 782 + table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; 783 783 if (level == 3) 784 784 break; 785 785 if (*table & _REGION_ENTRY_INVALID) ··· 787 787 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 788 788 /* Fallthrough */ 789 789 case _ASCE_TYPE_REGION3: 790 - table += (gaddr >> 31) & 0x7ff; 790 + table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; 791 791 if (level == 2) 792 792 break; 793 793 if (*table & _REGION_ENTRY_INVALID) ··· 795 795 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 796 796 /* Fallthrough */ 797 797 case _ASCE_TYPE_SEGMENT: 798 - table += (gaddr >> 20) & 0x7ff; 798 + table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; 799 799 if (level == 1) 800 800 break; 801 801 if (*table & _REGION_ENTRY_INVALID) 802 802 return NULL; 803 803 table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); 804 - table += (gaddr >> 12) & 0xff; 804 + table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT; 805 805 } 806 806 return table; 807 807 } ··· 1126 1126 table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */ 1127 1127 if (!table || *table & _PAGE_INVALID) 1128 1128 return; 1129 - gmap_call_notifier(sg, raddr, raddr + (1UL << 12) - 1); 1129 + gmap_call_notifier(sg, raddr, raddr + _PAGE_SIZE - 1); 1130 1130 ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table); 1131 1131 } 1132 1132 ··· 1144 1144 int i; 1145 1145 1146 1146 BUG_ON(!gmap_is_shadow(sg)); 1147 - for (i = 0; i < 256; i++, raddr += 1UL << 12) 1147 + for (i = 0; i < _PAGE_ENTRIES; i++, raddr += _PAGE_SIZE) 1148 1148 pgt[i] = _PAGE_INVALID; 1149 1149 } 1150 1150 ··· 1164 1164 ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */ 1165 1165 if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN)) 1166 1166 return; 1167 - gmap_call_notifier(sg, raddr, raddr + (1UL << 20) - 1); 1168 - sto = (unsigned long) (ste - ((raddr >> 20) & 0x7ff)); 1167 + gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1); 1168 + sto = (unsigned long) (ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT)); 1169 1169 gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr); 1170 1170 pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN); 1171 1171 *ste = _SEGMENT_ENTRY_EMPTY; ··· 1193 1193 1194 1194 BUG_ON(!gmap_is_shadow(sg)); 1195 1195 asce = (unsigned long) sgt | _ASCE_TYPE_SEGMENT; 1196 - for (i = 0; i < 2048; i++, raddr += 1UL << 20) { 1196 + for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) { 1197 1197 if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN)) 1198 1198 continue; 1199 1199 pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN); ··· 1222 1222 r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */ 1223 1223 if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN)) 1224 1224 return; 1225 - gmap_call_notifier(sg, raddr, raddr + (1UL << 31) - 1); 1226 - r3o = (unsigned long) (r3e - ((raddr >> 31) & 0x7ff)); 1225 + gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1); 1226 + r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT)); 1227 1227 gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr); 1228 1228 sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN); 1229 1229 *r3e = _REGION3_ENTRY_EMPTY; ··· 1231 1231 /* Free segment table */ 1232 1232 page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); 1233 1233 list_del(&page->lru); 1234 - __free_pages(page, 2); 1234 + __free_pages(page, CRST_ALLOC_ORDER); 1235 1235 } 1236 1236 1237 1237 /** ··· 1251 1251 1252 1252 BUG_ON(!gmap_is_shadow(sg)); 1253 1253 asce = (unsigned long) r3t | _ASCE_TYPE_REGION3; 1254 - for (i = 0; i < 2048; i++, raddr += 1UL << 31) { 1254 + for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) { 1255 1255 if (!(r3t[i] & _REGION_ENTRY_ORIGIN)) 1256 1256 continue; 1257 1257 sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN); ··· 1260 1260 /* Free segment table */ 1261 1261 page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); 1262 1262 list_del(&page->lru); 1263 - __free_pages(page, 2); 1263 + __free_pages(page, CRST_ALLOC_ORDER); 1264 1264 } 1265 1265 } 1266 1266 ··· 1280 1280 r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */ 1281 1281 if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN)) 1282 1282 return; 1283 - gmap_call_notifier(sg, raddr, raddr + (1UL << 42) - 1); 1284 - r2o = (unsigned long) (r2e - ((raddr >> 42) & 0x7ff)); 1283 + gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1); 1284 + r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT)); 1285 1285 gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr); 1286 1286 r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN); 1287 1287 *r2e = _REGION2_ENTRY_EMPTY; ··· 1289 1289 /* Free region 3 table */ 1290 1290 page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); 1291 1291 list_del(&page->lru); 1292 - __free_pages(page, 2); 1292 + __free_pages(page, CRST_ALLOC_ORDER); 1293 1293 } 1294 1294 1295 1295 /** ··· 1309 1309 1310 1310 BUG_ON(!gmap_is_shadow(sg)); 1311 1311 asce = (unsigned long) r2t | _ASCE_TYPE_REGION2; 1312 - for (i = 0; i < 2048; i++, raddr += 1UL << 42) { 1312 + for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) { 1313 1313 if (!(r2t[i] & _REGION_ENTRY_ORIGIN)) 1314 1314 continue; 1315 1315 r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN); ··· 1318 1318 /* Free region 3 table */ 1319 1319 page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); 1320 1320 list_del(&page->lru); 1321 - __free_pages(page, 2); 1321 + __free_pages(page, CRST_ALLOC_ORDER); 1322 1322 } 1323 1323 } 1324 1324 ··· 1338 1338 r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */ 1339 1339 if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN)) 1340 1340 return; 1341 - gmap_call_notifier(sg, raddr, raddr + (1UL << 53) - 1); 1342 - r1o = (unsigned long) (r1e - ((raddr >> 53) & 0x7ff)); 1341 + gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1); 1342 + r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT)); 1343 1343 gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr); 1344 1344 r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN); 1345 1345 *r1e = _REGION1_ENTRY_EMPTY; ··· 1347 1347 /* Free region 2 table */ 1348 1348 page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); 1349 1349 list_del(&page->lru); 1350 - __free_pages(page, 2); 1350 + __free_pages(page, CRST_ALLOC_ORDER); 1351 1351 } 1352 1352 1353 1353 /** ··· 1367 1367 1368 1368 BUG_ON(!gmap_is_shadow(sg)); 1369 1369 asce = (unsigned long) r1t | _ASCE_TYPE_REGION1; 1370 - for (i = 0; i < 2048; i++, raddr += 1UL << 53) { 1370 + for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) { 1371 1371 if (!(r1t[i] & _REGION_ENTRY_ORIGIN)) 1372 1372 continue; 1373 1373 r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN); ··· 1378 1378 /* Free region 2 table */ 1379 1379 page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); 1380 1380 list_del(&page->lru); 1381 - __free_pages(page, 2); 1381 + __free_pages(page, CRST_ALLOC_ORDER); 1382 1382 } 1383 1383 } 1384 1384 ··· 1535 1535 /* protect after insertion, so it will get properly invalidated */ 1536 1536 down_read(&parent->mm->mmap_sem); 1537 1537 rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN, 1538 - ((asce & _ASCE_TABLE_LENGTH) + 1) * 4096, 1538 + ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE, 1539 1539 PROT_READ, PGSTE_VSIE_BIT); 1540 1540 up_read(&parent->mm->mmap_sem); 1541 1541 spin_lock(&parent->shadow_lock); ··· 1578 1578 1579 1579 BUG_ON(!gmap_is_shadow(sg)); 1580 1580 /* Allocate a shadow region second table */ 1581 - page = alloc_pages(GFP_KERNEL, 2); 1581 + page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); 1582 1582 if (!page) 1583 1583 return -ENOMEM; 1584 1584 page->index = r2t & _REGION_ENTRY_ORIGIN; ··· 1614 1614 } 1615 1615 spin_unlock(&sg->guest_table_lock); 1616 1616 /* Make r2t read-only in parent gmap page table */ 1617 - raddr = (saddr & 0xffe0000000000000UL) | _SHADOW_RMAP_REGION1; 1617 + raddr = (saddr & _REGION1_MASK) | _SHADOW_RMAP_REGION1; 1618 1618 origin = r2t & _REGION_ENTRY_ORIGIN; 1619 - offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * 4096; 1620 - len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; 1619 + offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; 1620 + len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; 1621 1621 rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); 1622 1622 spin_lock(&sg->guest_table_lock); 1623 1623 if (!rc) { ··· 1634 1634 return rc; 1635 1635 out_free: 1636 1636 spin_unlock(&sg->guest_table_lock); 1637 - __free_pages(page, 2); 1637 + __free_pages(page, CRST_ALLOC_ORDER); 1638 1638 return rc; 1639 1639 } 1640 1640 EXPORT_SYMBOL_GPL(gmap_shadow_r2t); ··· 1662 1662 1663 1663 BUG_ON(!gmap_is_shadow(sg)); 1664 1664 /* Allocate a shadow region second table */ 1665 - page = alloc_pages(GFP_KERNEL, 2); 1665 + page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); 1666 1666 if (!page) 1667 1667 return -ENOMEM; 1668 1668 page->index = r3t & _REGION_ENTRY_ORIGIN; ··· 1697 1697 } 1698 1698 spin_unlock(&sg->guest_table_lock); 1699 1699 /* Make r3t read-only in parent gmap page table */ 1700 - raddr = (saddr & 0xfffffc0000000000UL) | _SHADOW_RMAP_REGION2; 1700 + raddr = (saddr & _REGION2_MASK) | _SHADOW_RMAP_REGION2; 1701 1701 origin = r3t & _REGION_ENTRY_ORIGIN; 1702 - offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * 4096; 1703 - len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; 1702 + offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; 1703 + len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; 1704 1704 rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); 1705 1705 spin_lock(&sg->guest_table_lock); 1706 1706 if (!rc) { ··· 1717 1717 return rc; 1718 1718 out_free: 1719 1719 spin_unlock(&sg->guest_table_lock); 1720 - __free_pages(page, 2); 1720 + __free_pages(page, CRST_ALLOC_ORDER); 1721 1721 return rc; 1722 1722 } 1723 1723 EXPORT_SYMBOL_GPL(gmap_shadow_r3t); ··· 1745 1745 1746 1746 BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE)); 1747 1747 /* Allocate a shadow segment table */ 1748 - page = alloc_pages(GFP_KERNEL, 2); 1748 + page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); 1749 1749 if (!page) 1750 1750 return -ENOMEM; 1751 1751 page->index = sgt & _REGION_ENTRY_ORIGIN; ··· 1781 1781 } 1782 1782 spin_unlock(&sg->guest_table_lock); 1783 1783 /* Make sgt read-only in parent gmap page table */ 1784 - raddr = (saddr & 0xffffffff80000000UL) | _SHADOW_RMAP_REGION3; 1784 + raddr = (saddr & _REGION3_MASK) | _SHADOW_RMAP_REGION3; 1785 1785 origin = sgt & _REGION_ENTRY_ORIGIN; 1786 - offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * 4096; 1787 - len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; 1786 + offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; 1787 + len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; 1788 1788 rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); 1789 1789 spin_lock(&sg->guest_table_lock); 1790 1790 if (!rc) { ··· 1801 1801 return rc; 1802 1802 out_free: 1803 1803 spin_unlock(&sg->guest_table_lock); 1804 - __free_pages(page, 2); 1804 + __free_pages(page, CRST_ALLOC_ORDER); 1805 1805 return rc; 1806 1806 } 1807 1807 EXPORT_SYMBOL_GPL(gmap_shadow_sgt); ··· 1902 1902 } 1903 1903 spin_unlock(&sg->guest_table_lock); 1904 1904 /* Make pgt read-only in parent gmap page table (not the pgste) */ 1905 - raddr = (saddr & 0xfffffffffff00000UL) | _SHADOW_RMAP_SEGMENT; 1905 + raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT; 1906 1906 origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK; 1907 1907 rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE, PROT_READ); 1908 1908 spin_lock(&sg->guest_table_lock); ··· 2021 2021 } 2022 2022 /* Check for top level table */ 2023 2023 start = sg->orig_asce & _ASCE_ORIGIN; 2024 - end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * 4096; 2024 + end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE; 2025 2025 if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start && 2026 2026 gaddr < end) { 2027 2027 /* The complete shadow table has to go */ ··· 2032 2032 return; 2033 2033 } 2034 2034 /* Remove the page table tree from on specific entry */ 2035 - head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> 12); 2035 + head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT); 2036 2036 gmap_for_each_rmap_safe(rmap, rnext, head) { 2037 2037 bits = rmap->raddr & _SHADOW_RMAP_MASK; 2038 2038 raddr = rmap->raddr ^ bits; ··· 2076 2076 struct gmap *gmap, *sg, *next; 2077 2077 2078 2078 offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); 2079 - offset = offset * (4096 / sizeof(pte_t)); 2079 + offset = offset * (PAGE_SIZE / sizeof(pte_t)); 2080 2080 rcu_read_lock(); 2081 2081 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 2082 2082 spin_lock(&gmap->guest_table_lock); ··· 2121 2121 } 2122 2122 2123 2123 /* 2124 + * Remove all empty zero pages from the mapping for lazy refaulting 2125 + * - This must be called after mm->context.has_pgste is set, to avoid 2126 + * future creation of zero pages 2127 + * - This must be called after THP was enabled 2128 + */ 2129 + static int __zap_zero_pages(pmd_t *pmd, unsigned long start, 2130 + unsigned long end, struct mm_walk *walk) 2131 + { 2132 + unsigned long addr; 2133 + 2134 + for (addr = start; addr != end; addr += PAGE_SIZE) { 2135 + pte_t *ptep; 2136 + spinlock_t *ptl; 2137 + 2138 + ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 2139 + if (is_zero_pfn(pte_pfn(*ptep))) 2140 + ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID)); 2141 + pte_unmap_unlock(ptep, ptl); 2142 + } 2143 + return 0; 2144 + } 2145 + 2146 + static inline void zap_zero_pages(struct mm_struct *mm) 2147 + { 2148 + struct mm_walk walk = { .pmd_entry = __zap_zero_pages }; 2149 + 2150 + walk.mm = mm; 2151 + walk_page_range(0, TASK_SIZE, &walk); 2152 + } 2153 + 2154 + /* 2124 2155 * switch on pgstes for its userspace process (for kvm) 2125 2156 */ 2126 2157 int s390_enable_sie(void) ··· 2168 2137 mm->context.has_pgste = 1; 2169 2138 /* split thp mappings and disable thp for future mappings */ 2170 2139 thp_split_mm(mm); 2140 + zap_zero_pages(mm); 2171 2141 up_write(&mm->mmap_sem); 2172 2142 return 0; 2173 2143 } ··· 2181 2149 static int __s390_enable_skey(pte_t *pte, unsigned long addr, 2182 2150 unsigned long next, struct mm_walk *walk) 2183 2151 { 2184 - /* 2185 - * Remove all zero page mappings, 2186 - * after establishing a policy to forbid zero page mappings 2187 - * following faults for that page will get fresh anonymous pages 2188 - */ 2189 - if (is_zero_pfn(pte_pfn(*pte))) 2190 - ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID)); 2191 2152 /* Clear storage key */ 2192 2153 ptep_zap_key(walk->mm, addr, pte); 2193 2154 return 0;
+57 -3
arch/s390/mm/init.c
··· 26 26 #include <linux/poison.h> 27 27 #include <linux/initrd.h> 28 28 #include <linux/export.h> 29 + #include <linux/cma.h> 29 30 #include <linux/gfp.h> 30 31 #include <linux/memblock.h> 31 32 #include <asm/processor.h> ··· 85 84 psw_t psw; 86 85 87 86 init_mm.pgd = swapper_pg_dir; 88 - if (VMALLOC_END > (1UL << 42)) { 87 + if (VMALLOC_END > _REGION2_SIZE) { 89 88 asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; 90 89 pgd_type = _REGION2_ENTRY_EMPTY; 91 90 } else { ··· 94 93 } 95 94 init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; 96 95 S390_lowcore.kernel_asce = init_mm.context.asce; 97 - clear_table((unsigned long *) init_mm.pgd, pgd_type, 98 - sizeof(unsigned long)*2048); 96 + crst_table_init((unsigned long *) init_mm.pgd, pgd_type); 99 97 vmem_map_init(); 100 98 101 99 /* enable virtual mapping in kernel mode */ ··· 137 137 free_all_bootmem(); 138 138 setup_zero_pages(); /* Setup zeroed pages. */ 139 139 140 + cmma_init_nodat(); 141 + 140 142 mem_init_print_info(NULL); 141 143 } 142 144 ··· 168 166 } 169 167 170 168 #ifdef CONFIG_MEMORY_HOTPLUG 169 + 170 + #ifdef CONFIG_CMA 171 + 172 + /* Prevent memory blocks which contain cma regions from going offline */ 173 + 174 + struct s390_cma_mem_data { 175 + unsigned long start; 176 + unsigned long end; 177 + }; 178 + 179 + static int s390_cma_check_range(struct cma *cma, void *data) 180 + { 181 + struct s390_cma_mem_data *mem_data; 182 + unsigned long start, end; 183 + 184 + mem_data = data; 185 + start = cma_get_base(cma); 186 + end = start + cma_get_size(cma); 187 + if (end < mem_data->start) 188 + return 0; 189 + if (start >= mem_data->end) 190 + return 0; 191 + return -EBUSY; 192 + } 193 + 194 + static int s390_cma_mem_notifier(struct notifier_block *nb, 195 + unsigned long action, void *data) 196 + { 197 + struct s390_cma_mem_data mem_data; 198 + struct memory_notify *arg; 199 + int rc = 0; 200 + 201 + arg = data; 202 + mem_data.start = arg->start_pfn << PAGE_SHIFT; 203 + mem_data.end = mem_data.start + (arg->nr_pages << PAGE_SHIFT); 204 + if (action == MEM_GOING_OFFLINE) 205 + rc = cma_for_each_area(s390_cma_check_range, &mem_data); 206 + return notifier_from_errno(rc); 207 + } 208 + 209 + static struct notifier_block s390_cma_mem_nb = { 210 + .notifier_call = s390_cma_mem_notifier, 211 + }; 212 + 213 + static int __init s390_cma_mem_init(void) 214 + { 215 + return register_memory_notifier(&s390_cma_mem_nb); 216 + } 217 + device_initcall(s390_cma_mem_init); 218 + 219 + #endif /* CONFIG_CMA */ 220 + 171 221 int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) 172 222 { 173 223 unsigned long start_pfn = PFN_DOWN(start);
+179 -17
arch/s390/mm/page-states.c
··· 10 10 #include <linux/errno.h> 11 11 #include <linux/types.h> 12 12 #include <linux/mm.h> 13 + #include <linux/memblock.h> 13 14 #include <linux/gfp.h> 14 15 #include <linux/init.h> 15 - 16 + #include <asm/facility.h> 16 17 #include <asm/page-states.h> 17 18 18 19 static int cmma_flag = 1; ··· 37 36 static inline int cmma_test_essa(void) 38 37 { 39 38 register unsigned long tmp asm("0") = 0; 40 - register int rc asm("1") = -EOPNOTSUPP; 39 + register int rc asm("1"); 41 40 41 + /* test ESSA_GET_STATE */ 42 42 asm volatile( 43 - " .insn rrf,0xb9ab0000,%1,%1,0,0\n" 43 + " .insn rrf,0xb9ab0000,%1,%1,%2,0\n" 44 44 "0: la %0,0\n" 45 45 "1:\n" 46 46 EX_TABLE(0b,1b) 47 - : "+&d" (rc), "+&d" (tmp)); 47 + : "=&d" (rc), "+&d" (tmp) 48 + : "i" (ESSA_GET_STATE), "0" (-EOPNOTSUPP)); 48 49 return rc; 49 50 } 50 51 ··· 54 51 { 55 52 if (!cmma_flag) 56 53 return; 57 - if (cmma_test_essa()) 54 + if (cmma_test_essa()) { 58 55 cmma_flag = 0; 56 + return; 57 + } 58 + if (test_facility(147)) 59 + cmma_flag = 2; 59 60 } 60 61 61 - static inline void set_page_unstable(struct page *page, int order) 62 + static inline unsigned char get_page_state(struct page *page) 63 + { 64 + unsigned char state; 65 + 66 + asm volatile(" .insn rrf,0xb9ab0000,%0,%1,%2,0" 67 + : "=&d" (state) 68 + : "a" (page_to_phys(page)), 69 + "i" (ESSA_GET_STATE)); 70 + return state & 0x3f; 71 + } 72 + 73 + static inline void set_page_unused(struct page *page, int order) 62 74 { 63 75 int i, rc; 64 76 ··· 84 66 "i" (ESSA_SET_UNUSED)); 85 67 } 86 68 87 - void arch_free_page(struct page *page, int order) 88 - { 89 - if (!cmma_flag) 90 - return; 91 - set_page_unstable(page, order); 92 - } 93 - 94 - static inline void set_page_stable(struct page *page, int order) 69 + static inline void set_page_stable_dat(struct page *page, int order) 95 70 { 96 71 int i, rc; 97 72 ··· 95 84 "i" (ESSA_SET_STABLE)); 96 85 } 97 86 87 + static inline void set_page_stable_nodat(struct page *page, int order) 88 + { 89 + int i, rc; 90 + 91 + for (i = 0; i < (1 << order); i++) 92 + asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" 93 + : "=&d" (rc) 94 + : "a" (page_to_phys(page + i)), 95 + "i" (ESSA_SET_STABLE_NODAT)); 96 + } 97 + 98 + static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end) 99 + { 100 + unsigned long next; 101 + struct page *page; 102 + pmd_t *pmd; 103 + 104 + pmd = pmd_offset(pud, addr); 105 + do { 106 + next = pmd_addr_end(addr, end); 107 + if (pmd_none(*pmd) || pmd_large(*pmd)) 108 + continue; 109 + page = virt_to_page(pmd_val(*pmd)); 110 + set_bit(PG_arch_1, &page->flags); 111 + } while (pmd++, addr = next, addr != end); 112 + } 113 + 114 + static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end) 115 + { 116 + unsigned long next; 117 + struct page *page; 118 + pud_t *pud; 119 + int i; 120 + 121 + pud = pud_offset(p4d, addr); 122 + do { 123 + next = pud_addr_end(addr, end); 124 + if (pud_none(*pud) || pud_large(*pud)) 125 + continue; 126 + if (!pud_folded(*pud)) { 127 + page = virt_to_page(pud_val(*pud)); 128 + for (i = 0; i < 3; i++) 129 + set_bit(PG_arch_1, &page[i].flags); 130 + } 131 + mark_kernel_pmd(pud, addr, next); 132 + } while (pud++, addr = next, addr != end); 133 + } 134 + 135 + static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end) 136 + { 137 + unsigned long next; 138 + struct page *page; 139 + p4d_t *p4d; 140 + int i; 141 + 142 + p4d = p4d_offset(pgd, addr); 143 + do { 144 + next = p4d_addr_end(addr, end); 145 + if (p4d_none(*p4d)) 146 + continue; 147 + if (!p4d_folded(*p4d)) { 148 + page = virt_to_page(p4d_val(*p4d)); 149 + for (i = 0; i < 3; i++) 150 + set_bit(PG_arch_1, &page[i].flags); 151 + } 152 + mark_kernel_pud(p4d, addr, next); 153 + } while (p4d++, addr = next, addr != end); 154 + } 155 + 156 + static void mark_kernel_pgd(void) 157 + { 158 + unsigned long addr, next; 159 + struct page *page; 160 + pgd_t *pgd; 161 + int i; 162 + 163 + addr = 0; 164 + pgd = pgd_offset_k(addr); 165 + do { 166 + next = pgd_addr_end(addr, MODULES_END); 167 + if (pgd_none(*pgd)) 168 + continue; 169 + if (!pgd_folded(*pgd)) { 170 + page = virt_to_page(pgd_val(*pgd)); 171 + for (i = 0; i < 3; i++) 172 + set_bit(PG_arch_1, &page[i].flags); 173 + } 174 + mark_kernel_p4d(pgd, addr, next); 175 + } while (pgd++, addr = next, addr != MODULES_END); 176 + } 177 + 178 + void __init cmma_init_nodat(void) 179 + { 180 + struct memblock_region *reg; 181 + struct page *page; 182 + unsigned long start, end, ix; 183 + 184 + if (cmma_flag < 2) 185 + return; 186 + /* Mark pages used in kernel page tables */ 187 + mark_kernel_pgd(); 188 + 189 + /* Set all kernel pages not used for page tables to stable/no-dat */ 190 + for_each_memblock(memory, reg) { 191 + start = memblock_region_memory_base_pfn(reg); 192 + end = memblock_region_memory_end_pfn(reg); 193 + page = pfn_to_page(start); 194 + for (ix = start; ix < end; ix++, page++) { 195 + if (__test_and_clear_bit(PG_arch_1, &page->flags)) 196 + continue; /* skip page table pages */ 197 + if (!list_empty(&page->lru)) 198 + continue; /* skip free pages */ 199 + set_page_stable_nodat(page, 0); 200 + } 201 + } 202 + } 203 + 204 + void arch_free_page(struct page *page, int order) 205 + { 206 + if (!cmma_flag) 207 + return; 208 + set_page_unused(page, order); 209 + } 210 + 98 211 void arch_alloc_page(struct page *page, int order) 99 212 { 100 213 if (!cmma_flag) 101 214 return; 102 - set_page_stable(page, order); 215 + if (cmma_flag < 2) 216 + set_page_stable_dat(page, order); 217 + else 218 + set_page_stable_nodat(page, order); 219 + } 220 + 221 + void arch_set_page_dat(struct page *page, int order) 222 + { 223 + if (!cmma_flag) 224 + return; 225 + set_page_stable_dat(page, order); 226 + } 227 + 228 + void arch_set_page_nodat(struct page *page, int order) 229 + { 230 + if (cmma_flag < 2) 231 + return; 232 + set_page_stable_nodat(page, order); 233 + } 234 + 235 + int arch_test_page_nodat(struct page *page) 236 + { 237 + unsigned char state; 238 + 239 + if (cmma_flag < 2) 240 + return 0; 241 + state = get_page_state(page); 242 + return !!(state & 0x20); 103 243 } 104 244 105 245 void arch_set_page_states(int make_stable) ··· 270 108 list_for_each(l, &zone->free_area[order].free_list[t]) { 271 109 page = list_entry(l, struct page, lru); 272 110 if (make_stable) 273 - set_page_stable(page, order); 111 + set_page_stable_dat(page, 0); 274 112 else 275 - set_page_unstable(page, order); 113 + set_page_unused(page, order); 276 114 } 277 115 } 278 116 spin_unlock_irqrestore(&zone->lock, flags);
+3 -2
arch/s390/mm/pageattr.c
··· 7 7 #include <asm/cacheflush.h> 8 8 #include <asm/facility.h> 9 9 #include <asm/pgtable.h> 10 + #include <asm/pgalloc.h> 10 11 #include <asm/page.h> 11 12 #include <asm/set_memory.h> 12 13 ··· 192 191 pud_t new; 193 192 int i, ro, nx; 194 193 195 - pm_dir = vmem_pmd_alloc(); 194 + pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); 196 195 if (!pm_dir) 197 196 return -ENOMEM; 198 197 pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT; ··· 329 328 return; 330 329 } 331 330 for (i = 0; i < nr; i++) { 332 - __ptep_ipte(address, pte, IPTE_GLOBAL); 331 + __ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL); 333 332 address += PAGE_SIZE; 334 333 pte++; 335 334 }
+7 -5
arch/s390/mm/pgalloc.c
··· 57 57 58 58 if (!page) 59 59 return NULL; 60 + arch_set_page_dat(page, 2); 60 61 return (unsigned long *) page_to_phys(page); 61 62 } 62 63 ··· 83 82 int rc, notify; 84 83 85 84 /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ 86 - BUG_ON(mm->context.asce_limit < (1UL << 42)); 85 + BUG_ON(mm->context.asce_limit < _REGION2_SIZE); 87 86 if (end >= TASK_SIZE_MAX) 88 87 return -ENOMEM; 89 88 rc = 0; ··· 96 95 } 97 96 spin_lock_bh(&mm->page_table_lock); 98 97 pgd = (unsigned long *) mm->pgd; 99 - if (mm->context.asce_limit == (1UL << 42)) { 98 + if (mm->context.asce_limit == _REGION2_SIZE) { 100 99 crst_table_init(table, _REGION2_ENTRY_EMPTY); 101 100 p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd); 102 101 mm->pgd = (pgd_t *) table; 103 - mm->context.asce_limit = 1UL << 53; 102 + mm->context.asce_limit = _REGION1_SIZE; 104 103 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | 105 104 _ASCE_USER_BITS | _ASCE_TYPE_REGION2; 106 105 } else { ··· 124 123 pgd_t *pgd; 125 124 126 125 /* downgrade should only happen from 3 to 2 levels (compat only) */ 127 - BUG_ON(mm->context.asce_limit != (1UL << 42)); 126 + BUG_ON(mm->context.asce_limit != _REGION2_SIZE); 128 127 129 128 if (current->active_mm == mm) { 130 129 clear_user_asce(); ··· 133 132 134 133 pgd = mm->pgd; 135 134 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); 136 - mm->context.asce_limit = 1UL << 31; 135 + mm->context.asce_limit = _REGION3_SIZE; 137 136 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | 138 137 _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; 139 138 crst_table_free(mm, (unsigned long *) pgd); ··· 215 214 __free_page(page); 216 215 return NULL; 217 216 } 217 + arch_set_page_dat(page, 0); 218 218 /* Initialize page table */ 219 219 table = (unsigned long *) page_to_phys(page); 220 220 if (mm_alloc_pgste(mm)) {
+121 -33
arch/s390/mm/pgtable.c
··· 25 25 #include <asm/mmu_context.h> 26 26 #include <asm/page-states.h> 27 27 28 + static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr, 29 + pte_t *ptep, int nodat) 30 + { 31 + unsigned long opt, asce; 32 + 33 + if (MACHINE_HAS_TLB_GUEST) { 34 + opt = 0; 35 + asce = READ_ONCE(mm->context.gmap_asce); 36 + if (asce == 0UL || nodat) 37 + opt |= IPTE_NODAT; 38 + if (asce != -1UL) { 39 + asce = asce ? : mm->context.asce; 40 + opt |= IPTE_GUEST_ASCE; 41 + } 42 + __ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL); 43 + } else { 44 + __ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL); 45 + } 46 + } 47 + 48 + static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr, 49 + pte_t *ptep, int nodat) 50 + { 51 + unsigned long opt, asce; 52 + 53 + if (MACHINE_HAS_TLB_GUEST) { 54 + opt = 0; 55 + asce = READ_ONCE(mm->context.gmap_asce); 56 + if (asce == 0UL || nodat) 57 + opt |= IPTE_NODAT; 58 + if (asce != -1UL) { 59 + asce = asce ? : mm->context.asce; 60 + opt |= IPTE_GUEST_ASCE; 61 + } 62 + __ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL); 63 + } else { 64 + __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); 65 + } 66 + } 67 + 28 68 static inline pte_t ptep_flush_direct(struct mm_struct *mm, 29 - unsigned long addr, pte_t *ptep) 69 + unsigned long addr, pte_t *ptep, 70 + int nodat) 30 71 { 31 72 pte_t old; 32 73 ··· 77 36 atomic_inc(&mm->context.flush_count); 78 37 if (MACHINE_HAS_TLB_LC && 79 38 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 80 - __ptep_ipte(addr, ptep, IPTE_LOCAL); 39 + ptep_ipte_local(mm, addr, ptep, nodat); 81 40 else 82 - __ptep_ipte(addr, ptep, IPTE_GLOBAL); 41 + ptep_ipte_global(mm, addr, ptep, nodat); 83 42 atomic_dec(&mm->context.flush_count); 84 43 return old; 85 44 } 86 45 87 46 static inline pte_t ptep_flush_lazy(struct mm_struct *mm, 88 - unsigned long addr, pte_t *ptep) 47 + unsigned long addr, pte_t *ptep, 48 + int nodat) 89 49 { 90 50 pte_t old; 91 51 ··· 99 57 pte_val(*ptep) |= _PAGE_INVALID; 100 58 mm->context.flush_mm = 1; 101 59 } else 102 - __ptep_ipte(addr, ptep, IPTE_GLOBAL); 60 + ptep_ipte_global(mm, addr, ptep, nodat); 103 61 atomic_dec(&mm->context.flush_count); 104 62 return old; 105 63 } ··· 271 229 { 272 230 pgste_t pgste; 273 231 pte_t old; 232 + int nodat; 274 233 275 234 preempt_disable(); 276 235 pgste = ptep_xchg_start(mm, addr, ptep); 277 - old = ptep_flush_direct(mm, addr, ptep); 236 + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 237 + old = ptep_flush_direct(mm, addr, ptep, nodat); 278 238 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 279 239 preempt_enable(); 280 240 return old; ··· 288 244 { 289 245 pgste_t pgste; 290 246 pte_t old; 247 + int nodat; 291 248 292 249 preempt_disable(); 293 250 pgste = ptep_xchg_start(mm, addr, ptep); 294 - old = ptep_flush_lazy(mm, addr, ptep); 251 + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 252 + old = ptep_flush_lazy(mm, addr, ptep, nodat); 295 253 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 296 254 preempt_enable(); 297 255 return old; ··· 305 259 { 306 260 pgste_t pgste; 307 261 pte_t old; 262 + int nodat; 308 263 309 264 preempt_disable(); 310 265 pgste = ptep_xchg_start(mm, addr, ptep); 311 - old = ptep_flush_lazy(mm, addr, ptep); 266 + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 267 + old = ptep_flush_lazy(mm, addr, ptep, nodat); 312 268 if (mm_has_pgste(mm)) { 313 269 pgste = pgste_update_all(old, pgste, mm); 314 270 pgste_set(ptep, pgste); ··· 338 290 } 339 291 EXPORT_SYMBOL(ptep_modify_prot_commit); 340 292 293 + static inline void pmdp_idte_local(struct mm_struct *mm, 294 + unsigned long addr, pmd_t *pmdp) 295 + { 296 + if (MACHINE_HAS_TLB_GUEST) 297 + __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, 298 + mm->context.asce, IDTE_LOCAL); 299 + else 300 + __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL); 301 + } 302 + 303 + static inline void pmdp_idte_global(struct mm_struct *mm, 304 + unsigned long addr, pmd_t *pmdp) 305 + { 306 + if (MACHINE_HAS_TLB_GUEST) 307 + __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, 308 + mm->context.asce, IDTE_GLOBAL); 309 + else if (MACHINE_HAS_IDTE) 310 + __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); 311 + else 312 + __pmdp_csp(pmdp); 313 + } 314 + 341 315 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, 342 316 unsigned long addr, pmd_t *pmdp) 343 317 { ··· 368 298 old = *pmdp; 369 299 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 370 300 return old; 371 - if (!MACHINE_HAS_IDTE) { 372 - __pmdp_csp(pmdp); 373 - return old; 374 - } 375 301 atomic_inc(&mm->context.flush_count); 376 302 if (MACHINE_HAS_TLB_LC && 377 303 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 378 - __pmdp_idte(addr, pmdp, IDTE_LOCAL); 304 + pmdp_idte_local(mm, addr, pmdp); 379 305 else 380 - __pmdp_idte(addr, pmdp, IDTE_GLOBAL); 306 + pmdp_idte_global(mm, addr, pmdp); 381 307 atomic_dec(&mm->context.flush_count); 382 308 return old; 383 309 } ··· 391 325 cpumask_of(smp_processor_id()))) { 392 326 pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; 393 327 mm->context.flush_mm = 1; 394 - } else if (MACHINE_HAS_IDTE) 395 - __pmdp_idte(addr, pmdp, IDTE_GLOBAL); 396 - else 397 - __pmdp_csp(pmdp); 328 + } else { 329 + pmdp_idte_global(mm, addr, pmdp); 330 + } 398 331 atomic_dec(&mm->context.flush_count); 399 332 return old; 400 333 } ··· 424 359 } 425 360 EXPORT_SYMBOL(pmdp_xchg_lazy); 426 361 362 + static inline void pudp_idte_local(struct mm_struct *mm, 363 + unsigned long addr, pud_t *pudp) 364 + { 365 + if (MACHINE_HAS_TLB_GUEST) 366 + __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, 367 + mm->context.asce, IDTE_LOCAL); 368 + else 369 + __pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL); 370 + } 371 + 372 + static inline void pudp_idte_global(struct mm_struct *mm, 373 + unsigned long addr, pud_t *pudp) 374 + { 375 + if (MACHINE_HAS_TLB_GUEST) 376 + __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, 377 + mm->context.asce, IDTE_GLOBAL); 378 + else if (MACHINE_HAS_IDTE) 379 + __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL); 380 + else 381 + /* 382 + * Invalid bit position is the same for pmd and pud, so we can 383 + * re-use _pmd_csp() here 384 + */ 385 + __pmdp_csp((pmd_t *) pudp); 386 + } 387 + 427 388 static inline pud_t pudp_flush_direct(struct mm_struct *mm, 428 389 unsigned long addr, pud_t *pudp) 429 390 { ··· 458 367 old = *pudp; 459 368 if (pud_val(old) & _REGION_ENTRY_INVALID) 460 369 return old; 461 - if (!MACHINE_HAS_IDTE) { 462 - /* 463 - * Invalid bit position is the same for pmd and pud, so we can 464 - * re-use _pmd_csp() here 465 - */ 466 - __pmdp_csp((pmd_t *) pudp); 467 - return old; 468 - } 469 370 atomic_inc(&mm->context.flush_count); 470 371 if (MACHINE_HAS_TLB_LC && 471 372 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 472 - __pudp_idte(addr, pudp, IDTE_LOCAL); 373 + pudp_idte_local(mm, addr, pudp); 473 374 else 474 - __pudp_idte(addr, pudp, IDTE_GLOBAL); 375 + pudp_idte_global(mm, addr, pudp); 475 376 atomic_dec(&mm->context.flush_count); 476 377 return old; 477 378 } ··· 565 482 { 566 483 pte_t entry; 567 484 pgste_t pgste; 568 - int pte_i, pte_p; 485 + int pte_i, pte_p, nodat; 569 486 570 487 pgste = pgste_get_lock(ptep); 571 488 entry = *ptep; ··· 578 495 return -EAGAIN; 579 496 } 580 497 /* Change access rights and set pgste bit */ 498 + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 581 499 if (prot == PROT_NONE && !pte_i) { 582 - ptep_flush_direct(mm, addr, ptep); 500 + ptep_flush_direct(mm, addr, ptep, nodat); 583 501 pgste = pgste_update_all(entry, pgste, mm); 584 502 pte_val(entry) |= _PAGE_INVALID; 585 503 } 586 504 if (prot == PROT_READ && !pte_p) { 587 - ptep_flush_direct(mm, addr, ptep); 505 + ptep_flush_direct(mm, addr, ptep, nodat); 588 506 pte_val(entry) &= ~_PAGE_INVALID; 589 507 pte_val(entry) |= _PAGE_PROTECT; 590 508 } ··· 625 541 void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) 626 542 { 627 543 pgste_t pgste; 544 + int nodat; 628 545 629 546 pgste = pgste_get_lock(ptep); 630 547 /* notifier is called by the caller */ 631 - ptep_flush_direct(mm, saddr, ptep); 548 + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 549 + ptep_flush_direct(mm, saddr, ptep, nodat); 632 550 /* don't touch the storage key - it belongs to parent pgste */ 633 551 pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID)); 634 552 pgste_set_unlock(ptep, pgste); ··· 703 617 pte_t *ptep; 704 618 pte_t pte; 705 619 bool dirty; 620 + int nodat; 706 621 707 622 pgd = pgd_offset(mm, addr); 708 623 p4d = p4d_alloc(mm, pgd, addr); ··· 732 645 pte = *ptep; 733 646 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { 734 647 pgste = pgste_pte_notify(mm, addr, ptep, pgste); 735 - __ptep_ipte(addr, ptep, IPTE_GLOBAL); 648 + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 649 + ptep_ipte_global(mm, addr, ptep, nodat); 736 650 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) 737 651 pte_val(pte) |= _PAGE_PROTECT; 738 652 else
+12 -35
arch/s390/mm/vmem.c
··· 38 38 return (void *) memblock_alloc(size, size); 39 39 } 40 40 41 - static inline p4d_t *vmem_p4d_alloc(void) 41 + void *vmem_crst_alloc(unsigned long val) 42 42 { 43 - p4d_t *p4d = NULL; 43 + unsigned long *table; 44 44 45 - p4d = vmem_alloc_pages(2); 46 - if (!p4d) 47 - return NULL; 48 - clear_table((unsigned long *) p4d, _REGION2_ENTRY_EMPTY, PAGE_SIZE * 4); 49 - return p4d; 50 - } 51 - 52 - static inline pud_t *vmem_pud_alloc(void) 53 - { 54 - pud_t *pud = NULL; 55 - 56 - pud = vmem_alloc_pages(2); 57 - if (!pud) 58 - return NULL; 59 - clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4); 60 - return pud; 61 - } 62 - 63 - pmd_t *vmem_pmd_alloc(void) 64 - { 65 - pmd_t *pmd = NULL; 66 - 67 - pmd = vmem_alloc_pages(2); 68 - if (!pmd) 69 - return NULL; 70 - clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4); 71 - return pmd; 45 + table = vmem_alloc_pages(CRST_ALLOC_ORDER); 46 + if (table) 47 + crst_table_init(table, val); 48 + return table; 72 49 } 73 50 74 51 pte_t __ref *vmem_pte_alloc(void) ··· 91 114 while (address < end) { 92 115 pg_dir = pgd_offset_k(address); 93 116 if (pgd_none(*pg_dir)) { 94 - p4_dir = vmem_p4d_alloc(); 117 + p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); 95 118 if (!p4_dir) 96 119 goto out; 97 120 pgd_populate(&init_mm, pg_dir, p4_dir); 98 121 } 99 122 p4_dir = p4d_offset(pg_dir, address); 100 123 if (p4d_none(*p4_dir)) { 101 - pu_dir = vmem_pud_alloc(); 124 + pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); 102 125 if (!pu_dir) 103 126 goto out; 104 127 p4d_populate(&init_mm, p4_dir, pu_dir); ··· 113 136 continue; 114 137 } 115 138 if (pud_none(*pu_dir)) { 116 - pm_dir = vmem_pmd_alloc(); 139 + pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); 117 140 if (!pm_dir) 118 141 goto out; 119 142 pud_populate(&init_mm, pu_dir, pm_dir); ··· 230 253 for (address = start; address < end;) { 231 254 pg_dir = pgd_offset_k(address); 232 255 if (pgd_none(*pg_dir)) { 233 - p4_dir = vmem_p4d_alloc(); 256 + p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); 234 257 if (!p4_dir) 235 258 goto out; 236 259 pgd_populate(&init_mm, pg_dir, p4_dir); ··· 238 261 239 262 p4_dir = p4d_offset(pg_dir, address); 240 263 if (p4d_none(*p4_dir)) { 241 - pu_dir = vmem_pud_alloc(); 264 + pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); 242 265 if (!pu_dir) 243 266 goto out; 244 267 p4d_populate(&init_mm, p4_dir, pu_dir); ··· 246 269 247 270 pu_dir = pud_offset(p4_dir, address); 248 271 if (pud_none(*pu_dir)) { 249 - pm_dir = vmem_pmd_alloc(); 272 + pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); 250 273 if (!pm_dir) 251 274 goto out; 252 275 pud_populate(&init_mm, pu_dir, pm_dir);
+9 -1
arch/s390/pci/pci_clp.c
··· 24 24 25 25 bool zpci_unique_uid; 26 26 27 + static void update_uid_checking(bool new) 28 + { 29 + if (zpci_unique_uid != new) 30 + zpci_dbg(1, "uid checking:%d\n", new); 31 + 32 + zpci_unique_uid = new; 33 + } 34 + 27 35 static inline void zpci_err_clp(unsigned int rsp, int rc) 28 36 { 29 37 struct { ··· 327 319 goto out; 328 320 } 329 321 330 - zpci_unique_uid = rrb->response.uid_checking; 322 + update_uid_checking(rrb->response.uid_checking); 331 323 WARN_ON_ONCE(rrb->response.entry_size != 332 324 sizeof(struct clp_fh_list_entry)); 333 325
+4 -1
arch/s390/tools/gen_facilities.c
··· 41 41 27, /* mvcos */ 42 42 32, /* compare and swap and store */ 43 43 33, /* compare and swap and store 2 */ 44 - 34, /* general extension facility */ 44 + 34, /* general instructions extension */ 45 45 35, /* execute extensions */ 46 46 #endif 47 47 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES ··· 53 53 #endif 54 54 #ifdef CONFIG_HAVE_MARCH_Z13_FEATURES 55 55 53, /* load-and-zero-rightmost-byte, etc. */ 56 + #endif 57 + #ifdef CONFIG_HAVE_MARCH_Z14_FEATURES 58 + 58, /* miscellaneous-instruction-extension 2 */ 56 59 #endif 57 60 -1 /* END */ 58 61 }
+49 -6
drivers/s390/block/dasd.c
··· 801 801 struct dasd_ccw_req *cqr, 802 802 struct request *req) 803 803 { 804 - long strtime, irqtime, endtime, tottime; /* in microseconds */ 805 - long tottimeps, sectors; 804 + unsigned long strtime, irqtime, endtime, tottime; 805 + unsigned long tottimeps, sectors; 806 806 struct dasd_device *device; 807 807 int sectors_ind, tottime_ind, tottimeps_ind, strtime_ind; 808 808 int irqtime_ind, irqtimeps_ind, endtime_ind; 809 + struct dasd_profile_info *data; 809 810 810 811 device = cqr->startdev; 811 812 if (!(dasd_global_profile_level || ··· 836 835 837 836 spin_lock(&dasd_global_profile.lock); 838 837 if (dasd_global_profile.data) { 838 + data = dasd_global_profile.data; 839 + data->dasd_sum_times += tottime; 840 + data->dasd_sum_time_str += strtime; 841 + data->dasd_sum_time_irq += irqtime; 842 + data->dasd_sum_time_end += endtime; 839 843 dasd_profile_end_add_data(dasd_global_profile.data, 840 844 cqr->startdev != block->base, 841 845 cqr->cpmode == 1, ··· 853 847 spin_unlock(&dasd_global_profile.lock); 854 848 855 849 spin_lock(&block->profile.lock); 856 - if (block->profile.data) 850 + if (block->profile.data) { 851 + data = block->profile.data; 852 + data->dasd_sum_times += tottime; 853 + data->dasd_sum_time_str += strtime; 854 + data->dasd_sum_time_irq += irqtime; 855 + data->dasd_sum_time_end += endtime; 857 856 dasd_profile_end_add_data(block->profile.data, 858 857 cqr->startdev != block->base, 859 858 cqr->cpmode == 1, ··· 867 856 tottimeps_ind, strtime_ind, 868 857 irqtime_ind, irqtimeps_ind, 869 858 endtime_ind); 859 + } 870 860 spin_unlock(&block->profile.lock); 871 861 872 862 spin_lock(&device->profile.lock); 873 - if (device->profile.data) 863 + if (device->profile.data) { 864 + data = device->profile.data; 865 + data->dasd_sum_times += tottime; 866 + data->dasd_sum_time_str += strtime; 867 + data->dasd_sum_time_irq += irqtime; 868 + data->dasd_sum_time_end += endtime; 874 869 dasd_profile_end_add_data(device->profile.data, 875 870 cqr->startdev != block->base, 876 871 cqr->cpmode == 1, ··· 885 868 tottimeps_ind, strtime_ind, 886 869 irqtime_ind, irqtimeps_ind, 887 870 endtime_ind); 871 + } 888 872 spin_unlock(&device->profile.lock); 889 873 } 890 874 ··· 1007 989 seq_printf(m, "total_sectors %u\n", data->dasd_io_sects); 1008 990 seq_printf(m, "total_pav %u\n", data->dasd_io_alias); 1009 991 seq_printf(m, "total_hpf %u\n", data->dasd_io_tpm); 992 + seq_printf(m, "avg_total %lu\n", data->dasd_io_reqs ? 993 + data->dasd_sum_times / data->dasd_io_reqs : 0UL); 994 + seq_printf(m, "avg_build_to_ssch %lu\n", data->dasd_io_reqs ? 995 + data->dasd_sum_time_str / data->dasd_io_reqs : 0UL); 996 + seq_printf(m, "avg_ssch_to_irq %lu\n", data->dasd_io_reqs ? 997 + data->dasd_sum_time_irq / data->dasd_io_reqs : 0UL); 998 + seq_printf(m, "avg_irq_to_end %lu\n", data->dasd_io_reqs ? 999 + data->dasd_sum_time_end / data->dasd_io_reqs : 0UL); 1010 1000 seq_puts(m, "histogram_sectors "); 1011 1001 dasd_stats_array(m, data->dasd_io_secs); 1012 1002 seq_puts(m, "histogram_io_times "); ··· 1665 1639 { 1666 1640 struct dasd_ccw_req *cqr, *next; 1667 1641 struct dasd_device *device; 1668 - unsigned long long now; 1642 + unsigned long now; 1669 1643 int nrf_suppressed = 0; 1670 1644 int fp_suppressed = 0; 1671 1645 u8 *sense = NULL; ··· 3178 3152 */ 3179 3153 static void dasd_setup_queue(struct dasd_block *block) 3180 3154 { 3155 + unsigned int logical_block_size = block->bp_block; 3181 3156 struct request_queue *q = block->request_queue; 3157 + unsigned int max_bytes, max_discard_sectors; 3182 3158 int max; 3183 3159 3184 3160 if (block->base->features & DASD_FEATURE_USERAW) { ··· 3197 3169 } 3198 3170 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 3199 3171 q->limits.max_dev_sectors = max; 3200 - blk_queue_logical_block_size(q, block->bp_block); 3172 + blk_queue_logical_block_size(q, logical_block_size); 3201 3173 blk_queue_max_hw_sectors(q, max); 3202 3174 blk_queue_max_segments(q, USHRT_MAX); 3203 3175 /* with page sized segments we can translate each segement into ··· 3205 3177 */ 3206 3178 blk_queue_max_segment_size(q, PAGE_SIZE); 3207 3179 blk_queue_segment_boundary(q, PAGE_SIZE - 1); 3180 + 3181 + /* Only activate blocklayer discard support for devices that support it */ 3182 + if (block->base->features & DASD_FEATURE_DISCARD) { 3183 + q->limits.discard_granularity = logical_block_size; 3184 + q->limits.discard_alignment = PAGE_SIZE; 3185 + 3186 + /* Calculate max_discard_sectors and make it PAGE aligned */ 3187 + max_bytes = USHRT_MAX * logical_block_size; 3188 + max_bytes = ALIGN(max_bytes, PAGE_SIZE) - PAGE_SIZE; 3189 + max_discard_sectors = max_bytes / logical_block_size; 3190 + 3191 + blk_queue_max_discard_sectors(q, max_discard_sectors); 3192 + blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); 3193 + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); 3194 + } 3208 3195 } 3209 3196 3210 3197 /*
+1 -1
drivers/s390/block/dasd_3990_erp.c
··· 2231 2231 struct dasd_device *device = erp->startdev; 2232 2232 __u8 lpum = erp->refers->irb.esw.esw1.lpum; 2233 2233 int pos = pathmask_to_pos(lpum); 2234 - unsigned long long clk; 2234 + unsigned long clk; 2235 2235 2236 2236 if (!device->path_thrhld) 2237 2237 return;
+2 -1
drivers/s390/block/dasd_devmap.c
··· 1634 1634 NULL, 1635 1635 }; 1636 1636 1637 - static struct attribute_group dasd_attr_group = { 1637 + static const struct attribute_group dasd_attr_group = { 1638 1638 .attrs = dasd_attrs, 1639 1639 }; 1640 1640 ··· 1676 1676 spin_unlock(&dasd_devmap_lock); 1677 1677 return 0; 1678 1678 } 1679 + EXPORT_SYMBOL(dasd_set_feature); 1679 1680 1680 1681 1681 1682 int
+1 -1
drivers/s390/block/dasd_diag.c
··· 235 235 { 236 236 struct dasd_ccw_req *cqr, *next; 237 237 struct dasd_device *device; 238 - unsigned long long expires; 238 + unsigned long expires; 239 239 unsigned long flags; 240 240 addr_t ip; 241 241 int rc;
+2 -6
drivers/s390/block/dasd_eckd.c
··· 3254 3254 /* 1x prefix + one read/write ccw per track */ 3255 3255 cplength = 1 + trkcount; 3256 3256 3257 - /* on 31-bit we need space for two 32 bit addresses per page 3258 - * on 64-bit one 64 bit address 3259 - */ 3260 - datasize = sizeof(struct PFX_eckd_data) + 3261 - cidaw * sizeof(unsigned long long); 3257 + datasize = sizeof(struct PFX_eckd_data) + cidaw * sizeof(unsigned long); 3262 3258 3263 3259 /* Allocate the ccw request. */ 3264 3260 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize, ··· 3852 3856 } 3853 3857 size = ALIGN(size, 8); 3854 3858 3855 - datasize = size + cidaw * sizeof(unsigned long long); 3859 + datasize = size + cidaw * sizeof(unsigned long); 3856 3860 3857 3861 /* Allocate the ccw request. */ 3858 3862 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength,
+1 -1
drivers/s390/block/dasd_eckd.h
··· 165 165 __u8 ga_extended; /* Global Attributes Extended */ 166 166 struct ch_t beg_ext; 167 167 struct ch_t end_ext; 168 - unsigned long long ep_sys_time; /* Ext Parameter - System Time Stamp */ 168 + unsigned long ep_sys_time; /* Ext Parameter - System Time Stamp */ 169 169 __u8 ep_format; /* Extended Parameter format byte */ 170 170 __u8 ep_prio; /* Extended Parameter priority I/O byte */ 171 171 __u8 ep_reserved1; /* Extended Parameter Reserved */
+1 -1
drivers/s390/block/dasd_erp.c
··· 124 124 struct dasd_ccw_req *dasd_default_erp_postaction(struct dasd_ccw_req *cqr) 125 125 { 126 126 int success; 127 - unsigned long long startclk, stopclk; 127 + unsigned long startclk, stopclk; 128 128 struct dasd_device *startdev; 129 129 130 130 BUG_ON(cqr->refers == NULL || cqr->function == NULL);
+199 -3
drivers/s390/block/dasd_fba.c
··· 174 174 if (readonly) 175 175 set_bit(DASD_FLAG_DEVICE_RO, &device->flags); 176 176 177 + /* FBA supports discard, set the according feature bit */ 178 + dasd_set_feature(cdev, DASD_FEATURE_DISCARD, 1); 179 + 177 180 dev_info(&device->cdev->dev, 178 181 "New FBA DASD %04X/%02X (CU %04X/%02X) with %d MB " 179 182 "and %d B/blk%s\n", ··· 250 247 dasd_generic_handle_state_change(device); 251 248 }; 252 249 253 - static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev, 254 - struct dasd_block *block, 255 - struct request *req) 250 + 251 + /* 252 + * Builds a CCW with no data payload 253 + */ 254 + static void ccw_write_no_data(struct ccw1 *ccw) 255 + { 256 + ccw->cmd_code = DASD_FBA_CCW_WRITE; 257 + ccw->flags |= CCW_FLAG_SLI; 258 + ccw->count = 0; 259 + } 260 + 261 + /* 262 + * Builds a CCW that writes only zeroes. 263 + */ 264 + static void ccw_write_zero(struct ccw1 *ccw, int count) 265 + { 266 + ccw->cmd_code = DASD_FBA_CCW_WRITE; 267 + ccw->flags |= CCW_FLAG_SLI; 268 + ccw->count = count; 269 + ccw->cda = (__u32) (addr_t) page_to_phys(ZERO_PAGE(0)); 270 + } 271 + 272 + /* 273 + * Helper function to count the amount of necessary CCWs within a given range 274 + * with 4k alignment and command chaining in mind. 275 + */ 276 + static int count_ccws(sector_t first_rec, sector_t last_rec, 277 + unsigned int blocks_per_page) 278 + { 279 + sector_t wz_stop = 0, d_stop = 0; 280 + int cur_pos = 0; 281 + int count = 0; 282 + 283 + if (first_rec % blocks_per_page != 0) { 284 + wz_stop = first_rec + blocks_per_page - 285 + (first_rec % blocks_per_page) - 1; 286 + if (wz_stop > last_rec) 287 + wz_stop = last_rec; 288 + cur_pos = wz_stop - first_rec + 1; 289 + count++; 290 + } 291 + 292 + if (last_rec - (first_rec + cur_pos) + 1 >= blocks_per_page) { 293 + if ((last_rec - blocks_per_page + 1) % blocks_per_page != 0) 294 + d_stop = last_rec - ((last_rec - blocks_per_page + 1) % 295 + blocks_per_page); 296 + else 297 + d_stop = last_rec; 298 + 299 + cur_pos += d_stop - (first_rec + cur_pos) + 1; 300 + count++; 301 + } 302 + 303 + if (cur_pos == 0 || first_rec + cur_pos - 1 < last_rec) 304 + count++; 305 + 306 + return count; 307 + } 308 + 309 + /* 310 + * This function builds a CCW request for block layer discard requests. 311 + * Each page in the z/VM hypervisor that represents certain records of an FBA 312 + * device will be padded with zeros. This is a special behaviour of the WRITE 313 + * command which is triggered when no data payload is added to the CCW. 314 + * 315 + * Note: Due to issues in some z/VM versions, we can't fully utilise this 316 + * special behaviour. We have to keep a 4k (or 8 block) alignment in mind to 317 + * work around those issues and write actual zeroes to the unaligned parts in 318 + * the request. This workaround might be removed in the future. 319 + */ 320 + static struct dasd_ccw_req *dasd_fba_build_cp_discard( 321 + struct dasd_device *memdev, 322 + struct dasd_block *block, 323 + struct request *req) 324 + { 325 + struct LO_fba_data *LO_data; 326 + struct dasd_ccw_req *cqr; 327 + struct ccw1 *ccw; 328 + 329 + sector_t wz_stop = 0, d_stop = 0; 330 + sector_t first_rec, last_rec; 331 + 332 + unsigned int blksize = block->bp_block; 333 + unsigned int blocks_per_page; 334 + int wz_count = 0; 335 + int d_count = 0; 336 + int cur_pos = 0; /* Current position within the extent */ 337 + int count = 0; 338 + int cplength; 339 + int datasize; 340 + int nr_ccws; 341 + 342 + first_rec = blk_rq_pos(req) >> block->s2b_shift; 343 + last_rec = 344 + (blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift; 345 + count = last_rec - first_rec + 1; 346 + 347 + blocks_per_page = BLOCKS_PER_PAGE(blksize); 348 + nr_ccws = count_ccws(first_rec, last_rec, blocks_per_page); 349 + 350 + /* define extent + nr_ccws * locate record + nr_ccws * single CCW */ 351 + cplength = 1 + 2 * nr_ccws; 352 + datasize = sizeof(struct DE_fba_data) + 353 + nr_ccws * (sizeof(struct LO_fba_data) + sizeof(struct ccw1)); 354 + 355 + cqr = dasd_smalloc_request(DASD_FBA_MAGIC, cplength, datasize, memdev); 356 + if (IS_ERR(cqr)) 357 + return cqr; 358 + 359 + ccw = cqr->cpaddr; 360 + 361 + define_extent(ccw++, cqr->data, WRITE, blksize, first_rec, count); 362 + LO_data = cqr->data + sizeof(struct DE_fba_data); 363 + 364 + /* First part is not aligned. Calculate range to write zeroes. */ 365 + if (first_rec % blocks_per_page != 0) { 366 + wz_stop = first_rec + blocks_per_page - 367 + (first_rec % blocks_per_page) - 1; 368 + if (wz_stop > last_rec) 369 + wz_stop = last_rec; 370 + wz_count = wz_stop - first_rec + 1; 371 + 372 + ccw[-1].flags |= CCW_FLAG_CC; 373 + locate_record(ccw++, LO_data++, WRITE, cur_pos, wz_count); 374 + 375 + ccw[-1].flags |= CCW_FLAG_CC; 376 + ccw_write_zero(ccw++, wz_count * blksize); 377 + 378 + cur_pos = wz_count; 379 + } 380 + 381 + /* We can do proper discard when we've got at least blocks_per_page blocks. */ 382 + if (last_rec - (first_rec + cur_pos) + 1 >= blocks_per_page) { 383 + /* is last record at page boundary? */ 384 + if ((last_rec - blocks_per_page + 1) % blocks_per_page != 0) 385 + d_stop = last_rec - ((last_rec - blocks_per_page + 1) % 386 + blocks_per_page); 387 + else 388 + d_stop = last_rec; 389 + 390 + d_count = d_stop - (first_rec + cur_pos) + 1; 391 + 392 + ccw[-1].flags |= CCW_FLAG_CC; 393 + locate_record(ccw++, LO_data++, WRITE, cur_pos, d_count); 394 + 395 + ccw[-1].flags |= CCW_FLAG_CC; 396 + ccw_write_no_data(ccw++); 397 + 398 + cur_pos += d_count; 399 + } 400 + 401 + /* We might still have some bits left which need to be zeroed. */ 402 + if (cur_pos == 0 || first_rec + cur_pos - 1 < last_rec) { 403 + if (d_stop != 0) 404 + wz_count = last_rec - d_stop; 405 + else if (wz_stop != 0) 406 + wz_count = last_rec - wz_stop; 407 + else 408 + wz_count = count; 409 + 410 + ccw[-1].flags |= CCW_FLAG_CC; 411 + locate_record(ccw++, LO_data++, WRITE, cur_pos, wz_count); 412 + 413 + ccw[-1].flags |= CCW_FLAG_CC; 414 + ccw_write_zero(ccw++, wz_count * blksize); 415 + } 416 + 417 + if (blk_noretry_request(req) || 418 + block->base->features & DASD_FEATURE_FAILFAST) 419 + set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); 420 + 421 + cqr->startdev = memdev; 422 + cqr->memdev = memdev; 423 + cqr->block = block; 424 + cqr->expires = memdev->default_expires * HZ; /* default 5 minutes */ 425 + cqr->retries = memdev->default_retries; 426 + cqr->buildclk = get_tod_clock(); 427 + cqr->status = DASD_CQR_FILLED; 428 + 429 + return cqr; 430 + } 431 + 432 + static struct dasd_ccw_req *dasd_fba_build_cp_regular( 433 + struct dasd_device *memdev, 434 + struct dasd_block *block, 435 + struct request *req) 256 436 { 257 437 struct dasd_fba_private *private = block->base->private; 258 438 unsigned long *idaws; ··· 556 370 cqr->buildclk = get_tod_clock(); 557 371 cqr->status = DASD_CQR_FILLED; 558 372 return cqr; 373 + } 374 + 375 + static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device *memdev, 376 + struct dasd_block *block, 377 + struct request *req) 378 + { 379 + if (req_op(req) == REQ_OP_DISCARD || req_op(req) == REQ_OP_WRITE_ZEROES) 380 + return dasd_fba_build_cp_discard(memdev, block, req); 381 + else 382 + return dasd_fba_build_cp_regular(memdev, block, req); 559 383 } 560 384 561 385 static int
+13 -6
drivers/s390/block/dasd_int.h
··· 167 167 printk(d_loglevel PRINTK_HEADER " " d_string "\n", d_args); \ 168 168 } while(0) 169 169 170 + /* Macro to calculate number of blocks per page */ 171 + #define BLOCKS_PER_PAGE(blksize) (PAGE_SIZE / blksize) 172 + 170 173 struct dasd_ccw_req { 171 174 unsigned int magic; /* Eye catcher */ 172 175 struct list_head devlist; /* for dasd_device request queue */ ··· 199 196 void *function; /* originating ERP action */ 200 197 201 198 /* these are for statistics only */ 202 - unsigned long long buildclk; /* TOD-clock of request generation */ 203 - unsigned long long startclk; /* TOD-clock of request start */ 204 - unsigned long long stopclk; /* TOD-clock of request interrupt */ 205 - unsigned long long endclk; /* TOD-clock of request termination */ 199 + unsigned long buildclk; /* TOD-clock of request generation */ 200 + unsigned long startclk; /* TOD-clock of request start */ 201 + unsigned long stopclk; /* TOD-clock of request interrupt */ 202 + unsigned long endclk; /* TOD-clock of request termination */ 206 203 207 204 /* Callback that is called after reaching final status. */ 208 205 void (*callback)(struct dasd_ccw_req *, void *data); ··· 426 423 u8 chpid; 427 424 struct dasd_conf_data *conf_data; 428 425 atomic_t error_count; 429 - unsigned long long errorclk; 426 + unsigned long errorclk; 430 427 }; 431 428 432 429 ··· 457 454 unsigned int dasd_read_time2[32]; /* hist. of time from start to irq */ 458 455 unsigned int dasd_read_time3[32]; /* hist. of time from irq to end */ 459 456 unsigned int dasd_read_nr_req[32]; /* hist. of # of requests in chanq */ 457 + unsigned long dasd_sum_times; /* sum of request times */ 458 + unsigned long dasd_sum_time_str; /* sum of time from build to start */ 459 + unsigned long dasd_sum_time_irq; /* sum of time from start to irq */ 460 + unsigned long dasd_sum_time_end; /* sum of time from irq to end */ 460 461 }; 461 462 462 463 struct dasd_profile { ··· 542 535 struct block_device *bdev; 543 536 atomic_t open_count; 544 537 545 - unsigned long long blocks; /* size of volume in blocks */ 538 + unsigned long blocks; /* size of volume in blocks */ 546 539 unsigned int bp_block; /* bytes per block */ 547 540 unsigned int s2b_shift; /* log2 (bp_block/512) */ 548 541
+1 -1
drivers/s390/block/dasd_proc.c
··· 90 90 seq_printf(m, "n/f "); 91 91 else 92 92 seq_printf(m, 93 - "at blocksize: %d, %lld blocks, %lld MB", 93 + "at blocksize: %u, %lu blocks, %lu MB", 94 94 block->bp_block, block->blocks, 95 95 ((block->bp_block >> 9) * 96 96 block->blocks) >> 11);
+8 -5
drivers/s390/block/scm_blk.c
··· 249 249 static void scm_request_finish(struct scm_request *scmrq) 250 250 { 251 251 struct scm_blk_dev *bdev = scmrq->bdev; 252 + int *error; 252 253 int i; 253 254 254 255 for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) { 255 - if (scmrq->error) 256 - blk_mq_end_request(scmrq->request[i], scmrq->error); 257 - else 258 - blk_mq_complete_request(scmrq->request[i]); 256 + error = blk_mq_rq_to_pdu(scmrq->request[i]); 257 + *error = scmrq->error; 258 + blk_mq_complete_request(scmrq->request[i]); 259 259 } 260 260 261 261 atomic_dec(&bdev->queued_reqs); ··· 415 415 416 416 static void scm_blk_request_done(struct request *req) 417 417 { 418 - blk_mq_end_request(req, 0); 418 + int *error = blk_mq_rq_to_pdu(req); 419 + 420 + blk_mq_end_request(req, *error); 419 421 } 420 422 421 423 static const struct block_device_operations scm_blk_devops = { ··· 450 448 atomic_set(&bdev->queued_reqs, 0); 451 449 452 450 bdev->tag_set.ops = &scm_mq_ops; 451 + bdev->tag_set.cmd_size = sizeof(int); 453 452 bdev->tag_set.nr_hw_queues = nr_requests; 454 453 bdev->tag_set.queue_depth = nr_requests_per_io * nr_requests; 455 454 bdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+11
drivers/s390/char/Kconfig
··· 169 169 def_bool y 170 170 prompt "Support for the z/VM CP interface" 171 171 depends on S390 172 + select CMA 172 173 help 173 174 Select this option if you want to be able to interact with the control 174 175 program on z/VM 176 + 177 + config VMCP_CMA_SIZE 178 + int "Memory in MiB reserved for z/VM CP interface" 179 + default "4" 180 + depends on VMCP 181 + help 182 + Specify the default amount of memory in MiB reserved for the z/VM CP 183 + interface. If needed this memory is used for large contiguous memory 184 + allocations. The default can be changed with the kernel command line 185 + parameter "vmcp_cma". 175 186 176 187 config MONREADER 177 188 def_tristate m
+1 -1
drivers/s390/char/raw3270.c
··· 1082 1082 NULL, 1083 1083 }; 1084 1084 1085 - static struct attribute_group raw3270_attr_group = { 1085 + static const struct attribute_group raw3270_attr_group = { 1086 1086 .attrs = raw3270_attrs, 1087 1087 }; 1088 1088
+1
drivers/s390/char/sclp_cmd.c
··· 252 252 if (!sccb) 253 253 return -ENOMEM; 254 254 sccb->header.length = PAGE_SIZE; 255 + sccb->header.function_code = 0x40; 255 256 rc = sclp_sync_request_timeout(0x00080001 | id << 8, sccb, 256 257 SCLP_QUEUE_INTERVAL); 257 258 if (rc)
+1 -1
drivers/s390/char/sclp_config.c
··· 135 135 return rc ?: count; 136 136 } 137 137 138 - static struct bin_attribute ofb_bin_attr = { 138 + static const struct bin_attribute ofb_bin_attr = { 139 139 .attr = { 140 140 .name = "event_data", 141 141 .mode = S_IWUSR,
+4 -2
drivers/s390/char/sclp_early.c
··· 39 39 u8 fac84; /* 84 */ 40 40 u8 fac85; /* 85 */ 41 41 u8 _pad_86[91 - 86]; /* 86-90 */ 42 - u8 flags; /* 91 */ 42 + u8 fac91; /* 91 */ 43 43 u8 _pad_92[98 - 92]; /* 92-97 */ 44 44 u8 fac98; /* 98 */ 45 45 u8 hamaxpow; /* 99 */ ··· 103 103 sclp.has_kss = !!(sccb->fac98 & 0x01); 104 104 if (sccb->fac85 & 0x02) 105 105 S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; 106 + if (sccb->fac91 & 0x40) 107 + S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_GUEST; 106 108 sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; 107 109 sclp.rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; 108 110 sclp.rzm <<= 20; ··· 141 139 142 140 /* Save IPL information */ 143 141 sclp_ipl_info.is_valid = 1; 144 - if (sccb->flags & 0x2) 142 + if (sccb->fac91 & 0x2) 145 143 sclp_ipl_info.has_dump = 1; 146 144 memcpy(&sclp_ipl_info.loadparm, &sccb->loadparm, LOADPARM_LEN); 147 145
+1 -1
drivers/s390/char/sclp_ocf.c
··· 126 126 NULL, 127 127 }; 128 128 129 - static struct attribute_group ocf_attr_group = { 129 + static const struct attribute_group ocf_attr_group = { 130 130 .attrs = ocf_attrs, 131 131 }; 132 132
+1 -1
drivers/s390/char/tape_core.c
··· 175 175 NULL 176 176 }; 177 177 178 - static struct attribute_group tape_attr_group = { 178 + static const struct attribute_group tape_attr_group = { 179 179 .attrs = tape_attrs, 180 180 }; 181 181
+89 -23
drivers/s390/char/vmcp.c
··· 17 17 #include <linux/kernel.h> 18 18 #include <linux/miscdevice.h> 19 19 #include <linux/slab.h> 20 + #include <linux/uaccess.h> 20 21 #include <linux/export.h> 22 + #include <linux/mutex.h> 23 + #include <linux/cma.h> 24 + #include <linux/mm.h> 21 25 #include <asm/compat.h> 22 26 #include <asm/cpcmd.h> 23 27 #include <asm/debug.h> 24 - #include <linux/uaccess.h> 25 - #include "vmcp.h" 28 + #include <asm/vmcp.h> 29 + 30 + struct vmcp_session { 31 + char *response; 32 + unsigned int bufsize; 33 + unsigned int cma_alloc : 1; 34 + int resp_size; 35 + int resp_code; 36 + struct mutex mutex; 37 + }; 26 38 27 39 static debug_info_t *vmcp_debug; 40 + 41 + static unsigned long vmcp_cma_size __initdata = CONFIG_VMCP_CMA_SIZE * 1024 * 1024; 42 + static struct cma *vmcp_cma; 43 + 44 + static int __init early_parse_vmcp_cma(char *p) 45 + { 46 + vmcp_cma_size = ALIGN(memparse(p, NULL), PAGE_SIZE); 47 + return 0; 48 + } 49 + early_param("vmcp_cma", early_parse_vmcp_cma); 50 + 51 + void __init vmcp_cma_reserve(void) 52 + { 53 + if (!MACHINE_IS_VM) 54 + return; 55 + cma_declare_contiguous(0, vmcp_cma_size, 0, 0, 0, false, "vmcp", &vmcp_cma); 56 + } 57 + 58 + static void vmcp_response_alloc(struct vmcp_session *session) 59 + { 60 + struct page *page = NULL; 61 + int nr_pages, order; 62 + 63 + order = get_order(session->bufsize); 64 + nr_pages = ALIGN(session->bufsize, PAGE_SIZE) >> PAGE_SHIFT; 65 + /* 66 + * For anything below order 3 allocations rely on the buddy 67 + * allocator. If such low-order allocations can't be handled 68 + * anymore the system won't work anyway. 69 + */ 70 + if (order > 2) 71 + page = cma_alloc(vmcp_cma, nr_pages, 0, GFP_KERNEL); 72 + if (page) { 73 + session->response = (char *)page_to_phys(page); 74 + session->cma_alloc = 1; 75 + return; 76 + } 77 + session->response = (char *)__get_free_pages(GFP_KERNEL | __GFP_RETRY_MAYFAIL, order); 78 + } 79 + 80 + static void vmcp_response_free(struct vmcp_session *session) 81 + { 82 + int nr_pages, order; 83 + struct page *page; 84 + 85 + if (!session->response) 86 + return; 87 + order = get_order(session->bufsize); 88 + nr_pages = ALIGN(session->bufsize, PAGE_SIZE) >> PAGE_SHIFT; 89 + if (session->cma_alloc) { 90 + page = phys_to_page((unsigned long)session->response); 91 + cma_release(vmcp_cma, page, nr_pages); 92 + session->cma_alloc = 0; 93 + } else { 94 + free_pages((unsigned long)session->response, order); 95 + } 96 + session->response = NULL; 97 + } 28 98 29 99 static int vmcp_open(struct inode *inode, struct file *file) 30 100 { ··· 121 51 122 52 session = file->private_data; 123 53 file->private_data = NULL; 124 - free_pages((unsigned long)session->response, get_order(session->bufsize)); 54 + vmcp_response_free(session); 125 55 kfree(session); 126 56 return 0; 127 57 } ··· 167 97 return -ERESTARTSYS; 168 98 } 169 99 if (!session->response) 170 - session->response = (char *)__get_free_pages(GFP_KERNEL 171 - | __GFP_RETRY_MAYFAIL | GFP_DMA, 172 - get_order(session->bufsize)); 100 + vmcp_response_alloc(session); 173 101 if (!session->response) { 174 102 mutex_unlock(&session->mutex); 175 103 kfree(cmd); ··· 198 130 static long vmcp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 199 131 { 200 132 struct vmcp_session *session; 133 + int ret = -ENOTTY; 201 134 int __user *argp; 202 - int temp; 203 135 204 136 session = file->private_data; 205 137 if (is_compat_task()) ··· 210 142 return -ERESTARTSYS; 211 143 switch (cmd) { 212 144 case VMCP_GETCODE: 213 - temp = session->resp_code; 214 - mutex_unlock(&session->mutex); 215 - return put_user(temp, argp); 145 + ret = put_user(session->resp_code, argp); 146 + break; 216 147 case VMCP_SETBUF: 217 - free_pages((unsigned long)session->response, 218 - get_order(session->bufsize)); 219 - session->response=NULL; 220 - temp = get_user(session->bufsize, argp); 221 - if (get_order(session->bufsize) > 8) { 148 + vmcp_response_free(session); 149 + ret = get_user(session->bufsize, argp); 150 + if (ret) 222 151 session->bufsize = PAGE_SIZE; 223 - temp = -EINVAL; 152 + if (!session->bufsize || get_order(session->bufsize) > 8) { 153 + session->bufsize = PAGE_SIZE; 154 + ret = -EINVAL; 224 155 } 225 - mutex_unlock(&session->mutex); 226 - return temp; 156 + break; 227 157 case VMCP_GETSIZE: 228 - temp = session->resp_size; 229 - mutex_unlock(&session->mutex); 230 - return put_user(temp, argp); 158 + ret = put_user(session->resp_size, argp); 159 + break; 231 160 default: 232 - mutex_unlock(&session->mutex); 233 - return -ENOIOCTLCMD; 161 + break; 234 162 } 163 + mutex_unlock(&session->mutex); 164 + return ret; 235 165 } 236 166 237 167 static const struct file_operations vmcp_fops = {
+7 -13
drivers/s390/char/vmcp.h arch/s390/include/uapi/asm/vmcp.h
··· 12 12 * The idea of this driver is based on cpint from Neale Ferguson 13 13 */ 14 14 15 + #ifndef _UAPI_ASM_VMCP_H 16 + #define _UAPI_ASM_VMCP_H 17 + 15 18 #include <linux/ioctl.h> 16 - #include <linux/mutex.h> 17 19 18 - #define VMCP_GETCODE _IOR(0x10, 1, int) 19 - #define VMCP_SETBUF _IOW(0x10, 2, int) 20 - #define VMCP_GETSIZE _IOR(0x10, 3, int) 20 + #define VMCP_GETCODE _IOR(0x10, 1, int) 21 + #define VMCP_SETBUF _IOW(0x10, 2, int) 22 + #define VMCP_GETSIZE _IOR(0x10, 3, int) 21 23 22 - struct vmcp_session { 23 - unsigned int bufsize; 24 - char *response; 25 - int resp_size; 26 - int resp_code; 27 - /* As we use copy_from/to_user, which might * 28 - * sleep and cannot use a spinlock */ 29 - struct mutex mutex; 30 - }; 24 + #endif /* _UAPI_ASM_VMCP_H */
+2 -2
drivers/s390/cio/chp.c
··· 143 143 sizeof(chp->cmg_chars)); 144 144 } 145 145 146 - static struct bin_attribute chp_measurement_chars_attr = { 146 + static const struct bin_attribute chp_measurement_chars_attr = { 147 147 .attr = { 148 148 .name = "measurement_chars", 149 149 .mode = S_IRUSR, ··· 197 197 return count; 198 198 } 199 199 200 - static struct bin_attribute chp_measurement_attr = { 200 + static const struct bin_attribute chp_measurement_attr = { 201 201 .attr = { 202 202 .name = "measurement", 203 203 .mode = S_IRUSR,
+2 -2
drivers/s390/cio/device.c
··· 612 612 NULL, 613 613 }; 614 614 615 - static struct attribute_group io_subchannel_attr_group = { 615 + static const struct attribute_group io_subchannel_attr_group = { 616 616 .attrs = io_subchannel_attrs, 617 617 }; 618 618 ··· 626 626 NULL, 627 627 }; 628 628 629 - static struct attribute_group ccwdev_attr_group = { 629 + static const struct attribute_group ccwdev_attr_group = { 630 630 .attrs = ccwdev_attrs, 631 631 }; 632 632
+1 -1
drivers/s390/crypto/zcrypt_card.c
··· 98 98 NULL, 99 99 }; 100 100 101 - static struct attribute_group zcrypt_card_attr_group = { 101 + static const struct attribute_group zcrypt_card_attr_group = { 102 102 .attrs = zcrypt_card_attrs, 103 103 }; 104 104
+1 -1
drivers/s390/crypto/zcrypt_msgtype6.c
··· 140 140 * + 0x000A 'MRP ' (MCL3 'PK' or CEX2C 'PK') 141 141 * - VUD block 142 142 */ 143 - static struct CPRBX static_cprbx = { 143 + static const struct CPRBX static_cprbx = { 144 144 .cprb_len = 0x00DC, 145 145 .cprb_ver_id = 0x02, 146 146 .func_id = {0x54, 0x32},
+1 -1
drivers/s390/crypto/zcrypt_queue.c
··· 89 89 NULL, 90 90 }; 91 91 92 - static struct attribute_group zcrypt_queue_attr_group = { 92 + static const struct attribute_group zcrypt_queue_attr_group = { 93 93 .attrs = zcrypt_queue_attrs, 94 94 }; 95 95
+4 -4
drivers/s390/net/qeth_l3_sys.c
··· 350 350 NULL, 351 351 }; 352 352 353 - static struct attribute_group qeth_l3_device_attr_group = { 353 + static const struct attribute_group qeth_l3_device_attr_group = { 354 354 .attrs = qeth_l3_device_attrs, 355 355 }; 356 356 ··· 680 680 NULL, 681 681 }; 682 682 683 - static struct attribute_group qeth_device_ipato_group = { 683 + static const struct attribute_group qeth_device_ipato_group = { 684 684 .name = "ipa_takeover", 685 685 .attrs = qeth_ipato_device_attrs, 686 686 }; ··· 843 843 NULL, 844 844 }; 845 845 846 - static struct attribute_group qeth_device_vipa_group = { 846 + static const struct attribute_group qeth_device_vipa_group = { 847 847 .name = "vipa", 848 848 .attrs = qeth_vipa_device_attrs, 849 849 }; ··· 1006 1006 NULL, 1007 1007 }; 1008 1008 1009 - static struct attribute_group qeth_device_rxip_group = { 1009 + static const struct attribute_group qeth_device_rxip_group = { 1010 1010 .name = "rxip", 1011 1011 .attrs = qeth_rxip_device_attrs, 1012 1012 };