Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

+1

.mailmap

··· 384 384 Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org> 385 385 Lior David <quic_liord@quicinc.com> <liord@codeaurora.org> 386 386 Lorenzo Pieralisi <lpieralisi@kernel.org> <lorenzo.pieralisi@arm.com> 387 + Lorenzo Stoakes <lorenzo.stoakes@oracle.com> <lstoakes@gmail.com> 387 388 Luca Ceresoli <luca.ceresoli@bootlin.com> <luca@lucaceresoli.net> 388 389 Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com> 389 390 Luo Jie <quic_luoj@quicinc.com> <luoj@codeaurora.org>

+3 -1

CREDITS

··· 3150 3150 S: 13353 Berlin 3151 3151 S: Germany 3152 3152 3153 - N: Gustavo Pimental 3153 + N: Gustavo Pimentel 3154 3154 E: gustavo.pimentel@synopsys.com 3155 3155 D: PCI driver for Synopsys DesignWare 3156 + D: Synopsys DesignWare eDMA driver 3157 + D: Synopsys DesignWare xData traffic generator 3156 3158 3157 3159 N: Emanuel Pirker 3158 3160 E: epirker@edu.uni-klu.ac.at

+2 -2

Documentation/arch/riscv/cmodx.rst

··· 62 62 printf("Value before cmodx: %d\n", value); 63 63 64 64 // Call prctl before first fence.i is called inside modify_instruction 65 - prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_ON, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS); 65 + prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_ON, PR_RISCV_SCOPE_PER_PROCESS); 66 66 modify_instruction(); 67 67 // Call prctl after final fence.i is called in process 68 - prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_OFF, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS); 68 + prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_OFF, PR_RISCV_SCOPE_PER_PROCESS); 69 69 70 70 value = get_value(); 71 71 printf("Value after cmodx: %d\n", value);

+1 -1

Documentation/networking/devlink/devlink-region.rst

··· 49 49 $ devlink region show [ DEV/REGION ] 50 50 $ devlink region del DEV/REGION snapshot SNAPSHOT_ID 51 51 $ devlink region dump DEV/REGION [ snapshot SNAPSHOT_ID ] 52 - $ devlink region read DEV/REGION [ snapshot SNAPSHOT_ID ] address ADDRESS length length 52 + $ devlink region read DEV/REGION [ snapshot SNAPSHOT_ID ] address ADDRESS length LENGTH 53 53 54 54 # Show all of the exposed regions with region sizes: 55 55 $ devlink region show

+2 -3

MAINTAINERS

··· 6239 6239 F: drivers/usb/dwc3/ 6240 6240 6241 6241 DESIGNWARE XDATA IP DRIVER 6242 - M: Gustavo Pimentel <gustavo.pimentel@synopsys.com> 6243 6242 L: linux-pci@vger.kernel.org 6244 - S: Maintained 6243 + S: Orphan 6245 6244 F: Documentation/misc-devices/dw-xdata-pcie.rst 6246 6245 F: drivers/misc/dw-xdata-pcie.c 6247 6246 ··· 14474 14475 M: Andrew Morton <akpm@linux-foundation.org> 14475 14476 R: Liam R. Howlett <Liam.Howlett@oracle.com> 14476 14477 R: Vlastimil Babka <vbabka@suse.cz> 14477 - R: Lorenzo Stoakes <lstoakes@gmail.com> 14478 + R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> 14478 14479 L: linux-mm@kvack.org 14479 14480 S: Maintained 14480 14481 W: http://www.linux-mm.org

+1 -1

Makefile

··· 2 2 VERSION = 6 3 3 PATCHLEVEL = 10 4 4 SUBLEVEL = 0 5 - EXTRAVERSION = -rc6 5 + EXTRAVERSION = -rc7 6 6 NAME = Baby Opossum Posse 7 7 8 8 # *DOCUMENTATION*

+5 -2

arch/powerpc/kernel/eeh_pe.c

··· 849 849 { 850 850 struct eeh_dev *edev; 851 851 struct pci_dev *pdev; 852 + struct pci_bus *bus = NULL; 852 853 853 854 if (pe->type & EEH_PE_PHB) 854 855 return pe->phb->bus; ··· 860 859 861 860 /* Retrieve the parent PCI bus of first (top) PCI device */ 862 861 edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry); 862 + pci_lock_rescan_remove(); 863 863 pdev = eeh_dev_to_pci_dev(edev); 864 864 if (pdev) 865 - return pdev->bus; 865 + bus = pdev->bus; 866 + pci_unlock_rescan_remove(); 866 867 867 - return NULL; 868 + return bus; 868 869 }

+3 -2

arch/powerpc/kernel/head_64.S

··· 647 647 * Note: This process overwrites the OF exception vectors. 648 648 */ 649 649 LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET) 650 - mr. r4,r26 /* In some cases the loader may */ 651 - beq 9f /* have already put us at zero */ 650 + mr r4,r26 /* Load the virtual source address into r4 */ 651 + cmpld r3,r4 /* Check if source == dest */ 652 + beq 9f /* If so skip the copy */ 652 653 li r6,0x100 /* Start offset, the first 0x100 */ 653 654 /* bytes were copied earlier. */ 654 655

+11

arch/powerpc/kexec/core_64.c

··· 27 27 #include <asm/paca.h> 28 28 #include <asm/mmu.h> 29 29 #include <asm/sections.h> /* _end */ 30 + #include <asm/setup.h> 30 31 #include <asm/smp.h> 31 32 #include <asm/hw_breakpoint.h> 32 33 #include <asm/svm.h> ··· 317 316 318 317 if (!kdump_in_progress()) 319 318 kexec_prepare_cpus(); 319 + 320 + #ifdef CONFIG_PPC_PSERIES 321 + /* 322 + * This must be done after other CPUs have shut down, otherwise they 323 + * could execute the 'scv' instruction, which is not supported with 324 + * reloc disabled (see configure_exceptions()). 325 + */ 326 + if (firmware_has_feature(FW_FEATURE_SET_MODE)) 327 + pseries_disable_reloc_on_exc(); 328 + #endif 320 329 321 330 printk("kexec: Starting switchover sequence.\n"); 322 331

-8

arch/powerpc/platforms/pseries/kexec.c

··· 61 61 } else 62 62 xics_kexec_teardown_cpu(secondary); 63 63 } 64 - 65 - void pseries_machine_kexec(struct kimage *image) 66 - { 67 - if (firmware_has_feature(FW_FEATURE_SET_MODE)) 68 - pseries_disable_reloc_on_exc(); 69 - 70 - default_machine_kexec(image); 71 - }

-1

arch/powerpc/platforms/pseries/pseries.h

··· 38 38 #endif 39 39 40 40 extern void pseries_kexec_cpu_down(int crash_shutdown, int secondary); 41 - void pseries_machine_kexec(struct kimage *image); 42 41 43 42 extern void pSeries_final_fixup(void); 44 43

+2 -3

arch/powerpc/platforms/pseries/setup.c

··· 343 343 { 344 344 void (*ctor)(void *) = get_dtl_cache_ctor(); 345 345 346 - dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES, 347 - DISPATCH_LOG_BYTES, 0, ctor); 346 + dtl_cache = kmem_cache_create_usercopy("dtl", DISPATCH_LOG_BYTES, 347 + DISPATCH_LOG_BYTES, 0, 0, DISPATCH_LOG_BYTES, ctor); 348 348 if (!dtl_cache) { 349 349 pr_warn("Failed to create dispatch trace log buffer cache\n"); 350 350 pr_warn("Stolen time statistics will be unreliable\n"); ··· 1159 1159 .machine_check_exception = pSeries_machine_check_exception, 1160 1160 .machine_check_log_err = pSeries_machine_check_log_err, 1161 1161 #ifdef CONFIG_KEXEC_CORE 1162 - .machine_kexec = pseries_machine_kexec, 1163 1162 .kexec_cpu_down = pseries_kexec_cpu_down, 1164 1163 #endif 1165 1164 #ifdef CONFIG_MEMORY_HOTPLUG

+1 -9

arch/riscv/kernel/machine_kexec.c

··· 121 121 122 122 for_each_irq_desc(i, desc) { 123 123 struct irq_chip *chip; 124 - int ret; 125 124 126 125 chip = irq_desc_get_chip(desc); 127 126 if (!chip) 128 127 continue; 129 128 130 - /* 131 - * First try to remove the active state. If this 132 - * fails, try to EOI the interrupt. 133 - */ 134 - ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); 135 - 136 - if (ret && irqd_irq_inprogress(&desc->irq_data) && 137 - chip->irq_eoi) 129 + if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) 138 130 chip->irq_eoi(&desc->irq_data); 139 131 140 132 if (chip->irq_mask)

+2 -1

arch/riscv/kernel/stacktrace.c

··· 32 32 bool (*fn)(void *, unsigned long), void *arg) 33 33 { 34 34 unsigned long fp, sp, pc; 35 + int graph_idx = 0; 35 36 int level = 0; 36 37 37 38 if (regs) { ··· 69 68 pc = regs->ra; 70 69 } else { 71 70 fp = frame->fp; 72 - pc = ftrace_graph_ret_addr(current, NULL, frame->ra, 71 + pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra, 73 72 &frame->ra); 74 73 if (pc == (unsigned long)ret_from_exception) { 75 74 if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))

+1 -1

arch/riscv/kvm/vcpu_pmu.c

··· 327 327 328 328 event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc); 329 329 if (IS_ERR(event)) { 330 - pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); 330 + pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); 331 331 return PTR_ERR(event); 332 332 } 333 333

+1

arch/s390/include/asm/kvm_host.h

··· 427 427 u64 instruction_io_other; 428 428 u64 instruction_lpsw; 429 429 u64 instruction_lpswe; 430 + u64 instruction_lpswey; 430 431 u64 instruction_pfmf; 431 432 u64 instruction_ptff; 432 433 u64 instruction_sck;

+1

arch/s390/kvm/kvm-s390.c

··· 132 132 STATS_DESC_COUNTER(VCPU, instruction_io_other), 133 133 STATS_DESC_COUNTER(VCPU, instruction_lpsw), 134 134 STATS_DESC_COUNTER(VCPU, instruction_lpswe), 135 + STATS_DESC_COUNTER(VCPU, instruction_lpswey), 135 136 STATS_DESC_COUNTER(VCPU, instruction_pfmf), 136 137 STATS_DESC_COUNTER(VCPU, instruction_ptff), 137 138 STATS_DESC_COUNTER(VCPU, instruction_sck),

+15

arch/s390/kvm/kvm-s390.h

··· 138 138 return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; 139 139 } 140 140 141 + static inline u64 kvm_s390_get_base_disp_siy(struct kvm_vcpu *vcpu, u8 *ar) 142 + { 143 + u32 base1 = vcpu->arch.sie_block->ipb >> 28; 144 + s64 disp1; 145 + 146 + /* The displacement is a 20bit _SIGNED_ value */ 147 + disp1 = sign_extend64(((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + 148 + ((vcpu->arch.sie_block->ipb & 0xff00) << 4), 19); 149 + 150 + if (ar) 151 + *ar = base1; 152 + 153 + return (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1; 154 + } 155 + 141 156 static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, 142 157 u64 *address1, u64 *address2, 143 158 u8 *ar_b1, u8 *ar_b2)

+32

arch/s390/kvm/priv.c

··· 797 797 return 0; 798 798 } 799 799 800 + static int handle_lpswey(struct kvm_vcpu *vcpu) 801 + { 802 + psw_t new_psw; 803 + u64 addr; 804 + int rc; 805 + u8 ar; 806 + 807 + vcpu->stat.instruction_lpswey++; 808 + 809 + if (!test_kvm_facility(vcpu->kvm, 193)) 810 + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); 811 + 812 + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 813 + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 814 + 815 + addr = kvm_s390_get_base_disp_siy(vcpu, &ar); 816 + if (addr & 7) 817 + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 818 + 819 + rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw)); 820 + if (rc) 821 + return kvm_s390_inject_prog_cond(vcpu, rc); 822 + 823 + vcpu->arch.sie_block->gpsw = new_psw; 824 + if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) 825 + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 826 + 827 + return 0; 828 + } 829 + 800 830 static int handle_stidp(struct kvm_vcpu *vcpu) 801 831 { 802 832 u64 stidp_data = vcpu->kvm->arch.model.cpuid; ··· 1492 1462 case 0x61: 1493 1463 case 0x62: 1494 1464 return handle_ri(vcpu); 1465 + case 0x71: 1466 + return handle_lpswey(vcpu); 1495 1467 default: 1496 1468 return -EOPNOTSUPP; 1497 1469 }

+4

arch/s390/mm/pgalloc.c

··· 55 55 56 56 void crst_table_free(struct mm_struct *mm, unsigned long *table) 57 57 { 58 + if (!table) 59 + return; 58 60 pagetable_free(virt_to_ptdesc(table)); 59 61 } 60 62 ··· 264 262 265 263 static void base_crst_free(unsigned long *table) 266 264 { 265 + if (!table) 266 + return; 267 267 pagetable_free(virt_to_ptdesc(table)); 268 268 } 269 269

+1 -1

arch/xtensa/include/asm/current.h

··· 19 19 20 20 struct task_struct; 21 21 22 - static inline struct task_struct *get_current(void) 22 + static __always_inline struct task_struct *get_current(void) 23 23 { 24 24 return current_thread_info()->task; 25 25 }

+1 -1

arch/xtensa/include/asm/thread_info.h

··· 91 91 } 92 92 93 93 /* how to get the thread information struct from C */ 94 - static inline struct thread_info *current_thread_info(void) 94 + static __always_inline struct thread_info *current_thread_info(void) 95 95 { 96 96 struct thread_info *ti; 97 97 __asm__("extui %0, a1, 0, "__stringify(CURRENT_SHIFT)"\n\t"

+16 -21

drivers/acpi/processor_idle.c

··· 16 16 #include <linux/acpi.h> 17 17 #include <linux/dmi.h> 18 18 #include <linux/sched.h> /* need_resched() */ 19 - #include <linux/sort.h> 20 19 #include <linux/tick.h> 21 20 #include <linux/cpuidle.h> 22 21 #include <linux/cpu.h> ··· 385 386 acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 1); 386 387 } 387 388 388 - static int acpi_cst_latency_cmp(const void *a, const void *b) 389 + static void acpi_cst_latency_sort(struct acpi_processor_cx *states, size_t length) 389 390 { 390 - const struct acpi_processor_cx *x = a, *y = b; 391 + int i, j, k; 391 392 392 - if (!(x->valid && y->valid)) 393 - return 0; 394 - if (x->latency > y->latency) 395 - return 1; 396 - if (x->latency < y->latency) 397 - return -1; 398 - return 0; 399 - } 400 - static void acpi_cst_latency_swap(void *a, void *b, int n) 401 - { 402 - struct acpi_processor_cx *x = a, *y = b; 393 + for (i = 1; i < length; i++) { 394 + if (!states[i].valid) 395 + continue; 403 396 404 - if (!(x->valid && y->valid)) 405 - return; 406 - swap(x->latency, y->latency); 397 + for (j = i - 1, k = i; j >= 0; j--) { 398 + if (!states[j].valid) 399 + continue; 400 + 401 + if (states[j].latency > states[k].latency) 402 + swap(states[j].latency, states[k].latency); 403 + 404 + k = j; 405 + } 406 + } 407 407 } 408 408 409 409 static int acpi_processor_power_verify(struct acpi_processor *pr) ··· 447 449 448 450 if (buggy_latency) { 449 451 pr_notice("FW issue: working around C-state latencies out of order\n"); 450 - sort(&pr->power.states[1], max_cstate, 451 - sizeof(struct acpi_processor_cx), 452 - acpi_cst_latency_cmp, 453 - acpi_cst_latency_swap); 452 + acpi_cst_latency_sort(&pr->power.states[1], max_cstate); 454 453 } 455 454 456 455 lapic_timer_propagate_broadcast(pr);

+1 -1

drivers/char/tpm/Makefile

··· 16 16 tpm-y += eventlog/tpm1.o 17 17 tpm-y += eventlog/tpm2.o 18 18 tpm-y += tpm-buf.o 19 + tpm-y += tpm2-sessions.o 19 20 20 - tpm-$(CONFIG_TCG_TPM2_HMAC) += tpm2-sessions.o 21 21 tpm-$(CONFIG_ACPI) += tpm_ppi.o eventlog/acpi.o 22 22 tpm-$(CONFIG_EFI) += eventlog/efi.o 23 23 tpm-$(CONFIG_OF) += eventlog/of.o

+240 -179

drivers/char/tpm/tpm2-sessions.c

··· 83 83 #define AES_KEY_BYTES AES_KEYSIZE_128 84 84 #define AES_KEY_BITS (AES_KEY_BYTES*8) 85 85 86 - static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy, 87 - u32 *handle, u8 *name); 88 - 89 86 /* 90 87 * This is the structure that carries all the auth information (like 91 88 * session handle, nonces, session key and auth) from use to use it is ··· 145 148 u8 name[AUTH_MAX_NAMES][2 + SHA512_DIGEST_SIZE]; 146 149 }; 147 150 151 + #ifdef CONFIG_TCG_TPM2_HMAC 148 152 /* 149 153 * Name Size based on TPM algorithm (assumes no hash bigger than 255) 150 154 */ ··· 160 162 u16 alg = get_unaligned_be16(name); 161 163 return size_map[alg] + 2; 162 164 } 165 + 166 + static int tpm2_parse_read_public(char *name, struct tpm_buf *buf) 167 + { 168 + struct tpm_header *head = (struct tpm_header *)buf->data; 169 + off_t offset = TPM_HEADER_SIZE; 170 + u32 tot_len = be32_to_cpu(head->length); 171 + u32 val; 172 + 173 + /* we're starting after the header so adjust the length */ 174 + tot_len -= TPM_HEADER_SIZE; 175 + 176 + /* skip public */ 177 + val = tpm_buf_read_u16(buf, &offset); 178 + if (val > tot_len) 179 + return -EINVAL; 180 + offset += val; 181 + /* name */ 182 + val = tpm_buf_read_u16(buf, &offset); 183 + if (val != name_size(&buf->data[offset])) 184 + return -EINVAL; 185 + memcpy(name, &buf->data[offset], val); 186 + /* forget the rest */ 187 + return 0; 188 + } 189 + 190 + static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name) 191 + { 192 + struct tpm_buf buf; 193 + int rc; 194 + 195 + rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC); 196 + if (rc) 197 + return rc; 198 + 199 + tpm_buf_append_u32(&buf, handle); 200 + rc = tpm_transmit_cmd(chip, &buf, 0, "read public"); 201 + if (rc == TPM2_RC_SUCCESS) 202 + rc = tpm2_parse_read_public(name, &buf); 203 + 204 + tpm_buf_destroy(&buf); 205 + 206 + return rc; 207 + } 208 + #endif /* CONFIG_TCG_TPM2_HMAC */ 209 + 210 + /** 211 + * tpm_buf_append_name() - add a handle area to the buffer 212 + * @chip: the TPM chip structure 213 + * @buf: The buffer to be appended 214 + * @handle: The handle to be appended 215 + * @name: The name of the handle (may be NULL) 216 + * 217 + * In order to compute session HMACs, we need to know the names of the 218 + * objects pointed to by the handles. For most objects, this is simply 219 + * the actual 4 byte handle or an empty buf (in these cases @name 220 + * should be NULL) but for volatile objects, permanent objects and NV 221 + * areas, the name is defined as the hash (according to the name 222 + * algorithm which should be set to sha256) of the public area to 223 + * which the two byte algorithm id has been appended. For these 224 + * objects, the @name pointer should point to this. If a name is 225 + * required but @name is NULL, then TPM2_ReadPublic() will be called 226 + * on the handle to obtain the name. 227 + * 228 + * As with most tpm_buf operations, success is assumed because failure 229 + * will be caused by an incorrect programming model and indicated by a 230 + * kernel message. 231 + */ 232 + void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, 233 + u32 handle, u8 *name) 234 + { 235 + #ifdef CONFIG_TCG_TPM2_HMAC 236 + enum tpm2_mso_type mso = tpm2_handle_mso(handle); 237 + struct tpm2_auth *auth; 238 + int slot; 239 + #endif 240 + 241 + if (!tpm2_chip_auth(chip)) { 242 + tpm_buf_append_u32(buf, handle); 243 + /* count the number of handles in the upper bits of flags */ 244 + buf->handles++; 245 + return; 246 + } 247 + 248 + #ifdef CONFIG_TCG_TPM2_HMAC 249 + slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE) / 4; 250 + if (slot >= AUTH_MAX_NAMES) { 251 + dev_err(&chip->dev, "TPM: too many handles\n"); 252 + return; 253 + } 254 + auth = chip->auth; 255 + WARN(auth->session != tpm_buf_length(buf), 256 + "name added in wrong place\n"); 257 + tpm_buf_append_u32(buf, handle); 258 + auth->session += 4; 259 + 260 + if (mso == TPM2_MSO_PERSISTENT || 261 + mso == TPM2_MSO_VOLATILE || 262 + mso == TPM2_MSO_NVRAM) { 263 + if (!name) 264 + tpm2_read_public(chip, handle, auth->name[slot]); 265 + } else { 266 + if (name) 267 + dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n"); 268 + } 269 + 270 + auth->name_h[slot] = handle; 271 + if (name) 272 + memcpy(auth->name[slot], name, name_size(name)); 273 + #endif 274 + } 275 + EXPORT_SYMBOL_GPL(tpm_buf_append_name); 276 + 277 + /** 278 + * tpm_buf_append_hmac_session() - Append a TPM session element 279 + * @chip: the TPM chip structure 280 + * @buf: The buffer to be appended 281 + * @attributes: The session attributes 282 + * @passphrase: The session authority (NULL if none) 283 + * @passphrase_len: The length of the session authority (0 if none) 284 + * 285 + * This fills in a session structure in the TPM command buffer, except 286 + * for the HMAC which cannot be computed until the command buffer is 287 + * complete. The type of session is controlled by the @attributes, 288 + * the main ones of which are TPM2_SA_CONTINUE_SESSION which means the 289 + * session won't terminate after tpm_buf_check_hmac_response(), 290 + * TPM2_SA_DECRYPT which means this buffers first parameter should be 291 + * encrypted with a session key and TPM2_SA_ENCRYPT, which means the 292 + * response buffer's first parameter needs to be decrypted (confusing, 293 + * but the defines are written from the point of view of the TPM). 294 + * 295 + * Any session appended by this command must be finalized by calling 296 + * tpm_buf_fill_hmac_session() otherwise the HMAC will be incorrect 297 + * and the TPM will reject the command. 298 + * 299 + * As with most tpm_buf operations, success is assumed because failure 300 + * will be caused by an incorrect programming model and indicated by a 301 + * kernel message. 302 + */ 303 + void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, 304 + u8 attributes, u8 *passphrase, 305 + int passphrase_len) 306 + { 307 + #ifdef CONFIG_TCG_TPM2_HMAC 308 + u8 nonce[SHA256_DIGEST_SIZE]; 309 + struct tpm2_auth *auth; 310 + u32 len; 311 + #endif 312 + 313 + if (!tpm2_chip_auth(chip)) { 314 + /* offset tells us where the sessions area begins */ 315 + int offset = buf->handles * 4 + TPM_HEADER_SIZE; 316 + u32 len = 9 + passphrase_len; 317 + 318 + if (tpm_buf_length(buf) != offset) { 319 + /* not the first session so update the existing length */ 320 + len += get_unaligned_be32(&buf->data[offset]); 321 + put_unaligned_be32(len, &buf->data[offset]); 322 + } else { 323 + tpm_buf_append_u32(buf, len); 324 + } 325 + /* auth handle */ 326 + tpm_buf_append_u32(buf, TPM2_RS_PW); 327 + /* nonce */ 328 + tpm_buf_append_u16(buf, 0); 329 + /* attributes */ 330 + tpm_buf_append_u8(buf, 0); 331 + /* passphrase */ 332 + tpm_buf_append_u16(buf, passphrase_len); 333 + tpm_buf_append(buf, passphrase, passphrase_len); 334 + return; 335 + } 336 + 337 + #ifdef CONFIG_TCG_TPM2_HMAC 338 + /* 339 + * The Architecture Guide requires us to strip trailing zeros 340 + * before computing the HMAC 341 + */ 342 + while (passphrase && passphrase_len > 0 && passphrase[passphrase_len - 1] == '\0') 343 + passphrase_len--; 344 + 345 + auth = chip->auth; 346 + auth->attrs = attributes; 347 + auth->passphrase_len = passphrase_len; 348 + if (passphrase_len) 349 + memcpy(auth->passphrase, passphrase, passphrase_len); 350 + 351 + if (auth->session != tpm_buf_length(buf)) { 352 + /* we're not the first session */ 353 + len = get_unaligned_be32(&buf->data[auth->session]); 354 + if (4 + len + auth->session != tpm_buf_length(buf)) { 355 + WARN(1, "session length mismatch, cannot append"); 356 + return; 357 + } 358 + 359 + /* add our new session */ 360 + len += 9 + 2 * SHA256_DIGEST_SIZE; 361 + put_unaligned_be32(len, &buf->data[auth->session]); 362 + } else { 363 + tpm_buf_append_u32(buf, 9 + 2 * SHA256_DIGEST_SIZE); 364 + } 365 + 366 + /* random number for our nonce */ 367 + get_random_bytes(nonce, sizeof(nonce)); 368 + memcpy(auth->our_nonce, nonce, sizeof(nonce)); 369 + tpm_buf_append_u32(buf, auth->handle); 370 + /* our new nonce */ 371 + tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE); 372 + tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE); 373 + tpm_buf_append_u8(buf, auth->attrs); 374 + /* and put a placeholder for the hmac */ 375 + tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE); 376 + tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE); 377 + #endif 378 + } 379 + EXPORT_SYMBOL_GPL(tpm_buf_append_hmac_session); 380 + 381 + #ifdef CONFIG_TCG_TPM2_HMAC 382 + 383 + static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy, 384 + u32 *handle, u8 *name); 163 385 164 386 /* 165 387 * It turns out the crypto hmac(sha256) is hard for us to consume ··· 562 344 } 563 345 564 346 /** 565 - * tpm_buf_append_hmac_session() - Append a TPM session element 566 - * @chip: the TPM chip structure 567 - * @buf: The buffer to be appended 568 - * @attributes: The session attributes 569 - * @passphrase: The session authority (NULL if none) 570 - * @passphrase_len: The length of the session authority (0 if none) 571 - * 572 - * This fills in a session structure in the TPM command buffer, except 573 - * for the HMAC which cannot be computed until the command buffer is 574 - * complete. The type of session is controlled by the @attributes, 575 - * the main ones of which are TPM2_SA_CONTINUE_SESSION which means the 576 - * session won't terminate after tpm_buf_check_hmac_response(), 577 - * TPM2_SA_DECRYPT which means this buffers first parameter should be 578 - * encrypted with a session key and TPM2_SA_ENCRYPT, which means the 579 - * response buffer's first parameter needs to be decrypted (confusing, 580 - * but the defines are written from the point of view of the TPM). 581 - * 582 - * Any session appended by this command must be finalized by calling 583 - * tpm_buf_fill_hmac_session() otherwise the HMAC will be incorrect 584 - * and the TPM will reject the command. 585 - * 586 - * As with most tpm_buf operations, success is assumed because failure 587 - * will be caused by an incorrect programming model and indicated by a 588 - * kernel message. 589 - */ 590 - void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, 591 - u8 attributes, u8 *passphrase, 592 - int passphrase_len) 593 - { 594 - u8 nonce[SHA256_DIGEST_SIZE]; 595 - u32 len; 596 - struct tpm2_auth *auth = chip->auth; 597 - 598 - /* 599 - * The Architecture Guide requires us to strip trailing zeros 600 - * before computing the HMAC 601 - */ 602 - while (passphrase && passphrase_len > 0 603 - && passphrase[passphrase_len - 1] == '\0') 604 - passphrase_len--; 605 - 606 - auth->attrs = attributes; 607 - auth->passphrase_len = passphrase_len; 608 - if (passphrase_len) 609 - memcpy(auth->passphrase, passphrase, passphrase_len); 610 - 611 - if (auth->session != tpm_buf_length(buf)) { 612 - /* we're not the first session */ 613 - len = get_unaligned_be32(&buf->data[auth->session]); 614 - if (4 + len + auth->session != tpm_buf_length(buf)) { 615 - WARN(1, "session length mismatch, cannot append"); 616 - return; 617 - } 618 - 619 - /* add our new session */ 620 - len += 9 + 2 * SHA256_DIGEST_SIZE; 621 - put_unaligned_be32(len, &buf->data[auth->session]); 622 - } else { 623 - tpm_buf_append_u32(buf, 9 + 2 * SHA256_DIGEST_SIZE); 624 - } 625 - 626 - /* random number for our nonce */ 627 - get_random_bytes(nonce, sizeof(nonce)); 628 - memcpy(auth->our_nonce, nonce, sizeof(nonce)); 629 - tpm_buf_append_u32(buf, auth->handle); 630 - /* our new nonce */ 631 - tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE); 632 - tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE); 633 - tpm_buf_append_u8(buf, auth->attrs); 634 - /* and put a placeholder for the hmac */ 635 - tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE); 636 - tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE); 637 - } 638 - EXPORT_SYMBOL(tpm_buf_append_hmac_session); 639 - 640 - /** 641 347 * tpm_buf_fill_hmac_session() - finalize the session HMAC 642 348 * @chip: the TPM chip structure 643 349 * @buf: The buffer to be appended ··· 590 448 u32 attrs; 591 449 u8 cphash[SHA256_DIGEST_SIZE]; 592 450 struct sha256_state sctx; 451 + 452 + if (!auth) 453 + return; 593 454 594 455 /* save the command code in BE format */ 595 456 auth->ordinal = head->ordinal; ··· 712 567 } 713 568 EXPORT_SYMBOL(tpm_buf_fill_hmac_session); 714 569 715 - static int tpm2_parse_read_public(char *name, struct tpm_buf *buf) 716 - { 717 - struct tpm_header *head = (struct tpm_header *)buf->data; 718 - off_t offset = TPM_HEADER_SIZE; 719 - u32 tot_len = be32_to_cpu(head->length); 720 - u32 val; 721 - 722 - /* we're starting after the header so adjust the length */ 723 - tot_len -= TPM_HEADER_SIZE; 724 - 725 - /* skip public */ 726 - val = tpm_buf_read_u16(buf, &offset); 727 - if (val > tot_len) 728 - return -EINVAL; 729 - offset += val; 730 - /* name */ 731 - val = tpm_buf_read_u16(buf, &offset); 732 - if (val != name_size(&buf->data[offset])) 733 - return -EINVAL; 734 - memcpy(name, &buf->data[offset], val); 735 - /* forget the rest */ 736 - return 0; 737 - } 738 - 739 - static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name) 740 - { 741 - struct tpm_buf buf; 742 - int rc; 743 - 744 - rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC); 745 - if (rc) 746 - return rc; 747 - 748 - tpm_buf_append_u32(&buf, handle); 749 - rc = tpm_transmit_cmd(chip, &buf, 0, "read public"); 750 - if (rc == TPM2_RC_SUCCESS) 751 - rc = tpm2_parse_read_public(name, &buf); 752 - 753 - tpm_buf_destroy(&buf); 754 - 755 - return rc; 756 - } 757 - 758 - /** 759 - * tpm_buf_append_name() - add a handle area to the buffer 760 - * @chip: the TPM chip structure 761 - * @buf: The buffer to be appended 762 - * @handle: The handle to be appended 763 - * @name: The name of the handle (may be NULL) 764 - * 765 - * In order to compute session HMACs, we need to know the names of the 766 - * objects pointed to by the handles. For most objects, this is simply 767 - * the actual 4 byte handle or an empty buf (in these cases @name 768 - * should be NULL) but for volatile objects, permanent objects and NV 769 - * areas, the name is defined as the hash (according to the name 770 - * algorithm which should be set to sha256) of the public area to 771 - * which the two byte algorithm id has been appended. For these 772 - * objects, the @name pointer should point to this. If a name is 773 - * required but @name is NULL, then TPM2_ReadPublic() will be called 774 - * on the handle to obtain the name. 775 - * 776 - * As with most tpm_buf operations, success is assumed because failure 777 - * will be caused by an incorrect programming model and indicated by a 778 - * kernel message. 779 - */ 780 - void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, 781 - u32 handle, u8 *name) 782 - { 783 - enum tpm2_mso_type mso = tpm2_handle_mso(handle); 784 - struct tpm2_auth *auth = chip->auth; 785 - int slot; 786 - 787 - slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE)/4; 788 - if (slot >= AUTH_MAX_NAMES) { 789 - dev_err(&chip->dev, "TPM: too many handles\n"); 790 - return; 791 - } 792 - WARN(auth->session != tpm_buf_length(buf), 793 - "name added in wrong place\n"); 794 - tpm_buf_append_u32(buf, handle); 795 - auth->session += 4; 796 - 797 - if (mso == TPM2_MSO_PERSISTENT || 798 - mso == TPM2_MSO_VOLATILE || 799 - mso == TPM2_MSO_NVRAM) { 800 - if (!name) 801 - tpm2_read_public(chip, handle, auth->name[slot]); 802 - } else { 803 - if (name) 804 - dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n"); 805 - } 806 - 807 - auth->name_h[slot] = handle; 808 - if (name) 809 - memcpy(auth->name[slot], name, name_size(name)); 810 - } 811 - EXPORT_SYMBOL(tpm_buf_append_name); 812 - 813 570 /** 814 571 * tpm_buf_check_hmac_response() - check the TPM return HMAC for correctness 815 572 * @chip: the TPM chip structure ··· 751 704 u16 tag = be16_to_cpu(head->tag); 752 705 u32 cc = be32_to_cpu(auth->ordinal); 753 706 int parm_len, len, i, handles; 707 + 708 + if (!auth) 709 + return rc; 754 710 755 711 if (auth->session >= TPM_HEADER_SIZE) { 756 712 WARN(1, "tpm session not filled correctly\n"); ··· 874 824 */ 875 825 void tpm2_end_auth_session(struct tpm_chip *chip) 876 826 { 877 - tpm2_flush_context(chip, chip->auth->handle); 878 - memzero_explicit(chip->auth, sizeof(*chip->auth)); 827 + struct tpm2_auth *auth = chip->auth; 828 + 829 + if (!auth) 830 + return; 831 + 832 + tpm2_flush_context(chip, auth->handle); 833 + memzero_explicit(auth, sizeof(*auth)); 879 834 } 880 835 EXPORT_SYMBOL(tpm2_end_auth_session); 881 836 ··· 961 906 struct tpm2_auth *auth = chip->auth; 962 907 int rc; 963 908 u32 null_key; 909 + 910 + if (!auth) { 911 + dev_warn_once(&chip->dev, "auth session is not active\n"); 912 + return 0; 913 + } 964 914 965 915 rc = tpm2_load_null(chip, &null_key); 966 916 if (rc) ··· 1361 1301 1362 1302 return rc; 1363 1303 } 1304 + #endif /* CONFIG_TCG_TPM2_HMAC */

+1

drivers/clk/mediatek/clk-mt8183-mfgcfg.c

··· 29 29 static const struct mtk_clk_desc mfg_desc = { 30 30 .clks = mfg_clks, 31 31 .num_clks = ARRAY_SIZE(mfg_clks), 32 + .need_runtime_pm = true, 32 33 }; 33 34 34 35 static const struct of_device_id of_match_clk_mt8183_mfg[] = {

+14 -10

drivers/clk/mediatek/clk-mtk.c

··· 496 496 } 497 497 498 498 499 - devm_pm_runtime_enable(&pdev->dev); 500 - /* 501 - * Do a pm_runtime_resume_and_get() to workaround a possible 502 - * deadlock between clk_register() and the genpd framework. 503 - */ 504 - r = pm_runtime_resume_and_get(&pdev->dev); 505 - if (r) 506 - return r; 499 + if (mcd->need_runtime_pm) { 500 + devm_pm_runtime_enable(&pdev->dev); 501 + /* 502 + * Do a pm_runtime_resume_and_get() to workaround a possible 503 + * deadlock between clk_register() and the genpd framework. 504 + */ 505 + r = pm_runtime_resume_and_get(&pdev->dev); 506 + if (r) 507 + return r; 508 + } 507 509 508 510 /* Calculate how many clk_hw_onecell_data entries to allocate */ 509 511 num_clks = mcd->num_clks + mcd->num_composite_clks; ··· 587 585 goto unregister_clks; 588 586 } 589 587 590 - pm_runtime_put(&pdev->dev); 588 + if (mcd->need_runtime_pm) 589 + pm_runtime_put(&pdev->dev); 591 590 592 591 return r; 593 592 ··· 621 618 if (mcd->shared_io && base) 622 619 iounmap(base); 623 620 624 - pm_runtime_put(&pdev->dev); 621 + if (mcd->need_runtime_pm) 622 + pm_runtime_put(&pdev->dev); 625 623 return r; 626 624 } 627 625

+2

drivers/clk/mediatek/clk-mtk.h

··· 237 237 238 238 int (*clk_notifier_func)(struct device *dev, struct clk *clk); 239 239 unsigned int mfg_clk_idx; 240 + 241 + bool need_runtime_pm; 240 242 }; 241 243 242 244 int mtk_clk_pdev_probe(struct platform_device *pdev);

-2

drivers/clk/qcom/apss-ipq-pll.c

··· 70 70 static const struct alpha_pll_config ipq5018_pll_config = { 71 71 .l = 0x2a, 72 72 .config_ctl_val = 0x4001075b, 73 - .config_ctl_hi_val = 0x304, 74 73 .main_output_mask = BIT(0), 75 74 .aux_output_mask = BIT(1), 76 75 .early_output_mask = BIT(3), ··· 83 84 static const struct alpha_pll_config ipq5332_pll_config = { 84 85 .l = 0x2d, 85 86 .config_ctl_val = 0x4001075b, 86 - .config_ctl_hi_val = 0x304, 87 87 .main_output_mask = BIT(0), 88 88 .aux_output_mask = BIT(1), 89 89 .early_output_mask = BIT(3),

+3

drivers/clk/qcom/clk-alpha-pll.c

··· 2574 2574 regmap_write(pll->clkr.regmap, PLL_ALPHA_VAL_U(pll), 2575 2575 a >> ALPHA_BITWIDTH); 2576 2576 2577 + regmap_update_bits(pll->clkr.regmap, PLL_USER_CTL(pll), 2578 + PLL_ALPHA_EN, PLL_ALPHA_EN); 2579 + 2577 2580 regmap_write(pll->clkr.regmap, PLL_MODE(pll), PLL_BYPASSNL); 2578 2581 2579 2582 /* Wait five micro seconds or more */

+6 -4

drivers/clk/qcom/gcc-ipq9574.c

··· 2140 2140 2141 2141 static struct clk_branch gcc_crypto_axi_clk = { 2142 2142 .halt_reg = 0x16010, 2143 + .halt_check = BRANCH_HALT_VOTED, 2143 2144 .clkr = { 2144 - .enable_reg = 0x16010, 2145 - .enable_mask = BIT(0), 2145 + .enable_reg = 0xb004, 2146 + .enable_mask = BIT(15), 2146 2147 .hw.init = &(const struct clk_init_data) { 2147 2148 .name = "gcc_crypto_axi_clk", 2148 2149 .parent_hws = (const struct clk_hw *[]) { ··· 2157 2156 2158 2157 static struct clk_branch gcc_crypto_ahb_clk = { 2159 2158 .halt_reg = 0x16014, 2159 + .halt_check = BRANCH_HALT_VOTED, 2160 2160 .clkr = { 2161 - .enable_reg = 0x16014, 2162 - .enable_mask = BIT(0), 2161 + .enable_reg = 0xb004, 2162 + .enable_mask = BIT(16), 2163 2163 .hw.init = &(const struct clk_init_data) { 2164 2164 .name = "gcc_crypto_ahb_clk", 2165 2165 .parent_hws = (const struct clk_hw *[]) {

+5 -5

drivers/clk/qcom/gcc-sm6350.c

··· 100 100 .enable_mask = BIT(6), 101 101 .hw.init = &(struct clk_init_data){ 102 102 .name = "gpll6", 103 - .parent_hws = (const struct clk_hw*[]){ 104 - &gpll0.clkr.hw, 103 + .parent_data = &(const struct clk_parent_data){ 104 + .fw_name = "bi_tcxo", 105 105 }, 106 106 .num_parents = 1, 107 107 .ops = &clk_alpha_pll_fixed_fabia_ops, ··· 124 124 .clkr.hw.init = &(struct clk_init_data){ 125 125 .name = "gpll6_out_even", 126 126 .parent_hws = (const struct clk_hw*[]){ 127 - &gpll0.clkr.hw, 127 + &gpll6.clkr.hw, 128 128 }, 129 129 .num_parents = 1, 130 130 .ops = &clk_alpha_pll_postdiv_fabia_ops, ··· 139 139 .enable_mask = BIT(7), 140 140 .hw.init = &(struct clk_init_data){ 141 141 .name = "gpll7", 142 - .parent_hws = (const struct clk_hw*[]){ 143 - &gpll0.clkr.hw, 142 + .parent_data = &(const struct clk_parent_data){ 143 + .fw_name = "bi_tcxo", 144 144 }, 145 145 .num_parents = 1, 146 146 .ops = &clk_alpha_pll_fixed_fabia_ops,

+12 -6

drivers/clk/sunxi-ng/ccu_common.c

··· 132 132 133 133 for (i = 0; i < desc->hw_clks->num ; i++) { 134 134 struct clk_hw *hw = desc->hw_clks->hws[i]; 135 - struct ccu_common *common = hw_to_ccu_common(hw); 136 135 const char *name; 137 136 138 137 if (!hw) ··· 146 147 pr_err("Couldn't register clock %d - %s\n", i, name); 147 148 goto err_clk_unreg; 148 149 } 150 + } 149 151 150 - if (common->max_rate) 151 - clk_hw_set_rate_range(hw, common->min_rate, 152 - common->max_rate); 152 + for (i = 0; i < desc->num_ccu_clks; i++) { 153 + struct ccu_common *cclk = desc->ccu_clks[i]; 154 + 155 + if (!cclk) 156 + continue; 157 + 158 + if (cclk->max_rate) 159 + clk_hw_set_rate_range(&cclk->hw, cclk->min_rate, 160 + cclk->max_rate); 153 161 else 154 - WARN(common->min_rate, 162 + WARN(cclk->min_rate, 155 163 "No max_rate, ignoring min_rate of clock %d - %s\n", 156 - i, name); 164 + i, clk_hw_get_name(&cclk->hw)); 157 165 } 158 166 159 167 ret = of_clk_add_hw_provider(node, of_clk_hw_onecell_get,

+3 -1

drivers/cpufreq/acpi-cpufreq.c

··· 890 890 if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) 891 891 pr_warn(FW_WARN "P-state 0 is not max freq\n"); 892 892 893 - if (acpi_cpufreq_driver.set_boost) 893 + if (acpi_cpufreq_driver.set_boost) { 894 894 set_boost(policy, acpi_cpufreq_driver.boost_enabled); 895 + policy->boost_enabled = acpi_cpufreq_driver.boost_enabled; 896 + } 895 897 896 898 return result; 897 899

+2 -1

drivers/cpufreq/cpufreq.c

··· 1431 1431 } 1432 1432 1433 1433 /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ 1434 - policy->boost_enabled = cpufreq_boost_enabled() && policy_has_boost_freq(policy); 1434 + if (cpufreq_boost_enabled() && policy_has_boost_freq(policy)) 1435 + policy->boost_enabled = true; 1435 1436 1436 1437 /* 1437 1438 * The initialization has succeeded and the policy is online.

+8 -4

drivers/firmware/sysfb.c

··· 101 101 if (IS_ERR(pdev)) { 102 102 return ERR_CAST(pdev); 103 103 } else if (pdev) { 104 - if (!sysfb_pci_dev_is_enabled(pdev)) 104 + if (!sysfb_pci_dev_is_enabled(pdev)) { 105 + pci_dev_put(pdev); 105 106 return ERR_PTR(-ENODEV); 107 + } 106 108 return &pdev->dev; 107 109 } 108 110 ··· 139 137 if (compatible) { 140 138 pd = sysfb_create_simplefb(si, &mode, parent); 141 139 if (!IS_ERR(pd)) 142 - goto unlock_mutex; 140 + goto put_device; 143 141 } 144 142 145 143 /* if the FB is incompatible, create a legacy framebuffer device */ ··· 157 155 pd = platform_device_alloc(name, 0); 158 156 if (!pd) { 159 157 ret = -ENOMEM; 160 - goto unlock_mutex; 158 + goto put_device; 161 159 } 162 160 163 161 pd->dev.parent = parent; ··· 172 170 if (ret) 173 171 goto err; 174 172 175 - goto unlock_mutex; 173 + goto put_device; 176 174 err: 177 175 platform_device_put(pd); 176 + put_device: 177 + put_device(parent); 178 178 unlock_mutex: 179 179 mutex_unlock(&disable_lock); 180 180 return ret;

-2

drivers/gpio/gpio-mmio.c

··· 619 619 ret = gpiochip_get_ngpios(gc, dev); 620 620 if (ret) 621 621 gc->ngpio = gc->bgpio_bits; 622 - else 623 - gc->bgpio_bits = roundup_pow_of_two(round_up(gc->ngpio, 8)); 624 622 625 623 ret = bgpio_setup_io(gc, dat, set, clr, flags); 626 624 if (ret)

+20 -2

drivers/gpio/gpiolib-of.c

··· 203 203 */ 204 204 { "qi,lb60", "rb-gpios", true }, 205 205 #endif 206 + #if IS_ENABLED(CONFIG_PCI_LANTIQ) 207 + /* 208 + * According to the PCI specification, the RST# pin is an 209 + * active-low signal. However, most of the device trees that 210 + * have been widely used for a long time incorrectly describe 211 + * reset GPIO as active-high, and were also using wrong name 212 + * for the property. 213 + */ 214 + { "lantiq,pci-xway", "gpio-reset", false }, 215 + #endif 216 + #if IS_ENABLED(CONFIG_TOUCHSCREEN_TSC2005) 217 + /* 218 + * DTS for Nokia N900 incorrectly specified "active high" 219 + * polarity for the reset line, while the chip actually 220 + * treats it as "active low". 221 + */ 222 + { "ti,tsc2005", "reset-gpios", false }, 223 + #endif 206 224 }; 207 225 unsigned int i; 208 226 ··· 522 504 { "reset", "reset-n-io", "marvell,nfc-uart" }, 523 505 { "reset", "reset-n-io", "mrvl,nfc-uart" }, 524 506 #endif 525 - #if !IS_ENABLED(CONFIG_PCI_LANTIQ) 507 + #if IS_ENABLED(CONFIG_PCI_LANTIQ) 526 508 /* MIPS Lantiq PCI */ 527 - { "reset", "gpios-reset", "lantiq,pci-xway" }, 509 + { "reset", "gpio-reset", "lantiq,pci-xway" }, 528 510 #endif 529 511 530 512 /*

+52 -1

drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

··· 10048 10048 } 10049 10049 10050 10050 /* Update Freesync settings. */ 10051 + reset_freesync_config_for_crtc(dm_new_crtc_state); 10051 10052 get_freesync_config_for_crtc(dm_new_crtc_state, 10052 10053 dm_new_conn_state); 10053 10054 ··· 11182 11181 return ret; 11183 11182 } 11184 11183 11184 + static void parse_edid_displayid_vrr(struct drm_connector *connector, 11185 + struct edid *edid) 11186 + { 11187 + u8 *edid_ext = NULL; 11188 + int i; 11189 + int j = 0; 11190 + u16 min_vfreq; 11191 + u16 max_vfreq; 11192 + 11193 + if (edid == NULL || edid->extensions == 0) 11194 + return; 11195 + 11196 + /* Find DisplayID extension */ 11197 + for (i = 0; i < edid->extensions; i++) { 11198 + edid_ext = (void *)(edid + (i + 1)); 11199 + if (edid_ext[0] == DISPLAYID_EXT) 11200 + break; 11201 + } 11202 + 11203 + if (edid_ext == NULL) 11204 + return; 11205 + 11206 + while (j < EDID_LENGTH) { 11207 + /* Get dynamic video timing range from DisplayID if available */ 11208 + if (EDID_LENGTH - j > 13 && edid_ext[j] == 0x25 && 11209 + (edid_ext[j+1] & 0xFE) == 0 && (edid_ext[j+2] == 9)) { 11210 + min_vfreq = edid_ext[j+9]; 11211 + if (edid_ext[j+1] & 7) 11212 + max_vfreq = edid_ext[j+10] + ((edid_ext[j+11] & 3) << 8); 11213 + else 11214 + max_vfreq = edid_ext[j+10]; 11215 + 11216 + if (max_vfreq && min_vfreq) { 11217 + connector->display_info.monitor_range.max_vfreq = max_vfreq; 11218 + connector->display_info.monitor_range.min_vfreq = min_vfreq; 11219 + 11220 + return; 11221 + } 11222 + } 11223 + j++; 11224 + } 11225 + } 11226 + 11185 11227 static int parse_amd_vsdb(struct amdgpu_dm_connector *aconnector, 11186 11228 struct edid *edid, struct amdgpu_hdmi_vsdb_info *vsdb_info) 11187 11229 { ··· 11346 11302 if (!adev->dm.freesync_module) 11347 11303 goto update; 11348 11304 11305 + /* Some eDP panels only have the refresh rate range info in DisplayID */ 11306 + if ((connector->display_info.monitor_range.min_vfreq == 0 || 11307 + connector->display_info.monitor_range.max_vfreq == 0)) 11308 + parse_edid_displayid_vrr(connector, edid); 11309 + 11349 11310 if (edid && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT || 11350 11311 sink->sink_signal == SIGNAL_TYPE_EDP)) { 11351 11312 bool edid_check_required = false; ··· 11358 11309 if (is_dp_capable_without_timing_msa(adev->dm.dc, 11359 11310 amdgpu_dm_connector)) { 11360 11311 if (edid->features & DRM_EDID_FEATURE_CONTINUOUS_FREQ) { 11361 - freesync_capable = true; 11362 11312 amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq; 11363 11313 amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq; 11314 + if (amdgpu_dm_connector->max_vfreq - 11315 + amdgpu_dm_connector->min_vfreq > 10) 11316 + freesync_capable = true; 11364 11317 } else { 11365 11318 edid_check_required = edid->version > 1 || 11366 11319 (edid->version == 1 &&

+3

drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c

··· 3364 3364 &mode_lib->vba.UrgentBurstFactorLumaPre[k], 3365 3365 &mode_lib->vba.UrgentBurstFactorChromaPre[k], 3366 3366 &mode_lib->vba.NotUrgentLatencyHidingPre[k]); 3367 + 3368 + v->cursor_bw_pre[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] / 3369 + 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * v->VRatioPreY[i][j][k]; 3367 3370 } 3368 3371 3369 3372 {

+1

drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c

··· 234 234 out->round_trip_ping_latency_dcfclk_cycles = 106; 235 235 out->smn_latency_us = 2; 236 236 out->dispclk_dppclk_vco_speed_mhz = 3600; 237 + out->pct_ideal_dram_bw_after_urgent_pixel_only = 65.0; 237 238 break; 238 239 239 240 }

+1 -1

drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c

··· 294 294 context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = (unsigned int)in_ctx->v20.dml_core_ctx.mp.DCFCLKDeepSleep * 1000; 295 295 context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; 296 296 297 - if (in_ctx->v20.dml_core_ctx.ms.support.FCLKChangeSupport[in_ctx->v20.scratch.mode_support_params.out_lowest_state_idx] == dml_fclock_change_unsupported) 297 + if (in_ctx->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0] == dml_fclock_change_unsupported) 298 298 context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false; 299 299 else 300 300 context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true;

+1 -1

drivers/gpu/drm/amd/include/atomfirmware.h

··· 734 734 { 735 735 struct atom_common_table_header table_header; 736 736 /*the real number of this included in the structure is calcualted by using the (whole structure size - the header size)/size of atom_gpio_pin_lut */ 737 - struct atom_gpio_pin_assignment gpio_pin[8]; 737 + struct atom_gpio_pin_assignment gpio_pin[]; 738 738 }; 739 739 740 740

+2 -1

drivers/gpu/drm/drm_fbdev_generic.c

··· 84 84 sizes->surface_width, sizes->surface_height, 85 85 sizes->surface_bpp); 86 86 87 - format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth); 87 + format = drm_driver_legacy_fb_format(dev, sizes->surface_bpp, 88 + sizes->surface_depth); 88 89 buffer = drm_client_framebuffer_create(client, sizes->surface_width, 89 90 sizes->surface_height, format); 90 91 if (IS_ERR(buffer))

+8 -1

drivers/gpu/drm/drm_panel_orientation_quirks.c

··· 420 420 DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Galaxy Book 10.6"), 421 421 }, 422 422 .driver_data = (void *)&lcd1280x1920_rightside_up, 423 - }, { /* Valve Steam Deck */ 423 + }, { /* Valve Steam Deck (Jupiter) */ 424 424 .matches = { 425 425 DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Valve"), 426 426 DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jupiter"), 427 + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "1"), 428 + }, 429 + .driver_data = (void *)&lcd800x1280_rightside_up, 430 + }, { /* Valve Steam Deck (Galileo) */ 431 + .matches = { 432 + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Valve"), 433 + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Galileo"), 427 434 DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "1"), 428 435 }, 429 436 .driver_data = (void *)&lcd800x1280_rightside_up,

+3

drivers/gpu/drm/i915/display/intel_ddi.c

··· 2088 2088 u32 ln0, ln1, pin_assignment; 2089 2089 u8 width; 2090 2090 2091 + if (DISPLAY_VER(dev_priv) >= 14) 2092 + return; 2093 + 2091 2094 if (!intel_encoder_is_tc(&dig_port->base) || 2092 2095 intel_tc_port_in_tbt_alt_mode(dig_port)) 2093 2096 return;

+3

drivers/gpu/drm/nouveau/nouveau_connector.c

··· 1001 1001 struct drm_display_mode *mode; 1002 1002 1003 1003 mode = drm_mode_duplicate(dev, nv_connector->native_mode); 1004 + if (!mode) 1005 + return 0; 1006 + 1004 1007 drm_mode_probed_add(connector, mode); 1005 1008 ret = 1; 1006 1009 }

+3 -3

drivers/gpu/drm/panthor/panthor_drv.c

··· 86 86 int ret = 0; 87 87 void *out_alloc; 88 88 89 + if (!in->count) 90 + return NULL; 91 + 89 92 /* User stride must be at least the minimum object size, otherwise it might 90 93 * lack useful information. 91 94 */ 92 95 if (in->stride < min_stride) 93 96 return ERR_PTR(-EINVAL); 94 - 95 - if (!in->count) 96 - return NULL; 97 97 98 98 out_alloc = kvmalloc_array(in->count, obj_size, GFP_KERNEL); 99 99 if (!out_alloc)

+33 -11

drivers/gpu/drm/panthor/panthor_sched.c

··· 459 459 atomic64_t seqno; 460 460 461 461 /** 462 + * @last_fence: Fence of the last submitted job. 463 + * 464 + * We return this fence when we get an empty command stream. 465 + * This way, we are guaranteed that all earlier jobs have completed 466 + * when drm_sched_job::s_fence::finished without having to feed 467 + * the CS ring buffer with a dummy job that only signals the fence. 468 + */ 469 + struct dma_fence *last_fence; 470 + 471 + /** 462 472 * @in_flight_jobs: List containing all in-flight jobs. 463 473 * 464 474 * Used to keep track and signal panthor_job::done_fence when the ··· 838 828 839 829 panthor_kernel_bo_destroy(queue->ringbuf); 840 830 panthor_kernel_bo_destroy(queue->iface.mem); 831 + 832 + /* Release the last_fence we were holding, if any. */ 833 + dma_fence_put(queue->fence_ctx.last_fence); 841 834 842 835 kfree(queue); 843 836 } ··· 2797 2784 2798 2785 spin_lock(&queue->fence_ctx.lock); 2799 2786 list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) { 2800 - if (!job->call_info.size) 2801 - continue; 2802 - 2803 2787 if (syncobj->seqno < job->done_fence->seqno) 2804 2788 break; 2805 2789 ··· 2875 2865 static_assert(sizeof(call_instrs) % 64 == 0, 2876 2866 "call_instrs is not aligned on a cacheline"); 2877 2867 2878 - /* Stream size is zero, nothing to do => return a NULL fence and let 2879 - * drm_sched signal the parent. 2868 + /* Stream size is zero, nothing to do except making sure all previously 2869 + * submitted jobs are done before we signal the 2870 + * drm_sched_job::s_fence::finished fence. 2880 2871 */ 2881 - if (!job->call_info.size) 2882 - return NULL; 2872 + if (!job->call_info.size) { 2873 + job->done_fence = dma_fence_get(queue->fence_ctx.last_fence); 2874 + return dma_fence_get(job->done_fence); 2875 + } 2883 2876 2884 2877 ret = pm_runtime_resume_and_get(ptdev->base.dev); 2885 2878 if (drm_WARN_ON(&ptdev->base, ret)) ··· 2940 2927 sched->pm.has_ref = true; 2941 2928 } 2942 2929 } 2930 + 2931 + /* Update the last fence. */ 2932 + dma_fence_put(queue->fence_ctx.last_fence); 2933 + queue->fence_ctx.last_fence = dma_fence_get(job->done_fence); 2943 2934 2944 2935 done_fence = dma_fence_get(job->done_fence); 2945 2936 ··· 3395 3378 goto err_put_job; 3396 3379 } 3397 3380 3398 - job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); 3399 - if (!job->done_fence) { 3400 - ret = -ENOMEM; 3401 - goto err_put_job; 3381 + /* Empty command streams don't need a fence, they'll pick the one from 3382 + * the previously submitted job. 3383 + */ 3384 + if (job->call_info.size) { 3385 + job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); 3386 + if (!job->done_fence) { 3387 + ret = -ENOMEM; 3388 + goto err_put_job; 3389 + } 3402 3390 } 3403 3391 3404 3392 ret = drm_sched_job_init(&job->base,

+1 -1

drivers/gpu/drm/radeon/radeon_gem.c

··· 642 642 if (r) 643 643 goto error_unlock; 644 644 645 - if (bo_va->it.start) 645 + if (bo_va->it.start && bo_va->bo) 646 646 r = radeon_vm_bo_update(rdev, bo_va, bo_va->bo->tbo.resource); 647 647 648 648 error_unlock:

+1

drivers/gpu/drm/ttm/ttm_bo.c

··· 346 346 if (!dma_resv_test_signaled(bo->base.resv, 347 347 DMA_RESV_USAGE_BOOKKEEP) || 348 348 (want_init_on_free() && (bo->ttm != NULL)) || 349 + bo->type == ttm_bo_type_sg || 349 350 !dma_resv_trylock(bo->base.resv)) { 350 351 /* The BO is not idle, resurrect it for delayed destroy */ 351 352 ttm_bo_flush_all_fences(bo);

+3 -3

drivers/gpu/drm/xe/xe_gt_mcr.c

··· 342 342 else 343 343 gt->steering[OADDRM].group_target = 1; 344 344 345 - gt->steering[DSS].instance_target = 0; /* unused */ 345 + gt->steering[OADDRM].instance_target = 0; /* unused */ 346 346 } 347 347 348 348 static void init_steering_sqidi_psmi(struct xe_gt *gt) ··· 357 357 358 358 static void init_steering_inst0(struct xe_gt *gt) 359 359 { 360 - gt->steering[DSS].group_target = 0; /* unused */ 361 - gt->steering[DSS].instance_target = 0; /* unused */ 360 + gt->steering[INSTANCE0].group_target = 0; /* unused */ 361 + gt->steering[INSTANCE0].instance_target = 0; /* unused */ 362 362 } 363 363 364 364 static const struct {

+4 -4

drivers/gpu/drm/xe/xe_migrate.c

··· 1334 1334 GFP_KERNEL, true, 0); 1335 1335 if (IS_ERR(sa_bo)) { 1336 1336 err = PTR_ERR(sa_bo); 1337 - goto err; 1337 + goto err_bb; 1338 1338 } 1339 1339 1340 1340 ppgtt_ofs = NUM_KERNEL_PDE + ··· 1385 1385 update_idx); 1386 1386 if (IS_ERR(job)) { 1387 1387 err = PTR_ERR(job); 1388 - goto err_bb; 1388 + goto err_sa; 1389 1389 } 1390 1390 1391 1391 /* Wait on BO move */ ··· 1434 1434 1435 1435 err_job: 1436 1436 xe_sched_job_put(job); 1437 + err_sa: 1438 + drm_suballoc_free(sa_bo, NULL); 1437 1439 err_bb: 1438 1440 if (!q) 1439 1441 mutex_unlock(&m->job_mutex); 1440 1442 xe_bb_free(bb, NULL); 1441 - err: 1442 - drm_suballoc_free(sa_bo, NULL); 1443 1443 return ERR_PTR(err); 1444 1444 } 1445 1445

+10 -38

drivers/i2c/busses/i2c-pnx.c

··· 15 15 #include <linux/ioport.h> 16 16 #include <linux/delay.h> 17 17 #include <linux/i2c.h> 18 - #include <linux/timer.h> 19 18 #include <linux/completion.h> 20 19 #include <linux/platform_device.h> 21 20 #include <linux/io.h> ··· 31 32 int ret; /* Return value */ 32 33 int mode; /* Interface mode */ 33 34 struct completion complete; /* I/O completion */ 34 - struct timer_list timer; /* Timeout */ 35 35 u8 * buf; /* Data buffer */ 36 36 int len; /* Length of data buffer */ 37 37 int order; /* RX Bytes to order via TX */ ··· 113 115 timeout--; 114 116 } 115 117 return (timeout <= 0); 116 - } 117 - 118 - static inline void i2c_pnx_arm_timer(struct i2c_pnx_algo_data *alg_data) 119 - { 120 - struct timer_list *timer = &alg_data->mif.timer; 121 - unsigned long expires = msecs_to_jiffies(alg_data->timeout); 122 - 123 - if (expires <= 1) 124 - expires = 2; 125 - 126 - del_timer_sync(timer); 127 - 128 - dev_dbg(&alg_data->adapter.dev, "Timer armed at %lu plus %lu jiffies.\n", 129 - jiffies, expires); 130 - 131 - timer->expires = jiffies + expires; 132 - 133 - add_timer(timer); 134 118 } 135 119 136 120 /** ··· 239 259 ~(mcntrl_afie | mcntrl_naie | mcntrl_drmie), 240 260 I2C_REG_CTL(alg_data)); 241 261 242 - del_timer_sync(&alg_data->mif.timer); 243 - 244 262 dev_dbg(&alg_data->adapter.dev, 245 263 "%s(): Waking up xfer routine.\n", 246 264 __func__); ··· 254 276 ~(mcntrl_afie | mcntrl_naie | mcntrl_drmie), 255 277 I2C_REG_CTL(alg_data)); 256 278 257 - /* Stop timer. */ 258 - del_timer_sync(&alg_data->mif.timer); 259 279 dev_dbg(&alg_data->adapter.dev, 260 280 "%s(): Waking up xfer routine after zero-xfer.\n", 261 281 __func__); ··· 340 364 mcntrl_drmie | mcntrl_daie); 341 365 iowrite32(ctl, I2C_REG_CTL(alg_data)); 342 366 343 - /* Kill timer. */ 344 - del_timer_sync(&alg_data->mif.timer); 345 367 complete(&alg_data->mif.complete); 346 368 } 347 369 } ··· 374 400 mcntrl_drmie); 375 401 iowrite32(ctl, I2C_REG_CTL(alg_data)); 376 402 377 - /* Stop timer, to prevent timeout. */ 378 - del_timer_sync(&alg_data->mif.timer); 379 403 complete(&alg_data->mif.complete); 380 404 } else if (stat & mstatus_nai) { 381 405 /* Slave did not acknowledge, generate a STOP */ ··· 391 419 /* Our return value. */ 392 420 alg_data->mif.ret = -EIO; 393 421 394 - /* Stop timer, to prevent timeout. */ 395 - del_timer_sync(&alg_data->mif.timer); 396 422 complete(&alg_data->mif.complete); 397 423 } else { 398 424 /* ··· 423 453 return IRQ_HANDLED; 424 454 } 425 455 426 - static void i2c_pnx_timeout(struct timer_list *t) 456 + static void i2c_pnx_timeout(struct i2c_pnx_algo_data *alg_data) 427 457 { 428 - struct i2c_pnx_algo_data *alg_data = from_timer(alg_data, t, mif.timer); 429 458 u32 ctl; 430 459 431 460 dev_err(&alg_data->adapter.dev, ··· 441 472 iowrite32(ctl, I2C_REG_CTL(alg_data)); 442 473 wait_reset(alg_data); 443 474 alg_data->mif.ret = -EIO; 444 - complete(&alg_data->mif.complete); 445 475 } 446 476 447 477 static inline void bus_reset_if_active(struct i2c_pnx_algo_data *alg_data) ··· 482 514 struct i2c_msg *pmsg; 483 515 int rc = 0, completed = 0, i; 484 516 struct i2c_pnx_algo_data *alg_data = adap->algo_data; 517 + unsigned long time_left; 485 518 u32 stat; 486 519 487 520 dev_dbg(&alg_data->adapter.dev, ··· 517 548 dev_dbg(&alg_data->adapter.dev, "%s(): mode %d, %d bytes\n", 518 549 __func__, alg_data->mif.mode, alg_data->mif.len); 519 550 520 - i2c_pnx_arm_timer(alg_data); 521 551 522 552 /* initialize the completion var */ 523 553 init_completion(&alg_data->mif.complete); ··· 532 564 break; 533 565 534 566 /* Wait for completion */ 535 - wait_for_completion(&alg_data->mif.complete); 567 + time_left = wait_for_completion_timeout(&alg_data->mif.complete, 568 + alg_data->timeout); 569 + if (time_left == 0) 570 + i2c_pnx_timeout(alg_data); 536 571 537 572 if (!(rc = alg_data->mif.ret)) 538 573 completed++; ··· 624 653 alg_data->adapter.algo_data = alg_data; 625 654 alg_data->adapter.nr = pdev->id; 626 655 627 - alg_data->timeout = I2C_PNX_TIMEOUT_DEFAULT; 656 + alg_data->timeout = msecs_to_jiffies(I2C_PNX_TIMEOUT_DEFAULT); 657 + if (alg_data->timeout <= 1) 658 + alg_data->timeout = 2; 659 + 628 660 #ifdef CONFIG_OF 629 661 alg_data->adapter.dev.of_node = of_node_get(pdev->dev.of_node); 630 662 if (pdev->dev.of_node) { ··· 646 672 alg_data->clk = devm_clk_get(&pdev->dev, NULL); 647 673 if (IS_ERR(alg_data->clk)) 648 674 return PTR_ERR(alg_data->clk); 649 - 650 - timer_setup(&alg_data->mif.timer, i2c_pnx_timeout, 0); 651 675 652 676 snprintf(alg_data->adapter.name, sizeof(alg_data->adapter.name), 653 677 "%s", pdev->name);

+10 -13

drivers/net/dsa/lan9303-core.c

··· 1047 1047 return ARRAY_SIZE(lan9303_mib); 1048 1048 } 1049 1049 1050 - static int lan9303_phy_read(struct dsa_switch *ds, int phy, int regnum) 1050 + static int lan9303_phy_read(struct dsa_switch *ds, int port, int regnum) 1051 1051 { 1052 1052 struct lan9303 *chip = ds->priv; 1053 1053 int phy_base = chip->phy_addr_base; 1054 1054 1055 - if (phy == phy_base) 1055 + if (port == 0) 1056 1056 return lan9303_virt_phy_reg_read(chip, regnum); 1057 - if (phy > phy_base + 2) 1057 + if (port > 2) 1058 1058 return -ENODEV; 1059 1059 1060 - return chip->ops->phy_read(chip, phy, regnum); 1060 + return chip->ops->phy_read(chip, phy_base + port, regnum); 1061 1061 } 1062 1062 1063 - static int lan9303_phy_write(struct dsa_switch *ds, int phy, int regnum, 1063 + static int lan9303_phy_write(struct dsa_switch *ds, int port, int regnum, 1064 1064 u16 val) 1065 1065 { 1066 1066 struct lan9303 *chip = ds->priv; 1067 1067 int phy_base = chip->phy_addr_base; 1068 1068 1069 - if (phy == phy_base) 1069 + if (port == 0) 1070 1070 return lan9303_virt_phy_reg_write(chip, regnum, val); 1071 - if (phy > phy_base + 2) 1071 + if (port > 2) 1072 1072 return -ENODEV; 1073 1073 1074 - return chip->ops->phy_write(chip, phy, regnum, val); 1074 + return chip->ops->phy_write(chip, phy_base + port, regnum, val); 1075 1075 } 1076 1076 1077 1077 static int lan9303_port_enable(struct dsa_switch *ds, int port, ··· 1099 1099 vlan_vid_del(dsa_port_to_conduit(dp), htons(ETH_P_8021Q), port); 1100 1100 1101 1101 lan9303_disable_processing_port(chip, port); 1102 - lan9303_phy_write(ds, chip->phy_addr_base + port, MII_BMCR, BMCR_PDOWN); 1102 + lan9303_phy_write(ds, port, MII_BMCR, BMCR_PDOWN); 1103 1103 } 1104 1104 1105 1105 static int lan9303_port_bridge_join(struct dsa_switch *ds, int port, ··· 1374 1374 1375 1375 static int lan9303_register_switch(struct lan9303 *chip) 1376 1376 { 1377 - int base; 1378 - 1379 1377 chip->ds = devm_kzalloc(chip->dev, sizeof(*chip->ds), GFP_KERNEL); 1380 1378 if (!chip->ds) 1381 1379 return -ENOMEM; ··· 1383 1385 chip->ds->priv = chip; 1384 1386 chip->ds->ops = &lan9303_switch_ops; 1385 1387 chip->ds->phylink_mac_ops = &lan9303_phylink_mac_ops; 1386 - base = chip->phy_addr_base; 1387 - chip->ds->phys_mii_mask = GENMASK(LAN9303_NUM_PORTS - 1 + base, base); 1388 + chip->ds->phys_mii_mask = GENMASK(LAN9303_NUM_PORTS - 1, 0); 1388 1389 1389 1390 return dsa_register_switch(chip->ds); 1390 1391 }

+1

drivers/net/ethernet/broadcom/asp2/bcmasp.c

··· 1380 1380 dev_err(dev, "Cannot create eth interface %d\n", i); 1381 1381 bcmasp_remove_intfs(priv); 1382 1382 of_node_put(intf_node); 1383 + ret = -ENOMEM; 1383 1384 goto of_put_exit; 1384 1385 } 1385 1386 list_add_tail(&intf->list, &priv->intfs);

+15

drivers/net/ethernet/broadcom/bnxt/bnxt.c

··· 6282 6282 return max_ring; 6283 6283 } 6284 6284 6285 + u16 bnxt_get_max_rss_ctx_ring(struct bnxt *bp) 6286 + { 6287 + u16 i, tbl_size, max_ring = 0; 6288 + struct bnxt_rss_ctx *rss_ctx; 6289 + 6290 + tbl_size = bnxt_get_rxfh_indir_size(bp->dev); 6291 + 6292 + list_for_each_entry(rss_ctx, &bp->rss_ctx_list, list) { 6293 + for (i = 0; i < tbl_size; i++) 6294 + max_ring = max(max_ring, rss_ctx->rss_indir_tbl[i]); 6295 + } 6296 + 6297 + return max_ring; 6298 + } 6299 + 6285 6300 int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings) 6286 6301 { 6287 6302 if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {

+1

drivers/net/ethernet/broadcom/bnxt/bnxt.h

··· 2814 2814 void bnxt_fill_ipv6_mask(__be32 mask[4]); 2815 2815 int bnxt_alloc_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx); 2816 2816 void bnxt_set_dflt_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx); 2817 + u16 bnxt_get_max_rss_ctx_ring(struct bnxt *bp); 2817 2818 int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings); 2818 2819 int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic); 2819 2820 int bnxt_hwrm_vnic_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic,

+6

drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c

··· 961 961 return rc; 962 962 } 963 963 964 + if (req_rx_rings < bp->rx_nr_rings && 965 + req_rx_rings <= bnxt_get_max_rss_ctx_ring(bp)) { 966 + netdev_warn(dev, "Can't deactivate rings used by RSS contexts\n"); 967 + return -EINVAL; 968 + } 969 + 964 970 if (bnxt_get_nr_rss_ctxs(bp, req_rx_rings) != 965 971 bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) && 966 972 netif_is_rxfh_configured(dev)) {

+53 -20

drivers/net/ethernet/intel/e1000e/ich8lan.c

··· 1109 1109 } 1110 1110 1111 1111 /** 1112 + * e1000e_force_smbus - Force interfaces to transition to SMBUS mode. 1113 + * @hw: pointer to the HW structure 1114 + * 1115 + * Force the MAC and the PHY to SMBUS mode. Assumes semaphore already 1116 + * acquired. 1117 + * 1118 + * Return: 0 on success, negative errno on failure. 1119 + **/ 1120 + static s32 e1000e_force_smbus(struct e1000_hw *hw) 1121 + { 1122 + u16 smb_ctrl = 0; 1123 + u32 ctrl_ext; 1124 + s32 ret_val; 1125 + 1126 + /* Switching PHY interface always returns MDI error 1127 + * so disable retry mechanism to avoid wasting time 1128 + */ 1129 + e1000e_disable_phy_retry(hw); 1130 + 1131 + /* Force SMBus mode in the PHY */ 1132 + ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &smb_ctrl); 1133 + if (ret_val) { 1134 + e1000e_enable_phy_retry(hw); 1135 + return ret_val; 1136 + } 1137 + 1138 + smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS; 1139 + e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, smb_ctrl); 1140 + 1141 + e1000e_enable_phy_retry(hw); 1142 + 1143 + /* Force SMBus mode in the MAC */ 1144 + ctrl_ext = er32(CTRL_EXT); 1145 + ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS; 1146 + ew32(CTRL_EXT, ctrl_ext); 1147 + 1148 + return 0; 1149 + } 1150 + 1151 + /** 1112 1152 * e1000_enable_ulp_lpt_lp - configure Ultra Low Power mode for LynxPoint-LP 1113 1153 * @hw: pointer to the HW structure 1114 1154 * @to_sx: boolean indicating a system power state transition to Sx ··· 1204 1164 ret_val = hw->phy.ops.acquire(hw); 1205 1165 if (ret_val) 1206 1166 goto out; 1167 + 1168 + if (hw->mac.type != e1000_pch_mtp) { 1169 + ret_val = e1000e_force_smbus(hw); 1170 + if (ret_val) { 1171 + e_dbg("Failed to force SMBUS: %d\n", ret_val); 1172 + goto release; 1173 + } 1174 + } 1207 1175 1208 1176 /* Si workaround for ULP entry flow on i127/rev6 h/w. Enable 1209 1177 * LPLU and disable Gig speed when entering ULP ··· 1273 1225 } 1274 1226 1275 1227 release: 1276 - /* Switching PHY interface always returns MDI error 1277 - * so disable retry mechanism to avoid wasting time 1278 - */ 1279 - e1000e_disable_phy_retry(hw); 1280 - 1281 - /* Force SMBus mode in PHY */ 1282 - ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); 1283 - if (ret_val) { 1284 - e1000e_enable_phy_retry(hw); 1285 - hw->phy.ops.release(hw); 1286 - goto out; 1228 + if (hw->mac.type == e1000_pch_mtp) { 1229 + ret_val = e1000e_force_smbus(hw); 1230 + if (ret_val) 1231 + e_dbg("Failed to force SMBUS over MTL system: %d\n", 1232 + ret_val); 1287 1233 } 1288 - phy_reg |= CV_SMB_CTRL_FORCE_SMBUS; 1289 - e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg); 1290 - 1291 - e1000e_enable_phy_retry(hw); 1292 - 1293 - /* Force SMBus mode in MAC */ 1294 - mac_reg = er32(CTRL_EXT); 1295 - mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS; 1296 - ew32(CTRL_EXT, mac_reg); 1297 1234 1298 1235 hw->phy.ops.release(hw); 1299 1236 out:

+4 -5

drivers/net/ethernet/intel/i40e/i40e_main.c

··· 13293 13293 bool need_reset; 13294 13294 int i; 13295 13295 13296 + /* VSI shall be deleted in a moment, block loading new programs */ 13297 + if (prog && test_bit(__I40E_IN_REMOVE, pf->state)) 13298 + return -EINVAL; 13299 + 13296 13300 /* Don't allow frames that span over multiple buffers */ 13297 13301 if (vsi->netdev->mtu > frame_size - I40E_PACKET_HDR_PAD) { 13298 13302 NL_SET_ERR_MSG_MOD(extack, "MTU too large for linear frames and XDP prog does not support frags"); ··· 13305 13301 13306 13302 /* When turning XDP on->off/off->on we reset and rebuild the rings. */ 13307 13303 need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog); 13308 - 13309 13304 if (need_reset) 13310 13305 i40e_prep_for_reset(pf); 13311 - 13312 - /* VSI shall be deleted in a moment, just return EINVAL */ 13313 - if (test_bit(__I40E_IN_REMOVE, pf->state)) 13314 - return -EINVAL; 13315 13306 13316 13307 old_prog = xchg(&vsi->xdp_prog, prog); 13317 13308

+2 -2

drivers/net/ethernet/lantiq_etop.c

··· 217 217 if (ch->dma.irq) 218 218 free_irq(ch->dma.irq, priv); 219 219 if (IS_RX(ch->idx)) { 220 - int desc; 220 + struct ltq_dma_channel *dma = &ch->dma; 221 221 222 - for (desc = 0; desc < LTQ_DESC_NUM; desc++) 222 + for (dma->desc = 0; dma->desc < LTQ_DESC_NUM; dma->desc++) 223 223 dev_kfree_skb_any(ch->skb[ch->dma.desc]); 224 224 } 225 225 }

+1 -1

drivers/net/ethernet/marvell/octeontx2/af/rvu.c

··· 1643 1643 if (req->ssow > block->lf.max) { 1644 1644 dev_err(&rvu->pdev->dev, 1645 1645 "Func 0x%x: Invalid SSOW req, %d > max %d\n", 1646 - pcifunc, req->sso, block->lf.max); 1646 + pcifunc, req->ssow, block->lf.max); 1647 1647 return -EINVAL; 1648 1648 } 1649 1649 mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->addr);

+7

drivers/net/ethernet/mediatek/mtk_star_emac.c

··· 1524 1524 { 1525 1525 struct device_node *of_node; 1526 1526 struct mtk_star_priv *priv; 1527 + struct phy_device *phydev; 1527 1528 struct net_device *ndev; 1528 1529 struct device *dev; 1529 1530 void __iomem *base; ··· 1649 1648 1650 1649 netif_napi_add(ndev, &priv->rx_napi, mtk_star_rx_poll); 1651 1650 netif_napi_add_tx(ndev, &priv->tx_napi, mtk_star_tx_poll); 1651 + 1652 + phydev = of_phy_find_device(priv->phy_node); 1653 + if (phydev) { 1654 + phydev->mac_managed_pm = true; 1655 + put_device(&phydev->mdio.dev); 1656 + } 1652 1657 1653 1658 return devm_register_netdev(dev, ndev); 1654 1659 }

+5 -5

drivers/net/ethernet/micrel/ks8851_common.c

··· 352 352 netif_dbg(ks, intr, ks->netdev, 353 353 "%s: txspace %d\n", __func__, tx_space); 354 354 355 - spin_lock(&ks->statelock); 355 + spin_lock_bh(&ks->statelock); 356 356 ks->tx_space = tx_space; 357 357 if (netif_queue_stopped(ks->netdev)) 358 358 netif_wake_queue(ks->netdev); 359 - spin_unlock(&ks->statelock); 359 + spin_unlock_bh(&ks->statelock); 360 360 } 361 361 362 362 if (status & IRQ_SPIBEI) { ··· 482 482 ks8851_wrreg16(ks, KS_IER, ks->rc_ier); 483 483 484 484 ks->queued_len = 0; 485 + ks->tx_space = ks8851_rdreg16(ks, KS_TXMIR); 485 486 netif_start_queue(ks->netdev); 486 487 487 488 netif_dbg(ks, ifup, ks->netdev, "network device up\n"); ··· 636 635 637 636 /* schedule work to do the actual set of the data if needed */ 638 637 639 - spin_lock(&ks->statelock); 638 + spin_lock_bh(&ks->statelock); 640 639 641 640 if (memcmp(&rxctrl, &ks->rxctrl, sizeof(rxctrl)) != 0) { 642 641 memcpy(&ks->rxctrl, &rxctrl, sizeof(ks->rxctrl)); 643 642 schedule_work(&ks->rxctrl_work); 644 643 } 645 644 646 - spin_unlock(&ks->statelock); 645 + spin_unlock_bh(&ks->statelock); 647 646 } 648 647 649 648 static int ks8851_set_mac_address(struct net_device *dev, void *addr) ··· 1102 1101 int ret; 1103 1102 1104 1103 ks->netdev = netdev; 1105 - ks->tx_space = 6144; 1106 1104 1107 1105 ks->gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); 1108 1106 ret = PTR_ERR_OR_ZERO(ks->gpio);

+2 -2

drivers/net/ethernet/micrel/ks8851_spi.c

··· 340 340 341 341 tx_space = ks8851_rdreg16_spi(ks, KS_TXMIR); 342 342 343 - spin_lock(&ks->statelock); 343 + spin_lock_bh(&ks->statelock); 344 344 ks->queued_len -= dequeued_len; 345 345 ks->tx_space = tx_space; 346 - spin_unlock(&ks->statelock); 346 + spin_unlock_bh(&ks->statelock); 347 347 348 348 ks8851_unlock_spi(ks, &flags); 349 349 }

+1 -1

drivers/net/phy/microchip_t1.c

··· 748 748 ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A, 749 749 lan87xx_cable_test_report_trans(detect)); 750 750 751 - return 0; 751 + return phy_init_hw(phydev); 752 752 } 753 753 754 754 static int lan87xx_cable_test_get_status(struct phy_device *phydev,

+15

drivers/net/ppp/ppp_generic.c

··· 70 70 #define MPHDRLEN_SSN 4 /* ditto with short sequence numbers */ 71 71 72 72 #define PPP_PROTO_LEN 2 73 + #define PPP_LCP_HDRLEN 4 73 74 74 75 /* 75 76 * An instance of /dev/ppp can be associated with either a ppp ··· 494 493 return ret; 495 494 } 496 495 496 + static bool ppp_check_packet(struct sk_buff *skb, size_t count) 497 + { 498 + /* LCP packets must include LCP header which 4 bytes long: 499 + * 1-byte code, 1-byte identifier, and 2-byte length. 500 + */ 501 + return get_unaligned_be16(skb->data) != PPP_LCP || 502 + count >= PPP_PROTO_LEN + PPP_LCP_HDRLEN; 503 + } 504 + 497 505 static ssize_t ppp_write(struct file *file, const char __user *buf, 498 506 size_t count, loff_t *ppos) 499 507 { ··· 522 512 skb_reserve(skb, pf->hdrlen); 523 513 ret = -EFAULT; 524 514 if (copy_from_user(skb_put(skb, count), buf, count)) { 515 + kfree_skb(skb); 516 + goto out; 517 + } 518 + ret = -EINVAL; 519 + if (unlikely(!ppp_check_packet(skb, count))) { 525 520 kfree_skb(skb); 526 521 goto out; 527 522 }

+2 -2

drivers/net/wireguard/allowedips.c

··· 15 15 if (bits == 32) { 16 16 *(u32 *)dst = be32_to_cpu(*(const __be32 *)src); 17 17 } else if (bits == 128) { 18 - ((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]); 19 - ((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]); 18 + ((u64 *)dst)[0] = get_unaligned_be64(src); 19 + ((u64 *)dst)[1] = get_unaligned_be64(src + 8); 20 20 } 21 21 } 22 22

+2 -2

drivers/net/wireguard/queueing.h

··· 124 124 */ 125 125 static inline int wg_cpumask_next_online(int *last_cpu) 126 126 { 127 - int cpu = cpumask_next(*last_cpu, cpu_online_mask); 127 + int cpu = cpumask_next(READ_ONCE(*last_cpu), cpu_online_mask); 128 128 if (cpu >= nr_cpu_ids) 129 129 cpu = cpumask_first(cpu_online_mask); 130 - *last_cpu = cpu; 130 + WRITE_ONCE(*last_cpu, cpu); 131 131 return cpu; 132 132 } 133 133

+1 -1

drivers/net/wireguard/send.c

··· 222 222 { 223 223 struct sk_buff *skb; 224 224 225 - if (skb_queue_empty(&peer->staged_packet_queue)) { 225 + if (skb_queue_empty_lockless(&peer->staged_packet_queue)) { 226 226 skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH, 227 227 GFP_ATOMIC); 228 228 if (unlikely(!skb))

+4 -14

drivers/of/irq.c

··· 81 81 /* 82 82 * These interrupt controllers abuse interrupt-map for unspeakable 83 83 * reasons and rely on the core code to *ignore* it (the drivers do 84 - * their own parsing of the property). 84 + * their own parsing of the property). The PAsemi entry covers a 85 + * non-sensical interrupt-map that is better left ignored. 85 86 * 86 87 * If you think of adding to the list for something *new*, think 87 88 * again. There is a high chance that you will be sent back to the ··· 96 95 "fsl,ls1043a-extirq", 97 96 "fsl,ls1088a-extirq", 98 97 "renesas,rza1-irqc", 98 + "pasemi,rootbus", 99 99 NULL, 100 100 }; 101 101 ··· 295 293 imaplen -= imap - oldimap; 296 294 pr_debug(" -> imaplen=%d\n", imaplen); 297 295 } 298 - if (!match) { 299 - if (intc) { 300 - /* 301 - * The PASEMI Nemo is a known offender, so 302 - * let's only warn for anyone else. 303 - */ 304 - WARN(!IS_ENABLED(CONFIG_PPC_PASEMI), 305 - "%pOF interrupt-map failed, using interrupt-controller\n", 306 - ipar); 307 - return 0; 308 - } 309 - 296 + if (!match) 310 297 goto fail; 311 - } 312 298 313 299 /* 314 300 * Successfully parsed an interrupt-map translation; copy new

+1 -1

drivers/perf/riscv_pmu.c

··· 167 167 unsigned long cmask; 168 168 u64 oldval, delta; 169 169 170 - if (!rvpmu->ctr_read) 170 + if (!rvpmu->ctr_read || (hwc->state & PERF_HES_UPTODATE)) 171 171 return 0; 172 172 173 173 cmask = riscv_pmu_ctr_get_width_mask(event);

+41 -3

drivers/perf/riscv_pmu_sbi.c

··· 20 20 #include <linux/cpu_pm.h> 21 21 #include <linux/sched/clock.h> 22 22 #include <linux/soc/andes/irq.h> 23 + #include <linux/workqueue.h> 23 24 24 25 #include <asm/errata_list.h> 25 26 #include <asm/sbi.h> ··· 115 114 }; 116 115 }; 117 116 118 - static const struct sbi_pmu_event_data pmu_hw_event_map[] = { 117 + static struct sbi_pmu_event_data pmu_hw_event_map[] = { 119 118 [PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = { 120 119 SBI_PMU_HW_CPU_CYCLES, 121 120 SBI_PMU_EVENT_TYPE_HW, 0}}, ··· 149 148 }; 150 149 151 150 #define C(x) PERF_COUNT_HW_CACHE_##x 152 - static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX] 151 + static struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX] 153 152 [PERF_COUNT_HW_CACHE_OP_MAX] 154 153 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 155 154 [C(L1D)] = { ··· 293 292 }, 294 293 }, 295 294 }; 295 + 296 + static void pmu_sbi_check_event(struct sbi_pmu_event_data *edata) 297 + { 298 + struct sbiret ret; 299 + 300 + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, 301 + 0, cmask, 0, edata->event_idx, 0, 0); 302 + if (!ret.error) { 303 + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 304 + ret.value, 0x1, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0); 305 + } else if (ret.error == SBI_ERR_NOT_SUPPORTED) { 306 + /* This event cannot be monitored by any counter */ 307 + edata->event_idx = -EINVAL; 308 + } 309 + } 310 + 311 + static void pmu_sbi_check_std_events(struct work_struct *work) 312 + { 313 + for (int i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++) 314 + pmu_sbi_check_event(&pmu_hw_event_map[i]); 315 + 316 + for (int i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++) 317 + for (int j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++) 318 + for (int k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++) 319 + pmu_sbi_check_event(&pmu_cache_event_map[i][j][k]); 320 + } 321 + 322 + static DECLARE_WORK(check_std_events_work, pmu_sbi_check_std_events); 296 323 297 324 static int pmu_sbi_ctr_get_width(int idx) 298 325 { ··· 506 477 int bSoftware; 507 478 u64 raw_config_val; 508 479 int ret; 480 + 481 + /* 482 + * Ensure we are finished checking standard hardware events for 483 + * validity before allowing userspace to configure any events. 484 + */ 485 + flush_work(&check_std_events_work); 509 486 510 487 switch (type) { 511 488 case PERF_TYPE_HARDWARE: ··· 797 762 * which may include counters that are not enabled yet. 798 763 */ 799 764 sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 800 - 0, pmu->cmask, 0, 0, 0, 0); 765 + 0, pmu->cmask, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0); 801 766 } 802 767 803 768 static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu) ··· 1393 1358 ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); 1394 1359 if (ret) 1395 1360 goto out_unregister; 1361 + 1362 + /* Asynchronously check which standard events are available */ 1363 + schedule_work(&check_std_events_work); 1396 1364 1397 1365 return 0; 1398 1366

+1

drivers/platform/x86/toshiba_acpi.c

··· 3299 3299 }, 3300 3300 .driver_data = (void *)(QUIRK_TURN_ON_PANEL_ON_RESUME | QUIRK_HCI_HOTKEY_QUICKSTART), 3301 3301 }, 3302 + { } 3302 3303 }; 3303 3304 3304 3305 static int toshiba_acpi_add(struct acpi_device *acpi_dev)

+2 -3

drivers/scsi/sd.c

··· 4119 4119 { 4120 4120 struct scsi_disk *sdkp = dev_get_drvdata(dev); 4121 4121 4122 - sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); 4123 - 4124 4122 if (opal_unlock_from_suspend(sdkp->opal_dev)) { 4125 4123 sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n"); 4126 4124 return -EIO; ··· 4135 4137 if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ 4136 4138 return 0; 4137 4139 4140 + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); 4141 + 4138 4142 if (!sd_do_start_stop(sdkp->device, runtime)) { 4139 4143 sdkp->suspended = false; 4140 4144 return 0; 4141 4145 } 4142 4146 4143 - sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); 4144 4147 ret = sd_start_stop_device(sdkp, 1); 4145 4148 if (!ret) { 4146 4149 sd_resume(dev);

+3

drivers/thermal/gov_power_allocator.c

··· 759 759 return; 760 760 } 761 761 762 + if (!params->trip_max) 763 + return; 764 + 762 765 allocate_power(tz, params->trip_max->temperature); 763 766 params->update_cdevs = true; 764 767 }

+8 -7

drivers/thermal/thermal_core.c

··· 300 300 thermal_zone_device_set_polling(tz, tz->passive_delay_jiffies); 301 301 else if (tz->polling_delay_jiffies) 302 302 thermal_zone_device_set_polling(tz, tz->polling_delay_jiffies); 303 + else if (tz->temperature == THERMAL_TEMP_INVALID) 304 + thermal_zone_device_set_polling(tz, msecs_to_jiffies(THERMAL_RECHECK_DELAY_MS)); 303 305 } 304 306 305 307 static struct thermal_governor *thermal_get_tz_governor(struct thermal_zone_device *tz) ··· 484 482 thermal_governor_trip_crossed(governor, tz, trip, crossed_up); 485 483 } 486 484 487 - static int thermal_trip_notify_cmp(void *ascending, const struct list_head *a, 485 + static int thermal_trip_notify_cmp(void *not_used, const struct list_head *a, 488 486 const struct list_head *b) 489 487 { 490 488 struct thermal_trip_desc *tda = container_of(a, struct thermal_trip_desc, 491 489 notify_list_node); 492 490 struct thermal_trip_desc *tdb = container_of(b, struct thermal_trip_desc, 493 491 notify_list_node); 494 - int ret = tdb->notify_temp - tda->notify_temp; 495 - 496 - return ascending ? ret : -ret; 492 + return tda->notify_temp - tdb->notify_temp; 497 493 } 498 494 499 495 void __thermal_zone_device_update(struct thermal_zone_device *tz, ··· 511 511 update_temperature(tz); 512 512 513 513 if (tz->temperature == THERMAL_TEMP_INVALID) 514 - return; 514 + goto monitor; 515 515 516 516 __thermal_zone_set_trips(tz); 517 517 ··· 520 520 for_each_trip_desc(tz, td) 521 521 handle_thermal_trip(tz, td, &way_up_list, &way_down_list); 522 522 523 - list_sort(&way_up_list, &way_up_list, thermal_trip_notify_cmp); 523 + list_sort(NULL, &way_up_list, thermal_trip_notify_cmp); 524 524 list_for_each_entry(td, &way_up_list, notify_list_node) 525 525 thermal_trip_crossed(tz, &td->trip, governor, true); 526 526 527 527 list_sort(NULL, &way_down_list, thermal_trip_notify_cmp); 528 - list_for_each_entry(td, &way_down_list, notify_list_node) 528 + list_for_each_entry_reverse(td, &way_down_list, notify_list_node) 529 529 thermal_trip_crossed(tz, &td->trip, governor, false); 530 530 531 531 if (governor->manage) ··· 533 533 534 534 thermal_debug_update_trip_stats(tz); 535 535 536 + monitor: 536 537 monitor_thermal_zone(tz); 537 538 } 538 539

+6

drivers/thermal/thermal_core.h

··· 133 133 struct thermal_trip_desc trips[] __counted_by(num_trips); 134 134 }; 135 135 136 + /* 137 + * Default delay after a failing thermal zone temperature check before 138 + * attempting to check it again. 139 + */ 140 + #define THERMAL_RECHECK_DELAY_MS 250 141 + 136 142 /* Default Thermal Governor */ 137 143 #if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE) 138 144 #define DEFAULT_THERMAL_GOVERNOR "step_wise"

+6 -5

drivers/ufs/core/ufs-mcq.c

··· 105 105 * @hba: per adapter instance 106 106 * @req: pointer to the request to be issued 107 107 * 108 - * Return: the hardware queue instance on which the request would 109 - * be queued. 108 + * Return: the hardware queue instance on which the request will be or has 109 + * been queued. %NULL if the request has already been freed. 110 110 */ 111 111 struct ufs_hw_queue *ufshcd_mcq_req_to_hwq(struct ufs_hba *hba, 112 112 struct request *req) 113 113 { 114 - u32 utag = blk_mq_unique_tag(req); 115 - u32 hwq = blk_mq_unique_tag_to_hwq(utag); 114 + struct blk_mq_hw_ctx *hctx = READ_ONCE(req->mq_hctx); 116 115 117 - return &hba->uhq[hwq]; 116 + return hctx ? &hba->uhq[hctx->queue_num] : NULL; 118 117 } 119 118 120 119 /** ··· 514 515 if (!cmd) 515 516 return -EINVAL; 516 517 hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd)); 518 + if (!hwq) 519 + return 0; 517 520 } else { 518 521 hwq = hba->dev_cmd_queue; 519 522 }

+2

drivers/ufs/core/ufshcd.c

··· 6456 6456 /* Release cmd in MCQ mode if abort succeeds */ 6457 6457 if (is_mcq_enabled(hba) && (*ret == 0)) { 6458 6458 hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd)); 6459 + if (!hwq) 6460 + return 0; 6459 6461 spin_lock_irqsave(&hwq->cq_lock, flags); 6460 6462 if (ufshcd_cmd_inflight(lrbp->cmd)) 6461 6463 ufshcd_release_scsi_cmd(hba, lrbp);

+1 -1

drivers/vfio/pci/vfio_pci_core.c

··· 1260 1260 struct vfio_pci_hot_reset_info hdr; 1261 1261 struct vfio_pci_fill_info fill = {}; 1262 1262 bool slot = false; 1263 - int ret, count; 1263 + int ret, count = 0; 1264 1264 1265 1265 if (copy_from_user(&hdr, arg, minsz)) 1266 1266 return -EFAULT;

+23 -25

fs/bcachefs/alloc_background.c

··· 3 3 #include "alloc_background.h" 4 4 #include "alloc_foreground.h" 5 5 #include "backpointers.h" 6 + #include "bkey_buf.h" 6 7 #include "btree_cache.h" 7 8 #include "btree_io.h" 8 9 #include "btree_key_cache.h" ··· 1554 1553 } 1555 1554 1556 1555 static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, 1557 - struct btree_iter *alloc_iter) 1556 + struct btree_iter *alloc_iter, 1557 + struct bkey_buf *last_flushed) 1558 1558 { 1559 1559 struct bch_fs *c = trans->c; 1560 - struct btree_iter lru_iter; 1561 1560 struct bch_alloc_v4 a_convert; 1562 1561 const struct bch_alloc_v4 *a; 1563 - struct bkey_s_c alloc_k, lru_k; 1562 + struct bkey_s_c alloc_k; 1564 1563 struct printbuf buf = PRINTBUF; 1565 1564 int ret; 1566 1565 ··· 1573 1572 return ret; 1574 1573 1575 1574 a = bch2_alloc_to_v4(alloc_k, &a_convert); 1575 + 1576 + if (a->fragmentation_lru) { 1577 + ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START, 1578 + a->fragmentation_lru, 1579 + alloc_k, last_flushed); 1580 + if (ret) 1581 + return ret; 1582 + } 1576 1583 1577 1584 if (a->data_type != BCH_DATA_cached) 1578 1585 return 0; ··· 1606 1597 a = &a_mut->v; 1607 1598 } 1608 1599 1609 - lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru, 1610 - lru_pos(alloc_k.k->p.inode, 1611 - bucket_to_u64(alloc_k.k->p), 1612 - a->io_time[READ]), 0); 1613 - ret = bkey_err(lru_k); 1600 + ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, a->io_time[READ], 1601 + alloc_k, last_flushed); 1614 1602 if (ret) 1615 - return ret; 1616 - 1617 - if (fsck_err_on(lru_k.k->type != KEY_TYPE_set, c, 1618 - alloc_key_to_missing_lru_entry, 1619 - "missing lru entry\n" 1620 - " %s", 1621 - (printbuf_reset(&buf), 1622 - bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { 1623 - ret = bch2_lru_set(trans, 1624 - alloc_k.k->p.inode, 1625 - bucket_to_u64(alloc_k.k->p), 1626 - a->io_time[READ]); 1627 - if (ret) 1628 - goto err; 1629 - } 1603 + goto err; 1630 1604 err: 1631 1605 fsck_err: 1632 - bch2_trans_iter_exit(trans, &lru_iter); 1633 1606 printbuf_exit(&buf); 1634 1607 return ret; 1635 1608 } 1636 1609 1637 1610 int bch2_check_alloc_to_lru_refs(struct bch_fs *c) 1638 1611 { 1612 + struct bkey_buf last_flushed; 1613 + 1614 + bch2_bkey_buf_init(&last_flushed); 1615 + bkey_init(&last_flushed.k->k); 1616 + 1639 1617 int ret = bch2_trans_run(c, 1640 1618 for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, 1641 1619 POS_MIN, BTREE_ITER_prefetch, k, 1642 1620 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1643 - bch2_check_alloc_to_lru_ref(trans, &iter))); 1621 + bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))); 1622 + 1623 + bch2_bkey_buf_exit(&last_flushed, c); 1644 1624 bch_err_fn(c, ret); 1645 1625 return ret; 1646 1626 }

+2

fs/bcachefs/alloc_foreground.c

··· 1703 1703 for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++) 1704 1704 nr[c->open_buckets[i].data_type]++; 1705 1705 1706 + printbuf_tabstops_reset(out); 1706 1707 printbuf_tabstop_push(out, 24); 1707 1708 1708 1709 percpu_down_read(&c->mark_lock); ··· 1737 1736 for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++) 1738 1737 nr[c->open_buckets[i].data_type]++; 1739 1738 1739 + printbuf_tabstops_reset(out); 1740 1740 printbuf_tabstop_push(out, 12); 1741 1741 printbuf_tabstop_push(out, 16); 1742 1742 printbuf_tabstop_push(out, 16);

+26 -44

fs/bcachefs/backpointers.c

··· 434 434 return ret; 435 435 } 436 436 437 - static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r) 438 - { 439 - return bpos_eq(l.k->p, r.k->p) && 440 - bkey_bytes(l.k) == bkey_bytes(r.k) && 441 - !memcmp(l.v, r.v, bkey_val_bytes(l.k)); 442 - } 443 - 444 437 struct extents_to_bp_state { 445 438 struct bpos bucket_start; 446 439 struct bpos bucket_end; ··· 529 536 struct btree_iter other_extent_iter = {}; 530 537 struct printbuf buf = PRINTBUF; 531 538 struct bkey_s_c bp_k; 532 - struct bkey_buf tmp; 533 539 int ret = 0; 534 - 535 - bch2_bkey_buf_init(&tmp); 536 540 537 541 struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket); 538 542 if (!ca) { ··· 555 565 556 566 if (bp_k.k->type != KEY_TYPE_backpointer || 557 567 memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { 558 - bch2_bkey_buf_reassemble(&tmp, c, orig_k); 559 - 560 - if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) { 561 - if (bp.level) { 562 - bch2_trans_unlock(trans); 563 - bch2_btree_interior_updates_flush(c); 564 - } 565 - 566 - ret = bch2_btree_write_buffer_flush_sync(trans); 567 - if (ret) 568 - goto err; 569 - 570 - bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k); 571 - ret = -BCH_ERR_transaction_restart_write_buffer_flush; 572 - goto out; 573 - } 568 + ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed); 569 + if (ret) 570 + goto err; 574 571 575 572 goto check_existing_bp; 576 573 } ··· 566 589 fsck_err: 567 590 bch2_trans_iter_exit(trans, &other_extent_iter); 568 591 bch2_trans_iter_exit(trans, &bp_iter); 569 - bch2_bkey_buf_exit(&tmp, c); 570 592 bch2_dev_put(ca); 571 593 printbuf_exit(&buf); 572 594 return ret; ··· 770 794 !((1U << btree) & btree_interior_mask)) 771 795 continue; 772 796 797 + bch2_trans_begin(trans); 798 + 773 799 __for_each_btree_node(trans, iter, btree, 774 800 btree == start.btree ? start.pos : POS_MIN, 775 801 0, depth, BTREE_ITER_prefetch, b, ret) { ··· 883 905 struct bbpos start, 884 906 struct bbpos end, 885 907 struct bkey_s_c_backpointer bp, 886 - struct bpos *last_flushed_pos) 908 + struct bkey_buf *last_flushed) 887 909 { 888 910 struct bch_fs *c = trans->c; 889 911 struct btree_iter iter; ··· 903 925 if (ret) 904 926 return ret; 905 927 906 - if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) { 907 - *last_flushed_pos = bp.k->p; 908 - ret = bch2_btree_write_buffer_flush_sync(trans) ?: 909 - -BCH_ERR_transaction_restart_write_buffer_flush; 910 - goto out; 911 - } 928 + if (!k.k) { 929 + ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed); 930 + if (ret) 931 + goto out; 912 932 913 - if (fsck_err_on(!k.k, c, 914 - backpointer_to_missing_ptr, 915 - "backpointer for missing %s\n %s", 916 - bp.v->level ? "btree node" : "extent", 917 - (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { 918 - ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); 919 - goto out; 933 + if (fsck_err(c, backpointer_to_missing_ptr, 934 + "backpointer for missing %s\n %s", 935 + bp.v->level ? "btree node" : "extent", 936 + (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { 937 + ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); 938 + goto out; 939 + } 920 940 } 921 941 out: 922 942 fsck_err: ··· 927 951 struct bbpos start, 928 952 struct bbpos end) 929 953 { 930 - struct bpos last_flushed_pos = SPOS_MAX; 954 + struct bkey_buf last_flushed; 931 955 932 - return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, 956 + bch2_bkey_buf_init(&last_flushed); 957 + bkey_init(&last_flushed.k->k); 958 + 959 + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, 933 960 POS_MIN, BTREE_ITER_prefetch, k, 934 961 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 935 962 check_one_backpointer(trans, start, end, 936 963 bkey_s_c_to_backpointer(k), 937 - &last_flushed_pos)); 964 + &last_flushed)); 965 + 966 + bch2_bkey_buf_exit(&last_flushed, trans->c); 967 + return ret; 938 968 } 939 969 940 970 int bch2_check_backpointers_to_extents(struct bch_fs *c)

+3 -2

fs/bcachefs/bkey.c

··· 660 660 bch2_bkey_format_field_overflows(f, i)) { 661 661 unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; 662 662 u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1)); 663 - u64 packed_max = f->bits_per_field[i] 664 - ? ~((~0ULL << 1) << (f->bits_per_field[i] - 1)) 663 + unsigned packed_bits = min(64, f->bits_per_field[i]); 664 + u64 packed_max = packed_bits 665 + ? ~((~0ULL << 1) << (packed_bits - 1)) 665 666 : 0; 666 667 667 668 prt_printf(err, "field %u too large: %llu + %llu > %llu",

+7

fs/bcachefs/bkey.h

··· 194 194 return bkey_gt(l, r) ? l : r; 195 195 } 196 196 197 + static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r) 198 + { 199 + return bpos_eq(l.k->p, r.k->p) && 200 + bkey_bytes(l.k) == bkey_bytes(r.k) && 201 + !memcmp(l.v, r.v, bkey_val_bytes(l.k)); 202 + } 203 + 197 204 void bch2_bpos_swab(struct bpos *); 198 205 void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *); 199 206

+11 -13

fs/bcachefs/btree_gc.c

··· 903 903 bch2_dev_usage_update(c, ca, &old_gc, &gc, 0, true); 904 904 percpu_up_read(&c->mark_lock); 905 905 906 + gc.fragmentation_lru = alloc_lru_idx_fragmentation(gc, ca); 907 + 906 908 if (fsck_err_on(new.data_type != gc.data_type, c, 907 909 alloc_key_data_type_wrong, 908 910 "bucket %llu:%llu gen %u has wrong data_type" ··· 918 916 #define copy_bucket_field(_errtype, _f) \ 919 917 if (fsck_err_on(new._f != gc._f, c, _errtype, \ 920 918 "bucket %llu:%llu gen %u data type %s has wrong " #_f \ 921 - ": got %u, should be %u", \ 919 + ": got %llu, should be %llu", \ 922 920 iter->pos.inode, iter->pos.offset, \ 923 921 gc.gen, \ 924 922 bch2_data_type_str(gc.data_type), \ 925 - new._f, gc._f)) \ 923 + (u64) new._f, (u64) gc._f)) \ 926 924 new._f = gc._f; \ 927 925 928 - copy_bucket_field(alloc_key_gen_wrong, 929 - gen); 930 - copy_bucket_field(alloc_key_dirty_sectors_wrong, 931 - dirty_sectors); 932 - copy_bucket_field(alloc_key_cached_sectors_wrong, 933 - cached_sectors); 934 - copy_bucket_field(alloc_key_stripe_wrong, 935 - stripe); 936 - copy_bucket_field(alloc_key_stripe_redundancy_wrong, 937 - stripe_redundancy); 926 + copy_bucket_field(alloc_key_gen_wrong, gen); 927 + copy_bucket_field(alloc_key_dirty_sectors_wrong, dirty_sectors); 928 + copy_bucket_field(alloc_key_cached_sectors_wrong, cached_sectors); 929 + copy_bucket_field(alloc_key_stripe_wrong, stripe); 930 + copy_bucket_field(alloc_key_stripe_redundancy_wrong, stripe_redundancy); 931 + copy_bucket_field(alloc_key_fragmentation_lru_wrong, fragmentation_lru); 938 932 #undef copy_bucket_field 939 933 940 934 if (!bch2_alloc_v4_cmp(*old, new)) ··· 944 946 a->v = new; 945 947 946 948 /* 947 - * The trigger normally makes sure this is set, but we're not running 949 + * The trigger normally makes sure these are set, but we're not running 948 950 * triggers: 949 951 */ 950 952 if (a->v.data_type == BCH_DATA_cached && !a->v.io_time[READ])

+37

fs/bcachefs/btree_write_buffer.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 3 #include "bcachefs.h" 4 + #include "bkey_buf.h" 4 5 #include "btree_locking.h" 5 6 #include "btree_update.h" 6 7 #include "btree_update_interior.h" 7 8 #include "btree_write_buffer.h" 8 9 #include "error.h" 10 + #include "extents.h" 9 11 #include "journal.h" 10 12 #include "journal_io.h" 11 13 #include "journal_reclaim.h" ··· 491 489 492 490 int ret = bch2_btree_write_buffer_flush_nocheck_rw(trans); 493 491 bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer); 492 + return ret; 493 + } 494 + 495 + /** 496 + * In check and repair code, when checking references to write buffer btrees we 497 + * need to issue a flush before we have a definitive error: this issues a flush 498 + * if this is a key we haven't yet checked. 499 + */ 500 + int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans, 501 + struct bkey_s_c referring_k, 502 + struct bkey_buf *last_flushed) 503 + { 504 + struct bch_fs *c = trans->c; 505 + struct bkey_buf tmp; 506 + int ret = 0; 507 + 508 + bch2_bkey_buf_init(&tmp); 509 + 510 + if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) { 511 + bch2_bkey_buf_reassemble(&tmp, c, referring_k); 512 + 513 + if (bkey_is_btree_ptr(referring_k.k)) { 514 + bch2_trans_unlock(trans); 515 + bch2_btree_interior_updates_flush(c); 516 + } 517 + 518 + ret = bch2_btree_write_buffer_flush_sync(trans); 519 + if (ret) 520 + goto err; 521 + 522 + bch2_bkey_buf_copy(last_flushed, c, tmp.k); 523 + ret = -BCH_ERR_transaction_restart_write_buffer_flush; 524 + } 525 + err: 526 + bch2_bkey_buf_exit(&tmp, c); 494 527 return ret; 495 528 } 496 529

+3

fs/bcachefs/btree_write_buffer.h

··· 23 23 int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *); 24 24 int bch2_btree_write_buffer_tryflush(struct btree_trans *); 25 25 26 + struct bkey_buf; 27 + int bch2_btree_write_buffer_maybe_flush(struct btree_trans *, struct bkey_s_c, struct bkey_buf *); 28 + 26 29 struct journal_keys_to_wb { 27 30 struct btree_write_buffer_keys *wb; 28 31 size_t room;

+2 -5

fs/bcachefs/clock.c

··· 132 132 { 133 133 struct io_timer *ret = NULL; 134 134 135 - spin_lock(&clock->timer_lock); 136 - 137 135 if (clock->timers.used && 138 136 time_after_eq(now, clock->timers.data[0]->expire)) 139 137 heap_pop(&clock->timers, ret, io_timer_cmp, NULL); 140 - 141 - spin_unlock(&clock->timer_lock); 142 - 143 138 return ret; 144 139 } 145 140 ··· 143 148 struct io_timer *timer; 144 149 unsigned long now = atomic64_add_return(sectors, &clock->now); 145 150 151 + spin_lock(&clock->timer_lock); 146 152 while ((timer = get_expired_timer(clock, now))) 147 153 timer->fn(timer); 154 + spin_unlock(&clock->timer_lock); 148 155 } 149 156 150 157 void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)

+44

fs/bcachefs/data_update.c

··· 5 5 #include "bkey_buf.h" 6 6 #include "btree_update.h" 7 7 #include "buckets.h" 8 + #include "compress.h" 8 9 #include "data_update.h" 10 + #include "disk_groups.h" 9 11 #include "ec.h" 10 12 #include "error.h" 11 13 #include "extents.h" ··· 456 454 } 457 455 } 458 456 457 + void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, 458 + struct bch_io_opts *io_opts, 459 + struct data_update_opts *data_opts) 460 + { 461 + printbuf_tabstop_push(out, 20); 462 + prt_str(out, "rewrite ptrs:\t"); 463 + bch2_prt_u64_base2(out, data_opts->rewrite_ptrs); 464 + prt_newline(out); 465 + 466 + prt_str(out, "kill ptrs:\t"); 467 + bch2_prt_u64_base2(out, data_opts->kill_ptrs); 468 + prt_newline(out); 469 + 470 + prt_str(out, "target:\t"); 471 + bch2_target_to_text(out, c, data_opts->target); 472 + prt_newline(out); 473 + 474 + prt_str(out, "compression:\t"); 475 + bch2_compression_opt_to_text(out, background_compression(*io_opts)); 476 + prt_newline(out); 477 + 478 + prt_str(out, "extra replicas:\t"); 479 + prt_u64(out, data_opts->extra_replicas); 480 + } 481 + 482 + void bch2_data_update_to_text(struct printbuf *out, struct data_update *m) 483 + { 484 + bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k)); 485 + prt_newline(out); 486 + bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts); 487 + } 488 + 459 489 int bch2_extent_drop_ptrs(struct btree_trans *trans, 460 490 struct btree_iter *iter, 461 491 struct bkey_s_c k, ··· 676 642 */ 677 643 if (!(durability_have + durability_removing)) 678 644 m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1); 645 + 646 + if (!m->op.nr_replicas) { 647 + struct printbuf buf = PRINTBUF; 648 + 649 + bch2_data_update_to_text(&buf, m); 650 + WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf); 651 + printbuf_exit(&buf); 652 + ret = -BCH_ERR_data_update_done; 653 + goto done; 654 + } 679 655 680 656 m->op.nr_replicas_required = m->op.nr_replicas; 681 657

+5

fs/bcachefs/data_update.h

··· 17 17 unsigned write_flags; 18 18 }; 19 19 20 + void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *, 21 + struct bch_io_opts *, struct data_update_opts *); 22 + 20 23 struct data_update { 21 24 /* extent being updated: */ 22 25 enum btree_id btree_id; ··· 29 26 struct bch_move_stats *stats; 30 27 struct bch_write_op op; 31 28 }; 29 + 30 + void bch2_data_update_to_text(struct printbuf *, struct data_update *); 32 31 33 32 int bch2_data_update_index_update(struct bch_write_op *); 34 33

+6 -6

fs/bcachefs/debug.c

··· 610 610 list_sort(&c->btree_trans_list, list_ptr_order_cmp); 611 611 612 612 list_for_each_entry(trans, &c->btree_trans_list, list) { 613 - if ((ulong) trans < i->iter) 613 + if ((ulong) trans <= i->iter) 614 614 continue; 615 615 616 616 i->iter = (ulong) trans; ··· 832 832 static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c) 833 833 { 834 834 struct btree_trans *trans; 835 - pid_t iter = 0; 835 + ulong iter = 0; 836 836 restart: 837 837 seqmutex_lock(&c->btree_trans_lock); 838 - list_for_each_entry(trans, &c->btree_trans_list, list) { 839 - struct task_struct *task = READ_ONCE(trans->locking_wait.task); 838 + list_sort(&c->btree_trans_list, list_ptr_order_cmp); 840 839 841 - if (!task || task->pid <= iter) 840 + list_for_each_entry(trans, &c->btree_trans_list, list) { 841 + if ((ulong) trans <= iter) 842 842 continue; 843 843 844 - iter = task->pid; 844 + iter = (ulong) trans; 845 845 846 846 if (!closure_get_not_zero(&trans->ref)) 847 847 continue;

+4 -2

fs/bcachefs/eytzinger.h

··· 48 48 49 49 static inline unsigned eytzinger1_first(unsigned size) 50 50 { 51 - return rounddown_pow_of_two(size); 51 + return size ? rounddown_pow_of_two(size) : 0; 52 52 } 53 53 54 54 static inline unsigned eytzinger1_last(unsigned size) ··· 101 101 102 102 static inline unsigned eytzinger1_extra(unsigned size) 103 103 { 104 - return (size + 1 - rounddown_pow_of_two(size)) << 1; 104 + return size 105 + ? (size + 1 - rounddown_pow_of_two(size)) << 1 106 + : 0; 105 107 } 106 108 107 109 static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,

+10 -1

fs/bcachefs/fs.c

··· 194 194 * discard_new_inode() expects it to be set... 195 195 */ 196 196 inode->v.i_flags |= I_NEW; 197 + /* 198 + * We don't want bch2_evict_inode() to delete the inode on disk, 199 + * we just raced and had another inode in cache. Normally new 200 + * inodes don't have nlink == 0 - except tmpfiles do... 201 + */ 202 + set_nlink(&inode->v, 1); 197 203 discard_new_inode(&inode->v); 198 204 inode = old; 199 205 } else { ··· 2032 2026 __bch2_fs_stop(c); 2033 2027 deactivate_locked_super(sb); 2034 2028 err: 2029 + if (ret) 2030 + pr_err("error: %s", bch2_err_str(ret)); 2035 2031 /* 2036 2032 * On an inconsistency error in recovery we might see an -EROFS derived 2037 2033 * errorcode (from the journal), but we don't want to return that to ··· 2073 2065 { 2074 2066 int ret = -ENOMEM; 2075 2067 2076 - bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT); 2068 + bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT | 2069 + SLAB_ACCOUNT); 2077 2070 if (!bch2_inode_cache) 2078 2071 goto err; 2079 2072

+3 -1

fs/bcachefs/io_read.c

··· 389 389 390 390 bch2_bkey_buf_reassemble(&sk, c, k); 391 391 k = bkey_i_to_s_c(sk.k); 392 - bch2_trans_unlock(trans); 393 392 394 393 if (!bch2_bkey_matches_ptr(c, k, 395 394 rbio->pick.ptr, ··· 1002 1003 rbio->version = k.k->version; 1003 1004 rbio->promote = promote; 1004 1005 INIT_WORK(&rbio->work, NULL); 1006 + 1007 + if (flags & BCH_READ_NODECODE) 1008 + orig->pick = pick; 1005 1009 1006 1010 rbio->bio.bi_opf = orig->bio.bi_opf; 1007 1011 rbio->bio.bi_iter.bi_sector = pick.ptr.offset;

+10 -8

fs/bcachefs/journal.c

··· 1095 1095 return ret; 1096 1096 } 1097 1097 1098 - int bch2_dev_journal_alloc(struct bch_dev *ca) 1098 + int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) 1099 1099 { 1100 1100 unsigned nr; 1101 1101 int ret; ··· 1117 1117 min(1 << 13, 1118 1118 (1 << 24) / ca->mi.bucket_size)); 1119 1119 1120 - ret = __bch2_set_nr_journal_buckets(ca, nr, true, NULL); 1120 + ret = __bch2_set_nr_journal_buckets(ca, nr, new_fs, NULL); 1121 1121 err: 1122 1122 bch_err_fn(ca, ret); 1123 1123 return ret; ··· 1129 1129 if (ca->journal.nr) 1130 1130 continue; 1131 1131 1132 - int ret = bch2_dev_journal_alloc(ca); 1132 + int ret = bch2_dev_journal_alloc(ca, true); 1133 1133 if (ret) { 1134 1134 percpu_ref_put(&ca->io_ref); 1135 1135 return ret; ··· 1184 1184 journal_quiesce(j); 1185 1185 cancel_delayed_work_sync(&j->write_work); 1186 1186 1187 - BUG_ON(!bch2_journal_error(j) && 1188 - test_bit(JOURNAL_replay_done, &j->flags) && 1189 - j->last_empty_seq != journal_cur_seq(j)); 1187 + WARN(!bch2_journal_error(j) && 1188 + test_bit(JOURNAL_replay_done, &j->flags) && 1189 + j->last_empty_seq != journal_cur_seq(j), 1190 + "journal shutdown error: cur seq %llu but last empty seq %llu", 1191 + journal_cur_seq(j), j->last_empty_seq); 1190 1192 1191 1193 if (!bch2_journal_error(j)) 1192 1194 clear_bit(JOURNAL_running, &j->flags); ··· 1420 1418 unsigned long now = jiffies; 1421 1419 u64 nr_writes = j->nr_flush_writes + j->nr_noflush_writes; 1422 1420 1423 - if (!out->nr_tabstops) 1424 - printbuf_tabstop_push(out, 28); 1421 + printbuf_tabstops_reset(out); 1422 + printbuf_tabstop_push(out, 28); 1425 1423 out->atomic++; 1426 1424 1427 1425 rcu_read_lock();

+1 -1

fs/bcachefs/journal.h

··· 433 433 434 434 int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *, 435 435 unsigned nr); 436 - int bch2_dev_journal_alloc(struct bch_dev *); 436 + int bch2_dev_journal_alloc(struct bch_dev *, bool); 437 437 int bch2_fs_journal_alloc(struct bch_fs *); 438 438 439 439 void bch2_dev_journal_stop(struct journal *, struct bch_dev *);

+8 -4

fs/bcachefs/journal_io.c

··· 415 415 flags|BCH_VALIDATE_journal); 416 416 if (ret == FSCK_DELETED_KEY) 417 417 continue; 418 + else if (ret) 419 + return ret; 418 420 419 421 k = bkey_next(k); 420 422 } ··· 1764 1762 1765 1763 if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) { 1766 1764 spin_lock(&j->lock); 1767 - closure_wait(&j->async_wait, cl); 1765 + if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) { 1766 + closure_wait(&j->async_wait, cl); 1767 + spin_unlock(&j->lock); 1768 + continue_at(cl, journal_write_preflush, j->wq); 1769 + return; 1770 + } 1768 1771 spin_unlock(&j->lock); 1769 - 1770 - continue_at(cl, journal_write_preflush, j->wq); 1771 - return; 1772 1772 } 1773 1773 1774 1774 if (w->separate_flush) {

+39

fs/bcachefs/lru.c

··· 77 77 NULL 78 78 }; 79 79 80 + int bch2_lru_check_set(struct btree_trans *trans, 81 + u16 lru_id, u64 time, 82 + struct bkey_s_c referring_k, 83 + struct bkey_buf *last_flushed) 84 + { 85 + struct bch_fs *c = trans->c; 86 + struct printbuf buf = PRINTBUF; 87 + struct btree_iter lru_iter; 88 + struct bkey_s_c lru_k = 89 + bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru, 90 + lru_pos(lru_id, 91 + bucket_to_u64(referring_k.k->p), 92 + time), 0); 93 + int ret = bkey_err(lru_k); 94 + if (ret) 95 + return ret; 96 + 97 + if (lru_k.k->type != KEY_TYPE_set) { 98 + ret = bch2_btree_write_buffer_maybe_flush(trans, referring_k, last_flushed); 99 + if (ret) 100 + goto err; 101 + 102 + if (fsck_err(c, alloc_key_to_missing_lru_entry, 103 + "missing %s lru entry\n" 104 + " %s", 105 + bch2_lru_types[lru_type(lru_k)], 106 + (bch2_bkey_val_to_text(&buf, c, referring_k), buf.buf))) { 107 + ret = bch2_lru_set(trans, lru_id, bucket_to_u64(referring_k.k->p), time); 108 + if (ret) 109 + goto err; 110 + } 111 + } 112 + err: 113 + fsck_err: 114 + bch2_trans_iter_exit(trans, &lru_iter); 115 + printbuf_exit(&buf); 116 + return ret; 117 + } 118 + 80 119 static int bch2_check_lru_key(struct btree_trans *trans, 81 120 struct btree_iter *lru_iter, 82 121 struct bkey_s_c lru_k,

+3

fs/bcachefs/lru.h

··· 61 61 int bch2_lru_set(struct btree_trans *, u16, u64, u64); 62 62 int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64); 63 63 64 + struct bkey_buf; 65 + int bch2_lru_check_set(struct btree_trans *, u16, u64, struct bkey_s_c, struct bkey_buf *); 66 + 64 67 int bch2_check_lrus(struct bch_fs *); 65 68 66 69 #endif /* _BCACHEFS_LRU_H */

-25

fs/bcachefs/move.c

··· 36 36 NULL 37 37 }; 38 38 39 - static void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, 40 - struct bch_io_opts *io_opts, 41 - struct data_update_opts *data_opts) 42 - { 43 - printbuf_tabstop_push(out, 20); 44 - prt_str(out, "rewrite ptrs:\t"); 45 - bch2_prt_u64_base2(out, data_opts->rewrite_ptrs); 46 - prt_newline(out); 47 - 48 - prt_str(out, "kill ptrs:\t"); 49 - bch2_prt_u64_base2(out, data_opts->kill_ptrs); 50 - prt_newline(out); 51 - 52 - prt_str(out, "target:\t"); 53 - bch2_target_to_text(out, c, data_opts->target); 54 - prt_newline(out); 55 - 56 - prt_str(out, "compression:\t"); 57 - bch2_compression_opt_to_text(out, background_compression(*io_opts)); 58 - prt_newline(out); 59 - 60 - prt_str(out, "extra replicas:\t"); 61 - prt_u64(out, data_opts->extra_replicas); 62 - } 63 - 64 39 static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k, 65 40 struct bch_io_opts *io_opts, 66 41 struct data_update_opts *data_opts)

+2 -1

fs/bcachefs/sb-errors_format.h

··· 286 286 x(accounting_mismatch, 272, 0) \ 287 287 x(accounting_replicas_not_marked, 273, 0) \ 288 288 x(invalid_btree_id, 274, 0) \ 289 - x(alloc_key_io_time_bad, 275, 0) 289 + x(alloc_key_io_time_bad, 275, 0) \ 290 + x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) 290 291 291 292 enum bch_sb_error_id { 292 293 #define x(t, n, ...) BCH_FSCK_ERR_##t = n,

+7 -4

fs/bcachefs/super.c

··· 563 563 BUG_ON(atomic_read(&c->journal_keys.ref)); 564 564 bch2_fs_btree_write_buffer_exit(c); 565 565 percpu_free_rwsem(&c->mark_lock); 566 - EBUG_ON(c->online_reserved && percpu_u64_get(c->online_reserved)); 567 - free_percpu(c->online_reserved); 566 + if (c->online_reserved) { 567 + u64 v = percpu_u64_get(c->online_reserved); 568 + WARN(v, "online_reserved not 0 at shutdown: %lli", v); 569 + free_percpu(c->online_reserved); 570 + } 568 571 569 572 darray_exit(&c->btree_roots_extra); 570 573 free_percpu(c->pcpu); ··· 1772 1769 if (ret) 1773 1770 goto err; 1774 1771 1775 - ret = bch2_dev_journal_alloc(ca); 1772 + ret = bch2_dev_journal_alloc(ca, true); 1776 1773 bch_err_msg(c, ret, "allocating journal"); 1777 1774 if (ret) 1778 1775 goto err; ··· 1932 1929 } 1933 1930 1934 1931 if (!ca->journal.nr) { 1935 - ret = bch2_dev_journal_alloc(ca); 1932 + ret = bch2_dev_journal_alloc(ca, false); 1936 1933 bch_err_msg(ca, ret, "allocating journal"); 1937 1934 if (ret) 1938 1935 goto err;

+1 -1

fs/btrfs/extent_io.c

··· 3553 3553 for (int i = 0; i < num_folios; i++) { 3554 3554 if (eb->folios[i]) { 3555 3555 detach_extent_buffer_folio(eb, eb->folios[i]); 3556 - __folio_put(eb->folios[i]); 3556 + folio_put(eb->folios[i]); 3557 3557 } 3558 3558 } 3559 3559 __free_extent_buffer(eb);

+1 -1

fs/btrfs/inode.c

··· 10385 10385 out_folios: 10386 10386 for (i = 0; i < nr_folios; i++) { 10387 10387 if (folios[i]) 10388 - __folio_put(folios[i]); 10388 + folio_put(folios[i]); 10389 10389 } 10390 10390 kvfree(folios); 10391 10391 out:

+8 -2

fs/btrfs/qgroup.c

··· 3062 3062 struct btrfs_qgroup_inherit *inherit, 3063 3063 size_t size) 3064 3064 { 3065 - if (!btrfs_qgroup_enabled(fs_info)) 3066 - return 0; 3067 3065 if (inherit->flags & ~BTRFS_QGROUP_INHERIT_FLAGS_SUPP) 3068 3066 return -EOPNOTSUPP; 3069 3067 if (size < sizeof(*inherit) || size > PAGE_SIZE) ··· 3081 3083 3082 3084 if (size != struct_size(inherit, qgroups, inherit->num_qgroups)) 3083 3085 return -EINVAL; 3086 + 3087 + /* 3088 + * Skip the inherit source qgroups check if qgroup is not enabled. 3089 + * Qgroup can still be later enabled causing problems, but in that case 3090 + * btrfs_qgroup_inherit() would just ignore those invalid ones. 3091 + */ 3092 + if (!btrfs_qgroup_enabled(fs_info)) 3093 + return 0; 3084 3094 3085 3095 /* 3086 3096 * Now check all the remaining qgroups, they should all:

+7 -2

fs/btrfs/ref-verify.c

··· 441 441 u32 item_size = btrfs_item_size(leaf, slot); 442 442 unsigned long end, ptr; 443 443 u64 offset, flags, count; 444 - int type, ret; 444 + int type; 445 + int ret = 0; 445 446 446 447 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 447 448 flags = btrfs_extent_flags(leaf, ei); ··· 487 486 key->objectid, key->offset); 488 487 break; 489 488 case BTRFS_EXTENT_OWNER_REF_KEY: 490 - WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)); 489 + if (!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)) { 490 + btrfs_err(fs_info, 491 + "found extent owner ref without simple quotas enabled"); 492 + ret = -EINVAL; 493 + } 491 494 break; 492 495 default: 493 496 btrfs_err(fs_info, "invalid key type in iref");

+21 -3

fs/btrfs/space-info.c

··· 373 373 * "optimal" chunk size based on the fs size. However when we actually 374 374 * allocate the chunk we will strip this down further, making it no more 375 375 * than 10% of the disk or 1G, whichever is smaller. 376 + * 377 + * On the zoned mode, we need to use zone_size (= 378 + * data_sinfo->chunk_size) as it is. 376 379 */ 377 380 data_sinfo = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); 378 - data_chunk_size = min(data_sinfo->chunk_size, 379 - mult_perc(fs_info->fs_devices->total_rw_bytes, 10)); 380 - data_chunk_size = min_t(u64, data_chunk_size, SZ_1G); 381 + if (!btrfs_is_zoned(fs_info)) { 382 + data_chunk_size = min(data_sinfo->chunk_size, 383 + mult_perc(fs_info->fs_devices->total_rw_bytes, 10)); 384 + data_chunk_size = min_t(u64, data_chunk_size, SZ_1G); 385 + } else { 386 + data_chunk_size = data_sinfo->chunk_size; 387 + } 381 388 382 389 /* 383 390 * Since data allocations immediately use block groups as part of the ··· 412 405 avail >>= 3; 413 406 else 414 407 avail >>= 1; 408 + 409 + /* 410 + * On the zoned mode, we always allocate one zone as one chunk. 411 + * Returning non-zone size alingned bytes here will result in 412 + * less pressure for the async metadata reclaim process, and it 413 + * will over-commit too much leading to ENOSPC. Align down to the 414 + * zone size to avoid that. 415 + */ 416 + if (btrfs_is_zoned(fs_info)) 417 + avail = ALIGN_DOWN(avail, fs_info->zone_size); 418 + 415 419 return avail; 416 420 } 417 421

+44 -1

fs/cachefiles/cache.c

··· 8 8 #include <linux/slab.h> 9 9 #include <linux/statfs.h> 10 10 #include <linux/namei.h> 11 + #include <trace/events/fscache.h> 11 12 #include "internal.h" 12 13 13 14 /* ··· 313 312 } 314 313 315 314 /* 316 - * Withdraw volumes. 315 + * Withdraw fscache volumes. 316 + */ 317 + static void cachefiles_withdraw_fscache_volumes(struct cachefiles_cache *cache) 318 + { 319 + struct list_head *cur; 320 + struct cachefiles_volume *volume; 321 + struct fscache_volume *vcookie; 322 + 323 + _enter(""); 324 + retry: 325 + spin_lock(&cache->object_list_lock); 326 + list_for_each(cur, &cache->volumes) { 327 + volume = list_entry(cur, struct cachefiles_volume, cache_link); 328 + 329 + if (atomic_read(&volume->vcookie->n_accesses) == 0) 330 + continue; 331 + 332 + vcookie = fscache_try_get_volume(volume->vcookie, 333 + fscache_volume_get_withdraw); 334 + if (vcookie) { 335 + spin_unlock(&cache->object_list_lock); 336 + fscache_withdraw_volume(vcookie); 337 + fscache_put_volume(vcookie, fscache_volume_put_withdraw); 338 + goto retry; 339 + } 340 + } 341 + spin_unlock(&cache->object_list_lock); 342 + 343 + _leave(""); 344 + } 345 + 346 + /* 347 + * Withdraw cachefiles volumes. 317 348 */ 318 349 static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache) 319 350 { 320 351 _enter(""); 321 352 322 353 for (;;) { 354 + struct fscache_volume *vcookie = NULL; 323 355 struct cachefiles_volume *volume = NULL; 324 356 325 357 spin_lock(&cache->object_list_lock); 326 358 if (!list_empty(&cache->volumes)) { 327 359 volume = list_first_entry(&cache->volumes, 328 360 struct cachefiles_volume, cache_link); 361 + vcookie = fscache_try_get_volume(volume->vcookie, 362 + fscache_volume_get_withdraw); 363 + if (!vcookie) { 364 + spin_unlock(&cache->object_list_lock); 365 + cpu_relax(); 366 + continue; 367 + } 329 368 list_del_init(&volume->cache_link); 330 369 } 331 370 spin_unlock(&cache->object_list_lock); ··· 373 332 break; 374 333 375 334 cachefiles_withdraw_volume(volume); 335 + fscache_put_volume(vcookie, fscache_volume_put_withdraw); 376 336 } 377 337 378 338 _leave(""); ··· 413 371 pr_info("File cache on %s unregistering\n", fscache->name); 414 372 415 373 fscache_withdraw_cache(fscache); 374 + cachefiles_withdraw_fscache_volumes(cache); 416 375 417 376 /* we now have to destroy all the active objects pertaining to this 418 377 * cache - which we do by passing them off to thread pool to be

+2 -2

fs/cachefiles/daemon.c

··· 366 366 367 367 if (cachefiles_in_ondemand_mode(cache)) { 368 368 if (!xa_empty(&cache->reqs)) { 369 - rcu_read_lock(); 369 + xas_lock(&xas); 370 370 xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) { 371 371 if (!cachefiles_ondemand_is_reopening_read(req)) { 372 372 mask |= EPOLLIN; 373 373 break; 374 374 } 375 375 } 376 - rcu_read_unlock(); 376 + xas_unlock(&xas); 377 377 } 378 378 } else { 379 379 if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags))

+3

fs/cachefiles/internal.h

··· 48 48 CACHEFILES_ONDEMAND_OBJSTATE_CLOSE, /* Anonymous fd closed by daemon or initial state */ 49 49 CACHEFILES_ONDEMAND_OBJSTATE_OPEN, /* Anonymous fd associated with object is available */ 50 50 CACHEFILES_ONDEMAND_OBJSTATE_REOPENING, /* Object that was closed and is being reopened. */ 51 + CACHEFILES_ONDEMAND_OBJSTATE_DROPPING, /* Object is being dropped. */ 51 52 }; 52 53 53 54 struct cachefiles_ondemand_info { ··· 129 128 unsigned long req_id_next; 130 129 struct xarray ondemand_ids; /* xarray for ondemand_id allocation */ 131 130 u32 ondemand_id_next; 131 + u32 msg_id_next; 132 132 }; 133 133 134 134 static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache) ··· 337 335 CACHEFILES_OBJECT_STATE_FUNCS(open, OPEN); 338 336 CACHEFILES_OBJECT_STATE_FUNCS(close, CLOSE); 339 337 CACHEFILES_OBJECT_STATE_FUNCS(reopening, REOPENING); 338 + CACHEFILES_OBJECT_STATE_FUNCS(dropping, DROPPING); 340 339 341 340 static inline bool cachefiles_ondemand_is_reopening_read(struct cachefiles_req *req) 342 341 {

+46 -6

fs/cachefiles/ondemand.c

··· 517 517 */ 518 518 xas_lock(&xas); 519 519 520 - if (test_bit(CACHEFILES_DEAD, &cache->flags)) { 520 + if (test_bit(CACHEFILES_DEAD, &cache->flags) || 521 + cachefiles_ondemand_object_is_dropping(object)) { 521 522 xas_unlock(&xas); 522 523 ret = -EIO; 523 524 goto out; ··· 528 527 smp_mb(); 529 528 530 529 if (opcode == CACHEFILES_OP_CLOSE && 531 - !cachefiles_ondemand_object_is_open(object)) { 530 + !cachefiles_ondemand_object_is_open(object)) { 532 531 WARN_ON_ONCE(object->ondemand->ondemand_id == 0); 533 532 xas_unlock(&xas); 534 533 ret = -EIO; 535 534 goto out; 536 535 } 537 536 538 - xas.xa_index = 0; 537 + /* 538 + * Cyclically find a free xas to avoid msg_id reuse that would 539 + * cause the daemon to successfully copen a stale msg_id. 540 + */ 541 + xas.xa_index = cache->msg_id_next; 539 542 xas_find_marked(&xas, UINT_MAX, XA_FREE_MARK); 543 + if (xas.xa_node == XAS_RESTART) { 544 + xas.xa_index = 0; 545 + xas_find_marked(&xas, cache->msg_id_next - 1, XA_FREE_MARK); 546 + } 540 547 if (xas.xa_node == XAS_RESTART) 541 548 xas_set_err(&xas, -EBUSY); 549 + 542 550 xas_store(&xas, req); 543 - xas_clear_mark(&xas, XA_FREE_MARK); 544 - xas_set_mark(&xas, CACHEFILES_REQ_NEW); 551 + if (xas_valid(&xas)) { 552 + cache->msg_id_next = xas.xa_index + 1; 553 + xas_clear_mark(&xas, XA_FREE_MARK); 554 + xas_set_mark(&xas, CACHEFILES_REQ_NEW); 555 + } 545 556 xas_unlock(&xas); 546 557 } while (xas_nomem(&xas, GFP_KERNEL)); 547 558 ··· 581 568 * If error occurs after creating the anonymous fd, 582 569 * cachefiles_ondemand_fd_release() will set object to close. 583 570 */ 584 - if (opcode == CACHEFILES_OP_OPEN) 571 + if (opcode == CACHEFILES_OP_OPEN && 572 + !cachefiles_ondemand_object_is_dropping(object)) 585 573 cachefiles_ondemand_set_object_close(object); 586 574 kfree(req); 587 575 return ret; ··· 681 667 682 668 void cachefiles_ondemand_clean_object(struct cachefiles_object *object) 683 669 { 670 + unsigned long index; 671 + struct cachefiles_req *req; 672 + struct cachefiles_cache *cache; 673 + 674 + if (!object->ondemand) 675 + return; 676 + 684 677 cachefiles_ondemand_send_req(object, CACHEFILES_OP_CLOSE, 0, 685 678 cachefiles_ondemand_init_close_req, NULL); 679 + 680 + if (!object->ondemand->ondemand_id) 681 + return; 682 + 683 + /* Cancel all requests for the object that is being dropped. */ 684 + cache = object->volume->cache; 685 + xa_lock(&cache->reqs); 686 + cachefiles_ondemand_set_object_dropping(object); 687 + xa_for_each(&cache->reqs, index, req) { 688 + if (req->object == object) { 689 + req->error = -EIO; 690 + complete(&req->done); 691 + __xa_erase(&cache->reqs, index); 692 + } 693 + } 694 + xa_unlock(&cache->reqs); 695 + 696 + /* Wait for ondemand_object_worker() to finish to avoid UAF. */ 697 + cancel_work_sync(&object->ondemand->ondemand_work); 686 698 } 687 699 688 700 int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object,

-1

fs/cachefiles/volume.c

··· 133 133 134 134 void cachefiles_withdraw_volume(struct cachefiles_volume *volume) 135 135 { 136 - fscache_withdraw_volume(volume->vcookie); 137 136 cachefiles_set_volume_xattr(volume); 138 137 __cachefiles_free_volume(volume); 139 138 }

+4 -1

fs/cachefiles/xattr.c

··· 110 110 if (xlen == 0) 111 111 xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, tlen); 112 112 if (xlen != tlen) { 113 - if (xlen < 0) 113 + if (xlen < 0) { 114 + ret = xlen; 114 115 trace_cachefiles_vfs_error(object, file_inode(file), xlen, 115 116 cachefiles_trace_getxattr_error); 117 + } 116 118 if (xlen == -EIO) 117 119 cachefiles_io_error_obj( 118 120 object, ··· 254 252 xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, len); 255 253 if (xlen != len) { 256 254 if (xlen < 0) { 255 + ret = xlen; 257 256 trace_cachefiles_vfs_error(NULL, d_inode(dentry), xlen, 258 257 cachefiles_trace_getxattr_error); 259 258 if (xlen == -EIO)

+9 -3

fs/dcache.c

··· 355 355 flags &= ~DCACHE_ENTRY_TYPE; 356 356 WRITE_ONCE(dentry->d_flags, flags); 357 357 dentry->d_inode = NULL; 358 - if (flags & DCACHE_LRU_LIST) 358 + /* 359 + * The negative counter only tracks dentries on the LRU. Don't inc if 360 + * d_lru is on another list. 361 + */ 362 + if ((flags & (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST) 359 363 this_cpu_inc(nr_dentry_negative); 360 364 } 361 365 ··· 1848 1844 1849 1845 spin_lock(&dentry->d_lock); 1850 1846 /* 1851 - * Decrement negative dentry count if it was in the LRU list. 1847 + * The negative counter only tracks dentries on the LRU. Don't dec if 1848 + * d_lru is on another list. 1852 1849 */ 1853 - if (dentry->d_flags & DCACHE_LRU_LIST) 1850 + if ((dentry->d_flags & 1851 + (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST) 1854 1852 this_cpu_dec(nr_dentry_negative); 1855 1853 hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); 1856 1854 raw_write_seqcount_begin(&dentry->d_seq);

+1 -1

fs/hfsplus/xattr.c

··· 696 696 return err; 697 697 } 698 698 699 - strbuf = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 699 + strbuf = kzalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 700 700 XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL); 701 701 if (!strbuf) { 702 702 res = -ENOMEM;

+1 -1

fs/locks.c

··· 1367 1367 locks_wake_up_blocks(&left->c); 1368 1368 } 1369 1369 out: 1370 + trace_posix_lock_inode(inode, request, error); 1370 1371 spin_unlock(&ctx->flc_lock); 1371 1372 percpu_up_read(&file_rwsem); 1372 - trace_posix_lock_inode(inode, request, error); 1373 1373 /* 1374 1374 * Free any unused locks. 1375 1375 */

+1 -2

fs/minix/namei.c

··· 213 213 if (!new_de) 214 214 goto out_dir; 215 215 err = minix_set_link(new_de, new_page, old_inode); 216 - kunmap(new_page); 217 - put_page(new_page); 216 + unmap_and_put_page(new_page, new_de); 218 217 if (err) 219 218 goto out_dir; 220 219 inode_set_ctime_current(new_inode);

+7 -7

fs/netfs/buffered_read.c

··· 117 117 if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { 118 118 if (folio->index == rreq->no_unlock_folio && 119 119 test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) 120 - _debug("no unlock"); 120 + kdebug("no unlock"); 121 121 else 122 122 folio_unlock(folio); 123 123 } ··· 204 204 struct netfs_inode *ctx = netfs_inode(ractl->mapping->host); 205 205 int ret; 206 206 207 - _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); 207 + kenter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); 208 208 209 209 if (readahead_count(ractl) == 0) 210 210 return; ··· 268 268 struct folio *sink = NULL; 269 269 int ret; 270 270 271 - _enter("%lx", folio->index); 271 + kenter("%lx", folio->index); 272 272 273 273 rreq = netfs_alloc_request(mapping, file, 274 274 folio_file_pos(folio), folio_size(folio), ··· 508 508 509 509 have_folio: 510 510 *_folio = folio; 511 - _leave(" = 0"); 511 + kleave(" = 0"); 512 512 return 0; 513 513 514 514 error_put: ··· 518 518 folio_unlock(folio); 519 519 folio_put(folio); 520 520 } 521 - _leave(" = %d", ret); 521 + kleave(" = %d", ret); 522 522 return ret; 523 523 } 524 524 EXPORT_SYMBOL(netfs_write_begin); ··· 536 536 size_t flen = folio_size(folio); 537 537 int ret; 538 538 539 - _enter("%zx @%llx", flen, start); 539 + kenter("%zx @%llx", flen, start); 540 540 541 541 ret = -ENOMEM; 542 542 ··· 567 567 error_put: 568 568 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 569 569 error: 570 - _leave(" = %d", ret); 570 + kleave(" = %d", ret); 571 571 return ret; 572 572 } 573 573

+6 -6

fs/netfs/buffered_write.c

··· 56 56 struct netfs_group *group = netfs_folio_group(folio); 57 57 loff_t pos = folio_file_pos(folio); 58 58 59 - _enter(""); 59 + kenter(""); 60 60 61 61 if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) 62 62 return NETFS_FLUSH_CONTENT; ··· 272 272 */ 273 273 howto = netfs_how_to_modify(ctx, file, folio, netfs_group, 274 274 flen, offset, part, maybe_trouble); 275 - _debug("howto %u", howto); 275 + kdebug("howto %u", howto); 276 276 switch (howto) { 277 277 case NETFS_JUST_PREFETCH: 278 278 ret = netfs_prefetch_for_write(file, folio, offset, part); 279 279 if (ret < 0) { 280 - _debug("prefetch = %zd", ret); 280 + kdebug("prefetch = %zd", ret); 281 281 goto error_folio_unlock; 282 282 } 283 283 break; ··· 418 418 } 419 419 420 420 iocb->ki_pos += written; 421 - _leave(" = %zd [%zd]", written, ret); 421 + kleave(" = %zd [%zd]", written, ret); 422 422 return written ? written : ret; 423 423 424 424 error_folio_unlock: ··· 491 491 struct netfs_inode *ictx = netfs_inode(inode); 492 492 ssize_t ret; 493 493 494 - _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); 494 + kenter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); 495 495 496 496 if (!iov_iter_count(from)) 497 497 return 0; ··· 529 529 vm_fault_t ret = VM_FAULT_RETRY; 530 530 int err; 531 531 532 - _enter("%lx", folio->index); 532 + kenter("%lx", folio->index); 533 533 534 534 sb_start_pagefault(inode->i_sb); 535 535

+1 -1

fs/netfs/direct_read.c

··· 33 33 size_t orig_count = iov_iter_count(iter); 34 34 bool async = !is_sync_kiocb(iocb); 35 35 36 - _enter(""); 36 + kenter(""); 37 37 38 38 if (!orig_count) 39 39 return 0; /* Don't update atime */

+4 -4

fs/netfs/direct_write.c

··· 37 37 size_t len = iov_iter_count(iter); 38 38 bool async = !is_sync_kiocb(iocb); 39 39 40 - _enter(""); 40 + kenter(""); 41 41 42 42 /* We're going to need a bounce buffer if what we transmit is going to 43 43 * be different in some way to the source buffer, e.g. because it gets ··· 45 45 */ 46 46 // TODO 47 47 48 - _debug("uw %llx-%llx", start, end); 48 + kdebug("uw %llx-%llx", start, end); 49 49 50 50 wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp, start, 51 51 iocb->ki_flags & IOCB_DIRECT ? ··· 96 96 wreq->cleanup = netfs_cleanup_dio_write; 97 97 ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len); 98 98 if (ret < 0) { 99 - _debug("begin = %zd", ret); 99 + kdebug("begin = %zd", ret); 100 100 goto out; 101 101 } 102 102 ··· 143 143 loff_t pos = iocb->ki_pos; 144 144 unsigned long long end = pos + iov_iter_count(from) - 1; 145 145 146 - _enter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode)); 146 + kenter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode)); 147 147 148 148 if (!iov_iter_count(from)) 149 149 return 0;

+2 -2

fs/netfs/fscache_cache.c

··· 237 237 { 238 238 int n_accesses; 239 239 240 - _enter("{%s,%s}", ops->name, cache->name); 240 + kenter("{%s,%s}", ops->name, cache->name); 241 241 242 242 BUG_ON(fscache_cache_state(cache) != FSCACHE_CACHE_IS_PREPARING); 243 243 ··· 257 257 258 258 up_write(&fscache_addremove_sem); 259 259 pr_notice("Cache \"%s\" added (type %s)\n", cache->name, ops->name); 260 - _leave(" = 0 [%s]", cache->name); 260 + kleave(" = 0 [%s]", cache->name); 261 261 return 0; 262 262 } 263 263 EXPORT_SYMBOL(fscache_add_cache);

+14 -14

fs/netfs/fscache_cookie.c

··· 456 456 { 457 457 struct fscache_cookie *cookie; 458 458 459 - _enter("V=%x", volume->debug_id); 459 + kenter("V=%x", volume->debug_id); 460 460 461 461 if (!index_key || !index_key_len || index_key_len > 255 || aux_data_len > 255) 462 462 return NULL; ··· 484 484 485 485 trace_fscache_acquire(cookie); 486 486 fscache_stat(&fscache_n_acquires_ok); 487 - _leave(" = c=%08x", cookie->debug_id); 487 + kleave(" = c=%08x", cookie->debug_id); 488 488 return cookie; 489 489 } 490 490 EXPORT_SYMBOL(__fscache_acquire_cookie); ··· 505 505 enum fscache_access_trace trace = fscache_access_lookup_cookie_end_failed; 506 506 bool need_withdraw = false; 507 507 508 - _enter(""); 508 + kenter(""); 509 509 510 510 if (!cookie->volume->cache_priv) { 511 511 fscache_create_volume(cookie->volume, true); ··· 519 519 if (cookie->state != FSCACHE_COOKIE_STATE_FAILED) 520 520 fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_QUIESCENT); 521 521 need_withdraw = true; 522 - _leave(" [fail]"); 522 + kleave(" [fail]"); 523 523 goto out; 524 524 } 525 525 ··· 572 572 bool queue = false; 573 573 int n_active; 574 574 575 - _enter("c=%08x", cookie->debug_id); 575 + kenter("c=%08x", cookie->debug_id); 576 576 577 577 if (WARN(test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags), 578 578 "Trying to use relinquished cookie\n")) ··· 636 636 spin_unlock(&cookie->lock); 637 637 if (queue) 638 638 fscache_queue_cookie(cookie, fscache_cookie_get_use_work); 639 - _leave(""); 639 + kleave(""); 640 640 } 641 641 EXPORT_SYMBOL(__fscache_use_cookie); 642 642 ··· 702 702 enum fscache_cookie_state state; 703 703 bool wake = false; 704 704 705 - _enter("c=%x", cookie->debug_id); 705 + kenter("c=%x", cookie->debug_id); 706 706 707 707 again: 708 708 spin_lock(&cookie->lock); ··· 820 820 spin_unlock(&cookie->lock); 821 821 if (wake) 822 822 wake_up_cookie_state(cookie); 823 - _leave(""); 823 + kleave(""); 824 824 } 825 825 826 826 static void fscache_cookie_worker(struct work_struct *work) ··· 867 867 set_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags); 868 868 spin_unlock(&cookie->lock); 869 869 fscache_stat(&fscache_n_cookies_lru_expired); 870 - _debug("lru c=%x", cookie->debug_id); 870 + kdebug("lru c=%x", cookie->debug_id); 871 871 __fscache_withdraw_cookie(cookie); 872 872 } 873 873 ··· 971 971 if (retire) 972 972 fscache_stat(&fscache_n_relinquishes_retire); 973 973 974 - _enter("c=%08x{%d},%d", 974 + kenter("c=%08x{%d},%d", 975 975 cookie->debug_id, atomic_read(&cookie->n_active), retire); 976 976 977 977 if (WARN(test_and_set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags), ··· 1050 1050 { 1051 1051 bool is_caching; 1052 1052 1053 - _enter("c=%x", cookie->debug_id); 1053 + kenter("c=%x", cookie->debug_id); 1054 1054 1055 1055 fscache_stat(&fscache_n_invalidates); 1056 1056 ··· 1072 1072 case FSCACHE_COOKIE_STATE_INVALIDATING: /* is_still_valid will catch it */ 1073 1073 default: 1074 1074 spin_unlock(&cookie->lock); 1075 - _leave(" [no %u]", cookie->state); 1075 + kleave(" [no %u]", cookie->state); 1076 1076 return; 1077 1077 1078 1078 case FSCACHE_COOKIE_STATE_LOOKING_UP: ··· 1081 1081 fallthrough; 1082 1082 case FSCACHE_COOKIE_STATE_CREATING: 1083 1083 spin_unlock(&cookie->lock); 1084 - _leave(" [look %x]", cookie->inval_counter); 1084 + kleave(" [look %x]", cookie->inval_counter); 1085 1085 return; 1086 1086 1087 1087 case FSCACHE_COOKIE_STATE_ACTIVE: ··· 1094 1094 1095 1095 if (is_caching) 1096 1096 fscache_queue_cookie(cookie, fscache_cookie_get_inval_work); 1097 - _leave(" [inv]"); 1097 + kleave(" [inv]"); 1098 1098 return; 1099 1099 } 1100 1100 }

+6 -6

fs/netfs/fscache_io.c

··· 28 28 29 29 again: 30 30 if (!fscache_cache_is_live(cookie->volume->cache)) { 31 - _leave(" [broken]"); 31 + kleave(" [broken]"); 32 32 return false; 33 33 } 34 34 35 35 state = fscache_cookie_state(cookie); 36 - _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state); 36 + kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state); 37 37 38 38 switch (state) { 39 39 case FSCACHE_COOKIE_STATE_CREATING: ··· 52 52 case FSCACHE_COOKIE_STATE_DROPPED: 53 53 case FSCACHE_COOKIE_STATE_RELINQUISHING: 54 54 default: 55 - _leave(" [not live]"); 55 + kleave(" [not live]"); 56 56 return false; 57 57 } 58 58 ··· 92 92 spin_lock(&cookie->lock); 93 93 94 94 state = fscache_cookie_state(cookie); 95 - _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state); 95 + kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state); 96 96 97 97 switch (state) { 98 98 case FSCACHE_COOKIE_STATE_LOOKING_UP: ··· 140 140 cres->cache_priv = NULL; 141 141 cres->ops = NULL; 142 142 fscache_end_cookie_access(cookie, fscache_access_io_not_live); 143 - _leave(" = -ENOBUFS"); 143 + kleave(" = -ENOBUFS"); 144 144 return -ENOBUFS; 145 145 } 146 146 ··· 224 224 if (len == 0) 225 225 goto abandon; 226 226 227 - _enter("%llx,%zx", start, len); 227 + kenter("%llx,%zx", start, len); 228 228 229 229 wreq = kzalloc(sizeof(struct fscache_write_request), GFP_NOFS); 230 230 if (!wreq)

+1 -1

fs/netfs/fscache_main.c

··· 99 99 */ 100 100 void __exit fscache_exit(void) 101 101 { 102 - _enter(""); 102 + kenter(""); 103 103 104 104 kmem_cache_destroy(fscache_cookie_jar); 105 105 fscache_proc_cleanup();

+16 -2

fs/netfs/fscache_volume.c

··· 27 27 return volume; 28 28 } 29 29 30 + struct fscache_volume *fscache_try_get_volume(struct fscache_volume *volume, 31 + enum fscache_volume_trace where) 32 + { 33 + int ref; 34 + 35 + if (!__refcount_inc_not_zero(&volume->ref, &ref)) 36 + return NULL; 37 + 38 + trace_fscache_volume(volume->debug_id, ref + 1, where); 39 + return volume; 40 + } 41 + EXPORT_SYMBOL(fscache_try_get_volume); 42 + 30 43 static void fscache_see_volume(struct fscache_volume *volume, 31 44 enum fscache_volume_trace where) 32 45 { ··· 264 251 fscache_see_volume(volume, fscache_volume_new_acquire); 265 252 fscache_stat(&fscache_n_volumes); 266 253 up_write(&fscache_addremove_sem); 267 - _leave(" = v=%x", volume->debug_id); 254 + kleave(" = v=%x", volume->debug_id); 268 255 return volume; 269 256 270 257 err_vol: ··· 433 420 fscache_free_volume(volume); 434 421 } 435 422 } 423 + EXPORT_SYMBOL(fscache_put_volume); 436 424 437 425 /* 438 426 * Relinquish a volume representation cookie. ··· 466 452 { 467 453 int n_accesses; 468 454 469 - _debug("withdraw V=%x", volume->debug_id); 455 + kdebug("withdraw V=%x", volume->debug_id); 470 456 471 457 /* Allow wakeups on dec-to-0 */ 472 458 n_accesses = atomic_dec_return(&volume->n_accesses);

+1 -34

fs/netfs/internal.h

··· 34 34 /* 35 35 * main.c 36 36 */ 37 - extern unsigned int netfs_debug; 38 37 extern struct list_head netfs_io_requests; 39 38 extern spinlock_t netfs_proc_lock; 40 39 extern mempool_t netfs_request_pool; ··· 343 344 344 345 struct fscache_volume *fscache_get_volume(struct fscache_volume *volume, 345 346 enum fscache_volume_trace where); 346 - void fscache_put_volume(struct fscache_volume *volume, 347 - enum fscache_volume_trace where); 348 347 bool fscache_begin_volume_access(struct fscache_volume *volume, 349 348 struct fscache_cookie *cookie, 350 349 enum fscache_access_trace why); ··· 353 356 * debug tracing 354 357 */ 355 358 #define dbgprintk(FMT, ...) \ 356 - printk("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) 359 + pr_debug("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) 357 360 358 361 #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) 359 362 #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) 360 363 #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) 361 - 362 - #ifdef __KDEBUG 363 - #define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) 364 - #define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__) 365 - #define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__) 366 - 367 - #elif defined(CONFIG_NETFS_DEBUG) 368 - #define _enter(FMT, ...) \ 369 - do { \ 370 - if (netfs_debug) \ 371 - kenter(FMT, ##__VA_ARGS__); \ 372 - } while (0) 373 - 374 - #define _leave(FMT, ...) \ 375 - do { \ 376 - if (netfs_debug) \ 377 - kleave(FMT, ##__VA_ARGS__); \ 378 - } while (0) 379 - 380 - #define _debug(FMT, ...) \ 381 - do { \ 382 - if (netfs_debug) \ 383 - kdebug(FMT, ##__VA_ARGS__); \ 384 - } while (0) 385 - 386 - #else 387 - #define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__) 388 - #define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__) 389 - #define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__) 390 - #endif 391 364 392 365 /* 393 366 * assertions

+6 -6

fs/netfs/io.c

··· 130 130 if (count == remaining) 131 131 return; 132 132 133 - _debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n", 133 + kdebug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n", 134 134 rreq->debug_id, subreq->debug_index, 135 135 iov_iter_count(&subreq->io_iter), subreq->transferred, 136 136 subreq->len, rreq->i_size, ··· 326 326 struct netfs_io_request *rreq = subreq->rreq; 327 327 int u; 328 328 329 - _enter("R=%x[%x]{%llx,%lx},%zd", 329 + kenter("R=%x[%x]{%llx,%lx},%zd", 330 330 rreq->debug_id, subreq->debug_index, 331 331 subreq->start, subreq->flags, transferred_or_error); 332 332 ··· 435 435 struct netfs_inode *ictx = netfs_inode(rreq->inode); 436 436 size_t lsize; 437 437 438 - _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size); 438 + kenter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size); 439 439 440 440 if (rreq->origin != NETFS_DIO_READ) { 441 441 source = netfs_cache_prepare_read(subreq, rreq->i_size); ··· 518 518 subreq->start = rreq->start + rreq->submitted; 519 519 subreq->len = io_iter->count; 520 520 521 - _debug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted); 521 + kdebug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted); 522 522 list_add_tail(&subreq->rreq_link, &rreq->subrequests); 523 523 524 524 /* Call out to the cache to find out what it can do with the remaining ··· 570 570 struct iov_iter io_iter; 571 571 int ret; 572 572 573 - _enter("R=%x %llx-%llx", 573 + kenter("R=%x %llx-%llx", 574 574 rreq->debug_id, rreq->start, rreq->start + rreq->len - 1); 575 575 576 576 if (rreq->len == 0) { ··· 593 593 atomic_set(&rreq->nr_outstanding, 1); 594 594 io_iter = rreq->io_iter; 595 595 do { 596 - _debug("submit %llx + %llx >= %llx", 596 + kdebug("submit %llx + %llx >= %llx", 597 597 rreq->start, rreq->submitted, rreq->i_size); 598 598 if (rreq->origin == NETFS_DIO_READ && 599 599 rreq->start + rreq->submitted >= rreq->i_size)

-4

fs/netfs/main.c

··· 20 20 21 21 EXPORT_TRACEPOINT_SYMBOL(netfs_sreq); 22 22 23 - unsigned netfs_debug; 24 - module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO); 25 - MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask"); 26 - 27 23 static struct kmem_cache *netfs_request_slab; 28 24 static struct kmem_cache *netfs_subrequest_slab; 29 25 mempool_t netfs_request_pool;

+2 -2

fs/netfs/misc.c

··· 26 26 struct fscache_cookie *cookie = netfs_i_cookie(ictx); 27 27 bool need_use = false; 28 28 29 - _enter(""); 29 + kenter(""); 30 30 31 31 if (!filemap_dirty_folio(mapping, folio)) 32 32 return false; ··· 99 99 struct netfs_folio *finfo; 100 100 size_t flen = folio_size(folio); 101 101 102 - _enter("{%lx},%zx,%zx", folio->index, offset, length); 102 + kenter("{%lx},%zx,%zx", folio->index, offset, length); 103 103 104 104 if (!folio_test_private(folio)) 105 105 return;

+8 -8

fs/netfs/write_collect.c

··· 161 161 { 162 162 struct list_head *next; 163 163 164 - _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr); 164 + kenter("R=%x[%x:]", wreq->debug_id, stream->stream_nr); 165 165 166 166 if (list_empty(&stream->subrequests)) 167 167 return; ··· 374 374 unsigned int notes; 375 375 int s; 376 376 377 - _enter("%llx-%llx", wreq->start, wreq->start + wreq->len); 377 + kenter("%llx-%llx", wreq->start, wreq->start + wreq->len); 378 378 trace_netfs_collect(wreq); 379 379 trace_netfs_rreq(wreq, netfs_rreq_trace_collect); 380 380 ··· 409 409 front = stream->front; 410 410 while (front) { 411 411 trace_netfs_collect_sreq(wreq, front); 412 - //_debug("sreq [%x] %llx %zx/%zx", 412 + //kdebug("sreq [%x] %llx %zx/%zx", 413 413 // front->debug_index, front->start, front->transferred, front->len); 414 414 415 415 /* Stall if there may be a discontinuity. */ ··· 598 598 out: 599 599 netfs_put_group_many(wreq->group, wreq->nr_group_rel); 600 600 wreq->nr_group_rel = 0; 601 - _leave(" = %x", notes); 601 + kleave(" = %x", notes); 602 602 return; 603 603 604 604 need_retry: ··· 606 606 * that any partially completed op will have had any wholly transferred 607 607 * folios removed from it. 608 608 */ 609 - _debug("retry"); 609 + kdebug("retry"); 610 610 netfs_retry_writes(wreq); 611 611 goto out; 612 612 } ··· 621 621 size_t transferred; 622 622 int s; 623 623 624 - _enter("R=%x", wreq->debug_id); 624 + kenter("R=%x", wreq->debug_id); 625 625 626 626 netfs_see_request(wreq, netfs_rreq_trace_see_work); 627 627 if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) { ··· 684 684 if (wreq->origin == NETFS_DIO_WRITE) 685 685 inode_dio_end(wreq->inode); 686 686 687 - _debug("finished"); 687 + kdebug("finished"); 688 688 trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); 689 689 clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags); 690 690 wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS); ··· 744 744 struct netfs_io_request *wreq = subreq->rreq; 745 745 struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr]; 746 746 747 - _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); 747 + kenter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); 748 748 749 749 switch (subreq->source) { 750 750 case NETFS_UPLOAD_TO_SERVER:

+18 -18

fs/netfs/write_issue.c

··· 99 99 if (IS_ERR(wreq)) 100 100 return wreq; 101 101 102 - _enter("R=%x", wreq->debug_id); 102 + kenter("R=%x", wreq->debug_id); 103 103 104 104 ictx = netfs_inode(wreq->inode); 105 105 if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags)) ··· 159 159 subreq->max_nr_segs = INT_MAX; 160 160 subreq->stream_nr = stream->stream_nr; 161 161 162 - _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index); 162 + kenter("R=%x[%x]", wreq->debug_id, subreq->debug_index); 163 163 164 164 trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, 165 165 refcount_read(&subreq->ref), ··· 215 215 { 216 216 struct netfs_io_request *wreq = subreq->rreq; 217 217 218 - _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len); 218 + kenter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len); 219 219 220 220 if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) 221 221 return netfs_write_subrequest_terminated(subreq, subreq->error, false); ··· 272 272 size_t part; 273 273 274 274 if (!stream->avail) { 275 - _leave("no write"); 275 + kleave("no write"); 276 276 return len; 277 277 } 278 278 279 - _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0); 279 + kenter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0); 280 280 281 281 if (subreq && start != subreq->start + subreq->len) { 282 282 netfs_issue_write(wreq, stream); ··· 288 288 subreq = stream->construct; 289 289 290 290 part = min(subreq->max_len - subreq->len, len); 291 - _debug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len); 291 + kdebug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len); 292 292 subreq->len += part; 293 293 subreq->nr_segs++; 294 294 ··· 319 319 bool to_eof = false, streamw = false; 320 320 bool debug = false; 321 321 322 - _enter(""); 322 + kenter(""); 323 323 324 324 /* netfs_perform_write() may shift i_size around the page or from out 325 325 * of the page to beyond it, but cannot move i_size into or through the ··· 329 329 330 330 if (fpos >= i_size) { 331 331 /* mmap beyond eof. */ 332 - _debug("beyond eof"); 332 + kdebug("beyond eof"); 333 333 folio_start_writeback(folio); 334 334 folio_unlock(folio); 335 335 wreq->nr_group_rel += netfs_folio_written_back(folio); ··· 363 363 } 364 364 flen -= foff; 365 365 366 - _debug("folio %zx %zx %zx", foff, flen, fsize); 366 + kdebug("folio %zx %zx %zx", foff, flen, fsize); 367 367 368 368 /* Deal with discontinuities in the stream of dirty pages. These can 369 369 * arise from a number of sources: ··· 487 487 for (int s = 0; s < NR_IO_STREAMS; s++) 488 488 netfs_issue_write(wreq, &wreq->io_streams[s]); 489 489 490 - _leave(" = 0"); 490 + kleave(" = 0"); 491 491 return 0; 492 492 } 493 493 ··· 522 522 netfs_stat(&netfs_n_wh_writepages); 523 523 524 524 do { 525 - _debug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted); 525 + kdebug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted); 526 526 527 527 /* It appears we don't have to handle cyclic writeback wrapping. */ 528 528 WARN_ON_ONCE(wreq && folio_pos(folio) < wreq->start + wreq->submitted); ··· 546 546 mutex_unlock(&ictx->wb_lock); 547 547 548 548 netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 549 - _leave(" = %d", error); 549 + kleave(" = %d", error); 550 550 return error; 551 551 552 552 couldnt_start: 553 553 netfs_kill_dirty_pages(mapping, wbc, folio); 554 554 out: 555 555 mutex_unlock(&ictx->wb_lock); 556 - _leave(" = %d", error); 556 + kleave(" = %d", error); 557 557 return error; 558 558 } 559 559 EXPORT_SYMBOL(netfs_writepages); ··· 590 590 struct folio *folio, size_t copied, bool to_page_end, 591 591 struct folio **writethrough_cache) 592 592 { 593 - _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u", 593 + kenter("R=%x ic=%zu ws=%u cp=%zu tp=%u", 594 594 wreq->debug_id, wreq->iter.count, wreq->wsize, copied, to_page_end); 595 595 596 596 if (!*writethrough_cache) { ··· 624 624 struct netfs_inode *ictx = netfs_inode(wreq->inode); 625 625 int ret; 626 626 627 - _enter("R=%x", wreq->debug_id); 627 + kenter("R=%x", wreq->debug_id); 628 628 629 629 if (writethrough_cache) 630 630 netfs_write_folio(wreq, wbc, writethrough_cache); ··· 657 657 loff_t start = wreq->start; 658 658 int error = 0; 659 659 660 - _enter("%zx", len); 660 + kenter("%zx", len); 661 661 662 662 if (wreq->origin == NETFS_DIO_WRITE) 663 663 inode_dio_begin(wreq->inode); ··· 665 665 while (len) { 666 666 // TODO: Prepare content encryption 667 667 668 - _debug("unbuffered %zx", len); 668 + kdebug("unbuffered %zx", len); 669 669 part = netfs_advance_write(wreq, upload, start, len, false); 670 670 start += part; 671 671 len -= part; ··· 684 684 if (list_empty(&upload->subrequests)) 685 685 netfs_wake_write_collector(wreq, false); 686 686 687 - _leave(" = %d", error); 687 + kleave(" = %d", error); 688 688 return error; 689 689 }

+30 -2

fs/nilfs2/dir.c

··· 383 383 384 384 struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop) 385 385 { 386 - struct nilfs_dir_entry *de = nilfs_get_folio(dir, 0, foliop); 386 + struct folio *folio; 387 + struct nilfs_dir_entry *de, *next_de; 388 + size_t limit; 389 + char *msg; 387 390 391 + de = nilfs_get_folio(dir, 0, &folio); 388 392 if (IS_ERR(de)) 389 393 return NULL; 390 - return nilfs_next_entry(de); 394 + 395 + limit = nilfs_last_byte(dir, 0); /* is a multiple of chunk size */ 396 + if (unlikely(!limit || le64_to_cpu(de->inode) != dir->i_ino || 397 + !nilfs_match(1, ".", de))) { 398 + msg = "missing '.'"; 399 + goto fail; 400 + } 401 + 402 + next_de = nilfs_next_entry(de); 403 + /* 404 + * If "next_de" has not reached the end of the chunk, there is 405 + * at least one more record. Check whether it matches "..". 406 + */ 407 + if (unlikely((char *)next_de == (char *)de + nilfs_chunk_size(dir) || 408 + !nilfs_match(2, "..", next_de))) { 409 + msg = "missing '..'"; 410 + goto fail; 411 + } 412 + *foliop = folio; 413 + return next_de; 414 + 415 + fail: 416 + nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg); 417 + folio_release_kmap(folio, de); 418 + return NULL; 391 419 } 392 420 393 421 ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)

-30

fs/smb/client/file.c

··· 246 246 } 247 247 248 248 /* 249 - * Expand the size of a readahead to the size of the rsize, if at least as 250 - * large as a page, allowing for the possibility that rsize is not pow-2 251 - * aligned. 252 - */ 253 - static void cifs_expand_readahead(struct netfs_io_request *rreq) 254 - { 255 - unsigned int rsize = rreq->rsize; 256 - loff_t misalignment, i_size = i_size_read(rreq->inode); 257 - 258 - if (rsize < PAGE_SIZE) 259 - return; 260 - 261 - if (rsize < INT_MAX) 262 - rsize = roundup_pow_of_two(rsize); 263 - else 264 - rsize = ((unsigned int)INT_MAX + 1) / 2; 265 - 266 - misalignment = rreq->start & (rsize - 1); 267 - if (misalignment) { 268 - rreq->start -= misalignment; 269 - rreq->len += misalignment; 270 - } 271 - 272 - rreq->len = round_up(rreq->len, rsize); 273 - if (rreq->start < i_size && rreq->len > i_size - rreq->start) 274 - rreq->len = i_size - rreq->start; 275 - } 276 - 277 - /* 278 249 * Completion of a request operation. 279 250 */ 280 251 static void cifs_rreq_done(struct netfs_io_request *rreq) ··· 300 329 .init_request = cifs_init_request, 301 330 .free_request = cifs_free_request, 302 331 .free_subrequest = cifs_free_subrequest, 303 - .expand_readahead = cifs_expand_readahead, 304 332 .clamp_length = cifs_clamp_length, 305 333 .issue_read = cifs_req_issue_read, 306 334 .done = cifs_rreq_done,

+34

fs/smb/common/smb2pdu.h

··· 917 917 __u8 Buffer[]; 918 918 } __packed; 919 919 920 + /* DeviceType Flags */ 921 + #define FILE_DEVICE_CD_ROM 0x00000002 922 + #define FILE_DEVICE_CD_ROM_FILE_SYSTEM 0x00000003 923 + #define FILE_DEVICE_DFS 0x00000006 924 + #define FILE_DEVICE_DISK 0x00000007 925 + #define FILE_DEVICE_DISK_FILE_SYSTEM 0x00000008 926 + #define FILE_DEVICE_FILE_SYSTEM 0x00000009 927 + #define FILE_DEVICE_NAMED_PIPE 0x00000011 928 + #define FILE_DEVICE_NETWORK 0x00000012 929 + #define FILE_DEVICE_NETWORK_FILE_SYSTEM 0x00000014 930 + #define FILE_DEVICE_NULL 0x00000015 931 + #define FILE_DEVICE_PARALLEL_PORT 0x00000016 932 + #define FILE_DEVICE_PRINTER 0x00000018 933 + #define FILE_DEVICE_SERIAL_PORT 0x0000001b 934 + #define FILE_DEVICE_STREAMS 0x0000001e 935 + #define FILE_DEVICE_TAPE 0x0000001f 936 + #define FILE_DEVICE_TAPE_FILE_SYSTEM 0x00000020 937 + #define FILE_DEVICE_VIRTUAL_DISK 0x00000024 938 + #define FILE_DEVICE_NETWORK_REDIRECTOR 0x00000028 939 + 940 + /* Device Characteristics */ 941 + #define FILE_REMOVABLE_MEDIA 0x00000001 942 + #define FILE_READ_ONLY_DEVICE 0x00000002 943 + #define FILE_FLOPPY_DISKETTE 0x00000004 944 + #define FILE_WRITE_ONCE_MEDIA 0x00000008 945 + #define FILE_REMOTE_DEVICE 0x00000010 946 + #define FILE_DEVICE_IS_MOUNTED 0x00000020 947 + #define FILE_VIRTUAL_VOLUME 0x00000040 948 + #define FILE_DEVICE_SECURE_OPEN 0x00000100 949 + #define FILE_CHARACTERISTIC_TS_DEVICE 0x00001000 950 + #define FILE_CHARACTERISTIC_WEBDAV_DEVICE 0x00002000 951 + #define FILE_PORTABLE_DEVICE 0x00004000 952 + #define FILE_DEVICE_ALLOW_APPCONTAINER_TRAVERSAL 0x00020000 953 + 920 954 /* 921 955 * Maximum number of iovs we need for a set-info request. 922 956 * The largest one is rename/hardlink

+18 -4

fs/smb/server/smb2pdu.c

··· 2051 2051 * @access: file access flags 2052 2052 * @disposition: file disposition flags 2053 2053 * @may_flags: set with MAY_ flags 2054 + * @is_dir: is creating open flags for directory 2054 2055 * 2055 2056 * Return: file open flags 2056 2057 */ 2057 2058 static int smb2_create_open_flags(bool file_present, __le32 access, 2058 2059 __le32 disposition, 2059 - int *may_flags) 2060 + int *may_flags, 2061 + bool is_dir) 2060 2062 { 2061 2063 int oflags = O_NONBLOCK | O_LARGEFILE; 2064 + 2065 + if (is_dir) { 2066 + access &= ~FILE_WRITE_DESIRE_ACCESS_LE; 2067 + ksmbd_debug(SMB, "Discard write access to a directory\n"); 2068 + } 2062 2069 2063 2070 if (access & FILE_READ_DESIRED_ACCESS_LE && 2064 2071 access & FILE_WRITE_DESIRE_ACCESS_LE) { ··· 3174 3167 3175 3168 open_flags = smb2_create_open_flags(file_present, daccess, 3176 3169 req->CreateDisposition, 3177 - &may_flags); 3170 + &may_flags, 3171 + req->CreateOptions & FILE_DIRECTORY_FILE_LE || 3172 + (file_present && S_ISDIR(d_inode(path.dentry)->i_mode))); 3178 3173 3179 3174 if (!test_tree_conn_flag(tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) { 3180 3175 if (open_flags & (O_CREAT | O_TRUNC)) { ··· 5323 5314 5324 5315 info = (struct filesystem_device_info *)rsp->Buffer; 5325 5316 5326 - info->DeviceType = cpu_to_le32(stfs.f_type); 5327 - info->DeviceCharacteristics = cpu_to_le32(0x00000020); 5317 + info->DeviceType = cpu_to_le32(FILE_DEVICE_DISK); 5318 + info->DeviceCharacteristics = 5319 + cpu_to_le32(FILE_DEVICE_IS_MOUNTED); 5320 + if (!test_tree_conn_flag(work->tcon, 5321 + KSMBD_TREE_CONN_FLAG_WRITABLE)) 5322 + info->DeviceCharacteristics |= 5323 + cpu_to_le32(FILE_READ_ONLY_DEVICE); 5328 5324 rsp->OutputBufferLength = cpu_to_le32(8); 5329 5325 break; 5330 5326 }

+6 -1

fs/userfaultfd.c

··· 2057 2057 goto out; 2058 2058 features = uffdio_api.features; 2059 2059 ret = -EINVAL; 2060 - if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES)) 2060 + if (uffdio_api.api != UFFD_API) 2061 2061 goto err_out; 2062 2062 ret = -EPERM; 2063 2063 if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE)) ··· 2081 2081 uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED; 2082 2082 uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC; 2083 2083 #endif 2084 + 2085 + ret = -EINVAL; 2086 + if (features & ~uffdio_api.features) 2087 + goto err_out; 2088 + 2084 2089 uffdio_api.ioctls = UFFD_API_IOCTLS; 2085 2090 ret = -EFAULT; 2086 2091 if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))

+7

include/linux/closure.h

··· 159 159 #ifdef CONFIG_DEBUG_CLOSURES 160 160 #define CLOSURE_MAGIC_DEAD 0xc054dead 161 161 #define CLOSURE_MAGIC_ALIVE 0xc054a11e 162 + #define CLOSURE_MAGIC_STACK 0xc05451cc 162 163 163 164 unsigned int magic; 164 165 struct list_head all; ··· 324 323 { 325 324 memset(cl, 0, sizeof(struct closure)); 326 325 atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); 326 + #ifdef CONFIG_DEBUG_CLOSURES 327 + cl->magic = CLOSURE_MAGIC_STACK; 328 + #endif 327 329 } 328 330 329 331 static inline void closure_init_stack_release(struct closure *cl) 330 332 { 331 333 memset(cl, 0, sizeof(struct closure)); 332 334 atomic_set_release(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); 335 + #ifdef CONFIG_DEBUG_CLOSURES 336 + cl->magic = CLOSURE_MAGIC_STACK; 337 + #endif 333 338 } 334 339 335 340 /**

+6

include/linux/fscache-cache.h

··· 19 19 enum fscache_cache_trace; 20 20 enum fscache_cookie_trace; 21 21 enum fscache_access_trace; 22 + enum fscache_volume_trace; 22 23 23 24 enum fscache_cache_state { 24 25 FSCACHE_CACHE_IS_NOT_PRESENT, /* No cache is present for this name */ ··· 98 97 99 98 extern void fscache_io_error(struct fscache_cache *cache); 100 99 100 + extern struct fscache_volume * 101 + fscache_try_get_volume(struct fscache_volume *volume, 102 + enum fscache_volume_trace where); 103 + extern void fscache_put_volume(struct fscache_volume *volume, 104 + enum fscache_volume_trace where); 101 105 extern void fscache_end_volume_access(struct fscache_volume *volume, 102 106 struct fscache_cookie *cookie, 103 107 enum fscache_access_trace why);

+2 -1

include/linux/mmzone.h

··· 1979 1979 static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) 1980 1980 { 1981 1981 int idx = subsection_map_index(pfn); 1982 + struct mem_section_usage *usage = READ_ONCE(ms->usage); 1982 1983 1983 - return test_bit(idx, READ_ONCE(ms->usage)->subsection_map); 1984 + return usage ? test_bit(idx, usage->subsection_map) : 0; 1984 1985 } 1985 1986 #else 1986 1987 static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)

+9 -48

include/linux/page_ref.h

··· 230 230 231 231 static inline bool page_ref_add_unless(struct page *page, int nr, int u) 232 232 { 233 - bool ret = atomic_add_unless(&page->_refcount, nr, u); 233 + bool ret = false; 234 + 235 + rcu_read_lock(); 236 + /* avoid writing to the vmemmap area being remapped */ 237 + if (!page_is_fake_head(page) && page_ref_count(page) != u) 238 + ret = atomic_add_unless(&page->_refcount, nr, u); 239 + rcu_read_unlock(); 234 240 235 241 if (page_ref_tracepoint_active(page_ref_mod_unless)) 236 242 __page_ref_mod_unless(page, nr, ret); ··· 264 258 return folio_ref_add_unless(folio, 1, 0); 265 259 } 266 260 267 - static inline bool folio_ref_try_add_rcu(struct folio *folio, int count) 261 + static inline bool folio_ref_try_add(struct folio *folio, int count) 268 262 { 269 - #ifdef CONFIG_TINY_RCU 270 - /* 271 - * The caller guarantees the folio will not be freed from interrupt 272 - * context, so (on !SMP) we only need preemption to be disabled 273 - * and TINY_RCU does that for us. 274 - */ 275 - # ifdef CONFIG_PREEMPT_COUNT 276 - VM_BUG_ON(!in_atomic() && !irqs_disabled()); 277 - # endif 278 - VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio); 279 - folio_ref_add(folio, count); 280 - #else 281 - if (unlikely(!folio_ref_add_unless(folio, count, 0))) { 282 - /* Either the folio has been freed, or will be freed. */ 283 - return false; 284 - } 285 - #endif 286 - return true; 287 - } 288 - 289 - /** 290 - * folio_try_get_rcu - Attempt to increase the refcount on a folio. 291 - * @folio: The folio. 292 - * 293 - * This is a version of folio_try_get() optimised for non-SMP kernels. 294 - * If you are still holding the rcu_read_lock() after looking up the 295 - * page and know that the page cannot have its refcount decreased to 296 - * zero in interrupt context, you can use this instead of folio_try_get(). 297 - * 298 - * Example users include get_user_pages_fast() (as pages are not unmapped 299 - * from interrupt context) and the page cache lookups (as pages are not 300 - * truncated from interrupt context). We also know that pages are not 301 - * frozen in interrupt context for the purposes of splitting or migration. 302 - * 303 - * You can also use this function if you're holding a lock that prevents 304 - * pages being frozen & removed; eg the i_pages lock for the page cache 305 - * or the mmap_lock or page table lock for page tables. In this case, 306 - * it will always succeed, and you could have used a plain folio_get(), 307 - * but it's sometimes more convenient to have a common function called 308 - * from both locked and RCU-protected contexts. 309 - * 310 - * Return: True if the reference count was successfully incremented. 311 - */ 312 - static inline bool folio_try_get_rcu(struct folio *folio) 313 - { 314 - return folio_ref_try_add_rcu(folio, 1); 263 + return folio_ref_add_unless(folio, count, 0); 315 264 } 316 265 317 266 static inline int page_ref_freeze(struct page *page, int count)

+9 -2

include/linux/pagemap.h

··· 354 354 * a good order (that's 1MB if you're using 4kB pages) 355 355 */ 356 356 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 357 - #define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER 357 + #define PREFERRED_MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER 358 358 #else 359 - #define MAX_PAGECACHE_ORDER 8 359 + #define PREFERRED_MAX_PAGECACHE_ORDER 8 360 360 #endif 361 + 362 + /* 363 + * xas_split_alloc() does not support arbitrary orders. This implies no 364 + * 512MB THP on ARM64 with 64KB base page size. 365 + */ 366 + #define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1) 367 + #define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER) 361 368 362 369 /** 363 370 * mapping_set_large_folios() - Indicate the file supports large folios.

+2 -2

include/linux/sched.h

··· 2198 2198 extern void sched_set_stop_task(int cpu, struct task_struct *stop); 2199 2199 2200 2200 #ifdef CONFIG_MEM_ALLOC_PROFILING 2201 - static inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag) 2201 + static __always_inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag) 2202 2202 { 2203 2203 swap(current->alloc_tag, tag); 2204 2204 return tag; 2205 2205 } 2206 2206 2207 - static inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old) 2207 + static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old) 2208 2208 { 2209 2209 #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG 2210 2210 WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n");

+2 -1

include/linux/swap.h

··· 354 354 } 355 355 356 356 /* linux/mm/workingset.c */ 357 - bool workingset_test_recent(void *shadow, bool file, bool *workingset); 357 + bool workingset_test_recent(void *shadow, bool file, bool *workingset, 358 + bool flush); 358 359 void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); 359 360 void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg); 360 361 void workingset_refault(struct folio *folio, void *shadow);

+29 -54

include/linux/tpm.h

··· 490 490 { 491 491 } 492 492 #endif 493 - #ifdef CONFIG_TCG_TPM2_HMAC 494 493 495 - int tpm2_start_auth_session(struct tpm_chip *chip); 494 + static inline struct tpm2_auth *tpm2_chip_auth(struct tpm_chip *chip) 495 + { 496 + #ifdef CONFIG_TCG_TPM2_HMAC 497 + return chip->auth; 498 + #else 499 + return NULL; 500 + #endif 501 + } 502 + 496 503 void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, 497 504 u32 handle, u8 *name); 498 505 void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, ··· 511 504 u8 *passphrase, 512 505 int passphraselen) 513 506 { 514 - tpm_buf_append_hmac_session(chip, buf, attributes, passphrase, 515 - passphraselen); 507 + struct tpm_header *head; 508 + int offset; 509 + 510 + if (tpm2_chip_auth(chip)) { 511 + tpm_buf_append_hmac_session(chip, buf, attributes, passphrase, passphraselen); 512 + } else { 513 + offset = buf->handles * 4 + TPM_HEADER_SIZE; 514 + head = (struct tpm_header *)buf->data; 515 + 516 + /* 517 + * If the only sessions are optional, the command tag must change to 518 + * TPM2_ST_NO_SESSIONS. 519 + */ 520 + if (tpm_buf_length(buf) == offset) 521 + head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS); 522 + } 516 523 } 524 + 525 + #ifdef CONFIG_TCG_TPM2_HMAC 526 + 527 + int tpm2_start_auth_session(struct tpm_chip *chip); 517 528 void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf); 518 529 int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf, 519 530 int rc); ··· 545 520 } 546 521 static inline void tpm2_end_auth_session(struct tpm_chip *chip) 547 522 { 548 - } 549 - static inline void tpm_buf_append_name(struct tpm_chip *chip, 550 - struct tpm_buf *buf, 551 - u32 handle, u8 *name) 552 - { 553 - tpm_buf_append_u32(buf, handle); 554 - /* count the number of handles in the upper bits of flags */ 555 - buf->handles++; 556 - } 557 - static inline void tpm_buf_append_hmac_session(struct tpm_chip *chip, 558 - struct tpm_buf *buf, 559 - u8 attributes, u8 *passphrase, 560 - int passphraselen) 561 - { 562 - /* offset tells us where the sessions area begins */ 563 - int offset = buf->handles * 4 + TPM_HEADER_SIZE; 564 - u32 len = 9 + passphraselen; 565 - 566 - if (tpm_buf_length(buf) != offset) { 567 - /* not the first session so update the existing length */ 568 - len += get_unaligned_be32(&buf->data[offset]); 569 - put_unaligned_be32(len, &buf->data[offset]); 570 - } else { 571 - tpm_buf_append_u32(buf, len); 572 - } 573 - /* auth handle */ 574 - tpm_buf_append_u32(buf, TPM2_RS_PW); 575 - /* nonce */ 576 - tpm_buf_append_u16(buf, 0); 577 - /* attributes */ 578 - tpm_buf_append_u8(buf, 0); 579 - /* passphrase */ 580 - tpm_buf_append_u16(buf, passphraselen); 581 - tpm_buf_append(buf, passphrase, passphraselen); 582 - } 583 - static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip, 584 - struct tpm_buf *buf, 585 - u8 attributes, 586 - u8 *passphrase, 587 - int passphraselen) 588 - { 589 - int offset = buf->handles * 4 + TPM_HEADER_SIZE; 590 - struct tpm_header *head = (struct tpm_header *) buf->data; 591 - 592 - /* 593 - * if the only sessions are optional, the command tag 594 - * must change to TPM2_ST_NO_SESSIONS 595 - */ 596 - if (tpm_buf_length(buf) == offset) 597 - head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS); 598 523 } 599 524 static inline void tpm_buf_fill_hmac_session(struct tpm_chip *chip, 600 525 struct tpm_buf *buf)

+9 -4

include/net/tcx.h

··· 13 13 struct tcx_entry { 14 14 struct mini_Qdisc __rcu *miniq; 15 15 struct bpf_mprog_bundle bundle; 16 - bool miniq_active; 16 + u32 miniq_active; 17 17 struct rcu_head rcu; 18 18 }; 19 19 ··· 125 125 tcx_dec(); 126 126 } 127 127 128 - static inline void tcx_miniq_set_active(struct bpf_mprog_entry *entry, 129 - const bool active) 128 + static inline void tcx_miniq_inc(struct bpf_mprog_entry *entry) 130 129 { 131 130 ASSERT_RTNL(); 132 - tcx_entry(entry)->miniq_active = active; 131 + tcx_entry(entry)->miniq_active++; 132 + } 133 + 134 + static inline void tcx_miniq_dec(struct bpf_mprog_entry *entry) 135 + { 136 + ASSERT_RTNL(); 137 + tcx_entry(entry)->miniq_active--; 133 138 } 134 139 135 140 static inline bool tcx_entry_is_active(struct bpf_mprog_entry *entry)

+4

include/trace/events/fscache.h

··· 35 35 fscache_volume_get_cookie, 36 36 fscache_volume_get_create_work, 37 37 fscache_volume_get_hash_collision, 38 + fscache_volume_get_withdraw, 38 39 fscache_volume_free, 39 40 fscache_volume_new_acquire, 40 41 fscache_volume_put_cookie, 41 42 fscache_volume_put_create_work, 42 43 fscache_volume_put_hash_collision, 43 44 fscache_volume_put_relinquish, 45 + fscache_volume_put_withdraw, 44 46 fscache_volume_see_create_work, 45 47 fscache_volume_see_hash_wake, 46 48 fscache_volume_wait_create_work, ··· 122 120 EM(fscache_volume_get_cookie, "GET cook ") \ 123 121 EM(fscache_volume_get_create_work, "GET creat") \ 124 122 EM(fscache_volume_get_hash_collision, "GET hcoll") \ 123 + EM(fscache_volume_get_withdraw, "GET withd") \ 125 124 EM(fscache_volume_free, "FREE ") \ 126 125 EM(fscache_volume_new_acquire, "NEW acq ") \ 127 126 EM(fscache_volume_put_cookie, "PUT cook ") \ 128 127 EM(fscache_volume_put_create_work, "PUT creat") \ 129 128 EM(fscache_volume_put_hash_collision, "PUT hcoll") \ 130 129 EM(fscache_volume_put_relinquish, "PUT relnq") \ 130 + EM(fscache_volume_put_withdraw, "PUT withd") \ 131 131 EM(fscache_volume_see_create_work, "SEE creat") \ 132 132 EM(fscache_volume_see_hash_wake, "SEE hwake") \ 133 133 E_(fscache_volume_wait_create_work, "WAIT crea")

+5

include/uapi/drm/panthor_drm.h

··· 802 802 * Must be 64-bit/8-byte aligned (the size of a CS instruction) 803 803 * 804 804 * Can be zero if stream_addr is zero too. 805 + * 806 + * When the stream size is zero, the queue submit serves as a 807 + * synchronization point. 805 808 */ 806 809 __u32 stream_size; 807 810 ··· 825 822 * ensure the GPU doesn't get garbage when reading the indirect command 826 823 * stream buffers. If you want the cache flush to happen 827 824 * unconditionally, pass a zero here. 825 + * 826 + * Ignored when stream_size is zero. 828 827 */ 829 828 __u32 latest_flush; 830 829

+82 -17

kernel/bpf/helpers.c

··· 1084 1084 struct bpf_prog *prog; 1085 1085 void __rcu *callback_fn; 1086 1086 void *value; 1087 - struct rcu_head rcu; 1087 + union { 1088 + struct rcu_head rcu; 1089 + struct work_struct delete_work; 1090 + }; 1088 1091 u64 flags; 1089 1092 }; 1090 1093 ··· 1110 1107 struct bpf_hrtimer { 1111 1108 struct bpf_async_cb cb; 1112 1109 struct hrtimer timer; 1110 + atomic_t cancelling; 1113 1111 }; 1114 1112 1115 1113 struct bpf_work { ··· 1223 1219 kfree_rcu(w, cb.rcu); 1224 1220 } 1225 1221 1222 + static void bpf_timer_delete_work(struct work_struct *work) 1223 + { 1224 + struct bpf_hrtimer *t = container_of(work, struct bpf_hrtimer, cb.delete_work); 1225 + 1226 + /* Cancel the timer and wait for callback to complete if it was running. 1227 + * If hrtimer_cancel() can be safely called it's safe to call 1228 + * kfree_rcu(t) right after for both preallocated and non-preallocated 1229 + * maps. The async->cb = NULL was already done and no code path can see 1230 + * address 't' anymore. Timer if armed for existing bpf_hrtimer before 1231 + * bpf_timer_cancel_and_free will have been cancelled. 1232 + */ 1233 + hrtimer_cancel(&t->timer); 1234 + kfree_rcu(t, cb.rcu); 1235 + } 1236 + 1226 1237 static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags, 1227 1238 enum bpf_async_type type) 1228 1239 { ··· 1281 1262 clockid = flags & (MAX_CLOCKS - 1); 1282 1263 t = (struct bpf_hrtimer *)cb; 1283 1264 1265 + atomic_set(&t->cancelling, 0); 1266 + INIT_WORK(&t->cb.delete_work, bpf_timer_delete_work); 1284 1267 hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT); 1285 1268 t->timer.function = bpf_timer_cb; 1286 1269 cb->value = (void *)async - map->record->timer_off; ··· 1461 1440 1462 1441 BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer) 1463 1442 { 1464 - struct bpf_hrtimer *t; 1443 + struct bpf_hrtimer *t, *cur_t; 1444 + bool inc = false; 1465 1445 int ret = 0; 1466 1446 1467 1447 if (in_nmi()) ··· 1474 1452 ret = -EINVAL; 1475 1453 goto out; 1476 1454 } 1477 - if (this_cpu_read(hrtimer_running) == t) { 1455 + 1456 + cur_t = this_cpu_read(hrtimer_running); 1457 + if (cur_t == t) { 1478 1458 /* If bpf callback_fn is trying to bpf_timer_cancel() 1479 1459 * its own timer the hrtimer_cancel() will deadlock 1480 - * since it waits for callback_fn to finish 1460 + * since it waits for callback_fn to finish. 1481 1461 */ 1482 1462 ret = -EDEADLK; 1483 1463 goto out; 1484 1464 } 1465 + 1466 + /* Only account in-flight cancellations when invoked from a timer 1467 + * callback, since we want to avoid waiting only if other _callbacks_ 1468 + * are waiting on us, to avoid introducing lockups. Non-callback paths 1469 + * are ok, since nobody would synchronously wait for their completion. 1470 + */ 1471 + if (!cur_t) 1472 + goto drop; 1473 + atomic_inc(&t->cancelling); 1474 + /* Need full barrier after relaxed atomic_inc */ 1475 + smp_mb__after_atomic(); 1476 + inc = true; 1477 + if (atomic_read(&cur_t->cancelling)) { 1478 + /* We're cancelling timer t, while some other timer callback is 1479 + * attempting to cancel us. In such a case, it might be possible 1480 + * that timer t belongs to the other callback, or some other 1481 + * callback waiting upon it (creating transitive dependencies 1482 + * upon us), and we will enter a deadlock if we continue 1483 + * cancelling and waiting for it synchronously, since it might 1484 + * do the same. Bail! 1485 + */ 1486 + ret = -EDEADLK; 1487 + goto out; 1488 + } 1489 + drop: 1485 1490 drop_prog_refcnt(&t->cb); 1486 1491 out: 1487 1492 __bpf_spin_unlock_irqrestore(&timer->lock); ··· 1516 1467 * if it was running. 1517 1468 */ 1518 1469 ret = ret ?: hrtimer_cancel(&t->timer); 1470 + if (inc) 1471 + atomic_dec(&t->cancelling); 1519 1472 rcu_read_unlock(); 1520 1473 return ret; 1521 1474 } ··· 1563 1512 1564 1513 if (!t) 1565 1514 return; 1566 - /* Cancel the timer and wait for callback to complete if it was running. 1567 - * If hrtimer_cancel() can be safely called it's safe to call kfree(t) 1568 - * right after for both preallocated and non-preallocated maps. 1569 - * The async->cb = NULL was already done and no code path can 1570 - * see address 't' anymore. 1571 - * 1572 - * Check that bpf_map_delete/update_elem() wasn't called from timer 1573 - * callback_fn. In such case don't call hrtimer_cancel() (since it will 1574 - * deadlock) and don't call hrtimer_try_to_cancel() (since it will just 1575 - * return -1). Though callback_fn is still running on this cpu it's 1515 + /* We check that bpf_map_delete/update_elem() was called from timer 1516 + * callback_fn. In such case we don't call hrtimer_cancel() (since it 1517 + * will deadlock) and don't call hrtimer_try_to_cancel() (since it will 1518 + * just return -1). Though callback_fn is still running on this cpu it's 1576 1519 * safe to do kfree(t) because bpf_timer_cb() read everything it needed 1577 1520 * from 't'. The bpf subprog callback_fn won't be able to access 't', 1578 1521 * since async->cb = NULL was already done. The timer will be 1579 1522 * effectively cancelled because bpf_timer_cb() will return 1580 1523 * HRTIMER_NORESTART. 1524 + * 1525 + * However, it is possible the timer callback_fn calling us armed the 1526 + * timer _before_ calling us, such that failing to cancel it here will 1527 + * cause it to possibly use struct hrtimer after freeing bpf_hrtimer. 1528 + * Therefore, we _need_ to cancel any outstanding timers before we do 1529 + * kfree_rcu, even though no more timers can be armed. 1530 + * 1531 + * Moreover, we need to schedule work even if timer does not belong to 1532 + * the calling callback_fn, as on two different CPUs, we can end up in a 1533 + * situation where both sides run in parallel, try to cancel one 1534 + * another, and we end up waiting on both sides in hrtimer_cancel 1535 + * without making forward progress, since timer1 depends on time2 1536 + * callback to finish, and vice versa. 1537 + * 1538 + * CPU 1 (timer1_cb) CPU 2 (timer2_cb) 1539 + * bpf_timer_cancel_and_free(timer2) bpf_timer_cancel_and_free(timer1) 1540 + * 1541 + * To avoid these issues, punt to workqueue context when we are in a 1542 + * timer callback. 1581 1543 */ 1582 - if (this_cpu_read(hrtimer_running) != t) 1583 - hrtimer_cancel(&t->timer); 1584 - kfree_rcu(t, cb.rcu); 1544 + if (this_cpu_read(hrtimer_running)) 1545 + queue_work(system_unbound_wq, &t->cb.delete_work); 1546 + else 1547 + bpf_timer_delete_work(&t->cb.delete_work); 1585 1548 } 1586 1549 1587 1550 /* This function is called by map_delete/update_elem for individual element and

+3 -1

lib/build_OID_registry

··· 38 38 # 39 39 open C_FILE, ">$ARGV[1]" or die; 40 40 print C_FILE "/*\n"; 41 - print C_FILE " * Automatically generated by ", $0 =~ s#^\Q$abs_srctree/\E##r, ". Do not edit\n"; 41 + my $scriptname = $0; 42 + $scriptname =~ s#^\Q$abs_srctree/\E##; 43 + print C_FILE " * Automatically generated by ", $scriptname, ". Do not edit\n"; 42 44 print C_FILE " */\n"; 43 45 44 46 #

+3

lib/closure.c

··· 244 244 { 245 245 unsigned long flags; 246 246 247 + if (cl->magic == CLOSURE_MAGIC_STACK) 248 + return; 249 + 247 250 BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE); 248 251 cl->magic = CLOSURE_MAGIC_DEAD; 249 252

+19 -2

mm/damon/core.c

··· 1358 1358 * access frequencies are similar. This is for minimizing the monitoring 1359 1359 * overhead under the dynamically changeable access pattern. If a merge was 1360 1360 * unnecessarily made, later 'kdamond_split_regions()' will revert it. 1361 + * 1362 + * The total number of regions could be higher than the user-defined limit, 1363 + * max_nr_regions for some cases. For example, the user can update 1364 + * max_nr_regions to a number that lower than the current number of regions 1365 + * while DAMON is running. For such a case, repeat merging until the limit is 1366 + * met while increasing @threshold up to possible maximum level. 1361 1367 */ 1362 1368 static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, 1363 1369 unsigned long sz_limit) 1364 1370 { 1365 1371 struct damon_target *t; 1372 + unsigned int nr_regions; 1373 + unsigned int max_thres; 1366 1374 1367 - damon_for_each_target(t, c) 1368 - damon_merge_regions_of(t, threshold, sz_limit); 1375 + max_thres = c->attrs.aggr_interval / 1376 + (c->attrs.sample_interval ? c->attrs.sample_interval : 1); 1377 + do { 1378 + nr_regions = 0; 1379 + damon_for_each_target(t, c) { 1380 + damon_merge_regions_of(t, threshold, sz_limit); 1381 + nr_regions += damon_nr_regions(t); 1382 + } 1383 + threshold = max(1, threshold * 2); 1384 + } while (nr_regions > c->attrs.max_nr_regions && 1385 + threshold / 2 < max_thres); 1369 1386 } 1370 1387 1371 1388 /*

+12 -8

mm/filemap.c

··· 1847 1847 if (!folio || xa_is_value(folio)) 1848 1848 goto out; 1849 1849 1850 - if (!folio_try_get_rcu(folio)) 1850 + if (!folio_try_get(folio)) 1851 1851 goto repeat; 1852 1852 1853 1853 if (unlikely(folio != xas_reload(&xas))) { ··· 2001 2001 if (!folio || xa_is_value(folio)) 2002 2002 return folio; 2003 2003 2004 - if (!folio_try_get_rcu(folio)) 2004 + if (!folio_try_get(folio)) 2005 2005 goto reset; 2006 2006 2007 2007 if (unlikely(folio != xas_reload(xas))) { ··· 2181 2181 if (xa_is_value(folio)) 2182 2182 goto update_start; 2183 2183 2184 - if (!folio_try_get_rcu(folio)) 2184 + if (!folio_try_get(folio)) 2185 2185 goto retry; 2186 2186 2187 2187 if (unlikely(folio != xas_reload(&xas))) ··· 2313 2313 break; 2314 2314 if (xa_is_sibling(folio)) 2315 2315 break; 2316 - if (!folio_try_get_rcu(folio)) 2316 + if (!folio_try_get(folio)) 2317 2317 goto retry; 2318 2318 2319 2319 if (unlikely(folio != xas_reload(&xas))) ··· 3124 3124 3125 3125 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 3126 3126 /* Use the readahead code, even if readahead is disabled */ 3127 - if (vm_flags & VM_HUGEPAGE) { 3127 + if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) { 3128 3128 fpin = maybe_unlock_mmap_for_io(vmf, fpin); 3129 3129 ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1); 3130 3130 ra->size = HPAGE_PMD_NR; ··· 3231 3231 if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID)) 3232 3232 return 0; 3233 3233 3234 - ptep = pte_offset_map(vmf->pmd, vmf->address); 3234 + ptep = pte_offset_map_nolock(vma->vm_mm, vmf->pmd, vmf->address, 3235 + &vmf->ptl); 3235 3236 if (unlikely(!ptep)) 3236 3237 return VM_FAULT_NOPAGE; 3237 3238 ··· 3473 3472 continue; 3474 3473 if (folio_test_locked(folio)) 3475 3474 continue; 3476 - if (!folio_try_get_rcu(folio)) 3475 + if (!folio_try_get(folio)) 3477 3476 continue; 3478 3477 /* Has the page moved or been split? */ 3479 3478 if (unlikely(folio != xas_reload(xas))) ··· 4249 4248 XA_STATE(xas, &mapping->i_pages, first_index); 4250 4249 struct folio *folio; 4251 4250 4251 + /* Flush stats (and potentially sleep) outside the RCU read section. */ 4252 + mem_cgroup_flush_stats_ratelimited(NULL); 4253 + 4252 4254 rcu_read_lock(); 4253 4255 xas_for_each(&xas, folio, last_index) { 4254 4256 int order; ··· 4315 4311 goto resched; 4316 4312 } 4317 4313 #endif 4318 - if (workingset_test_recent(shadow, true, &workingset)) 4314 + if (workingset_test_recent(shadow, true, &workingset, false)) 4319 4315 cs->nr_recently_evicted += nr_pages; 4320 4316 4321 4317 goto resched;

+154 -137

mm/gup.c

··· 76 76 folio = page_folio(page); 77 77 if (WARN_ON_ONCE(folio_ref_count(folio) < 0)) 78 78 return NULL; 79 - if (unlikely(!folio_ref_try_add_rcu(folio, refs))) 79 + if (unlikely(!folio_ref_try_add(folio, refs))) 80 80 return NULL; 81 81 82 82 /* ··· 93 93 folio_put_refs(folio, refs); 94 94 goto retry; 95 95 } 96 - 97 - return folio; 98 - } 99 - 100 - /** 101 - * try_grab_folio() - Attempt to get or pin a folio. 102 - * @page: pointer to page to be grabbed 103 - * @refs: the value to (effectively) add to the folio's refcount 104 - * @flags: gup flags: these are the FOLL_* flag values. 105 - * 106 - * "grab" names in this file mean, "look at flags to decide whether to use 107 - * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. 108 - * 109 - * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the 110 - * same time. (That's true throughout the get_user_pages*() and 111 - * pin_user_pages*() APIs.) Cases: 112 - * 113 - * FOLL_GET: folio's refcount will be incremented by @refs. 114 - * 115 - * FOLL_PIN on large folios: folio's refcount will be incremented by 116 - * @refs, and its pincount will be incremented by @refs. 117 - * 118 - * FOLL_PIN on single-page folios: folio's refcount will be incremented by 119 - * @refs * GUP_PIN_COUNTING_BIAS. 120 - * 121 - * Return: The folio containing @page (with refcount appropriately 122 - * incremented) for success, or NULL upon failure. If neither FOLL_GET 123 - * nor FOLL_PIN was set, that's considered failure, and furthermore, 124 - * a likely bug in the caller, so a warning is also emitted. 125 - */ 126 - struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) 127 - { 128 - struct folio *folio; 129 - 130 - if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0)) 131 - return NULL; 132 - 133 - if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) 134 - return NULL; 135 - 136 - if (flags & FOLL_GET) 137 - return try_get_folio(page, refs); 138 - 139 - /* FOLL_PIN is set */ 140 - 141 - /* 142 - * Don't take a pin on the zero page - it's not going anywhere 143 - * and it is used in a *lot* of places. 144 - */ 145 - if (is_zero_page(page)) 146 - return page_folio(page); 147 - 148 - folio = try_get_folio(page, refs); 149 - if (!folio) 150 - return NULL; 151 - 152 - /* 153 - * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a 154 - * right zone, so fail and let the caller fall back to the slow 155 - * path. 156 - */ 157 - if (unlikely((flags & FOLL_LONGTERM) && 158 - !folio_is_longterm_pinnable(folio))) { 159 - if (!put_devmap_managed_folio_refs(folio, refs)) 160 - folio_put_refs(folio, refs); 161 - return NULL; 162 - } 163 - 164 - /* 165 - * When pinning a large folio, use an exact count to track it. 166 - * 167 - * However, be sure to *also* increment the normal folio 168 - * refcount field at least once, so that the folio really 169 - * is pinned. That's why the refcount from the earlier 170 - * try_get_folio() is left intact. 171 - */ 172 - if (folio_test_large(folio)) 173 - atomic_add(refs, &folio->_pincount); 174 - else 175 - folio_ref_add(folio, 176 - refs * (GUP_PIN_COUNTING_BIAS - 1)); 177 - /* 178 - * Adjust the pincount before re-checking the PTE for changes. 179 - * This is essentially a smp_mb() and is paired with a memory 180 - * barrier in folio_try_share_anon_rmap_*(). 181 - */ 182 - smp_mb__after_atomic(); 183 - 184 - node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); 185 96 186 97 return folio; 187 98 } ··· 114 203 } 115 204 116 205 /** 117 - * try_grab_page() - elevate a page's refcount by a flag-dependent amount 118 - * @page: pointer to page to be grabbed 119 - * @flags: gup flags: these are the FOLL_* flag values. 206 + * try_grab_folio() - add a folio's refcount by a flag-dependent amount 207 + * @folio: pointer to folio to be grabbed 208 + * @refs: the value to (effectively) add to the folio's refcount 209 + * @flags: gup flags: these are the FOLL_* flag values 120 210 * 121 211 * This might not do anything at all, depending on the flags argument. 122 212 * 123 213 * "grab" names in this file mean, "look at flags to decide whether to use 124 - * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount. 214 + * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. 125 215 * 126 216 * Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same 127 - * time. Cases: please see the try_grab_folio() documentation, with 128 - * "refs=1". 217 + * time. 129 218 * 130 219 * Return: 0 for success, or if no action was required (if neither FOLL_PIN 131 220 * nor FOLL_GET was set, nothing is done). A negative error code for failure: 132 221 * 133 - * -ENOMEM FOLL_GET or FOLL_PIN was set, but the page could not 222 + * -ENOMEM FOLL_GET or FOLL_PIN was set, but the folio could not 134 223 * be grabbed. 224 + * 225 + * It is called when we have a stable reference for the folio, typically in 226 + * GUP slow path. 135 227 */ 136 - int __must_check try_grab_page(struct page *page, unsigned int flags) 228 + int __must_check try_grab_folio(struct folio *folio, int refs, 229 + unsigned int flags) 137 230 { 138 - struct folio *folio = page_folio(page); 139 - 140 231 if (WARN_ON_ONCE(folio_ref_count(folio) <= 0)) 141 232 return -ENOMEM; 142 233 143 - if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) 234 + if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(&folio->page))) 144 235 return -EREMOTEIO; 145 236 146 237 if (flags & FOLL_GET) 147 - folio_ref_inc(folio); 238 + folio_ref_add(folio, refs); 148 239 else if (flags & FOLL_PIN) { 149 240 /* 150 241 * Don't take a pin on the zero page - it's not going anywhere 151 242 * and it is used in a *lot* of places. 152 243 */ 153 - if (is_zero_page(page)) 244 + if (is_zero_folio(folio)) 154 245 return 0; 155 246 156 247 /* 157 - * Similar to try_grab_folio(): be sure to *also* 158 - * increment the normal page refcount field at least once, 248 + * Increment the normal page refcount field at least once, 159 249 * so that the page really is pinned. 160 250 */ 161 251 if (folio_test_large(folio)) { 162 - folio_ref_add(folio, 1); 163 - atomic_add(1, &folio->_pincount); 252 + folio_ref_add(folio, refs); 253 + atomic_add(refs, &folio->_pincount); 164 254 } else { 165 - folio_ref_add(folio, GUP_PIN_COUNTING_BIAS); 255 + folio_ref_add(folio, refs * GUP_PIN_COUNTING_BIAS); 166 256 } 167 257 168 - node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1); 258 + node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); 169 259 } 170 260 171 261 return 0; ··· 427 515 428 516 return nr; 429 517 } 518 + 519 + /** 520 + * try_grab_folio_fast() - Attempt to get or pin a folio in fast path. 521 + * @page: pointer to page to be grabbed 522 + * @refs: the value to (effectively) add to the folio's refcount 523 + * @flags: gup flags: these are the FOLL_* flag values. 524 + * 525 + * "grab" names in this file mean, "look at flags to decide whether to use 526 + * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. 527 + * 528 + * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the 529 + * same time. (That's true throughout the get_user_pages*() and 530 + * pin_user_pages*() APIs.) Cases: 531 + * 532 + * FOLL_GET: folio's refcount will be incremented by @refs. 533 + * 534 + * FOLL_PIN on large folios: folio's refcount will be incremented by 535 + * @refs, and its pincount will be incremented by @refs. 536 + * 537 + * FOLL_PIN on single-page folios: folio's refcount will be incremented by 538 + * @refs * GUP_PIN_COUNTING_BIAS. 539 + * 540 + * Return: The folio containing @page (with refcount appropriately 541 + * incremented) for success, or NULL upon failure. If neither FOLL_GET 542 + * nor FOLL_PIN was set, that's considered failure, and furthermore, 543 + * a likely bug in the caller, so a warning is also emitted. 544 + * 545 + * It uses add ref unless zero to elevate the folio refcount and must be called 546 + * in fast path only. 547 + */ 548 + static struct folio *try_grab_folio_fast(struct page *page, int refs, 549 + unsigned int flags) 550 + { 551 + struct folio *folio; 552 + 553 + /* Raise warn if it is not called in fast GUP */ 554 + VM_WARN_ON_ONCE(!irqs_disabled()); 555 + 556 + if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0)) 557 + return NULL; 558 + 559 + if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) 560 + return NULL; 561 + 562 + if (flags & FOLL_GET) 563 + return try_get_folio(page, refs); 564 + 565 + /* FOLL_PIN is set */ 566 + 567 + /* 568 + * Don't take a pin on the zero page - it's not going anywhere 569 + * and it is used in a *lot* of places. 570 + */ 571 + if (is_zero_page(page)) 572 + return page_folio(page); 573 + 574 + folio = try_get_folio(page, refs); 575 + if (!folio) 576 + return NULL; 577 + 578 + /* 579 + * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a 580 + * right zone, so fail and let the caller fall back to the slow 581 + * path. 582 + */ 583 + if (unlikely((flags & FOLL_LONGTERM) && 584 + !folio_is_longterm_pinnable(folio))) { 585 + if (!put_devmap_managed_folio_refs(folio, refs)) 586 + folio_put_refs(folio, refs); 587 + return NULL; 588 + } 589 + 590 + /* 591 + * When pinning a large folio, use an exact count to track it. 592 + * 593 + * However, be sure to *also* increment the normal folio 594 + * refcount field at least once, so that the folio really 595 + * is pinned. That's why the refcount from the earlier 596 + * try_get_folio() is left intact. 597 + */ 598 + if (folio_test_large(folio)) 599 + atomic_add(refs, &folio->_pincount); 600 + else 601 + folio_ref_add(folio, 602 + refs * (GUP_PIN_COUNTING_BIAS - 1)); 603 + /* 604 + * Adjust the pincount before re-checking the PTE for changes. 605 + * This is essentially a smp_mb() and is paired with a memory 606 + * barrier in folio_try_share_anon_rmap_*(). 607 + */ 608 + smp_mb__after_atomic(); 609 + 610 + node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); 611 + 612 + return folio; 613 + } 430 614 #endif /* CONFIG_ARCH_HAS_HUGEPD || CONFIG_HAVE_GUP_FAST */ 431 615 432 616 #ifdef CONFIG_ARCH_HAS_HUGEPD ··· 543 535 */ 544 536 static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz, 545 537 unsigned long addr, unsigned long end, unsigned int flags, 546 - struct page **pages, int *nr) 538 + struct page **pages, int *nr, bool fast) 547 539 { 548 540 unsigned long pte_end; 549 541 struct page *page; ··· 566 558 page = pte_page(pte); 567 559 refs = record_subpages(page, sz, addr, end, pages + *nr); 568 560 569 - folio = try_grab_folio(page, refs, flags); 570 - if (!folio) 571 - return 0; 561 + if (fast) { 562 + folio = try_grab_folio_fast(page, refs, flags); 563 + if (!folio) 564 + return 0; 565 + } else { 566 + folio = page_folio(page); 567 + if (try_grab_folio(folio, refs, flags)) 568 + return 0; 569 + } 572 570 573 571 if (unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) { 574 572 gup_put_folio(folio, refs, flags); ··· 602 588 static int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd, 603 589 unsigned long addr, unsigned int pdshift, 604 590 unsigned long end, unsigned int flags, 605 - struct page **pages, int *nr) 591 + struct page **pages, int *nr, bool fast) 606 592 { 607 593 pte_t *ptep; 608 594 unsigned long sz = 1UL << hugepd_shift(hugepd); ··· 612 598 ptep = hugepte_offset(hugepd, addr, pdshift); 613 599 do { 614 600 next = hugepte_addr_end(addr, end, sz); 615 - ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr); 601 + ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr, 602 + fast); 616 603 if (ret != 1) 617 604 return ret; 618 605 } while (ptep++, addr = next, addr != end); ··· 640 625 ptep = hugepte_offset(hugepd, addr, pdshift); 641 626 ptl = huge_pte_lock(h, vma->vm_mm, ptep); 642 627 ret = gup_hugepd(vma, hugepd, addr, pdshift, addr + PAGE_SIZE, 643 - flags, &page, &nr); 628 + flags, &page, &nr, false); 644 629 spin_unlock(ptl); 645 630 646 631 if (ret == 1) { ··· 657 642 static inline int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd, 658 643 unsigned long addr, unsigned int pdshift, 659 644 unsigned long end, unsigned int flags, 660 - struct page **pages, int *nr) 645 + struct page **pages, int *nr, bool fast) 661 646 { 662 647 return 0; 663 648 } ··· 744 729 gup_must_unshare(vma, flags, page)) 745 730 return ERR_PTR(-EMLINK); 746 731 747 - ret = try_grab_page(page, flags); 732 + ret = try_grab_folio(page_folio(page), 1, flags); 748 733 if (ret) 749 734 page = ERR_PTR(ret); 750 735 else ··· 821 806 VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) && 822 807 !PageAnonExclusive(page), page); 823 808 824 - ret = try_grab_page(page, flags); 809 + ret = try_grab_folio(page_folio(page), 1, flags); 825 810 if (ret) 826 811 return ERR_PTR(ret); 827 812 ··· 983 968 VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) && 984 969 !PageAnonExclusive(page), page); 985 970 986 - /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */ 987 - ret = try_grab_page(page, flags); 971 + /* try_grab_folio() does nothing unless FOLL_GET or FOLL_PIN is set. */ 972 + ret = try_grab_folio(page_folio(page), 1, flags); 988 973 if (unlikely(ret)) { 989 974 page = ERR_PTR(ret); 990 975 goto out; ··· 1248 1233 goto unmap; 1249 1234 *page = pte_page(entry); 1250 1235 } 1251 - ret = try_grab_page(*page, gup_flags); 1236 + ret = try_grab_folio(page_folio(*page), 1, gup_flags); 1252 1237 if (unlikely(ret)) 1253 1238 goto unmap; 1254 1239 out: ··· 1651 1636 * pages. 1652 1637 */ 1653 1638 if (page_increm > 1) { 1654 - struct folio *folio; 1639 + struct folio *folio = page_folio(page); 1655 1640 1656 1641 /* 1657 1642 * Since we already hold refcount on the 1658 1643 * large folio, this should never fail. 1659 1644 */ 1660 - folio = try_grab_folio(page, page_increm - 1, 1661 - foll_flags); 1662 - if (WARN_ON_ONCE(!folio)) { 1645 + if (try_grab_folio(folio, page_increm - 1, 1646 + foll_flags)) { 1663 1647 /* 1664 1648 * Release the 1st page ref if the 1665 1649 * folio is problematic, fail hard. 1666 1650 */ 1667 - gup_put_folio(page_folio(page), 1, 1651 + gup_put_folio(folio, 1, 1668 1652 foll_flags); 1669 1653 ret = -EFAULT; 1670 1654 goto out; ··· 2811 2797 * This code is based heavily on the PowerPC implementation by Nick Piggin. 2812 2798 */ 2813 2799 #ifdef CONFIG_HAVE_GUP_FAST 2814 - 2815 2800 /* 2816 2801 * Used in the GUP-fast path to determine whether GUP is permitted to work on 2817 2802 * a specific folio. ··· 2975 2962 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 2976 2963 page = pte_page(pte); 2977 2964 2978 - folio = try_grab_folio(page, 1, flags); 2965 + folio = try_grab_folio_fast(page, 1, flags); 2979 2966 if (!folio) 2980 2967 goto pte_unmap; 2981 2968 ··· 3062 3049 break; 3063 3050 } 3064 3051 3065 - folio = try_grab_folio(page, 1, flags); 3052 + folio = try_grab_folio_fast(page, 1, flags); 3066 3053 if (!folio) { 3067 3054 gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages); 3068 3055 break; ··· 3151 3138 page = pmd_page(orig); 3152 3139 refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr); 3153 3140 3154 - folio = try_grab_folio(page, refs, flags); 3141 + folio = try_grab_folio_fast(page, refs, flags); 3155 3142 if (!folio) 3156 3143 return 0; 3157 3144 ··· 3195 3182 page = pud_page(orig); 3196 3183 refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr); 3197 3184 3198 - folio = try_grab_folio(page, refs, flags); 3185 + folio = try_grab_folio_fast(page, refs, flags); 3199 3186 if (!folio) 3200 3187 return 0; 3201 3188 ··· 3235 3222 page = pgd_page(orig); 3236 3223 refs = record_subpages(page, PGDIR_SIZE, addr, end, pages + *nr); 3237 3224 3238 - folio = try_grab_folio(page, refs, flags); 3225 + folio = try_grab_folio_fast(page, refs, flags); 3239 3226 if (!folio) 3240 3227 return 0; 3241 3228 ··· 3289 3276 * pmd format and THP pmd format 3290 3277 */ 3291 3278 if (gup_hugepd(NULL, __hugepd(pmd_val(pmd)), addr, 3292 - PMD_SHIFT, next, flags, pages, nr) != 1) 3279 + PMD_SHIFT, next, flags, pages, nr, 3280 + true) != 1) 3293 3281 return 0; 3294 3282 } else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags, 3295 3283 pages, nr)) ··· 3320 3306 return 0; 3321 3307 } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) { 3322 3308 if (gup_hugepd(NULL, __hugepd(pud_val(pud)), addr, 3323 - PUD_SHIFT, next, flags, pages, nr) != 1) 3309 + PUD_SHIFT, next, flags, pages, nr, 3310 + true) != 1) 3324 3311 return 0; 3325 3312 } else if (!gup_fast_pmd_range(pudp, pud, addr, next, flags, 3326 3313 pages, nr)) ··· 3348 3333 BUILD_BUG_ON(p4d_leaf(p4d)); 3349 3334 if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) { 3350 3335 if (gup_hugepd(NULL, __hugepd(p4d_val(p4d)), addr, 3351 - P4D_SHIFT, next, flags, pages, nr) != 1) 3336 + P4D_SHIFT, next, flags, pages, nr, 3337 + true) != 1) 3352 3338 return 0; 3353 3339 } else if (!gup_fast_pud_range(p4dp, p4d, addr, next, flags, 3354 3340 pages, nr)) ··· 3378 3362 return; 3379 3363 } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) { 3380 3364 if (gup_hugepd(NULL, __hugepd(pgd_val(pgd)), addr, 3381 - PGDIR_SHIFT, next, flags, pages, nr) != 1) 3365 + PGDIR_SHIFT, next, flags, pages, nr, 3366 + true) != 1) 3382 3367 return; 3383 3368 } else if (!gup_fast_p4d_range(pgdp, pgd, addr, next, flags, 3384 3369 pages, nr))

+1 -1

mm/huge_memory.c

··· 1331 1331 if (!*pgmap) 1332 1332 return ERR_PTR(-EFAULT); 1333 1333 page = pfn_to_page(pfn); 1334 - ret = try_grab_page(page, flags); 1334 + ret = try_grab_folio(page_folio(page), 1, flags); 1335 1335 if (ret) 1336 1336 page = ERR_PTR(ret); 1337 1337

+17 -53

mm/hugetlb.c

··· 1625 1625 * folio appears as just a compound page. Otherwise, wait until after 1626 1626 * allocating vmemmap to clear the flag. 1627 1627 * 1628 - * A reference is held on the folio, except in the case of demote. 1629 - * 1630 1628 * Must be called with hugetlb lock held. 1631 1629 */ 1632 - static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio, 1633 - bool adjust_surplus, 1634 - bool demote) 1630 + static void remove_hugetlb_folio(struct hstate *h, struct folio *folio, 1631 + bool adjust_surplus) 1635 1632 { 1636 1633 int nid = folio_nid(folio); 1637 1634 ··· 1642 1645 list_del(&folio->lru); 1643 1646 1644 1647 if (folio_test_hugetlb_freed(folio)) { 1648 + folio_clear_hugetlb_freed(folio); 1645 1649 h->free_huge_pages--; 1646 1650 h->free_huge_pages_node[nid]--; 1647 1651 } ··· 1659 1661 if (!folio_test_hugetlb_vmemmap_optimized(folio)) 1660 1662 __folio_clear_hugetlb(folio); 1661 1663 1662 - /* 1663 - * In the case of demote we do not ref count the page as it will soon 1664 - * be turned into a page of smaller size. 1665 - */ 1666 - if (!demote) 1667 - folio_ref_unfreeze(folio, 1); 1668 - 1669 1664 h->nr_huge_pages--; 1670 1665 h->nr_huge_pages_node[nid]--; 1671 - } 1672 - 1673 - static void remove_hugetlb_folio(struct hstate *h, struct folio *folio, 1674 - bool adjust_surplus) 1675 - { 1676 - __remove_hugetlb_folio(h, folio, adjust_surplus, false); 1677 - } 1678 - 1679 - static void remove_hugetlb_folio_for_demote(struct hstate *h, struct folio *folio, 1680 - bool adjust_surplus) 1681 - { 1682 - __remove_hugetlb_folio(h, folio, adjust_surplus, true); 1683 1666 } 1684 1667 1685 1668 static void add_hugetlb_folio(struct hstate *h, struct folio *folio, 1686 1669 bool adjust_surplus) 1687 1670 { 1688 - int zeroed; 1689 1671 int nid = folio_nid(folio); 1690 1672 1691 1673 VM_BUG_ON_FOLIO(!folio_test_hugetlb_vmemmap_optimized(folio), folio); ··· 1688 1710 * folio_change_private(folio, NULL) cleared it. 1689 1711 */ 1690 1712 folio_set_hugetlb_vmemmap_optimized(folio); 1691 - 1692 - /* 1693 - * This folio is about to be managed by the hugetlb allocator and 1694 - * should have no users. Drop our reference, and check for others 1695 - * just in case. 1696 - */ 1697 - zeroed = folio_put_testzero(folio); 1698 - if (unlikely(!zeroed)) 1699 - /* 1700 - * It is VERY unlikely soneone else has taken a ref 1701 - * on the folio. In this case, we simply return as 1702 - * free_huge_folio() will be called when this other ref 1703 - * is dropped. 1704 - */ 1705 - return; 1706 1713 1707 1714 arch_clear_hugetlb_flags(folio); 1708 1715 enqueue_hugetlb_folio(h, folio); ··· 1726 1763 } 1727 1764 1728 1765 /* 1729 - * Move PageHWPoison flag from head page to the raw error pages, 1730 - * which makes any healthy subpages reusable. 1731 - */ 1732 - if (unlikely(folio_test_hwpoison(folio))) 1733 - folio_clear_hugetlb_hwpoison(folio); 1734 - 1735 - /* 1736 1766 * If vmemmap pages were allocated above, then we need to clear the 1737 1767 * hugetlb flag under the hugetlb lock. 1738 1768 */ ··· 1734 1778 __folio_clear_hugetlb(folio); 1735 1779 spin_unlock_irq(&hugetlb_lock); 1736 1780 } 1781 + 1782 + /* 1783 + * Move PageHWPoison flag from head page to the raw error pages, 1784 + * which makes any healthy subpages reusable. 1785 + */ 1786 + if (unlikely(folio_test_hwpoison(folio))) 1787 + folio_clear_hugetlb_hwpoison(folio); 1788 + 1789 + folio_ref_unfreeze(folio, 1); 1737 1790 1738 1791 /* 1739 1792 * Non-gigantic pages demoted from CMA allocated gigantic pages ··· 2162 2197 nid = numa_mem_id(); 2163 2198 retry: 2164 2199 folio = __folio_alloc(gfp_mask, order, nid, nmask); 2200 + /* Ensure hugetlb folio won't have large_rmappable flag set. */ 2201 + if (folio) 2202 + folio_clear_large_rmappable(folio); 2165 2203 2166 2204 if (folio && !folio_ref_freeze(folio, 1)) { 2167 2205 folio_put(folio); ··· 3047 3079 3048 3080 free_new: 3049 3081 spin_unlock_irq(&hugetlb_lock); 3050 - if (new_folio) { 3051 - /* Folio has a zero ref count, but needs a ref to be freed */ 3052 - folio_ref_unfreeze(new_folio, 1); 3082 + if (new_folio) 3053 3083 update_and_free_hugetlb_folio(h, new_folio, false); 3054 - } 3055 3084 3056 3085 return ret; 3057 3086 } ··· 3903 3938 3904 3939 target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order); 3905 3940 3906 - remove_hugetlb_folio_for_demote(h, folio, false); 3941 + remove_hugetlb_folio(h, folio, false); 3907 3942 spin_unlock_irq(&hugetlb_lock); 3908 3943 3909 3944 /* ··· 3917 3952 if (rc) { 3918 3953 /* Allocation of vmemmmap failed, we can not demote folio */ 3919 3954 spin_lock_irq(&hugetlb_lock); 3920 - folio_ref_unfreeze(folio, 1); 3921 3955 add_hugetlb_folio(h, folio, false); 3922 3956 return rc; 3923 3957 }

+16

mm/hugetlb_vmemmap.c

··· 446 446 unsigned long vmemmap_reuse; 447 447 448 448 VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio); 449 + VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio); 450 + 449 451 if (!folio_test_hugetlb_vmemmap_optimized(folio)) 450 452 return 0; 451 453 ··· 483 481 */ 484 482 int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio) 485 483 { 484 + /* avoid writes from page_ref_add_unless() while unfolding vmemmap */ 485 + synchronize_rcu(); 486 + 486 487 return __hugetlb_vmemmap_restore_folio(h, folio, 0); 487 488 } 488 489 ··· 509 504 struct folio *folio, *t_folio; 510 505 long restored = 0; 511 506 long ret = 0; 507 + 508 + /* avoid writes from page_ref_add_unless() while unfolding vmemmap */ 509 + synchronize_rcu(); 512 510 513 511 list_for_each_entry_safe(folio, t_folio, folio_list, lru) { 514 512 if (folio_test_hugetlb_vmemmap_optimized(folio)) { ··· 558 550 unsigned long vmemmap_reuse; 559 551 560 552 VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio); 553 + VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio); 554 + 561 555 if (!vmemmap_should_optimize_folio(h, folio)) 562 556 return ret; 563 557 ··· 611 601 { 612 602 LIST_HEAD(vmemmap_pages); 613 603 604 + /* avoid writes from page_ref_add_unless() while folding vmemmap */ 605 + synchronize_rcu(); 606 + 614 607 __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, 0); 615 608 free_vmemmap_page_list(&vmemmap_pages); 616 609 } ··· 656 643 } 657 644 658 645 flush_tlb_all(); 646 + 647 + /* avoid writes from page_ref_add_unless() while folding vmemmap */ 648 + synchronize_rcu(); 659 649 660 650 list_for_each_entry(folio, folio_list, lru) { 661 651 int ret;

+2 -2

mm/internal.h

··· 1182 1182 /* 1183 1183 * mm/gup.c 1184 1184 */ 1185 - struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags); 1186 - int __must_check try_grab_page(struct page *page, unsigned int flags); 1185 + int __must_check try_grab_folio(struct folio *folio, int refs, 1186 + unsigned int flags); 1187 1187 1188 1188 /* 1189 1189 * mm/huge_memory.c

-11

mm/memcontrol.c

··· 7823 7823 7824 7824 /* Transfer the charge and the css ref */ 7825 7825 commit_charge(new, memcg); 7826 - /* 7827 - * If the old folio is a large folio and is in the split queue, it needs 7828 - * to be removed from the split queue now, in case getting an incorrect 7829 - * split queue in destroy_large_folio() after the memcg of the old folio 7830 - * is cleared. 7831 - * 7832 - * In addition, the old folio is about to be freed after migration, so 7833 - * removing from the split queue a bit earlier seems reasonable. 7834 - */ 7835 - if (folio_test_large(old) && folio_test_large_rmappable(old)) 7836 - folio_undo_large_rmappable(old); 7837 7826 old->memcg_data = 0; 7838 7827 } 7839 7828

+13

mm/migrate.c

··· 415 415 if (folio_ref_count(folio) != expected_count) 416 416 return -EAGAIN; 417 417 418 + /* Take off deferred split queue while frozen and memcg set */ 419 + if (folio_test_large(folio) && 420 + folio_test_large_rmappable(folio)) { 421 + if (!folio_ref_freeze(folio, expected_count)) 422 + return -EAGAIN; 423 + folio_undo_large_rmappable(folio); 424 + folio_ref_unfreeze(folio, expected_count); 425 + } 426 + 418 427 /* No turning back from here */ 419 428 newfolio->index = folio->index; 420 429 newfolio->mapping = folio->mapping; ··· 441 432 xas_unlock_irq(&xas); 442 433 return -EAGAIN; 443 434 } 435 + 436 + /* Take off deferred split queue while frozen and memcg set */ 437 + if (folio_test_large(folio) && folio_test_large_rmappable(folio)) 438 + folio_undo_large_rmappable(folio); 444 439 445 440 /* 446 441 * Now we know that no one else is looking at the folio:

+4 -4

mm/readahead.c

··· 503 503 504 504 limit = min(limit, index + ra->size - 1); 505 505 506 - if (new_order < MAX_PAGECACHE_ORDER) { 506 + if (new_order < MAX_PAGECACHE_ORDER) 507 507 new_order += 2; 508 - new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order); 509 - new_order = min_t(unsigned int, new_order, ilog2(ra->size)); 510 - } 508 + 509 + new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order); 510 + new_order = min_t(unsigned int, new_order, ilog2(ra->size)); 511 511 512 512 /* See comment in page_cache_ra_unbounded() */ 513 513 nofs = memalloc_nofs_save();

+13 -2

mm/shmem.c

··· 541 541 542 542 static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER; 543 543 544 - bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, 545 - struct mm_struct *mm, unsigned long vm_flags) 544 + static bool __shmem_is_huge(struct inode *inode, pgoff_t index, 545 + bool shmem_huge_force, struct mm_struct *mm, 546 + unsigned long vm_flags) 546 547 { 547 548 loff_t i_size; 548 549 ··· 572 571 default: 573 572 return false; 574 573 } 574 + } 575 + 576 + bool shmem_is_huge(struct inode *inode, pgoff_t index, 577 + bool shmem_huge_force, struct mm_struct *mm, 578 + unsigned long vm_flags) 579 + { 580 + if (HPAGE_PMD_ORDER > MAX_PAGECACHE_ORDER) 581 + return false; 582 + 583 + return __shmem_is_huge(inode, index, shmem_huge_force, mm, vm_flags); 575 584 } 576 585 577 586 #if defined(CONFIG_SYSFS)

+9 -1

mm/vmalloc.c

··· 2543 2543 static struct xarray * 2544 2544 addr_to_vb_xa(unsigned long addr) 2545 2545 { 2546 - int index = (addr / VMAP_BLOCK_SIZE) % num_possible_cpus(); 2546 + int index = (addr / VMAP_BLOCK_SIZE) % nr_cpu_ids; 2547 + 2548 + /* 2549 + * Please note, nr_cpu_ids points on a highest set 2550 + * possible bit, i.e. we never invoke cpumask_next() 2551 + * if an index points on it which is nr_cpu_ids - 1. 2552 + */ 2553 + if (!cpu_possible(index)) 2554 + index = cpumask_next(index, cpu_possible_mask); 2547 2555 2548 2556 return &per_cpu(vmap_block_queue, index).vmap_blocks; 2549 2557 }

+11 -3

mm/workingset.c

··· 412 412 * @file: whether the corresponding folio is from the file lru. 413 413 * @workingset: where the workingset value unpacked from shadow should 414 414 * be stored. 415 + * @flush: whether to flush cgroup rstat. 415 416 * 416 417 * Return: true if the shadow is for a recently evicted folio; false otherwise. 417 418 */ 418 - bool workingset_test_recent(void *shadow, bool file, bool *workingset) 419 + bool workingset_test_recent(void *shadow, bool file, bool *workingset, 420 + bool flush) 419 421 { 420 422 struct mem_cgroup *eviction_memcg; 421 423 struct lruvec *eviction_lruvec; ··· 469 467 470 468 /* 471 469 * Flush stats (and potentially sleep) outside the RCU read section. 470 + * 471 + * Note that workingset_test_recent() itself might be called in RCU read 472 + * section (for e.g, in cachestat) - these callers need to skip flushing 473 + * stats (via the flush argument). 474 + * 472 475 * XXX: With per-memcg flushing and thresholding, is ratelimiting 473 476 * still needed here? 474 477 */ 475 - mem_cgroup_flush_stats_ratelimited(eviction_memcg); 478 + if (flush) 479 + mem_cgroup_flush_stats_ratelimited(eviction_memcg); 476 480 477 481 eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat); 478 482 refault = atomic_long_read(&eviction_lruvec->nonresident_age); ··· 566 558 567 559 mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr); 568 560 569 - if (!workingset_test_recent(shadow, file, &workingset)) 561 + if (!workingset_test_recent(shadow, file, &workingset, true)) 570 562 return; 571 563 572 564 folio_set_active(folio);

+2 -1

net/core/datagram.c

··· 423 423 if (copy > len) 424 424 copy = len; 425 425 426 + n = 0; 426 427 skb_frag_foreach_page(frag, 427 428 skb_frag_off(frag) + offset - start, 428 429 copy, p, p_off, p_len, copied) { 429 430 vaddr = kmap_local_page(p); 430 - n = INDIRECT_CALL_1(cb, simple_copy_to_iter, 431 + n += INDIRECT_CALL_1(cb, simple_copy_to_iter, 431 432 vaddr + p_off, p_len, data, to); 432 433 kunmap_local(vaddr); 433 434 }

+2 -1

net/core/skmsg.c

··· 434 434 page = sg_page(sge); 435 435 if (copied + copy > len) 436 436 copy = len - copied; 437 - copy = copy_page_to_iter(page, sge->offset, copy, iter); 437 + if (copy) 438 + copy = copy_page_to_iter(page, sge->offset, copy, iter); 438 439 if (!copy) { 439 440 copied = copied ? copied : -EFAULT; 440 441 goto out;

+28 -13

net/ethtool/linkstate.c

··· 37 37 mutex_lock(&phydev->lock); 38 38 if (!phydev->drv || !phydev->drv->get_sqi) 39 39 ret = -EOPNOTSUPP; 40 + else if (!phydev->link) 41 + ret = -ENETDOWN; 40 42 else 41 43 ret = phydev->drv->get_sqi(phydev); 42 44 mutex_unlock(&phydev->lock); ··· 57 55 mutex_lock(&phydev->lock); 58 56 if (!phydev->drv || !phydev->drv->get_sqi_max) 59 57 ret = -EOPNOTSUPP; 58 + else if (!phydev->link) 59 + ret = -ENETDOWN; 60 60 else 61 61 ret = phydev->drv->get_sqi_max(phydev); 62 62 mutex_unlock(&phydev->lock); 63 63 64 64 return ret; 65 65 }; 66 + 67 + static bool linkstate_sqi_critical_error(int sqi) 68 + { 69 + return sqi < 0 && sqi != -EOPNOTSUPP && sqi != -ENETDOWN; 70 + } 71 + 72 + static bool linkstate_sqi_valid(struct linkstate_reply_data *data) 73 + { 74 + return data->sqi >= 0 && data->sqi_max >= 0 && 75 + data->sqi <= data->sqi_max; 76 + } 66 77 67 78 static int linkstate_get_link_ext_state(struct net_device *dev, 68 79 struct linkstate_reply_data *data) ··· 108 93 data->link = __ethtool_get_link(dev); 109 94 110 95 ret = linkstate_get_sqi(dev); 111 - if (ret < 0 && ret != -EOPNOTSUPP) 96 + if (linkstate_sqi_critical_error(ret)) 112 97 goto out; 113 98 data->sqi = ret; 114 99 115 100 ret = linkstate_get_sqi_max(dev); 116 - if (ret < 0 && ret != -EOPNOTSUPP) 101 + if (linkstate_sqi_critical_error(ret)) 117 102 goto out; 118 103 data->sqi_max = ret; 119 104 ··· 151 136 len = nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */ 152 137 + 0; 153 138 154 - if (data->sqi != -EOPNOTSUPP) 155 - len += nla_total_size(sizeof(u32)); 156 - 157 - if (data->sqi_max != -EOPNOTSUPP) 158 - len += nla_total_size(sizeof(u32)); 139 + if (linkstate_sqi_valid(data)) { 140 + len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI */ 141 + len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI_MAX */ 142 + } 159 143 160 144 if (data->link_ext_state_provided) 161 145 len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_STATE */ ··· 178 164 nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link)) 179 165 return -EMSGSIZE; 180 166 181 - if (data->sqi != -EOPNOTSUPP && 182 - nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi)) 183 - return -EMSGSIZE; 167 + if (linkstate_sqi_valid(data)) { 168 + if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi)) 169 + return -EMSGSIZE; 184 170 185 - if (data->sqi_max != -EOPNOTSUPP && 186 - nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max)) 187 - return -EMSGSIZE; 171 + if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, 172 + data->sqi_max)) 173 + return -EMSGSIZE; 174 + } 188 175 189 176 if (data->link_ext_state_provided) { 190 177 if (nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_STATE,

+10 -1

net/ipv4/tcp_input.c

··· 2129 2129 static inline void tcp_init_undo(struct tcp_sock *tp) 2130 2130 { 2131 2131 tp->undo_marker = tp->snd_una; 2132 + 2132 2133 /* Retransmission still in flight may cause DSACKs later. */ 2133 - tp->undo_retrans = tp->retrans_out ? : -1; 2134 + /* First, account for regular retransmits in flight: */ 2135 + tp->undo_retrans = tp->retrans_out; 2136 + /* Next, account for TLP retransmits in flight: */ 2137 + if (tp->tlp_high_seq && tp->tlp_retrans) 2138 + tp->undo_retrans++; 2139 + /* Finally, avoid 0, because undo_retrans==0 means "can undo now": */ 2140 + if (!tp->undo_retrans) 2141 + tp->undo_retrans = -1; 2134 2142 } 2135 2143 2136 2144 static bool tcp_is_rack(const struct sock *sk) ··· 2217 2209 2218 2210 tcp_set_ca_state(sk, TCP_CA_Loss); 2219 2211 tp->high_seq = tp->snd_nxt; 2212 + tp->tlp_high_seq = 0; 2220 2213 tcp_ecn_queue_cwr(tp); 2221 2214 2222 2215 /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous

+13 -4

net/ipv4/tcp_timer.c

··· 479 479 const struct sk_buff *skb, 480 480 u32 rtx_delta) 481 481 { 482 + const struct inet_connection_sock *icsk = inet_csk(sk); 483 + u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); 482 484 const struct tcp_sock *tp = tcp_sk(sk); 483 - const int timeout = TCP_RTO_MAX * 2; 485 + int timeout = TCP_RTO_MAX * 2; 484 486 s32 rcv_delta; 485 487 488 + if (user_timeout) { 489 + /* If user application specified a TCP_USER_TIMEOUT, 490 + * it does not want win 0 packets to 'reset the timer' 491 + * while retransmits are not making progress. 492 + */ 493 + if (rtx_delta > user_timeout) 494 + return true; 495 + timeout = min_t(u32, timeout, msecs_to_jiffies(user_timeout)); 496 + } 486 497 /* Note: timer interrupt might have been delayed by at least one jiffy, 487 498 * and tp->rcv_tstamp might very well have been written recently. 488 499 * rcv_delta can thus be negative. 489 500 */ 490 - rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp; 501 + rcv_delta = icsk->icsk_timeout - tp->rcv_tstamp; 491 502 if (rcv_delta <= timeout) 492 503 return false; 493 504 ··· 542 531 skb = tcp_rtx_queue_head(sk); 543 532 if (WARN_ON_ONCE(!skb)) 544 533 return; 545 - 546 - tp->tlp_high_seq = 0; 547 534 548 535 if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && 549 536 !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {

+3 -1

net/ipv4/udp.c

··· 326 326 goto fail_unlock; 327 327 } 328 328 329 + sock_set_flag(sk, SOCK_RCU_FREE); 330 + 329 331 sk_add_node_rcu(sk, &hslot->head); 330 332 hslot->count++; 331 333 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); ··· 344 342 hslot2->count++; 345 343 spin_unlock(&hslot2->lock); 346 344 } 347 - sock_set_flag(sk, SOCK_RCU_FREE); 345 + 348 346 error = 0; 349 347 fail_unlock: 350 348 spin_unlock_bh(&hslot->lock);

+13 -145

net/netfilter/nf_tables_api.c

··· 3866 3866 nf_tables_rule_destroy(ctx, rule); 3867 3867 } 3868 3868 3869 + /** nft_chain_validate - loop detection and hook validation 3870 + * 3871 + * @ctx: context containing call depth and base chain 3872 + * @chain: chain to validate 3873 + * 3874 + * Walk through the rules of the given chain and chase all jumps/gotos 3875 + * and set lookups until either the jump limit is hit or all reachable 3876 + * chains have been validated. 3877 + */ 3869 3878 int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) 3870 3879 { 3871 3880 struct nft_expr *expr, *last; ··· 3896 3887 if (!expr->ops->validate) 3897 3888 continue; 3898 3889 3890 + /* This may call nft_chain_validate() recursively, 3891 + * callers that do so must increment ctx->level. 3892 + */ 3899 3893 err = expr->ops->validate(ctx, expr, &data); 3900 3894 if (err < 0) 3901 3895 return err; ··· 10891 10879 } 10892 10880 EXPORT_SYMBOL_GPL(nft_chain_validate_hooks); 10893 10881 10894 - /* 10895 - * Loop detection - walk through the ruleset beginning at the destination chain 10896 - * of a new jump until either the source chain is reached (loop) or all 10897 - * reachable chains have been traversed. 10898 - * 10899 - * The loop check is performed whenever a new jump verdict is added to an 10900 - * expression or verdict map or a verdict map is bound to a new chain. 10901 - */ 10902 - 10903 - static int nf_tables_check_loops(const struct nft_ctx *ctx, 10904 - const struct nft_chain *chain); 10905 - 10906 - static int nft_check_loops(const struct nft_ctx *ctx, 10907 - const struct nft_set_ext *ext) 10908 - { 10909 - const struct nft_data *data; 10910 - int ret; 10911 - 10912 - data = nft_set_ext_data(ext); 10913 - switch (data->verdict.code) { 10914 - case NFT_JUMP: 10915 - case NFT_GOTO: 10916 - ret = nf_tables_check_loops(ctx, data->verdict.chain); 10917 - break; 10918 - default: 10919 - ret = 0; 10920 - break; 10921 - } 10922 - 10923 - return ret; 10924 - } 10925 - 10926 - static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, 10927 - struct nft_set *set, 10928 - const struct nft_set_iter *iter, 10929 - struct nft_elem_priv *elem_priv) 10930 - { 10931 - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); 10932 - 10933 - if (!nft_set_elem_active(ext, iter->genmask)) 10934 - return 0; 10935 - 10936 - if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && 10937 - *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) 10938 - return 0; 10939 - 10940 - return nft_check_loops(ctx, ext); 10941 - } 10942 - 10943 - static int nft_set_catchall_loops(const struct nft_ctx *ctx, 10944 - struct nft_set *set) 10945 - { 10946 - u8 genmask = nft_genmask_next(ctx->net); 10947 - struct nft_set_elem_catchall *catchall; 10948 - struct nft_set_ext *ext; 10949 - int ret = 0; 10950 - 10951 - list_for_each_entry_rcu(catchall, &set->catchall_list, list) { 10952 - ext = nft_set_elem_ext(set, catchall->elem); 10953 - if (!nft_set_elem_active(ext, genmask)) 10954 - continue; 10955 - 10956 - ret = nft_check_loops(ctx, ext); 10957 - if (ret < 0) 10958 - return ret; 10959 - } 10960 - 10961 - return ret; 10962 - } 10963 - 10964 - static int nf_tables_check_loops(const struct nft_ctx *ctx, 10965 - const struct nft_chain *chain) 10966 - { 10967 - const struct nft_rule *rule; 10968 - const struct nft_expr *expr, *last; 10969 - struct nft_set *set; 10970 - struct nft_set_binding *binding; 10971 - struct nft_set_iter iter; 10972 - 10973 - if (ctx->chain == chain) 10974 - return -ELOOP; 10975 - 10976 - if (fatal_signal_pending(current)) 10977 - return -EINTR; 10978 - 10979 - list_for_each_entry(rule, &chain->rules, list) { 10980 - nft_rule_for_each_expr(expr, last, rule) { 10981 - struct nft_immediate_expr *priv; 10982 - const struct nft_data *data; 10983 - int err; 10984 - 10985 - if (strcmp(expr->ops->type->name, "immediate")) 10986 - continue; 10987 - 10988 - priv = nft_expr_priv(expr); 10989 - if (priv->dreg != NFT_REG_VERDICT) 10990 - continue; 10991 - 10992 - data = &priv->data; 10993 - switch (data->verdict.code) { 10994 - case NFT_JUMP: 10995 - case NFT_GOTO: 10996 - err = nf_tables_check_loops(ctx, 10997 - data->verdict.chain); 10998 - if (err < 0) 10999 - return err; 11000 - break; 11001 - default: 11002 - break; 11003 - } 11004 - } 11005 - } 11006 - 11007 - list_for_each_entry(set, &ctx->table->sets, list) { 11008 - if (!nft_is_active_next(ctx->net, set)) 11009 - continue; 11010 - if (!(set->flags & NFT_SET_MAP) || 11011 - set->dtype != NFT_DATA_VERDICT) 11012 - continue; 11013 - 11014 - list_for_each_entry(binding, &set->bindings, list) { 11015 - if (!(binding->flags & NFT_SET_MAP) || 11016 - binding->chain != chain) 11017 - continue; 11018 - 11019 - iter.genmask = nft_genmask_next(ctx->net); 11020 - iter.type = NFT_ITER_UPDATE; 11021 - iter.skip = 0; 11022 - iter.count = 0; 11023 - iter.err = 0; 11024 - iter.fn = nf_tables_loop_check_setelem; 11025 - 11026 - set->ops->walk(ctx, set, &iter); 11027 - if (!iter.err) 11028 - iter.err = nft_set_catchall_loops(ctx, set); 11029 - 11030 - if (iter.err < 0) 11031 - return iter.err; 11032 - } 11033 - } 11034 - 11035 - return 0; 11036 - } 11037 - 11038 10882 /** 11039 10883 * nft_parse_u32_check - fetch u32 attribute and check for maximum value 11040 10884 * ··· 11003 11135 if (data != NULL && 11004 11136 (data->verdict.code == NFT_GOTO || 11005 11137 data->verdict.code == NFT_JUMP)) { 11006 - err = nf_tables_check_loops(ctx, data->verdict.chain); 11138 + err = nft_chain_validate(ctx, data->verdict.chain); 11007 11139 if (err < 0) 11008 11140 return err; 11009 11141 }

+1 -1

net/netfilter/nfnetlink_queue.c

··· 325 325 hooks = nf_hook_entries_head(net, pf, entry->state.hook); 326 326 327 327 i = entry->hook_index; 328 - if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) { 328 + if (!hooks || i >= hooks->num_hook_entries) { 329 329 kfree_skb_reason(skb, SKB_DROP_REASON_NETFILTER_DROP); 330 330 nf_queue_entry_free(entry); 331 331 return;

+8

net/sched/act_ct.c

··· 1081 1081 err = nf_conntrack_confirm(skb); 1082 1082 if (err != NF_ACCEPT) 1083 1083 goto nf_error; 1084 + 1085 + /* The ct may be dropped if a clash has been resolved, 1086 + * so it's necessary to retrieve it from skb again to 1087 + * prevent UAF. 1088 + */ 1089 + ct = nf_ct_get(skb, &ctinfo); 1090 + if (!ct) 1091 + skip_add = true; 1084 1092 } 1085 1093 1086 1094 if (!skip_add)

+6 -6

net/sched/sch_ingress.c

··· 91 91 entry = tcx_entry_fetch_or_create(dev, true, &created); 92 92 if (!entry) 93 93 return -ENOMEM; 94 - tcx_miniq_set_active(entry, true); 94 + tcx_miniq_inc(entry); 95 95 mini_qdisc_pair_init(&q->miniqp, sch, &tcx_entry(entry)->miniq); 96 96 if (created) 97 97 tcx_entry_update(dev, entry, true); ··· 121 121 tcf_block_put_ext(q->block, sch, &q->block_info); 122 122 123 123 if (entry) { 124 - tcx_miniq_set_active(entry, false); 124 + tcx_miniq_dec(entry); 125 125 if (!tcx_entry_is_active(entry)) { 126 126 tcx_entry_update(dev, NULL, true); 127 127 tcx_entry_free(entry); ··· 257 257 entry = tcx_entry_fetch_or_create(dev, true, &created); 258 258 if (!entry) 259 259 return -ENOMEM; 260 - tcx_miniq_set_active(entry, true); 260 + tcx_miniq_inc(entry); 261 261 mini_qdisc_pair_init(&q->miniqp_ingress, sch, &tcx_entry(entry)->miniq); 262 262 if (created) 263 263 tcx_entry_update(dev, entry, true); ··· 276 276 entry = tcx_entry_fetch_or_create(dev, false, &created); 277 277 if (!entry) 278 278 return -ENOMEM; 279 - tcx_miniq_set_active(entry, true); 279 + tcx_miniq_inc(entry); 280 280 mini_qdisc_pair_init(&q->miniqp_egress, sch, &tcx_entry(entry)->miniq); 281 281 if (created) 282 282 tcx_entry_update(dev, entry, false); ··· 302 302 tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info); 303 303 304 304 if (ingress_entry) { 305 - tcx_miniq_set_active(ingress_entry, false); 305 + tcx_miniq_dec(ingress_entry); 306 306 if (!tcx_entry_is_active(ingress_entry)) { 307 307 tcx_entry_update(dev, NULL, true); 308 308 tcx_entry_free(ingress_entry); ··· 310 310 } 311 311 312 312 if (egress_entry) { 313 - tcx_miniq_set_active(egress_entry, false); 313 + tcx_miniq_dec(egress_entry); 314 314 if (!tcx_entry_is_active(egress_entry)) { 315 315 tcx_entry_update(dev, NULL, false); 316 316 tcx_entry_free(egress_entry);

+7

net/sunrpc/xprtsock.c

··· 2441 2441 transport->srcport = 0; 2442 2442 status = -EAGAIN; 2443 2443 break; 2444 + case -EPERM: 2445 + /* Happens, for instance, if a BPF program is preventing 2446 + * the connect. Remap the error so upper layers can better 2447 + * deal with it. 2448 + */ 2449 + status = -ECONNREFUSED; 2450 + fallthrough; 2444 2451 case -EINVAL: 2445 2452 /* Happens, for instance, if the user specified a link 2446 2453 * local IPv6 address without a scope-id.

+1 -2

security/integrity/ima/ima_fs.c

··· 427 427 428 428 kfree(lists); 429 429 } 430 - 431 - securityfs_measurement_list_count = 0; 432 430 } 433 431 434 432 static int __init create_securityfs_measurement_lists(void) ··· 623 625 securityfs_remove(binary_runtime_measurements); 624 626 remove_securityfs_measurement_lists(ascii_securityfs_measurement_lists); 625 627 remove_securityfs_measurement_lists(binary_securityfs_measurement_lists); 628 + securityfs_measurement_list_count = 0; 626 629 securityfs_remove(ima_symlink); 627 630 securityfs_remove(ima_dir); 628 631

+18 -11

tools/perf/util/comm.c

··· 86 86 return result; 87 87 } 88 88 89 - static int comm_str__cmp(const void *_lhs, const void *_rhs) 90 - { 91 - const struct comm_str *lhs = *(const struct comm_str * const *)_lhs; 92 - const struct comm_str *rhs = *(const struct comm_str * const *)_rhs; 93 - 94 - return strcmp(comm_str__str(lhs), comm_str__str(rhs)); 95 - } 96 - 97 89 static int comm_str__search(const void *_key, const void *_member) 98 90 { 99 91 const char *key = _key; ··· 161 169 } 162 170 result = comm_str__new(str); 163 171 if (result) { 164 - comm_strs->strs[comm_strs->num_strs++] = result; 165 - qsort(comm_strs->strs, comm_strs->num_strs, sizeof(struct comm_str *), 166 - comm_str__cmp); 172 + int low = 0, high = comm_strs->num_strs - 1; 173 + int insert = comm_strs->num_strs; /* Default to inserting at the end. */ 174 + 175 + while (low <= high) { 176 + int mid = low + (high - low) / 2; 177 + int cmp = strcmp(comm_str__str(comm_strs->strs[mid]), str); 178 + 179 + if (cmp < 0) { 180 + low = mid + 1; 181 + } else { 182 + high = mid - 1; 183 + insert = mid; 184 + } 185 + } 186 + memmove(&comm_strs->strs[insert + 1], &comm_strs->strs[insert], 187 + (comm_strs->num_strs - insert) * sizeof(struct comm_str *)); 188 + comm_strs->num_strs++; 189 + comm_strs->strs[insert] = result; 167 190 } 168 191 } 169 192 up_write(&comm_strs->lock);

+21 -5

tools/perf/util/dsos.c

··· 203 203 dsos->dsos = temp; 204 204 dsos->allocated = to_allocate; 205 205 } 206 - dsos->dsos[dsos->cnt++] = dso__get(dso); 207 - if (dsos->cnt >= 2 && dsos->sorted) { 208 - dsos->sorted = dsos__cmp_long_name_id_short_name(&dsos->dsos[dsos->cnt - 2], 209 - &dsos->dsos[dsos->cnt - 1]) 210 - <= 0; 206 + if (!dsos->sorted) { 207 + dsos->dsos[dsos->cnt++] = dso__get(dso); 208 + } else { 209 + int low = 0, high = dsos->cnt - 1; 210 + int insert = dsos->cnt; /* Default to inserting at the end. */ 211 + 212 + while (low <= high) { 213 + int mid = low + (high - low) / 2; 214 + int cmp = dsos__cmp_long_name_id_short_name(&dsos->dsos[mid], &dso); 215 + 216 + if (cmp < 0) { 217 + low = mid + 1; 218 + } else { 219 + high = mid - 1; 220 + insert = mid; 221 + } 222 + } 223 + memmove(&dsos->dsos[insert + 1], &dsos->dsos[insert], 224 + (dsos->cnt - insert) * sizeof(struct dso *)); 225 + dsos->cnt++; 226 + dsos->dsos[insert] = dso__get(dso); 211 227 } 212 228 dso__set_dsos(dso, dsos); 213 229 return 0;

+3

tools/testing/selftests/bpf/config

··· 58 58 CONFIG_MPLS_IPTUNNEL=y 59 59 CONFIG_MPLS_ROUTING=y 60 60 CONFIG_MPTCP=y 61 + CONFIG_NET_ACT_SKBMOD=y 62 + CONFIG_NET_CLS=y 61 63 CONFIG_NET_CLS_ACT=y 62 64 CONFIG_NET_CLS_BPF=y 63 65 CONFIG_NET_CLS_FLOWER=y 66 + CONFIG_NET_CLS_MATCHALL=y 64 67 CONFIG_NET_FOU=y 65 68 CONFIG_NET_FOU_IP_TUNNELS=y 66 69 CONFIG_NET_IPGRE=y

+61

tools/testing/selftests/bpf/prog_tests/tc_links.c

··· 9 9 #define ping_cmd "ping -q -c1 -w1 127.0.0.1 > /dev/null" 10 10 11 11 #include "test_tc_link.skel.h" 12 + 13 + #include "netlink_helpers.h" 12 14 #include "tc_helpers.h" 13 15 14 16 void serial_test_tc_links_basic(void) ··· 1787 1785 test_tc_links_ingress(BPF_TCX_INGRESS, true, true); 1788 1786 test_tc_links_ingress(BPF_TCX_INGRESS, true, false); 1789 1787 test_tc_links_ingress(BPF_TCX_INGRESS, false, false); 1788 + } 1789 + 1790 + struct qdisc_req { 1791 + struct nlmsghdr n; 1792 + struct tcmsg t; 1793 + char buf[1024]; 1794 + }; 1795 + 1796 + static int qdisc_replace(int ifindex, const char *kind, bool block) 1797 + { 1798 + struct rtnl_handle rth = { .fd = -1 }; 1799 + struct qdisc_req req; 1800 + int err; 1801 + 1802 + err = rtnl_open(&rth, 0); 1803 + if (!ASSERT_OK(err, "open_rtnetlink")) 1804 + return err; 1805 + 1806 + memset(&req, 0, sizeof(req)); 1807 + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); 1808 + req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_REQUEST; 1809 + req.n.nlmsg_type = RTM_NEWQDISC; 1810 + req.t.tcm_family = AF_UNSPEC; 1811 + req.t.tcm_ifindex = ifindex; 1812 + req.t.tcm_parent = 0xfffffff1; 1813 + 1814 + addattr_l(&req.n, sizeof(req), TCA_KIND, kind, strlen(kind) + 1); 1815 + if (block) 1816 + addattr32(&req.n, sizeof(req), TCA_INGRESS_BLOCK, 1); 1817 + 1818 + err = rtnl_talk(&rth, &req.n, NULL); 1819 + ASSERT_OK(err, "talk_rtnetlink"); 1820 + rtnl_close(&rth); 1821 + return err; 1822 + } 1823 + 1824 + void serial_test_tc_links_dev_chain0(void) 1825 + { 1826 + int err, ifindex; 1827 + 1828 + ASSERT_OK(system("ip link add dev foo type veth peer name bar"), "add veth"); 1829 + ifindex = if_nametoindex("foo"); 1830 + ASSERT_NEQ(ifindex, 0, "non_zero_ifindex"); 1831 + err = qdisc_replace(ifindex, "ingress", true); 1832 + if (!ASSERT_OK(err, "attaching ingress")) 1833 + goto cleanup; 1834 + ASSERT_OK(system("tc filter add block 1 matchall action skbmod swap mac"), "add block"); 1835 + err = qdisc_replace(ifindex, "clsact", false); 1836 + if (!ASSERT_OK(err, "attaching clsact")) 1837 + goto cleanup; 1838 + /* Heuristic: kern_sync_rcu() alone does not work; a wait-time of ~5s 1839 + * triggered the issue without the fix reliably 100% of the time. 1840 + */ 1841 + sleep(5); 1842 + ASSERT_OK(system("tc filter add dev foo ingress matchall action skbmod swap mac"), "add filter"); 1843 + cleanup: 1844 + ASSERT_OK(system("ip link del dev foo"), "del veth"); 1845 + ASSERT_EQ(if_nametoindex("foo"), 0, "foo removed"); 1846 + ASSERT_EQ(if_nametoindex("bar"), 0, "bar removed"); 1790 1847 } 1791 1848 1792 1849 static void test_tc_links_dev_mixed(int target)

+91

tools/testing/selftests/bpf/prog_tests/timer_lockup.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #define _GNU_SOURCE 4 + #include <sched.h> 5 + #include <test_progs.h> 6 + #include <pthread.h> 7 + #include <network_helpers.h> 8 + 9 + #include "timer_lockup.skel.h" 10 + 11 + static long cpu; 12 + static int *timer1_err; 13 + static int *timer2_err; 14 + static bool skip; 15 + 16 + volatile int k = 0; 17 + 18 + static void *timer_lockup_thread(void *arg) 19 + { 20 + LIBBPF_OPTS(bpf_test_run_opts, opts, 21 + .data_in = &pkt_v4, 22 + .data_size_in = sizeof(pkt_v4), 23 + .repeat = 1000, 24 + ); 25 + int i, prog_fd = *(int *)arg; 26 + cpu_set_t cpuset; 27 + 28 + CPU_ZERO(&cpuset); 29 + CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset); 30 + ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset), 31 + &cpuset), 32 + "cpu affinity"); 33 + 34 + for (i = 0; !READ_ONCE(*timer1_err) && !READ_ONCE(*timer2_err); i++) { 35 + bpf_prog_test_run_opts(prog_fd, &opts); 36 + /* Skip the test if we can't reproduce the race in a reasonable 37 + * amount of time. 38 + */ 39 + if (i > 50) { 40 + WRITE_ONCE(skip, true); 41 + break; 42 + } 43 + } 44 + 45 + return NULL; 46 + } 47 + 48 + void test_timer_lockup(void) 49 + { 50 + int timer1_prog, timer2_prog; 51 + struct timer_lockup *skel; 52 + pthread_t thrds[2]; 53 + void *ret; 54 + 55 + skel = timer_lockup__open_and_load(); 56 + if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load")) 57 + return; 58 + 59 + timer1_prog = bpf_program__fd(skel->progs.timer1_prog); 60 + timer2_prog = bpf_program__fd(skel->progs.timer2_prog); 61 + 62 + timer1_err = &skel->bss->timer1_err; 63 + timer2_err = &skel->bss->timer2_err; 64 + 65 + if (!ASSERT_OK(pthread_create(&thrds[0], NULL, timer_lockup_thread, 66 + &timer1_prog), 67 + "pthread_create thread1")) 68 + goto out; 69 + if (!ASSERT_OK(pthread_create(&thrds[1], NULL, timer_lockup_thread, 70 + &timer2_prog), 71 + "pthread_create thread2")) { 72 + pthread_exit(&thrds[0]); 73 + goto out; 74 + } 75 + 76 + pthread_join(thrds[1], &ret); 77 + pthread_join(thrds[0], &ret); 78 + 79 + if (skip) { 80 + test__skip(); 81 + goto out; 82 + } 83 + 84 + if (*timer1_err != -EDEADLK && *timer1_err != 0) 85 + ASSERT_FAIL("timer1_err bad value"); 86 + if (*timer2_err != -EDEADLK && *timer2_err != 0) 87 + ASSERT_FAIL("timer2_err bad value"); 88 + out: 89 + timer_lockup__destroy(skel); 90 + return; 91 + }

+87

tools/testing/selftests/bpf/progs/timer_lockup.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <linux/bpf.h> 4 + #include <time.h> 5 + #include <errno.h> 6 + #include <bpf/bpf_helpers.h> 7 + #include <bpf/bpf_tracing.h> 8 + #include "bpf_misc.h" 9 + 10 + char _license[] SEC("license") = "GPL"; 11 + 12 + struct elem { 13 + struct bpf_timer t; 14 + }; 15 + 16 + struct { 17 + __uint(type, BPF_MAP_TYPE_ARRAY); 18 + __uint(max_entries, 1); 19 + __type(key, int); 20 + __type(value, struct elem); 21 + } timer1_map SEC(".maps"); 22 + 23 + struct { 24 + __uint(type, BPF_MAP_TYPE_ARRAY); 25 + __uint(max_entries, 1); 26 + __type(key, int); 27 + __type(value, struct elem); 28 + } timer2_map SEC(".maps"); 29 + 30 + int timer1_err; 31 + int timer2_err; 32 + 33 + static int timer_cb1(void *map, int *k, struct elem *v) 34 + { 35 + struct bpf_timer *timer; 36 + int key = 0; 37 + 38 + timer = bpf_map_lookup_elem(&timer2_map, &key); 39 + if (timer) 40 + timer2_err = bpf_timer_cancel(timer); 41 + 42 + return 0; 43 + } 44 + 45 + static int timer_cb2(void *map, int *k, struct elem *v) 46 + { 47 + struct bpf_timer *timer; 48 + int key = 0; 49 + 50 + timer = bpf_map_lookup_elem(&timer1_map, &key); 51 + if (timer) 52 + timer1_err = bpf_timer_cancel(timer); 53 + 54 + return 0; 55 + } 56 + 57 + SEC("tc") 58 + int timer1_prog(void *ctx) 59 + { 60 + struct bpf_timer *timer; 61 + int key = 0; 62 + 63 + timer = bpf_map_lookup_elem(&timer1_map, &key); 64 + if (timer) { 65 + bpf_timer_init(timer, &timer1_map, CLOCK_BOOTTIME); 66 + bpf_timer_set_callback(timer, timer_cb1); 67 + bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN); 68 + } 69 + 70 + return 0; 71 + } 72 + 73 + SEC("tc") 74 + int timer2_prog(void *ctx) 75 + { 76 + struct bpf_timer *timer; 77 + int key = 0; 78 + 79 + timer = bpf_map_lookup_elem(&timer2_map, &key); 80 + if (timer) { 81 + bpf_timer_init(timer, &timer2_map, CLOCK_BOOTTIME); 82 + bpf_timer_set_callback(timer, timer_cb2); 83 + bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN); 84 + } 85 + 86 + return 0; 87 + }

+1 -4

tools/testing/selftests/powerpc/flags.mk

··· 5 5 export GIT_VERSION 6 6 endif 7 7 8 - ifeq ($(CFLAGS),) 9 - CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(CFLAGS) 8 + CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(USERCFLAGS) 10 9 export CFLAGS 11 - endif 12 -

+1 -1

tools/testing/selftests/riscv/sigreturn/sigreturn.c

··· 51 51 52 52 asm(".option push \n\ 53 53 .option arch, +v \n\ 54 - vsetivli x0, 1, e32, ta, ma \n\ 54 + vsetivli x0, 1, e32, m1, ta, ma \n\ 55 55 vmv.s.x v0, %1 \n\ 56 56 # Generate SIGSEGV \n\ 57 57 lw a0, 0(x0) \n\

+3 -3

tools/testing/selftests/timens/exec.c

··· 30 30 31 31 for (i = 0; i < 2; i++) { 32 32 _gettime(CLOCK_MONOTONIC, &tst, i); 33 - if (abs(tst.tv_sec - now.tv_sec) > 5) 33 + if (labs(tst.tv_sec - now.tv_sec) > 5) 34 34 return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec); 35 35 } 36 36 return 0; ··· 50 50 51 51 for (i = 0; i < 2; i++) { 52 52 _gettime(CLOCK_MONOTONIC, &tst, i); 53 - if (abs(tst.tv_sec - now.tv_sec) > 5) 53 + if (labs(tst.tv_sec - now.tv_sec) > 5) 54 54 return pr_fail("%ld %ld\n", 55 55 now.tv_sec, tst.tv_sec); 56 56 } ··· 70 70 /* Check that a child process is in the new timens. */ 71 71 for (i = 0; i < 2; i++) { 72 72 _gettime(CLOCK_MONOTONIC, &tst, i); 73 - if (abs(tst.tv_sec - now.tv_sec - OFFSET) > 5) 73 + if (labs(tst.tv_sec - now.tv_sec - OFFSET) > 5) 74 74 return pr_fail("%ld %ld\n", 75 75 now.tv_sec + OFFSET, tst.tv_sec); 76 76 }

+1 -1

tools/testing/selftests/timens/timer.c

··· 56 56 return pr_perror("timerfd_gettime"); 57 57 58 58 elapsed = new_value.it_value.tv_sec; 59 - if (abs(elapsed - 3600) > 60) { 59 + if (llabs(elapsed - 3600) > 60) { 60 60 ksft_test_result_fail("clockid: %d elapsed: %lld\n", 61 61 clockid, elapsed); 62 62 return 1;

+1 -1

tools/testing/selftests/timens/timerfd.c

··· 61 61 return pr_perror("timerfd_gettime(%d)", clockid); 62 62 63 63 elapsed = new_value.it_value.tv_sec; 64 - if (abs(elapsed - 3600) > 60) { 64 + if (llabs(elapsed - 3600) > 60) { 65 65 ksft_test_result_fail("clockid: %d elapsed: %lld\n", 66 66 clockid, elapsed); 67 67 return 1;

+2 -2

tools/testing/selftests/timens/vfork_exec.c

··· 32 32 33 33 for (i = 0; i < 2; i++) { 34 34 _gettime(CLOCK_MONOTONIC, &tst, i); 35 - if (abs(tst.tv_sec - now->tv_sec) > 5) { 35 + if (labs(tst.tv_sec - now->tv_sec) > 5) { 36 36 pr_fail("%s: in-thread: unexpected value: %ld (%ld)\n", 37 37 args->tst_name, tst.tv_sec, now->tv_sec); 38 38 return (void *)1UL; ··· 64 64 65 65 for (i = 0; i < 2; i++) { 66 66 _gettime(CLOCK_MONOTONIC, &tst, i); 67 - if (abs(tst.tv_sec - now->tv_sec) > 5) 67 + if (labs(tst.tv_sec - now->tv_sec) > 5) 68 68 return pr_fail("%s: unexpected value: %ld (%ld)\n", 69 69 tst_name, tst.tv_sec, now->tv_sec); 70 70 }

+12 -17

tools/testing/selftests/vDSO/Makefile

··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 - include ../lib.mk 3 - 4 2 uname_M := $(shell uname -m 2>/dev/null || echo not) 5 3 ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) 6 4 7 - TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu 8 - TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi 9 - TEST_GEN_PROGS += $(OUTPUT)/vdso_test_clock_getres 5 + TEST_GEN_PROGS := vdso_test_gettimeofday 6 + TEST_GEN_PROGS += vdso_test_getcpu 7 + TEST_GEN_PROGS += vdso_test_abi 8 + TEST_GEN_PROGS += vdso_test_clock_getres 10 9 ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) 11 - TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86 10 + TEST_GEN_PROGS += vdso_standalone_test_x86 12 11 endif 13 - TEST_GEN_PROGS += $(OUTPUT)/vdso_test_correctness 12 + TEST_GEN_PROGS += vdso_test_correctness 14 13 15 14 CFLAGS := -std=gnu99 16 - CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector 17 - LDFLAGS_vdso_test_correctness := -ldl 15 + 18 16 ifeq ($(CONFIG_X86_32),y) 19 17 LDLIBS += -lgcc_s 20 18 endif 21 19 22 - all: $(TEST_GEN_PROGS) 20 + include ../lib.mk 23 21 $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c 24 22 $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c 25 23 $(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c 26 24 $(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c 25 + 27 26 $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c 28 - $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \ 29 - vdso_standalone_test_x86.c parse_vdso.c \ 30 - -o $@ 27 + $(OUTPUT)/vdso_standalone_test_x86: CFLAGS +=-nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector 28 + 31 29 $(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c 32 - $(CC) $(CFLAGS) \ 33 - vdso_test_correctness.c \ 34 - -o $@ \ 35 - $(LDFLAGS_vdso_test_correctness) 30 + $(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl

+11 -5

tools/testing/selftests/vDSO/parse_vdso.c

··· 55 55 ELF(Verdef) *verdef; 56 56 } vdso_info; 57 57 58 - /* Straight from the ELF specification. */ 59 - static unsigned long elf_hash(const unsigned char *name) 58 + /* 59 + * Straight from the ELF specification...and then tweaked slightly, in order to 60 + * avoid a few clang warnings. 61 + */ 62 + static unsigned long elf_hash(const char *name) 60 63 { 61 64 unsigned long h = 0, g; 62 - while (*name) 65 + const unsigned char *uch_name = (const unsigned char *)name; 66 + 67 + while (*uch_name) 63 68 { 64 - h = (h << 4) + *name++; 65 - if (g = h & 0xf0000000) 69 + h = (h << 4) + *uch_name++; 70 + g = h & 0xf0000000; 71 + if (g) 66 72 h ^= g >> 24; 67 73 h &= ~g; 68 74 }

+16 -2

tools/testing/selftests/vDSO/vdso_standalone_test_x86.c

··· 18 18 19 19 #include "parse_vdso.h" 20 20 21 - /* We need a libc functions... */ 21 + /* We need some libc functions... */ 22 22 int strcmp(const char *a, const char *b) 23 23 { 24 24 /* This implementation is buggy: it never returns -1. */ ··· 32 32 } 33 33 34 34 return 0; 35 + } 36 + 37 + /* 38 + * The clang build needs this, although gcc does not. 39 + * Stolen from lib/string.c. 40 + */ 41 + void *memcpy(void *dest, const void *src, size_t count) 42 + { 43 + char *tmp = dest; 44 + const char *s = src; 45 + 46 + while (count--) 47 + *tmp++ = *s++; 48 + return dest; 35 49 } 36 50 37 51 /* ...and two syscalls. This is x86-specific. */ ··· 84 70 } 85 71 } 86 72 87 - __attribute__((externally_visible)) void c_main(void **stack) 73 + void c_main(void **stack) 88 74 { 89 75 /* Parse the stack */ 90 76 long argc = (long)*stack;

+4 -4

tools/testing/selftests/wireguard/qemu/Makefile

··· 109 109 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage 110 110 QEMU_VPORT_RESULT := virtio-serial-device 111 111 ifeq ($(HOST_ARCH),$(ARCH)) 112 - QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi 112 + QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off 113 113 else 114 - QEMU_MACHINE := -cpu max -machine microvm -no-acpi 114 + QEMU_MACHINE := -cpu max -machine microvm,acpi=off 115 115 endif 116 116 else ifeq ($(ARCH),i686) 117 117 CHOST := i686-linux-musl ··· 120 120 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage 121 121 QEMU_VPORT_RESULT := virtio-serial-device 122 122 ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH)) 123 - QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi 123 + QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off 124 124 else 125 - QEMU_MACHINE := -cpu coreduo -machine microvm -no-acpi 125 + QEMU_MACHINE := -cpu coreduo -machine microvm,acpi=off 126 126 endif 127 127 else ifeq ($(ARCH),mips64) 128 128 CHOST := mips64-linux-musl