ARCv2: Implement atomic64 based on LLOCKD/SCONDD instructions

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

ARCv2 ISA provides 64-bit exclusive load/stores so use them to implement
the 64-bit atomics and elide the spinlock based generic 64-bit atomics

boot tested with atomic64 self-test (and GOD bless the person who wrote
them, I realized my inline assmebly is sloppy as hell)

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-snps-arc@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

Vineet Gupta 9 years ago ce636527 26c01c49

+260 -3

2 changed files

expand all

arch

arc

Kconfig

include

asm

atomic.h

+1 -1

arch/arc/Kconfig

··· 13 13 select CLKSRC_OF 14 14 select CLONE_BACKWARDS 15 15 select COMMON_CLK 16 - select GENERIC_ATOMIC64 16 + select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC) 17 17 select GENERIC_CLOCKEVENTS 18 18 select GENERIC_FIND_FIRST_BIT 19 19 # for now, we don't need GENERIC_IRQ_PROBE, CONFIG_GENERIC_IRQ_CHIP

+259 -2

arch/arc/include/asm/atomic.h

··· 20 20 #ifndef CONFIG_ARC_PLAT_EZNPS 21 21 22 22 #define atomic_read(v) READ_ONCE((v)->counter) 23 + #define ATOMIC_INIT(i) { (i) } 23 24 24 25 #ifdef CONFIG_ARC_HAS_LLSC 25 26 ··· 344 343 345 344 #define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0) 346 345 347 - #define ATOMIC_INIT(i) { (i) } 346 + 347 + #ifdef CONFIG_GENERIC_ATOMIC64 348 348 349 349 #include <asm-generic/atomic64.h> 350 350 351 - #endif 351 + #else /* Kconfig ensures this is only enabled with needed h/w assist */ 352 + 353 + /* 354 + * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD) 355 + * - The address HAS to be 64-bit aligned 356 + * - There are 2 semantics involved here: 357 + * = exclusive implies no interim update between load/store to same addr 358 + * = both words are observed/updated together: this is guaranteed even 359 + * for regular 64-bit load (LDD) / store (STD). Thus atomic64_set() 360 + * is NOT required to use LLOCKD+SCONDD, STD suffices 361 + */ 362 + 363 + typedef struct { 364 + aligned_u64 counter; 365 + } atomic64_t; 366 + 367 + #define ATOMIC64_INIT(a) { (a) } 368 + 369 + static inline long long atomic64_read(const atomic64_t *v) 370 + { 371 + unsigned long long val; 372 + 373 + __asm__ __volatile__( 374 + " ldd %0, [%1] \n" 375 + : "=r"(val) 376 + : "r"(&v->counter)); 377 + 378 + return val; 379 + } 380 + 381 + static inline void atomic64_set(atomic64_t *v, long long a) 382 + { 383 + /* 384 + * This could have been a simple assignment in "C" but would need 385 + * explicit volatile. Otherwise gcc optimizers could elide the store 386 + * which borked atomic64 self-test 387 + * In the inline asm version, memory clobber needed for exact same 388 + * reason, to tell gcc about the store. 389 + * 390 + * This however is not needed for sibling atomic64_add() etc since both 391 + * load/store are explicitly done in inline asm. As long as API is used 392 + * for each access, gcc has no way to optimize away any load/store 393 + */ 394 + __asm__ __volatile__( 395 + " std %0, [%1] \n" 396 + : 397 + : "r"(a), "r"(&v->counter) 398 + : "memory"); 399 + } 400 + 401 + #define ATOMIC64_OP(op, op1, op2) \ 402 + static inline void atomic64_##op(long long a, atomic64_t *v) \ 403 + { \ 404 + unsigned long long val; \ 405 + \ 406 + __asm__ __volatile__( \ 407 + "1: \n" \ 408 + " llockd %0, [%1] \n" \ 409 + " " #op1 " %L0, %L0, %L2 \n" \ 410 + " " #op2 " %H0, %H0, %H2 \n" \ 411 + " scondd %0, [%1] \n" \ 412 + " bnz 1b \n" \ 413 + : "=&r"(val) \ 414 + : "r"(&v->counter), "ir"(a) \ 415 + : "cc"); \ 416 + } \ 417 + 418 + #define ATOMIC64_OP_RETURN(op, op1, op2) \ 419 + static inline long long atomic64_##op##_return(long long a, atomic64_t *v) \ 420 + { \ 421 + unsigned long long val; \ 422 + \ 423 + smp_mb(); \ 424 + \ 425 + __asm__ __volatile__( \ 426 + "1: \n" \ 427 + " llockd %0, [%1] \n" \ 428 + " " #op1 " %L0, %L0, %L2 \n" \ 429 + " " #op2 " %H0, %H0, %H2 \n" \ 430 + " scondd %0, [%1] \n" \ 431 + " bnz 1b \n" \ 432 + : [val] "=&r"(val) \ 433 + : "r"(&v->counter), "ir"(a) \ 434 + : "cc"); /* memory clobber comes from smp_mb() */ \ 435 + \ 436 + smp_mb(); \ 437 + \ 438 + return val; \ 439 + } 440 + 441 + #define ATOMIC64_FETCH_OP(op, op1, op2) \ 442 + static inline long long atomic64_fetch_##op(long long a, atomic64_t *v) \ 443 + { \ 444 + unsigned long long val, orig; \ 445 + \ 446 + smp_mb(); \ 447 + \ 448 + __asm__ __volatile__( \ 449 + "1: \n" \ 450 + " llockd %0, [%2] \n" \ 451 + " " #op1 " %L1, %L0, %L3 \n" \ 452 + " " #op2 " %H1, %H0, %H3 \n" \ 453 + " scondd %1, [%2] \n" \ 454 + " bnz 1b \n" \ 455 + : "=&r"(orig), "=&r"(val) \ 456 + : "r"(&v->counter), "ir"(a) \ 457 + : "cc"); /* memory clobber comes from smp_mb() */ \ 458 + \ 459 + smp_mb(); \ 460 + \ 461 + return orig; \ 462 + } 463 + 464 + #define ATOMIC64_OPS(op, op1, op2) \ 465 + ATOMIC64_OP(op, op1, op2) \ 466 + ATOMIC64_OP_RETURN(op, op1, op2) \ 467 + ATOMIC64_FETCH_OP(op, op1, op2) 468 + 469 + #define atomic64_andnot atomic64_andnot 470 + 471 + ATOMIC64_OPS(add, add.f, adc) 472 + ATOMIC64_OPS(sub, sub.f, sbc) 473 + ATOMIC64_OPS(and, and, and) 474 + ATOMIC64_OPS(andnot, bic, bic) 475 + ATOMIC64_OPS(or, or, or) 476 + ATOMIC64_OPS(xor, xor, xor) 477 + 478 + #undef ATOMIC64_OPS 479 + #undef ATOMIC64_FETCH_OP 480 + #undef ATOMIC64_OP_RETURN 481 + #undef ATOMIC64_OP 482 + 483 + static inline long long 484 + atomic64_cmpxchg(atomic64_t *ptr, long long expected, long long new) 485 + { 486 + long long prev; 487 + 488 + smp_mb(); 489 + 490 + __asm__ __volatile__( 491 + "1: llockd %0, [%1] \n" 492 + " brne %L0, %L2, 2f \n" 493 + " brne %H0, %H2, 2f \n" 494 + " scondd %3, [%1] \n" 495 + " bnz 1b \n" 496 + "2: \n" 497 + : "=&r"(prev) 498 + : "r"(ptr), "ir"(expected), "r"(new) 499 + : "cc"); /* memory clobber comes from smp_mb() */ 500 + 501 + smp_mb(); 502 + 503 + return prev; 504 + } 505 + 506 + static inline long long atomic64_xchg(atomic64_t *ptr, long long new) 507 + { 508 + long long prev; 509 + 510 + smp_mb(); 511 + 512 + __asm__ __volatile__( 513 + "1: llockd %0, [%1] \n" 514 + " scondd %2, [%1] \n" 515 + " bnz 1b \n" 516 + "2: \n" 517 + : "=&r"(prev) 518 + : "r"(ptr), "r"(new) 519 + : "cc"); /* memory clobber comes from smp_mb() */ 520 + 521 + smp_mb(); 522 + 523 + return prev; 524 + } 525 + 526 + /** 527 + * atomic64_dec_if_positive - decrement by 1 if old value positive 528 + * @v: pointer of type atomic64_t 529 + * 530 + * The function returns the old value of *v minus 1, even if 531 + * the atomic variable, v, was not decremented. 532 + */ 533 + 534 + static inline long long atomic64_dec_if_positive(atomic64_t *v) 535 + { 536 + long long val; 537 + 538 + smp_mb(); 539 + 540 + __asm__ __volatile__( 541 + "1: llockd %0, [%1] \n" 542 + " sub.f %L0, %L0, 1 # w0 - 1, set C on borrow\n" 543 + " sub.c %H0, %H0, 1 # if C set, w1 - 1\n" 544 + " brlt %H0, 0, 2f \n" 545 + " scondd %0, [%1] \n" 546 + " bnz 1b \n" 547 + "2: \n" 548 + : "=&r"(val) 549 + : "r"(&v->counter) 550 + : "cc"); /* memory clobber comes from smp_mb() */ 551 + 552 + smp_mb(); 553 + 554 + return val; 555 + } 556 + 557 + /** 558 + * atomic64_add_unless - add unless the number is a given value 559 + * @v: pointer of type atomic64_t 560 + * @a: the amount to add to v... 561 + * @u: ...unless v is equal to u. 562 + * 563 + * if (v != u) { v += a; ret = 1} else {ret = 0} 564 + * Returns 1 iff @v was not @u (i.e. if add actually happened) 565 + */ 566 + static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) 567 + { 568 + long long val; 569 + int op_done; 570 + 571 + smp_mb(); 572 + 573 + __asm__ __volatile__( 574 + "1: llockd %0, [%2] \n" 575 + " mov %1, 1 \n" 576 + " brne %L0, %L4, 2f # continue to add since v != u \n" 577 + " breq.d %H0, %H4, 3f # return since v == u \n" 578 + " mov %1, 0 \n" 579 + "2: \n" 580 + " add.f %L0, %L0, %L3 \n" 581 + " adc %H0, %H0, %H3 \n" 582 + " scondd %0, [%2] \n" 583 + " bnz 1b \n" 584 + "3: \n" 585 + : "=&r"(val), "=&r" (op_done) 586 + : "r"(&v->counter), "r"(a), "r"(u) 587 + : "cc"); /* memory clobber comes from smp_mb() */ 588 + 589 + smp_mb(); 590 + 591 + return op_done; 592 + } 593 + 594 + #define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) 595 + #define atomic64_inc(v) atomic64_add(1LL, (v)) 596 + #define atomic64_inc_return(v) atomic64_add_return(1LL, (v)) 597 + #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) 598 + #define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0) 599 + #define atomic64_dec(v) atomic64_sub(1LL, (v)) 600 + #define atomic64_dec_return(v) atomic64_sub_return(1LL, (v)) 601 + #define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) 602 + #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) 603 + 604 + #endif /* !CONFIG_GENERIC_ATOMIC64 */ 605 + 606 + #endif /* !__ASSEMBLY__ */ 352 607 353 608 #endif