src/libm/Source/Intel/ceill.S at fixPythonPipStalling

overby.me / darling-nix

fork atom

this repo has no description

fork atom

darling-nix / src / libm / Source / Intel / ceill.S

at fixPythonPipStalling 77 lines 2.7 kB view raw

wrap content

Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago

f228ae16

 1/*
 2 *  ceill.s
 3 *
 4 *      by Ian Ollmann
 5 *
 6 *  Copyright (c) Apple Inc. 2007. All Rights Reserved.
 7 *
 8 *  C99 implementation of ceill for __i386__ and __x86_64__.
 9 */
10 
11 
12#include <machine/asm.h>
13#include "abi.h"
14
15
16ENTRY( ceill )
17    movswl  8+FRAME_SIZE( STACKP ),     %eax        // load signed exponent
18    movl    %eax,                       %edx        // sign + exponent
19    andl    $0x7fff,                    %eax        // exponent
20    subl    $0x3fff,                    %eax        // remove bias
21    cmpl    $63,                        %eax        // if( |x| >= 0x1.0p63 || |x| < 1.0 || isnan(x) )
22    jae     2f                                      //      goto 2
23    
24    movl    $63,                        %ecx
25    movq    FRAME_SIZE( STACKP ),       %xmm0       // significand
26    subl    %eax,                       %ecx        // 63 - exponent
27    pcmpeqb %xmm1,                      %xmm1       // -1LL
28    movd    %ecx,                       %xmm7       // 63-exponent
29    pxor    %xmm2,                      %xmm2       // 0
30    psubq   %xmm1,                      %xmm2       // 1
31    movdqa  %xmm2,                      %xmm1       // 1
32    psllq   %xmm7,                      %xmm2       // one's bit
33    psubq   %xmm1,                      %xmm2       // fract mask
34    pandn   %xmm0,                      %xmm2
35    movq    %xmm2,                      FRAME_SIZE( STACKP )
36    pcmpeqd %xmm2,                      %xmm0
37    pmovmskb   %xmm0,                   %eax
38    cmp     $0xffff,                    %eax
39    fldt    FRAME_SIZE( STACKP )                    // result
40    je      1f
41    
42    //need to set the inexact flag and correct rounding
43    sarl    $31,                        %edx
44    notl    %edx
45    andl    $0x3f800000,                %edx
46    movl    %edx,                       FRAME_SIZE(STACKP)
47    fadds   FRAME_SIZE( STACKP )                                // correct rounding
48
49    // set inexact
50    fldpi
51    fmul    %st(0), %st(0)
52    fstp    %st(0)    
53    
541:  ret
55
56// |x| >= 0x1.0p63 || |x| < 1.0 || isnan(x)    
572:  jge     3f
58    
59//  |x| < 1.0
60    fldt    FRAME_SIZE( STACKP )                    // { x }
61    fldz                                            // { 0, x }
62    fucomip %st(1), %st(0)                          // { x }
63    je      1b
64
65    fistpl  FRAME_SIZE( STACKP )
66    fldz                                            //  { 0 }
67    fchs                                            //  { 1, -0 }
68    fld1                                            //  { 1, -0 }
69    fcmova  %st(1), %st(0)
70    fstp    %st(1)
71    ret
72
73// |x| >= 0x1.0p63 || isnan(x)    
743:  fldt    FRAME_SIZE(STACKP)
75    fldz
76    faddp
77    ret