AMD Athlon Processor x86 Optimization Manual page 80

X86 code optimization
Table of Contents

Advertisement

AMD Athlon™ Processor x86 Code Optimization
Example 5:
64
C code:
#define PI 3.14159265358979323
float x,y,xa,ya,r,res;
int
xs,df;
xs = x < 0 ? 1 : 0;
xa = fabs(x);
ya = fabs(y);
df = (xa < ya);
if (xs && df) {
res = PI/2 + r;
}
else if (xs) {
res = PI - r;
}
else if (df) {
res = PI/2 - r;
}
else {
res = r;
}
3DNow! code:
;in:
MM0 = r
;
MM1 = y
;
MM2 = x
;out: MM0 = res
MOVQ
MM7, sgn
MOVQ
MM6, sgn
MOVQ
MM5, mabs
PAND
MM7, MM2
PAND
MM1, MM5
PAND
MM2, MM5
MOVQ
MM6, MM1
PCMPGTD
MM6, MM2
PSLLD
MM6, 31
MOVQ
MM5, MM7
PXOR
MM7, MM6
MOVQ
MM3, npio2
PXOR
MM5, MM3
PSRAD
MM6, 31
PANDN
MM6, MM5
PFSUB
MM6, MM3
POR
MM0, MM7
PFADD
MM0, MM6
;mask to extract sign bit
;mask to extract sign bit
;mask to clear sign bit
;xs = sign(x)
;ya = abs(y)
;xa = abs(x)
;y
;df = (xa < ya) ? 0xffffffff : 0
;df = bit<31>
;xs
;xs^df ? 0x80000000 : 0
;-pi/2
;xs ? pi/2 : -pi/2
;df ? 0xffffffff : 0
;xs ? (df ? 0 : pi/2) : (df ? 0 : -pi/2)
;pr = pi/2 + (xs ? (df ? 0 : pi/2) :
; (df ? 0 : -pi/2))
;ar = xs^df ? -r : r
;res = ar + pr
Replace Branches with Computation in 3DNow!™ Code
22007E/0—November 1999

Advertisement

Table of Contents
loading

Table of Contents