Intel ARCHITECTURE IA-32 Reference Manual page 273

Architecture optimization
Table of Contents

Advertisement

Example 5-3
Swizzling Data (continued)
y1 x1
movhps xmm7, [ecx+16]
movlps xmm0, [ecx+32]
movhps xmm0, [ecx+48]
movaps xmm6, xmm7
shufps xmm7, xmm0, 0x88
shufps xmm6, xmm0, 0xDD
movlps xmm2, [ecx+8]
movhps xmm2, [ecx+24]
movlps xmm1, [ecx+40]
movhps xmm1, [ecx+56]
movaps xmm0, xmm2
shufps xmm2, xmm1, 0x88
movlps xmm7, [ecx]
movaps [edx], xmm7
movaps [edx+16], xmm6
movaps [edx+32], xmm2
movaps [edx+48], xmm0
}
}
Optimizing for SIMD Floating-point Applications
// xmm7 = y2 x2 y1 x1
// xmm0 = -- -- y3 x3
// xmm0 = y4 x4 y3 x3
// xmm6 = y1 x1 y1 x1
// xmm7 = x1 x2 x3 x4 => X
// xmm6 = y1 y2 y3 y4 => Y
// xmm2 = -- -- w1 z1
// xmm2 = w2 z2 u1 z1
// xmm1 = -- -- s3 z3
// xmm1 = w4 z4 w3 z3
// xmm0 = w1 z1 w1 z1
// xmm2 = z1 z2 z3 z4 => Z
// xmm7 = -- --shufps xmm0, xmm1,
// 0xDD xmm6 = w1 w2 w3 w4 => W
// store X
// store Y
// store Z
// store W
// SWIZZLE XYZ -> XXX
5
5-11

Advertisement

Table of Contents
loading

Table of Contents