AMD Athlon Processor x86 Optimization Manual page 132

X86 code optimization
Table of Contents

Advertisement

AMD Athlon™ Processor x86 Code Optimization
116
$xfer:
movq
mm0, [eax]
add
edx, 64
movq
mm1, [eax+8]
add
eax, 64
movq
mm2, [eax-48]
movq
[edx-64], mm0
movq
mm0, [eax-40]
movq
[edx-56], mm1
movq
mm1, [eax-32]
movq
[edx-48], mm2
movq
mm2, [eax-24]
movq
[edx-40], mm0
movq
mm0, [eax-16]
movq
[edx-32], mm1
movq
mm1, [eax-8]
movq
[edx-24], mm2
movq
[edx-16], mm0
dec
ecx
movq
[edx-8], mm1
jnz
$xfer
femms
}
/* block fill (destination QWORD aligned) */
__asm {
mov
edx, [dst_ptr]
mov
ecx, [blk_size]
shr
ecx, 6
movq
mm0, [fill_data]
align 16
$fill:
movq
[edx], mm0
movq
[edx+8], mm0
movq
[edx+16], mm0
movq
[edx+24], mm0
movq
[edx+32], mm0
movq
[edx+40], mm0
add
edx, 64
movq
[edx-16], mm0
decq
ecx
mov
[edx-8], mm0
jnz
$fill
femms
}
Use MMX™ Instructions for Block Copies and Block Fills
22007E/0—November 1999

Advertisement

Table of Contents
loading

Table of Contents