AMD Athlon Processor x86 Optimization Manual page 140

X86 code optimization
Table of Contents

Advertisement

AMD Athlon™ Processor x86 Code Optimization
124
Example 1 (Avoid):
MOV
ESI, DWORD PTR Src_MB
MOV
EDI, DWORD PTR Dst_MB
MOV
EDX, DWORD PTR SrcStride
MOV
EBX, DWORD PTR DstStride
MOVQ
MM7, QWORD PTR [ConstFEFE]
MOVQ
MM6, QWORD PTR [Const0101]
MOV
ECX, 16
L1:
MOVQ
MM0, [ESI]
MOVQ
MM1, [EDI]
MOVQ
MM2, MM0
MOVQ
MM3, MM1
PAND
MM2, MM6
PAND
MM3, MM6
PAND
MM0, MM7
PAND
MM1, MM7
POR
MM2, MM3
PSRLQ
MM0, 1
PSRLQ
MM1, 1
PAND
MM2, MM6
PADDB
MM0, MM1
PADDB
MM0, MM2
MOVQ
[EDI], MM0
MOVQ
MM4, [ESI+8]
MOVQ
MM5, [EDI+8]
MOVQ
MM2, MM4
MOVQ
MM3, MM5
PAND
MM2, MM6
PAND
MM3, MM6
PAND
MM4, MM7
PAND
MM5, MM7
POR
MM2, MM3
PSRLQ
MM4, 1
PSRLQ
MM5, 1
PAND
MM2, MM6
PADDB
MM4, MM5
PADDB
MM4, MM2
MOVQ
[EDI+8], MM4
ADD
ESI, EDX
ADD
EDI, EBX
LOOP
L1
22007E/0—November 1999
;MM0=QWORD1
;MM1=QWORD3
;MM0 = QWORD1 & 0xfefefefe
;MM1 = QWORD3 & 0xfefefefe
;calculate adjustment
;MM0 = (QWORD1 & 0xfefefefe)/2
;MM1 = (QWORD3 & 0xfefefefe)/2
;MM0 = QWORD1/2 + QWORD3/2 w/o
; adjustment
;add lsb adjustment
;MM4=QWORD2
;MM5=QWORD4
;MM0 = QWORD2 & 0xfefefefe
;MM1 = QWORD4 & 0xfefefefe
;calculate adjustment
;MM0 = (QWORD2 & 0xfefefefe)/2
;MM1 = (QWORD4 & 0xfefefefe)/2
;MM0 = QWORD2/2 + QWORD4/2 w/o
; adjustment
;add lsb adjustment
Use 3DNow!™ PAVGUSB for MPEG-2 Motion

Advertisement

Table of Contents
loading

Table of Contents