|
.586
.K3D ; 3D-Now optimized Code
; .. data section ..
x_12 Real4 1.0 ; Pair of 32-bit reals
Real4 2.0
x_34 Real4 3.0 ; Pair of 32-bit reals
Real4 4.0
m Real4 3.141592654 ; Duplicate values to fill
Real4 3.141592654 ; both 32-bit parts of
b Real4 10.25 ; of 64-bit number
Real4 10.25
y_12 Real4 ?
Real4 ?
y_34 Real4 ?
Real4 ?
; .. code section ..
MOVQ mm1,Qword PTR x_12
MOVQ mm2,Qword PTR x_34
MOVQ mm3,Qword PTR m
MOVQ mm4,Qword PTR b
PFMUL mm1,mm3 ; Compute: y[i] = m * x[i] + b
PFMUL mm2,mm3 ; for i = (1,2,3,4)
PFADD mm1,mm4 ; In Parallel using Superscaler
PFADD mm2,mm4 ; 3DNow instructions.
MOVQ Qword PTR y_12,mm1
MOVQ Qword PTR y_34,mm2 |