/*--------------------------------------------------------------------------- xprod.asm: Vector Cross Product ----------------------------------------------------------------------------- Description: The Vector Cross Product, which produces a vector perpendicular to the two vector operands, is a commonly used algorithm in 3D graphics shading and illumination. The cross product of vectors A and B, C = A X B can be written as: cx = ay*bz - az*by cy = az*bx - ax*bz cz = ax*by - ay*bx To maximize looped performance, this subroutine is simultaneously computes two vector cross products within a 12-instruction loop. Since each cross product requires six multiplies, the effective rate reached here of six cycles per cross product is the best that can be achieved on a processor with a single parallel multiplier. Preprocessor "#defines" are used for register assignments to aid the readability of the code, but it is still a BIT cryptic. ----------------------------------------------------------------------------- Program Characteristics: Calling Values: REGISTER FILE: r0 = number of cross products to perform (must be even and greater than 4) DAG1 (Data Memory): i0 = index to list of A vectors l0 = 0 m0 = +1 i1 = index to C output vectors l1 = 0 DAG2 (Program Memory): i8 = index to list of B vectors l8 = 0 m8 = +1 Computation Time = 6N+7 cycles, where N Cross Products are performed = 240ns per Cross Product @ 25MHz = 4.2 million Cross Products/sec @ 25MHz ----------------------------------------------------------------------------- Author: Jim Donahue, Analog Devices DSP Division Revised: 13-AUG-91 ----------------------------------------------------------------------------*/ .GLOBAL xprod; #define AX1 f1 #define AY1 f2 #define AZ1 f3 #define AX2 f0 #define AY2 f3 #define AZ2 f2 #define BX1 f5 #define BY1 f6 #define BZ1 f7 #define BX2 f4 #define BY2 f7 #define BZ2 f6 #define AXBY f8 #define AYBX f12 #define AYBZ f9 #define AZBY f13 #define AZBX f10 #define AXBZ f14 #define CX f15 #define CY f15 #define CZ f15 .SEGMENT /pm pm_code; xprod: r11 = lshift r11 by -1; r11 = r11-1, AX1=dm(i0,m0), BX1=pm(i8,m8); AY1=dm(i0,m0), BY1=pm(i8,m8); AZ1=dm(i0,m0), BZ1=pm(i8,m8); AYBZ=AY1*BZ1; AZBY=AZ1*BY1, AX2=dm(i0,m0), BX2=pm(i8,m8); AZBX=AZ1*BX1, CX=AYBZ-AZBY; AXBZ=AX1*BZ1, AY2=dm(i0,m0), BY2=pm(i8,m8); AXBY=AX1*BY1, CY=AZBX-AXBZ, dm(i1,m0)=CX; AYBX=AY1*BX1, AZ2=dm(i0,m0), BZ2=pm(i8,m8); lcntr=r11, do xlp until lce; AYBZ=AY2*BZ2, CZ=AXBY-AYBX, dm(i1,m0)=CY; AZBY=AZ2*BY2, AX1=dm(i0,m0), BX1=pm(i8,m8); AZBX=AZ2*BX2, CX=AYBZ-AZBY, dm(i1,m0)=CZ; AXBZ=AX2*BZ2, AY1=dm(i0,m0), BY1=pm(i8,m8); AXBY=AX2*BY2, CY=AZBX-AXBZ, dm(i1,m0)=CX; AYBX=AY2*BX2, AZ1=dm(i0,m0), BZ1=pm(i8,m8); AYBZ=AY1*BZ1, CZ=AXBY-AYBX, dm(i1,m0)=CY; AZBY=AZ1*BY1, AX2=dm(i0,m0), BX2=pm(i8,m8); AZBX=AZ1*BX1, CX=AYBZ-AZBY, dm(i1,m0)=CZ; AXBZ=AX1*BZ1, AY2=dm(i0,m0), BY2=pm(i8,m8); AXBY=AX1*BY1, CY=AZBX-AXBZ, dm(i1,m0)=CX; xlp: AYBX=AY1*BX1, AZ2=dm(i0,m0), BZ2=pm(i8,m8); AYBZ=AY2*BZ2, CZ=AXBY-AYBX, dm(i1,m0)=CY; AZBY=AZ2*BY2; AZBX=AZ2*BX2, CX=AYBZ-AZBY, dm(i1,m0)=CZ; AXBZ=AX2*BZ2; AXBY=AX2*BY2, CY=AZBX-AXBZ, dm(i1,m0)=CX; rts(db), AYBX=AY2*BX2; CZ=AXBY-AYBX, dm(i1,m0)=CY; dm(i1,m0)=CZ; .ENDSEG;