{________________________________________________________________________ MxNxNx1.ASM ADSP-21020 Matrix times a Vector C[Mx1]=A[MxN]*B[Nx1] Martix dimensions are arbitrary. Matrix A accessed as a circular buffer so that the last iteration of the inner loop will do a dummy read from a known location. Revision: 25-APR-91 , Steven Cox , Analog Devices Assembler Preprocessor Switches: -Dexample is used to include assembly of and example calling routine Calling Information: Constants: m, n pm(mat_b[n]) row major, dm(mat_a[m*n]) row major, M1=1; M9=1; B0=mat_a; L0=@mat_a; B1=mat_c; L1=0; B8=mat_b; L8=@mat_b; Results: dm(mat_c[m]) row major Altered Registers: F0,F4,F8,F12, I0,I1,I8 Benchmark: mxnxnx1 cycles=6+M(3+N)+5 (entrance + core + 5 cache) Memory Usage: pm code=8 words, pm data=n words, dm data=m*n+m words ________________________________________________________________________} { dimension constants } #define M 4 #define N 4 #ifndef example .GLOBAL mxnxnx1; .EXTERN mat_a, mat_b,mat_c; #endif #ifdef example .SEGMENT/DM dm_data; .VAR mat_a[M*N]="mat_a.dat"; .VAR mat_c[M]; .ENDSEG; .SEGMENT/PM pm_data; .VAR mat_b[N]="mat_bb.dat"; .ENDSEG; .SEGMENT/PM rst_svc; dmwait=0x21; { set dm waitstates to zero } pmwait=0x21; { set pm waitstates to zero } jump setup; .ENDSEG; { example calling code } .SEGMENT/PM pm_code; setup: m1=1; m9=1; b0=mat_a; l0=@mat_a; b1=mat_c; l1=0; b8=mat_b; l8=@mat_b; call mxnxnx1; idle; .ENDSEG; #endif { matrix multiply starts here } .SEGMENT/PM pm_code; mxnxnx1: r8=r8 xor r8, f0=dm(i0,m1), f4=pm(i8,m9); { clear f8 } f12=f0*f4, f0=dm(i0,m1), f4=pm(i8,m9); lcntr=M, do column until lce; lcntr=N, do row until lce; row: f12=f0*f4, f8=f8+f12, f0=dm(i0,m1), f4=pm(i8,m9); column: r8=r8 xor r8, dm(i1,m1)=f8; rts; .ENDSEG;