{________________________________________________________________________ MxNxNxO.ASM ADSP-21020 Matrix times a Matrix C[MxO]=A[MxN]*B[NxO] The three matrices have arbitrary dimensions. Matrix A accessed as a circular buffer so that the last iteration of the inner loop will do a dummy read from a known location. Revision: 25-APR-91 , Steven Cox , Analog Devices Assembler Preprocessor Switches: -Dexample is used to include assembly of and example calling routine Calling Information: Constants: m, n, o pm(mat_b[n*o]) row major, dm(mat_a[m*n]) row major dm(mat_c[m*o]) row major M1=1; M2=-2; M3=o; M9=-(o*2-1); M10=o; B0=mat_a; L0=@mat_a; B1=mat_c; L1=@mat_c; B8=mat_b; L8=@mat_b; Results: dm(mat_c[m*o]) row major Altered Registers: F0,F4,F8,F12, I0,I9, B8 Benchmark: mxnxnxo cycles=4+o(m(n+2)+5)+7 (entrance + core + 7 cache) Memory Usage: pm code=11 words, pm data=nxo words, dm data=mxn+mxo words ________________________________________________________________________} { dimension constants } #define M 4 #define N 4 #define O 4 #ifndef example .GLOBAL mxnxnxo; .EXTERN mat_a, mat_b, mat_c; #endif #ifdef example .SEGMENT/DM dm_data; .VAR mat_a[M*N]="mat_a.dat"; .VAR mat_c[M*O]; .ENDSEG; .SEGMENT/PM pm_data; .VAR mat_b[N*O]="mat_b.dat"; .ENDSEG; .SEGMENT/PM rst_svc; { reset vector } dmwait=0X21; { set dm waitstates to zero } pmwait=0X21; { set pm waitstates to zero } jump setup; .ENDSEG; { example calling code } .SEGMENT/PM pm_code; setup: m1=1; m2=-2; m3=O; m9=-(O*2-1); m10=O; b0=mat_a; l0=@mat_a; b1=mat_c; l1=@mat_c; b8=mat_b; l8=@mat_b; call mxnxnxo; idle; .ENDSEG; #endif { matrix multiply starts here } .SEGMENT/PM pm_code; mxnxnxo: lcntr=O, do colrow until lce; r8=r8 xor r8, f0=dm(i0,m1), f4=pm(i8,m10); { clear f8 } f12=f0*f4, f0=dm(i0,m1), f4=pm(i8,m10); lcntr=M, do column until lce; lcntr=N, do row until lce; row: f12=f0*f4, f8=f8+f12, f0=dm(i0,m1), f4=pm(i8,m10); column: f8=pass f15, dm(i1,m3)=f8; f0=dm(i0,m2), f4=pm(i8,m9); { modify with dummy fetches } colrow: modify(i1,1); rts; .ENDSEG;