{_______________________________________________________________________________
  MATINV.ASM     inverts a square matrix using the Gauss-Jordan elimination
                 algorithm with full pivoting.
    
       Reference: P.M. Embree and B. Kimble. C Language Algorithms For Digital
                  Signal Processing. Chap. 6, Sect. 6.2.3, pp. 326-329.
                  Prentice-Hall, 1991

       Written by: Wassim G. Najm, Analog Devices, DSP Division, May 6 1991

       Calling Information:
	       dm(mat_a[n*n]) row major, dm(pf[n+1]), dm(sc[n]);
	       pm(swr[n]);     
               r14=n;     (n= number of rows (columns))
	       m0=1; m1=-1;
	       m8=1; m9=-1;
	       b0=mat_a; 
	       b1=pf;
	       b7=sc;  l7=0;
               b8=swr; l8=0;

       Results:
	       dm(mat_a[n*n]) row major;
               f12=0.0 -> matrix is singular

       Altered registers:
               f0 - f15,
	       i0 - i7, i8, m2

       Benchmark:
	      maximum number= worst case= 7.5n**3+25n**2+25.5n+23 (approximated)

       Memory usage:
	       pm code= 93 words, pm data= n words, dm data= n*n+2n+1 words

     ** To assemble the example below type the following command
                   asm21k -Dexample matinv                       **
_______________________________________________________________________________}

#include	"macros.h"

#define	n	3

#ifndef		example
.GLOBAL		mat_inv;
.EXTERN		mat_a;
#endif

#ifdef		example
.SEGMENT/DM	dm_data;
.VAR	mat_a[n*n]= "mat_a1.dat";
.VAR	pf[n+1];
.VAR	sc[n];
.ENDSEG;

.SEGMENT/PM rst_svc;
	    dmwait=0x21;	{set dm waitstates to zero}
            pmwait=0x21;	{set pm waitstates to zero}
	    jump setup;
.ENDSEG;

.SEGMENT/PM	pm_data;
.VAR	swr[n];
.ENDSEG;
		{ example calling code }

.SEGMENT/PM	pm_code;
setup:		b0=mat_a; {i0 -> a(row,col)}
		b1=pf;    {i1 -> pf= pivot_flag}
		b7=sc;    {i7 -> sc= swap_col}
		b8=swr;   {i8 -> sr= swap_row}
		l7=0;
		l8=0;
		m0=1;
		m1=-1;
		m8=1;
		m9=-1;
		r14=n;
		call mat_inv;
		idle;
.ENDSEG;
#endif

			{ Matrix inversion starts here }
.SEGMENT/PM	pm_code;
mat_inv:	r13=r14*r14(ssi), b3=b0;
		l0=r13;    {matrix in a circular data buffer}
		b4=b0;
		b5=b0;
		b6=b0;
		l3=l0;
		l4=l0;
		l5=l0;
		l6=l0;
		r13=r14+1, b2=b1;
		l1=r13;    {pf in a circular data buffer}
		l2=l1;
		f9=0.0;
		f8=2.0;    {2.0 is required for DIVIDE_macro}
		f7=1.0;    {1.0 is a numerator for DIVIDE_macro}
		r13=fix f9, m2=r14;
		lcntr=r14, do zero_index until lce;
		  dm(i7,m0)=r13, pm(i8,m8)=r13;
zero_index:	  dm(i1,m0)=r13;
		f0=pass f9, dm(i1,m0)=r13; {f0= big}

		lcntr=r14, do full_pivot until lce;
			{find the biggest pivot element}
		  r1=pass r13, r11=dm(i1,1); {r1= row no., r11= pf(row)}
		  lcntr=r14, do row_big until lce;
		    r11=pass r11, i4=i3; {check if pf(row) is zero}
		    if ne jump (PC,12), f4=dm(i0,m2); {i0 -> next row}
		    r5=pass r13, r15=dm(i2,1); {r5= col no., r15= pf(col)}
		    lcntr=r14, do column_big until lce;
		      r15=pass r15; {check if pf(col) is zero}
		      if ne jump column_big (db);
		      f4=dm(i0,1); {f4= a(row,col)}
		      f6=abs f4;
		      comp(f6,f0); {compare abs_element to big}
		      if lt jump column_big;
		      f0=pass f6, f12=f4; {f0= abs_element, f12= pivot_element}
		      r2=pass r1, r10=r5; {r2= irow, r10= icol}
column_big:	      r5=r5+1, r15=dm(i2,1);
row_big:	    r1=r1+1, r11=dm(i1,1);
			
		{swap rows to make this diagonal the biggest absolute pivot} 
		  f12=pass f12, m5=r10; {check if pivot is zero, m5= icol}
		  if eq rts; {if pivot is zero, matrix is singular}
		  r1=r2*r14 (ssi), dm(m5,i1)=r5; {pf(col) not zero}
		  r5=r10*r14 (ssi), m6=r1;                     
		  comp(r2,r10), r1=dm(i3,m6); {i3 -> a(irow,col)}
		  dm(i7,m0)=r10, pm(i8,m8)=r2; {store icol in sc and irow in sr}
                  if eq jump row_divide (db);
		  r2=pass r13, m7=r5; 
		  modify(i4,m7); {i4 -> a(icol,col)}
		  i5=i4;
		  lcntr=r14, do swap_row until lce;
		    f4=dm(i3,0);		{f4= temp= a(irow,col)}
		    f0=dm(i5,0);                {f0= a(icol,col)}
		    dm(i3,1)=f0;		{a(irow,col)= a(icol,col)}
swap_row:           dm(i5,1)=f4;		{a(icol,col)= temp}
		  
			{divide the row by the pivot}
row_divide:	  f6=pass f7, i5=i4;
		  DIVIDE(f1,f6,f12,f8,f3); {f1= pivot_inverse}
		  i6=i5;
		  f4=dm(i4,1);
		  lcntr=r14, do divide_row until lce;
		    f5=f1*f4, f4=dm(i4,1);
divide_row:	    dm(i6,1)=f5;
		  dm(m5,i5)=f1;

			{fix the other rows by subtracting}
        	  lcntr=r14, do fix_row until lce;
		    comp(r2,r10), i6=i5; {check if row= icol}
		    if eq jump (PC,8), f4=dm(i0,m2); {i0 -> next row}
		    f4=dm(m5,i0); {temp= a(row,icol)}
		    dm(m5,i0)=f9;
		    f3=dm(i6,1);
		    lcntr=r14, do fix_column until lce;
		      f3=f3*f4, f0=dm(i0,0);
		      f0=f0-f3, f3=dm(i6,1);
fix_column:	      dm(i0,1)=f0;
fix_row:	    r2=r2+1;
full_pivot:	f0=pass f9, i3=i0;

			{fix the affect of all the swaps for final answer} 
		r0=dm(i7,m1), r1=pm(i8,m9); {i7 -> sc(N-1), i8 -> sr(N-1)}
		r0=dm(i7,m1), r1=pm(i8,m9); {r0= sc(N-1), r1= sr(N-1)}
		lcntr=r14, do fix_swap until lce;
		  comp(r0,r1), m5=r0; {m5= sc(swap)}
		  if eq jump fix_swap;
		  m4=r1; {m4= sr(swap)}
		  lcntr=r14, do swap until lce;
		    f4=dm(m4,i0); {f4= temp= a(row,sr(swap))}
		    f0=dm(m5,i0); {f0= a(row,sc(swap))}
		    dm(m4,i0)=f0; {a(row,sr(swap))= a(row,sc(swap))}
		    dm(m5,i0)=f4; {a(row,sc(swap))= temp}
swap:               modify(i0,m2);
fix_swap:	  r0=dm(i7,m1), r1=pm(i8,m9);
		rts;
.ENDSEG;