{___________________________________________________________________________ FFTRAD2.ASM ADSP-21020 Radix-2 DIT Complex Fast Fourier Transform Calculates a radix-2 FFT. The FFT length (N) must be a power of 2 and a minimum of 32 points. Input data is not destroyed during the course of this routine. The input and output arrays are normal ordered. The real array is stored in DM, the imaginary array is stored in PM. The real twiddle factors are in an N/2 long Cosine table stored in PM, and the imaginary twiddle factors are in an N/2 long Sine Table in stored in DM. The twiddle factors are generated by the program TWIDRAD2. To implement a inverse FFT, one only has to (1) swap the real and imaginary of the incoming data, (2) take the forward FFT, (3) swap the real and imaginary of the outgoing data, and (4) scale the data by 1/N. Author: 10-SEP-90, Kapriel Karagozian, Analog Devices DSP Div.(617) 461-3672 Revision: 25-MAR-91, Steven Cox 25-APR-91, Ronnin Yee Calling Information: pm(cosine[N/2]) - real twiddle factors from TWIDRAD2 program dm(sine[N/2]) - imaginary twiddle factors from TWIDRAD2 program dm(refft[N]) - real input array, bitreversed to a working array dm(imfft[N]) - imaginary input array, bitreversed to a working array (Note: Because the bit reversed address mode is used with the arrays refft and imfft, they must start at addresses that are integer multiples of the length (N) of the transform, (i.e. 0,N,2N,3N,...). This is accomplished by specifing two segments starting at those addresses in the architecture file and placing the variables alone in their respective segments. These addresses must also be reflected in the preprocessor variables ORE and OIM in bit reversed format.) Results: dm(redata[N]) - real working array and output dm(imdata[N]) - imaginary working array and output Benchmarks: Radix-2, complex FFT Length cycles ms @ 20 MHz CLK ms @ 25 MHz CLK ---------- ------ --------------- --------------- 64 1002 .050 .040 128 2088 .104 .083 256 4486 .224 .179 512 9764 .488 .391 1024 21314 1.066 .853 2048 46432 2.322 1.857 4096 100734 5.037 4.029 8192 217504 10.875 8.700 First 2 Stages - 8 cycles per 2 butterflies Middle Stages - 4 cycles per butterfly 2nd to Last Stage - 9 cycles per 2 butterflies Last Stage - 5 cycles per butterfly group Memory Usage: pm code = 153 words, pm data = 1.5*N words, dm data = 3.5*N words ____________________________________________________________________________} { Include for symbolic definition of system register bits } #include "def21020.h" {_________The constants below must be changed for different length FFTs______ N = number of points in the FFT, must be a power of 2 STAGES = log2(N) BRMODIFY = bitrev(32 bit N/2) ORE = bitrev(32 bit addr of input real in dm), addr is 0,N,2N,3N,... OIM = bitrev(32 bit addr of input imag in dm), addr is 0,N,2N,3N,... ____________________________________________________________________________} #define N 256 #define STAGES 8 #define BRMODIFY 0x01000000 #define ORE 0x00000000 #define OIM 0x00020000 {________These constants are independent of the number of points____________} {Number of bttrfly in a group of 8} #define BFLY8 4 .SEGMENT/DM dm_data; .VAR sine[N/2]= "ts2.dat"; { imag twiddle factors, from TWIDRAD2 } .VAR refft[N]; { real result } .GLOBAL refft; .ENDSEG; .SEGMENT/DM dm_rdat; { This segment is an integer multiple of N } .VAR redata[N]= "inreal.dat"; { input real array } .GLOBAL redata; .ENDSEG; .SEGMENT/DM dm_idat; { This segment is an integer multiple of N } .VAR imdata[N]= "inimag.dat"; { input image array } .GLOBAL imdata; .ENDSEG; .SEGMENT/PM pm_data; .VAR cosine[N/2]= "tc2.dat"; { real twiddle factors, from TWIDRAD2 } .VAR imfft[N]; { imag result } .GLOBAL imfft; .ENDSEG; {______________________reset vector test call of fft______________________} .SEGMENT/PM rst_svc; { program starts at the reset vector } pmwait=0x0021; {pgsz=0,pmwtstates=0,intrn.wtstates only} dmwait=0x8421; {pgsz=0,pmwtstates=0,intrn.wtstates only} call fftrad2; stop: idle; nop; nop; .ENDSEG; .SEGMENT/PM pm_code; {______________________________begin FFT__________________________________} fftrad2: bit set mode1 BR0; { enable bit reverse of i0 } B0=OIM; { Points to input imaginary array } l0=0; m0=BRMODIFY; { Modifier for bitreverse counter} b8=imfft; { Working array and output } l8=N; m8=1; {First read imag and bit reverse to pm space} f0=dm(i0,m0); lcntr=N-1, do pmbr until lce; { Bit reverse from dm to pm } pmbr: f0=dm(i0,m0), pm(i8,m8)=f0; pm(i8,m8)=f0; { Do last transfer } {Now do bitrev real within first two stages} b0=ORE; { Points to input real array to be read in } { bit reversed order } b2=refft; l2=N; m1=1; { This loop increments forward +1} b8=imfft; l8=N; b10=imfft; l10=N; {Do the first two stages (actually a radix-4 FFT stage)} f0=dm(i0,m0), f1=pm(i8,m8); f2=dm(i0,m0), f3=pm(i8,m8); f0=f0+f2, f2=f0-f2, f4=dm(i0,m0), f5=pm(i8,m8); f1=f1+f3, f3=f1-f3, f6=dm(i0,m0), f7=pm(i8,m8); f4=f6+f4, f6=f6-f4; f5=f5+f7, f7=f5-f7; f8=f0+f4, f9=f0-f4; f10=f1+f5, f11=f1-f5; lcntr=N/4, do FSTAGE until lce; { do N/4 simple radix-4 butterflies } f12=f2+f7, f13=f2-f7, f0=dm(i0,m0), f1=pm(i8,m8); f14=f3+f6, f15=f3-f6, f2=dm(i0,m0), f3=pm(i8,m8); f0=f0+f2, f2=f0-f2, f4=dm(i0,m0), f5=pm(i8,m8); f1=f1+f3, f3=f1-f3, f6=dm(i0,m0), f7=pm(i8,m8); f4=f6+f4, f6=f6-f4, dm(i2,m1)=f8, pm(i10,m8)=f10; f5=f5+f7, f7=f5-f7, dm(i2,m1)=f12, pm(i10,m8)=f14; f8=f0+f4, f9=f0-f4, dm(i2,m1)=f9, pm(i10,m8)=f11; FSTAGE: f10=f1+f5, f11=f1-f5, dm(i2,m1)=f13, pm(i10,m8)=f15; {middle stages loop } bit clr mode1 BR0; {finished with bitreversal} b0=refft; l0=N; b1=sine; l1=@sine; b9=cosine; l9=@cosine; b11=imfft; l11=N; m0=-BFLY8; m1=-N/8; m2=-BFLY8-1; m9=-N/8; m11=-1; r2=2; r3=-BFLY8; {initializes m0,10 - incr for butterf branches } r5=BFLY8; {counts # butterflies per a group } r9=(-2*BFLY8)-1; {initializes m12 - wrap around to next grp + 1 } r10=-2*BFLY8; {initializes m8 - incr between groups } r13=-BFLY8-1; {initializes m2,13 - wrap to bgn of 1st group } r15=N/8; {# OF GROUPS IN THIRD STAGE} f1=dm(i1,m1), f7=pm(i9,m9); {set pointers to tables to 1st coeff. } lcntr=STAGES-4, do end_stage until lce; {# OF STAGES TO BE HANDLED = LOG2N-4} m8=r10; m10=r3; m12=r9; i0=refft+N-1; i2=refft+N-1; i8=imfft+N-1; i10=imfft+N-1; i11=imfft+N-1; r15=r15-r2, m13=r13; {CALCULATE # OF CORE } {BFLIES/GROUP IN THIS STAGE} f0=dm(i1,m1), f7=pm(i8,m8); f12=f0*f7, f6=dm(i0,m0), f1=pm(i9,m9); f8=f1*f6, modify(i11,m10); f11=f1*f7, f7=pm(i8,m8); f14=f0*f6, f12=f8+f12, f8=dm(i0,m0); f12=f0*f7, f13=f8+f12, f10=f8-f12, f6=dm(i0,m0); {Each iteration does another set of bttrflys in each group} lcntr=r5, do end_group until lce; {# OF BUTTERFLIES/GROUP IN THIS STAGE} {core butterfly loop} lcntr=r15, do end_bfly until lce; {Do a butterfly in each group - 2} f8=f1*f6, f14=f11-f14, dm(i2,m0)=f10, f9=pm(i11,m8); f11=f1*f7, f3=f9+f14, f9=f9-f14, dm(i2,m0)=f13, f7=pm(i8,m8); f14=f0*f6, f12=f8+f12, f8=dm(i0,m0), pm(i10,m10)=f9; end_bfly: f12=f0*f7, f13=f8+f12, f10=f8-f12, f6=dm(i0,m0), pm(i10,m10)=f3; {finish up last bttrfly and set up for next stage} f8=f1*f6, f14=f11-f14, dm(i2,m0)=f10, f9=pm(i11,m8); f11=f1*f7, f4=f9+f14, f9=f9-f14, dm(i2,m0)=f13, f14=pm(i8,m11); f14=f0*f6, f12=f8+f12, f8=dm(i0,m2), pm(i10,m10)=f9; f13=f8+f12, f10=f8-f12, f0=dm(i1,m1), f7=pm(i8,m8); {dm:sin} f14=f11-f14, dm(i2,m0)=f10, f9=pm(i11,m12); {start on next butterfly in each group} f12=f0*f7, f3=f9+f14, f9=f9-f14, f6=dm(i0,m0), f1=pm(i9,m9); {pm:cos} f8=f1*f6, dm(i2,m2)=f13, pm(i10,m10)=f4; f11=f1*f7, pm(i10,m10)=f9; f14=f0*f6, f12=f8+f12, f8=dm(i0,m0), f7=pm(i8,m8); end_group: f12=f0*f7, f13=f8+f12, f10=f8-f12, f6=dm(i0,m0), pm(i10,m13)=f3; r4=r15+r2, i1=b1; {PREPARE R4 FOR #OF BFLIES CALC} r15=ashift r4 by -1; {# OF BFLIES/GRP IN NEXT STAGE} r4=-r15, i9=b9; m1=r4; {update inc for sin & cos } m9=r4; r5=ashift r5 by 1, f1=dm(i1,m1); {update # bttrfly in a grp} r3=-r5; { inc for bttrfly branch} r13=r3-1, m0=r3; { wrap to 1st grp } r10=ashift r3 by 1, f7=pm(i9,m9); { inc between grps } end_stage: r9=r10-1, m2=r13; { wrap to grp +1 } {_________ next to last stage__________} m1=-2; {modifier to sine table pntr } m8=r10; {incr between groups } m9=-2; {modifier to cosine table pntr } m10=r3; {incr between bttrfly branches } m12=r9; {wrap around to next grp + 1 } m13=r13; {wrap to bgn of 1st group } i0=refft+N-1; i1=sine+(N/2)-2; {pntr to 1st sine coeff } i2=refft+N-1; i8=imfft+N-1; i9=cosine+(N/2)-2; {pntr to 1st cosine coeff } i10=imfft+N-1; i11=imfft+N-1; f0=dm(i1,m1), f7=pm(i8,m8); f12=f0*f7, f6=dm(i0,m0), f1=pm(i9,m9); f8=f1*f6, modify(i11,m10); f11=f1*f7, f7=pm(i8,m12); f14=f0*f6, f12=f8+f12, f8=dm(i0,m0); f12=f0*f7, f13=f8+f12, f10=f8-f12, f6=dm(i0,m0); {Do the N/4 butterflies in the two groups of this stage} lcntr=N/4, do end_group2 until lce; f8=f1*f6, f14=f11-f14, dm(i2,m0)=f10, f9=pm(i11,m8); f11=f1*f7, f3=f9+f14, f9=f9-f14, dm(i2,m0)=f13, f1=pm(i9,m9); f14=f0*f6, f12=f8+f12, f8=dm(i0,m2), pm(i10,m10)=f9; f13=f8+f12, f10=f8-f12, f0=dm(i1,m1), f7=pm(i8,m8); f12=f0*f7, f14=f11-f14, f6=dm(i0,m0), f9=pm(i11,m12); f8=f1*f6, f3=f9+f14, f9=f9-f14, dm(i2,m0)=f10, pm(i10,m10)=f3; f11=f1*f7, dm(i2,m2)=f13, pm(i10,m10)=f9; f14=f0*f6, f12=f8+f12, f8=dm(i0,m0), f7=pm(i8,m12); end_group2: f12=f0*f7, f13=f8+f12, f10=f8-f12, f6=dm(i0,m0), pm(i10,m13)=f3; { The last stage } m0=-N/2; m2=-N/2-1; m10=m0; m13=m2; i0=refft+N-1; i1=sine+(N/2)-1; {pntr to 1st sine coeff } i2=refft+N-1; i8=imfft+N-1; i9=cosine+(N/2)-1; {pntr to 1st cosine coeff } i10=imfft+N-1; i11=imfft+N-1; m1=-1; {modifiers to coeff tables } m9=-1; {start first bttrfly} f0=dm(i1,m1), f7=pm(i8,m11); f12=f0*f7, f6=dm(i0,m0), f1=pm(i9,m9); f8=f1*f6, modify(i11,m10); f11=f1*f7; f14=f0*f6, f12=f8+f12, f8=dm(i0,m2), f9=pm(i11,m11); {do N/2 bttrflys in the last stage} lcntr=N/2, do last_stage until lce; f13=f8+f12, f10=f8-f12, f0=dm(i1,m1), f7=pm(i8,m11); f12=f0*f7, f14=f11-f14, f6=dm(i0,m0), f1=pm(i9,m9); f8=f1*f6, f3=f9+f14, f15=f9-f14, dm(i2,m0)=f10, f9=pm(i11,m11); f11=f1*f7, dm(i2,m2)=f13, pm(i10,m10)=f15; last_stage: f14=f0*f6, f12=f8+f12, f8=dm(i0,m2), pm(i10,m13)=f3; rts; {finished} nop; nop; {_______________________________________________________________________} .ENDSEG;