{ Subroutine to compute x raise to the y power of its two floating point inputs. Y=POW(X,Y) Calling Registers F0 = X Input Value F1 = Y Input Value l_reg = 0 Result Registers F0 = exponential of input Altered Registers F0, F1, F2, F4, F5, F7, F8, F9, F10, F11, F12, F13, F14, F15 i_reg, ms_reg Computation Time 37 Cycles Version 0.04 7/6/90 Gordon A. Sterling } #include "asm_glob.h" #include "pow.h" .SEGMENT/PM Assembly_Library_Code_Space; .PRECISION=MACHINE_PRECISION; .GLOBAL pow; underflow: F0=0; {Set error also} RTS; x_neg_error: F0=0; {Set an error also} RTS; check_y: F1=PASS F1, F0=F0; {If y>0 return x} IF GT RTS; overflow: RTS; {Set an error also} pow: F0=PASS F0; {Test for x<=0} IF LT JUMP x_neg_error; {Report error} IF EQ JUMP check_y; {Test y input} determine_m: R2=LOGB F0; {Get exponent of x input} R2=R2+1; {Reduce to proper format} R3=-R2; {Used to produce f} determine_g: F15=SCALB F0 BY R3; { .5 <= g < 1} determine_p: i_reg=a1_values; R14=1; R13=9; R10=i_reg; F12=mem(9,i_reg); {Get A1(9)} COMP(F12,F15), F12=mem(5,i_reg); {A1(9) - g} F11=mem(13,i_reg); IF GE F12=F11; {Use A(13) next} IF GE R14=R13; {IF (g<=A1(9)) p=9} R9=R10+R14; i_reg=R9; R13=4; COMP(F12,F15), F12=mem(2,i_reg); {A1(p+4) - g} F11=mem(6,i_reg); IF GE F12=F11; IF GE R14=R14+R13; {IF (g<=A1(p+4)) p=p+4} R13=2; COMP(F12,F15); {A1(p+2) - g} IF GE R14=R14+R13; {IF (g<=A1(p+4)) p=p+2} determine_z: R14=R14+1, R4=R14; ms_reg=R14; i_reg=a1_values; R11=ASHIFT R14 BY -1; {Compute (p+1)/2} F12=mem(ms_reg,i_reg); {Fetch A1(p+1)} ms_reg=R11; i_reg=a2_values; {Correction array} F0=F12+F15; {g + A1(p+1)} F14=F15-F12, F11=mem(ms_reg,i_reg); F7=F14-F11, F12=F0; {[g-A1(p+1)]-A2((p+1)/2)} F11=2.0; __divide_: F0=RECIPS F12; {Get 4 bit seed R0=1/D} F12=F0*F12; {D(prime) = D*R0} F7=F0*F7, F0=F11-F12; {F0=R1=2-D(prime), F7=N*R0} F12=F0*F12; {F12=D(prime)=D(prime)*R1} F7=F0*F7, F0=F11-F12; {F7=N*R0*R1, F0=R2=2-D(prime)} F12=F0*F12; {F12=D(prime)=D(prime)*R2} F7=F0*F7, F0=F11-F12; {F7=N*R0*R1*R2, F0=R3=2-D(prime)} F7=F0*F7; {F7=N*R0*R1*R2*R3} F7=F7+F7; { z = z + z} determine_R: i_reg=power_array; F8=F7*F7, F9=mem(p2,i_reg); { v = z * z } F10=F8*F9, F9=mem(p1,i_reg); { p2*v } F10=F10+F9; { p2*v + p1 } F10=F10*F8; { (p2*v + p1) * v } F10=F10*F7, F9=mem(K,i_reg); { R(z) = (p2*v+p1)*v*z } determine_u2: F11=F10*F9; { K*R } F11=F10+F11; { K + K*R } F9=F9*F7; { z*K } F9=F9+F11; { R + z*K } F9=F9+F7; { (R + z*K) + z} determine_u1: R3=16; R2=R2*R3 (SSI); { m*16 } R2=R2-R4; { m*16-p } R3=-4; F2=FLOAT R2 BY R3; {FLOAT(m*16-p)*.0625} determine_w: R4=4; {Used in reduce} R8=FIX F1 BY R4; F8=FLOAT R8 BY R3; { y1=REDUCE(Y) } F7=F1-F8; { y2 = y - y1 } F15=F9*F1; { U2*Y } F14=F2*F7; { U1*Y2 } F15=F14+F15; { W = U2*Y + U1*Y2} R14=FIX F15 BY R4; F14=FLOAT R14 BY R3; { W1=REDUCE(W) } F13=F15-F14; { W2 = W - W1 } F12=F2*F8; { U1*Y1 } F12=F14+F12; { W = W1 + U1*Y1 } R14=FIX F12 BY R4; F14=FLOAT R14 BY R3; { W1 = REDUCE(W) } F11=F12-F14; { W-W1 } F13=F11+F13; { W2 = W2 + (W-W1) } R12=FIX F13 BY R4; F12=FLOAT R12 BY R3; { W = REDUCE(W2) } F10=F12+F14; R10=FIX F10 BY R4; { IW1 = INT(16*(W1+W)) } F13=F13-F12, R9=mem(bigx,i_reg); { W2 = W2 - W } COMP(R10,R9), R9=mem(smallx,i_reg); { Test for overflow } IF GE JUMP overflow; COMP(R10,R9); IF LE JUMP underflow; flow_to_a: F13=PASS F13; { W2 must be <=0 } IF LE JUMP determine_mp; F8=.0625; F13=F13-F8; R10=R10+1; determine_mp: R8=1; R10=PASS R10; IF LT R8=R8-R8; { I=0 if IWI < 0 } R6=ABS R10; { Take ABS for shift} R7=ASHIFT R6 BY -4; { IW1/16 } R6=PASS R10; IF LT R7=-R7; R7=R7+R8; { m(prime) = IW1/16 + I } R6=ASHIFT R7 BY 4; { m(prime)*16 } R6=R6-R10, F5=mem(q5,i_reg); { p(prime) = 16*m(prime) - IW1 } determine_Z: F4=F5*F13, F5=mem(q4,i_reg); { q5*W2 } F4=F4+F5, F5=mem(q3,i_reg); { q5*W2 + q4 } F4=F4*F13; { (q5*W2+q4)*W2 } F4=F4+F5, F5=mem(q2,i_reg); { (q5*W2+q4)*W2+q3 } F4=F4*F13; { ((q5*W2+q4)*W2+q3)*W2 } F4=F4+F5, F5=mem(q1,i_reg); { ((q5*W2+q4)*W2+q3)*W2+q2 } F4=F4*F13; { (((q5*W2+q4)*W2+q3)*W2+q2)*W2 } F4=F4+F5; { Q(W2)=(((q5*W2+q4)*W2+q3)*W2+q2)*W2+q1} F4=F4*F13; { Z = W2*Q(W2) } i_reg=a1_values; R6=R6+1; { Compute p(prime)+1 } ms_reg=R6; F5=mem(ms_reg,i_reg); { Fetch A1(p(prime)+1) } RTS (DB), F4=F4*F5; { A1(p(prime)+1)*Z } F4=F4+F5; { Z = A1(p(prime)+1) + A1(p(prime)+1)*Z } F0=SCALB F4 BY R7; { Result = ADX(Z,m(prime)) } .ENDSEG; .SEGMENT/SPACE Assembly_Library_Data_Space; .VAR a1_values[18] = 0, 0x3F800000, 0x3F75257D, 0x3F6AC0C6, 0x3F60CCDE, 0x3F5744FC, 0x3F4E248C, 0x3F45672A , 0x3F3D08A3 , 0x3F3504F3 , 0x3F2D583E , 0x3F25FED6 , 0x3F1EF532 , 0x3F1837F0 , 0x3F11C3D3 , 0x3F0B95C1 , 0x3F05AAC3 , 0x3F000000; .VAR a2_values[9] = 0, 0x31A92436, 0x336C2A95, 0x31A8FC24, 0x331F580C, 0x336A42A1, 0x32C12342, 0x32E75624, 0x32CF9891; .VAR power_array[10]= 0.833333286245E-1, { p1 } 0.125064850052E-1, { p2 } 0.693147180556341, { q1 } 0.240226506144710, { q2 } 0.555040488130765E-1, { q3 } 0.961620659583789E-2, { q4 } 0.130525515942810E-2, { q5 } 0.44269504088896340736, { K } 608, { bigx } -608; { smallx } .ENDSEG;