/************************************************************************/ /* recog.c 4/10/95 */ /* */ /************************************************************************/ /* This program does all aspects of speech recognition, including */ /* training for the recognition of a finite number of short, */ /* isolated words. After models for each of the words are chosen, */ /* recognition is done by fitting the input pattern to be */ /* classified by choosing the model that produces that pattern */ /* with the highest probability. */ /* */ /* This program defines the port addressses for the */ /* particular input and output channel being used, sets up the */ /* IRQ3 to call the delta-modulation routine, and sets up */ /* an idle wait loop to conserve power while waiting for the */ /* interrupt. */ /* */ /* */ /************************************************************************/ #include <21020.h> /* For the idle() command */ #include /* For the interrupt command */ #include /* For the segment function */ #include #include #include volatile int in_port segment(hip_reg0); /* hip_reg0, hip_reg2, hip_reg3 are */ volatile int out_port segment(hip_reg2);/* used in architecture file */ volatile int out_port2 segment(hip_reg3); static int num_samples = 0; static float samples, sig_pow; static float R_vec[11]; /* autocorrelation vector */ static float R2_vec[10][1]; static float Rmat[10][10]; /* autocorrelation matrix */ static float inv_Rmat[10][10]; static float finalR[10]; /* final autocorrelation vector of a frame */ static float a_vec[10]; /* vector of predictor coefficients */ static float nu; /* learning coefficient for LMS algorithm */ static float sig[10][10][50]; /* where predictor coefficients are saved */ static float mean[6][10][10]; /* means for each state - Gaussian dist. */ static float mean_num[10][10]; /* numerator of eq. to compute the mean */ static float new_mean[10][10]; static float var[6][10][10]; static float var_num[10][10]; /* numerator of eq. to compute variance */ static float denom[10]; /* denom. of eq. to compute mean and variance */ static float new_var[10][10]; static float alpha[10][50]; static float ct[50]; static float beta[10][50]; static float transit[6][10][10]; /* transition matrices for each word */ static float transit_num[10][10]; /* numerator of eq. to compute transitions */ static float transit_den[10]; /* denom. of eq. to comp. transitions */ static float new_transit[10][10]; static float probs[10]; /* probability of each example word */ static int length[10]; /* length of each example */ static int start_word, end_word; static float previous[20]; static int index = 0; static int frame_num = 0; static int word_num = 0; static int end_frame = 0; static int need_update; /* need to update the parameters again? */ static int button = 0; /* * void matinv(float out[][], float in[][], int n); * void matmul(float z[][], float x[][], float y[][], int r, int s, int t); */ void process_input(int); void push_button(int sig_num); void compute_alphas(int word, int example); void compute_betas(int word, int example); float compute_prob(int word, int example); float p_output(int word, int example, int state, int time); void update_params(int word); void main(void) { static int i,j,k,r,c; /* initialize LPC coeffients */ for (i = 0; i < 10; i++) a_vec[i] = 0; nu = 1.0e-11; /* initialize state transition matrices */ for (k = 0; k < 6; k++) for (r = 0; r < 10; r++) { for (c = 0; c < 10; c++) transit[k][r][c] = 0; if (r + 1 > 9) transit[k][r][r] = 1.0; else if (r + 2 > 9) { transit[k][r][r] = 0.5; transit[k][r][r+1] = 0.5; } else { transit[k][r][r] = 0.333; transit[k][r][r+1] = 0.333; transit[k][r][r+2] = 0.333; } } interrupt(SIG_IRQ3, process_input); signal(SIG_IRQ2, push_button); set_flag(SET_FLAG1, CLR_FLAG); set_flag(SET_FLAG2, CLR_FLAG); set_flag(SET_FLAG3, CLR_FLAG); for (i = 0; i < 6; i++) { for (j = 0; j < 10; j++) { /* while (!(button)) { if (end_frame) end_frame = 0; } */ poll_flag_in(READ_FLAG0, FLAG_IN_LO_TO_HI); while ( !(end_frame) ) { } length[j] = 0; /* turn on light */ set_flag(SET_FLAG1, SET_FLAG); while ( (length[j] < 48) ) { length[j] = length[j] + 1; start_word = 1; end_frame = 0; /* compute predictor coefficients for the word */ /* for (r = 0; r < 10; r++) for (c = 0; c < 10; c++) Rmat[r][c] = finalR[abs(r - c)]; matinv(inv_Rmat, Rmat, 10); for (r = 0; r < 10; r++) R2_vec[r][0] = R_vec[r+1]; matmul(a_vec, inv_Rmat, R2_vec, 10, 10, 1); */ for (r =0; r < 10; r++) sig[j][r][length[j] - 1] = a_vec[r]; while ( !(end_frame) ) { } } /* turn off light */ set_flag(SET_FLAG1, CLR_FLAG); button = 0; } /* ten words read in */ /* now set up approximate mean and variance vectors */ for (r = 0; r < 10; r++) { for (c = 0; c < 10; c++) { if (r == 9) { mean[i][r][c] = sig[r][c][length[r] - 1]; var[i][r][c] = (sig[r][c][length[r] - 2] - mean[i][r][c]) * (sig[r][c][length[r] - 2] - mean[i][r][c]); } else { mean[i][r][c] = sig[r][c][((length[j] - 1) / 10) * r]; var[i][r][c] = (sig[r][c][(((length[r] - 1) / 10) * r) + 1] - mean[i][r][c]) * (sig[r][c][(((length[r] - 1) / 10) * r) + 1] - mean[i][r][c]); } } } /* now turn on the light to indicate processing */ set_flag(SET_FLAG1, SET_FLAG); need_update = 1; while (need_update) { update_params(i); need_update = 0; for (j = 0; j < 10; j++) for (k = 0; k < 10; k++) { if ( abs(new_transit[j][k] - transit[i][j][k]) > 0.05 ) need_update = 1; transit[i][j][k] = new_transit[j][k]; } } set_flag(SET_FLAG1, CLR_FLAG); } } /************************************************************************** * Function: compute_alphas * compute the scaled alpha coefficients for an input sequence * recursively. ***************************************************************************/ void compute_alphas(int dm word_p, int dm example_p) { static int i,j,k,l, time; static float sum; static int word; static int example; word = word_p; example = example_p; for (i = 0; i < 10; i++) alpha[i][0] = 0; alpha[0][0] = p_output(word, example, 0, 0); ct[0] = alpha[0][0]; alpha[0][0] = 1; for (time = 1; time < length[example]; time++) { sum = 0; for (i = 0; i < 10; i++) { alpha[i][time] = 0; for (j = 0; j < 10; j++) { alpha[i][time] = alpha[i][time] + alpha[j][time-1]*transit[word][j][i]; alpha[i][time] = alpha[i][time] * p_output(word, example, i, time); } sum = sum + alpha[i][time]; } ct[time] = sum; for (i = 0; i < 10; i++) { alpha[i][time] = alpha[i][time] / sum; } } } void compute_betas(int dm word_p, int dm example_p) { static int i,j, k, time; static int word, example; word = word_p; example = example_p; for (i = 0; i < 10; i++) beta[i][length[example] - 1] = 1; for (time = length[example] - 2; time >= 0; time--) { for (i = 0; i < 10; i++) { beta[i][time] = 0; for (j = 0; j < 10; j++) { beta[i][time] = beta[i][time] + beta[j][time+1]*transit[word][i][j] * p_output(word, example, j, time + 1); } beta[i][time] = beta[i][time]/ct[time]; } } } /************************************************************************** * *compute_prob: compute probability of getting an input sequence * given the model ************************************************************************** */ float compute_prob(int dm word_p, int dm example_p) /* word : corresponds to the model we are trying */ { static int i,j,k,time; static int ex; static float phi[10]; static float new_phi[10]; static float val; static float max; static int word, example; word = word_p; example = example_p; for (i = 0; i < 10; i++) phi[i] = -30.0; phi[0] = log10(p_output(word, example, 0, 0)); for (time = 1; time < length[example]; time++) { for (j = 0; j < 10; j++) { new_phi[j] = -50; for (i = 0; i < 10; i++) { val = phi[i]; if (transit[word][i][j] < 1.0e-10) val = val - 20; else val = val + log10f(transit[word][i][j]); if (val > new_phi[j]) new_phi[j] = val; } new_phi[j] = new_phi[j] + log10f(p_output(word, example, j, time)); phi[j] = new_phi[j]; } } max = 0; for (i = 0; i < 10; i++) if (phi[i] > max) max = phi[i]; return powf(10, max); } /************************************************************************* * * p_output : compute the probability of a particular output vector for * a specified state, using a Gaussian distribution with a diagonal * covariance matrix * ************************************************************************** */ float p_output(int dm word_p, int dm example_p, int dm state_p, int dm time_p) { static float dm_row[10]; static float dm_row2[10]; static float dm_col[1][10]; static float inv_cov[10][10]; static float exp_term[1][1]; /* the power of the exponential */ static float cons_term; /* constant multiplier of exponential function */ static int i,j; static int word, example, state, time; word = word_p; example = example_p; state = state_p; time = time_p; cons_term = 2*(3.1415927); for (i = 0; i < 10; i++) cons_term = cons_term * var[word][state][i]; cons_term = 1 / cons_term; for (i = 0; i < 10; i++) { dm_row[i] = sig[example][i][time] - mean[word][state][i]; dm_col[0][i] = dm_row[i]; for (j = 0; j < 10; j++) inv_cov[i][j] = 0; inv_cov[i][i] = 1 / (var[word][state][i]); } matmul(dm_row2, dm_row, inv_cov, 1, 10, 10); matmul(exp_term, dm_row2, dm_col, 1, 10, 1); exp_term[0][0] = -0.5 * exp_term[0][0]; return (cons_term * expf(exp_term[0][0])); } /************************************************************************* * * update_params: update parameters of the hidden markov model for the * specified word, using the training data. Parameters are the state * transition probabilities and the Gaussian means, variances for each state * ************************************************************************** */ void update_params(int dm word_p) { static float gamma, gamma_den, gamma_num; static float p_example; /* prob. of producing an example */ static int i, j, k, l, time; static int word; word = word_p; for (i = 0; i < 10; i++) { for (j = 0; j < 10; j++) { mean_num[i][j] = 0; var_num[i][j] = 0; transit_num[i][j] = 0; } denom[i] = 0; transit_den[i] = 0; } for (i = 0; i < 10; i++) { compute_alphas(word, i); compute_betas(word, i); /* p_example = compute_prob(word, i); */ for (j = 0; j < 10; j++) { gamma_den = 0; transit_den[j] = 0; for (k = 0; k < 10; k++) transit_num[j][k] = 0; for (time = 0; time < length[i] - 1; time++) { gamma_num = alpha[j][time]*beta[j][time]; for (k = 0; k < 10; k++) gamma_den += alpha[k][time]*beta[k][time]; gamma = gamma_num / gamma_den; for (k = 0; k < 10; k++) { mean_num[j][k] += sig[i][k][time] * gamma; var_num[j][k] += (sig[i][k][time] - mean[word][j][k]) * (sig[i][k][time] - mean[word][j][k]) * gamma; } denom[j] += gamma; for (k = 0; k < 10; k++) { if (time <= length[i] - 2) { transit_den[j] += alpha[j][time] * beta[k][time+1] * transit[word][j][k] * p_output(word, i, j, time); transit_num[j][k] += alpha[j][time] * beta[k][time+1] * transit[word][j][k] * p_output(word, i, j, time); } } } } } /* now do final updating of parameters */ for (j = 0; j < 10; j++) { for (k = 0; k < 10; k++) { mean[word][j][k] = mean_num[j][k] / denom[j]; var[word][j][k] = var_num[j][k] / denom[j]; new_transit[j][k] = transit_num[j][k] / transit_den[j]; } } } void process_input(int sig_number) { static int ind, i, j; static float predicted, err; num_samples++; /* get a new sample */ index--; if (index < 0) index = 20; previous[index] = in_port / 10000; ind = (index + 1) % 20; /* pre-emphasis */ previous[index] = previous[index] - 0.95*previous[ind]; predicted = 0; for (i = 0; i < 10; i++) predicted = predicted + previous[(ind + i) % 20]*a_vec[i]; err = previous[index] - predicted; for (i = 0; i < 10; i++) a_vec[i] = a_vec[i] + nu*err*previous[(ind + i) % 20]; out_port = (int) (predicted*10000); /* for (i = 0; i < 11; i++) { ind = (index + i) % 20; R_vec[i] = R_vec[i] + (previous[index])*(previous[ind])/160; } */ if (num_samples == 160) { num_samples = 0; for (i = 0; i < 10; i++) { a_vec[i] = 0; } end_frame = 1; } } void push_button(int sig_num) { if (button == 0) button = 1; else button = 0; }