diff options
Diffstat (limited to 'sine.c')
-rw-r--r-- | sine.c | 689 |
1 files changed, 689 insertions, 0 deletions
@@ -0,0 +1,689 @@ | |||
1 | /*---------------------------------------------------------------------------*\ | ||
2 | |||
3 | FILE........: sine.c | ||
4 | AUTHOR......: David Rowe | ||
5 | DATE CREATED: 19/8/2010 | ||
6 | |||
7 | Sinusoidal analysis and synthesis functions. | ||
8 | |||
9 | \*---------------------------------------------------------------------------*/ | ||
10 | |||
11 | /* | ||
12 | Copyright (C) 1990-2010 David Rowe | ||
13 | |||
14 | All rights reserved. | ||
15 | |||
16 | This program is free software; you can redistribute it and/or modify | ||
17 | it under the terms of the GNU Lesser General Public License version 2.1, as | ||
18 | published by the Free Software Foundation. This program is | ||
19 | distributed in the hope that it will be useful, but WITHOUT ANY | ||
20 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
21 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | ||
22 | License for more details. | ||
23 | |||
24 | You should have received a copy of the GNU Lesser General Public License | ||
25 | along with this program; if not, see <http://www.gnu.org/licenses/>. | ||
26 | */ | ||
27 | |||
28 | /*---------------------------------------------------------------------------*\ | ||
29 | |||
30 | INCLUDES | ||
31 | |||
32 | \*---------------------------------------------------------------------------*/ | ||
33 | |||
34 | #include <stdlib.h> | ||
35 | #include <stdio.h> | ||
36 | #include <math.h> | ||
37 | |||
38 | #include "defines.h" | ||
39 | #include "sine.h" | ||
40 | #include "kiss_fft.h" | ||
41 | |||
42 | #define HPF_BETA 0.125 | ||
43 | |||
44 | /*---------------------------------------------------------------------------*\ | ||
45 | |||
46 | HEADERS | ||
47 | |||
48 | \*---------------------------------------------------------------------------*/ | ||
49 | |||
50 | void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, | ||
51 | float pstep); | ||
52 | |||
53 | /*---------------------------------------------------------------------------*\ | ||
54 | |||
55 | FUNCTIONS | ||
56 | |||
57 | \*---------------------------------------------------------------------------*/ | ||
58 | |||
59 | C2CONST c2const_create(int Fs, float framelength_s) { | ||
60 | C2CONST c2const; | ||
61 | |||
62 | assert((Fs == 8000) || (Fs = 16000)); | ||
63 | c2const.Fs = Fs; | ||
64 | c2const.n_samp = round(Fs*framelength_s); | ||
65 | c2const.max_amp = floor(Fs*P_MIN_S/2); | ||
66 | c2const.p_min = floor(Fs*P_MIN_S); | ||
67 | c2const.p_max = floor(Fs*P_MAX_S); | ||
68 | c2const.m_pitch = floor(Fs*M_PITCH_S); | ||
69 | c2const.Wo_min = TWO_PI/c2const.p_max; | ||
70 | c2const.Wo_max = TWO_PI/c2const.p_min; | ||
71 | |||
72 | if (Fs == 8000) { | ||
73 | c2const.nw = 279; | ||
74 | } else { | ||
75 | c2const.nw = 511; /* actually a bit shorter in time but lets us maintain constant FFT size */ | ||
76 | } | ||
77 | |||
78 | c2const.tw = Fs*TW_S; | ||
79 | |||
80 | /* | ||
81 | fprintf(stderr, "max_amp: %d m_pitch: %d\n", c2const.n_samp, c2const.m_pitch); | ||
82 | fprintf(stderr, "p_min: %d p_max: %d\n", c2const.p_min, c2const.p_max); | ||
83 | fprintf(stderr, "Wo_min: %f Wo_max: %f\n", c2const.Wo_min, c2const.Wo_max); | ||
84 | fprintf(stderr, "nw: %d tw: %d\n", c2const.nw, c2const.tw); | ||
85 | */ | ||
86 | |||
87 | return c2const; | ||
88 | } | ||
89 | |||
90 | /*---------------------------------------------------------------------------*\ | ||
91 | |||
92 | FUNCTION....: make_analysis_window | ||
93 | AUTHOR......: David Rowe | ||
94 | DATE CREATED: 11/5/94 | ||
95 | |||
96 | Init function that generates the time domain analysis window and it's DFT. | ||
97 | |||
98 | \*---------------------------------------------------------------------------*/ | ||
99 | |||
100 | void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[], COMP W[]) | ||
101 | { | ||
102 | float m; | ||
103 | COMP wshift[FFT_ENC]; | ||
104 | COMP temp; | ||
105 | int i,j; | ||
106 | int m_pitch = c2const->m_pitch; | ||
107 | int nw = c2const->nw; | ||
108 | |||
109 | /* | ||
110 | Generate Hamming window centered on M-sample pitch analysis window | ||
111 | |||
112 | 0 M/2 M-1 | ||
113 | |-------------|-------------| | ||
114 | |-------|-------| | ||
115 | nw samples | ||
116 | |||
117 | All our analysis/synthsis is centred on the M/2 sample. | ||
118 | */ | ||
119 | |||
120 | m = 0.0; | ||
121 | for(i=0; i<m_pitch/2-nw/2; i++) | ||
122 | w[i] = 0.0; | ||
123 | for(i=m_pitch/2-nw/2,j=0; i<m_pitch/2+nw/2; i++,j++) { | ||
124 | w[i] = 0.5 - 0.5*cosf(TWO_PI*j/(nw-1)); | ||
125 | m += w[i]*w[i]; | ||
126 | } | ||
127 | for(i=m_pitch/2+nw/2; i<m_pitch; i++) | ||
128 | w[i] = 0.0; | ||
129 | |||
130 | /* Normalise - makes freq domain amplitude estimation straight | ||
131 | forward */ | ||
132 | |||
133 | m = 1.0/sqrtf(m*FFT_ENC); | ||
134 | for(i=0; i<m_pitch; i++) { | ||
135 | w[i] *= m; | ||
136 | } | ||
137 | |||
138 | /* | ||
139 | Generate DFT of analysis window, used for later processing. Note | ||
140 | we modulo FFT_ENC shift the time domain window w[], this makes the | ||
141 | imaginary part of the DFT W[] equal to zero as the shifted w[] is | ||
142 | even about the n=0 time axis if nw is odd. Having the imag part | ||
143 | of the DFT W[] makes computation easier. | ||
144 | |||
145 | 0 FFT_ENC-1 | ||
146 | |-------------------------| | ||
147 | |||
148 | ----\ /---- | ||
149 | \ / | ||
150 | \ / <- shifted version of window w[n] | ||
151 | \ / | ||
152 | \ / | ||
153 | ------- | ||
154 | |||
155 | |---------| |---------| | ||
156 | nw/2 nw/2 | ||
157 | */ | ||
158 | |||
159 | for(i=0; i<FFT_ENC; i++) { | ||
160 | wshift[i].real = 0.0; | ||
161 | wshift[i].imag = 0.0; | ||
162 | } | ||
163 | for(i=0; i<nw/2; i++) | ||
164 | wshift[i].real = w[i+m_pitch/2]; | ||
165 | for(i=FFT_ENC-nw/2,j=m_pitch/2-nw/2; i<FFT_ENC; i++,j++) | ||
166 | wshift[i].real = w[j]; | ||
167 | |||
168 | codec2_fft(fft_fwd_cfg, wshift, W); | ||
169 | |||
170 | /* | ||
171 | Re-arrange W[] to be symmetrical about FFT_ENC/2. Makes later | ||
172 | analysis convenient. | ||
173 | |||
174 | Before: | ||
175 | |||
176 | |||
177 | 0 FFT_ENC-1 | ||
178 | |----------|---------| | ||
179 | __ _ | ||
180 | \ / | ||
181 | \_______________/ | ||
182 | |||
183 | After: | ||
184 | |||
185 | 0 FFT_ENC-1 | ||
186 | |----------|---------| | ||
187 | ___ | ||
188 | / \ | ||
189 | ________/ \_______ | ||
190 | |||
191 | */ | ||
192 | |||
193 | |||
194 | for(i=0; i<FFT_ENC/2; i++) { | ||
195 | temp.real = W[i].real; | ||
196 | temp.imag = W[i].imag; | ||
197 | W[i].real = W[i+FFT_ENC/2].real; | ||
198 | W[i].imag = W[i+FFT_ENC/2].imag; | ||
199 | W[i+FFT_ENC/2].real = temp.real; | ||
200 | W[i+FFT_ENC/2].imag = temp.imag; | ||
201 | } | ||
202 | |||
203 | } | ||
204 | |||
205 | /*---------------------------------------------------------------------------*\ | ||
206 | |||
207 | FUNCTION....: hpf | ||
208 | AUTHOR......: David Rowe | ||
209 | DATE CREATED: 16 Nov 2010 | ||
210 | |||
211 | High pass filter with a -3dB point of about 160Hz. | ||
212 | |||
213 | y(n) = -HPF_BETA*y(n-1) + x(n) - x(n-1) | ||
214 | |||
215 | \*---------------------------------------------------------------------------*/ | ||
216 | |||
217 | float hpf(float x, float states[]) | ||
218 | { | ||
219 | states[0] = -HPF_BETA*states[0] + x - states[1]; | ||
220 | states[1] = x; | ||
221 | |||
222 | return states[0]; | ||
223 | } | ||
224 | |||
225 | /*---------------------------------------------------------------------------*\ | ||
226 | |||
227 | FUNCTION....: dft_speech | ||
228 | AUTHOR......: David Rowe | ||
229 | DATE CREATED: 27/5/94 | ||
230 | |||
231 | Finds the DFT of the current speech input speech frame. | ||
232 | |||
233 | \*---------------------------------------------------------------------------*/ | ||
234 | |||
235 | // TODO: we can either go for a faster FFT using fftr and some stack usage | ||
236 | // or we can reduce stack usage to almost zero on STM32 by switching to fft_inplace | ||
237 | #if 1 | ||
238 | void dft_speech(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[]) | ||
239 | { | ||
240 | int i; | ||
241 | int m_pitch = c2const->m_pitch; | ||
242 | int nw = c2const->nw; | ||
243 | |||
244 | for(i=0; i<FFT_ENC; i++) { | ||
245 | Sw[i].real = 0.0; | ||
246 | Sw[i].imag = 0.0; | ||
247 | } | ||
248 | |||
249 | /* Centre analysis window on time axis, we need to arrange input | ||
250 | to FFT this way to make FFT phases correct */ | ||
251 | |||
252 | /* move 2nd half to start of FFT input vector */ | ||
253 | |||
254 | for(i=0; i<nw/2; i++) | ||
255 | Sw[i].real = Sn[i+m_pitch/2]*w[i+m_pitch/2]; | ||
256 | |||
257 | /* move 1st half to end of FFT input vector */ | ||
258 | |||
259 | for(i=0; i<nw/2; i++) | ||
260 | Sw[FFT_ENC-nw/2+i].real = Sn[i+m_pitch/2-nw/2]*w[i+m_pitch/2-nw/2]; | ||
261 | |||
262 | codec2_fft_inplace(fft_fwd_cfg, Sw); | ||
263 | } | ||
264 | #else | ||
265 | void dft_speech(codec2_fftr_cfg fftr_fwd_cfg, COMP Sw[], float Sn[], float w[]) | ||
266 | { | ||
267 | int i; | ||
268 | float sw[FFT_ENC]; | ||
269 | |||
270 | for(i=0; i<FFT_ENC; i++) { | ||
271 | sw[i] = 0.0; | ||
272 | } | ||
273 | |||
274 | /* Centre analysis window on time axis, we need to arrange input | ||
275 | to FFT this way to make FFT phases correct */ | ||
276 | |||
277 | /* move 2nd half to start of FFT input vector */ | ||
278 | |||
279 | for(i=0; i<nw/2; i++) | ||
280 | sw[i] = Sn[i+m_pitch/2]*w[i+m_pitch/2]; | ||
281 | |||
282 | /* move 1st half to end of FFT input vector */ | ||
283 | |||
284 | for(i=0; i<nw/2; i++) | ||
285 | sw[FFT_ENC-nw/2+i] = Sn[i+m_pitch/2-nw/2]*w[i+m_pitch/2-nw/2]; | ||
286 | |||
287 | codec2_fftr(fftr_fwd_cfg, sw, Sw); | ||
288 | } | ||
289 | #endif | ||
290 | |||
291 | |||
292 | /*---------------------------------------------------------------------------*\ | ||
293 | |||
294 | FUNCTION....: two_stage_pitch_refinement | ||
295 | AUTHOR......: David Rowe | ||
296 | DATE CREATED: 27/5/94 | ||
297 | |||
298 | Refines the current pitch estimate using the harmonic sum pitch | ||
299 | estimation technique. | ||
300 | |||
301 | \*---------------------------------------------------------------------------*/ | ||
302 | |||
303 | void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, COMP Sw[]) | ||
304 | { | ||
305 | float pmin,pmax,pstep; /* pitch refinment minimum, maximum and step */ | ||
306 | |||
307 | /* Coarse refinement */ | ||
308 | |||
309 | pmax = TWO_PI/model->Wo + 5; | ||
310 | pmin = TWO_PI/model->Wo - 5; | ||
311 | pstep = 1.0; | ||
312 | hs_pitch_refinement(model,Sw,pmin,pmax,pstep); | ||
313 | |||
314 | /* Fine refinement */ | ||
315 | |||
316 | pmax = TWO_PI/model->Wo + 1; | ||
317 | pmin = TWO_PI/model->Wo - 1; | ||
318 | pstep = 0.25; | ||
319 | hs_pitch_refinement(model,Sw,pmin,pmax,pstep); | ||
320 | |||
321 | /* Limit range */ | ||
322 | |||
323 | if (model->Wo < TWO_PI/c2const->p_max) | ||
324 | model->Wo = TWO_PI/c2const->p_max; | ||
325 | if (model->Wo > TWO_PI/c2const->p_min) | ||
326 | model->Wo = TWO_PI/c2const->p_min; | ||
327 | |||
328 | model->L = floorf(PI/model->Wo); | ||
329 | |||
330 | /* trap occasional round off issues with floorf() */ | ||
331 | if (model->Wo*model->L >= 0.95*PI) { | ||
332 | model->L--; | ||
333 | } | ||
334 | assert(model->Wo*model->L < PI); | ||
335 | } | ||
336 | |||
337 | /*---------------------------------------------------------------------------*\ | ||
338 | |||
339 | FUNCTION....: hs_pitch_refinement | ||
340 | AUTHOR......: David Rowe | ||
341 | DATE CREATED: 27/5/94 | ||
342 | |||
343 | Harmonic sum pitch refinement function. | ||
344 | |||
345 | pmin pitch search range minimum | ||
346 | pmax pitch search range maximum | ||
347 | step pitch search step size | ||
348 | model current pitch estimate in model.Wo | ||
349 | |||
350 | model refined pitch estimate in model.Wo | ||
351 | |||
352 | \*---------------------------------------------------------------------------*/ | ||
353 | |||
354 | void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float pstep) | ||
355 | { | ||
356 | int m; /* loop variable */ | ||
357 | int b; /* bin for current harmonic centre */ | ||
358 | float E; /* energy for current pitch*/ | ||
359 | float Wo; /* current "test" fundamental freq. */ | ||
360 | float Wom; /* Wo that maximises E */ | ||
361 | float Em; /* mamimum energy */ | ||
362 | float r, one_on_r; /* number of rads/bin */ | ||
363 | float p; /* current pitch */ | ||
364 | |||
365 | /* Initialisation */ | ||
366 | |||
367 | model->L = PI/model->Wo; /* use initial pitch est. for L */ | ||
368 | Wom = model->Wo; | ||
369 | Em = 0.0; | ||
370 | r = TWO_PI/FFT_ENC; | ||
371 | one_on_r = 1.0/r; | ||
372 | |||
373 | /* Determine harmonic sum for a range of Wo values */ | ||
374 | |||
375 | for(p=pmin; p<=pmax; p+=pstep) { | ||
376 | E = 0.0; | ||
377 | Wo = TWO_PI/p; | ||
378 | |||
379 | /* Sum harmonic magnitudes */ | ||
380 | for(m=1; m<=model->L; m++) { | ||
381 | b = (int)(m*Wo*one_on_r + 0.5); | ||
382 | E += Sw[b].real*Sw[b].real + Sw[b].imag*Sw[b].imag; | ||
383 | } | ||
384 | /* Compare to see if this is a maximum */ | ||
385 | |||
386 | if (E > Em) { | ||
387 | Em = E; | ||
388 | Wom = Wo; | ||
389 | } | ||
390 | } | ||
391 | |||
392 | model->Wo = Wom; | ||
393 | } | ||
394 | |||
395 | /*---------------------------------------------------------------------------*\ | ||
396 | |||
397 | FUNCTION....: estimate_amplitudes | ||
398 | AUTHOR......: David Rowe | ||
399 | DATE CREATED: 27/5/94 | ||
400 | |||
401 | Estimates the complex amplitudes of the harmonics. | ||
402 | |||
403 | \*---------------------------------------------------------------------------*/ | ||
404 | |||
405 | void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[], int est_phase) | ||
406 | { | ||
407 | int i,m; /* loop variables */ | ||
408 | int am,bm; /* bounds of current harmonic */ | ||
409 | int b; /* DFT bin of centre of current harmonic */ | ||
410 | float den; /* denominator of amplitude expression */ | ||
411 | float r, one_on_r; /* number of rads/bin */ | ||
412 | int offset; | ||
413 | COMP Am; | ||
414 | |||
415 | r = TWO_PI/FFT_ENC; | ||
416 | one_on_r = 1.0/r; | ||
417 | |||
418 | for(m=1; m<=model->L; m++) { | ||
419 | den = 0.0; | ||
420 | am = (int)((m - 0.5)*model->Wo*one_on_r + 0.5); | ||
421 | bm = (int)((m + 0.5)*model->Wo*one_on_r + 0.5); | ||
422 | b = (int)(m*model->Wo/r + 0.5); | ||
423 | |||
424 | /* Estimate ampltude of harmonic */ | ||
425 | |||
426 | den = 0.0; | ||
427 | Am.real = Am.imag = 0.0; | ||
428 | offset = FFT_ENC/2 - (int)(m*model->Wo*one_on_r + 0.5); | ||
429 | for(i=am; i<bm; i++) { | ||
430 | den += Sw[i].real*Sw[i].real + Sw[i].imag*Sw[i].imag; | ||
431 | Am.real += Sw[i].real*W[i + offset].real; | ||
432 | Am.imag += Sw[i].imag*W[i + offset].real; | ||
433 | } | ||
434 | |||
435 | model->A[m] = sqrtf(den); | ||
436 | |||
437 | if (est_phase) { | ||
438 | |||
439 | /* Estimate phase of harmonic, this is expensive in CPU for | ||
440 | embedded devicesso we make it an option */ | ||
441 | |||
442 | model->phi[m] = atan2f(Sw[b].imag,Sw[b].real); | ||
443 | } | ||
444 | } | ||
445 | } | ||
446 | |||
447 | /*---------------------------------------------------------------------------*\ | ||
448 | |||
449 | est_voicing_mbe() | ||
450 | |||
451 | Returns the error of the MBE cost function for a fiven F0. | ||
452 | |||
453 | Note: I think a lot of the operations below can be simplified as | ||
454 | W[].imag = 0 and has been normalised such that den always equals 1. | ||
455 | |||
456 | \*---------------------------------------------------------------------------*/ | ||
457 | |||
458 | float est_voicing_mbe( | ||
459 | C2CONST *c2const, | ||
460 | MODEL *model, | ||
461 | COMP Sw[], | ||
462 | COMP W[] | ||
463 | ) | ||
464 | { | ||
465 | int l,al,bl,m; /* loop variables */ | ||
466 | COMP Am; /* amplitude sample for this band */ | ||
467 | int offset; /* centers Hw[] about current harmonic */ | ||
468 | float den; /* denominator of Am expression */ | ||
469 | float error; /* accumulated error between original and synthesised */ | ||
470 | float Wo; | ||
471 | float sig, snr; | ||
472 | float elow, ehigh, eratio; | ||
473 | float sixty; | ||
474 | COMP Ew; | ||
475 | Ew.real = 0; | ||
476 | Ew.imag = 0; | ||
477 | |||
478 | int l_1000hz = model->L*1000.0/(c2const->Fs/2); | ||
479 | sig = 1E-4; | ||
480 | for(l=1; l<=l_1000hz; l++) { | ||
481 | sig += model->A[l]*model->A[l]; | ||
482 | } | ||
483 | |||
484 | Wo = model->Wo; | ||
485 | error = 1E-4; | ||
486 | |||
487 | /* Just test across the harmonics in the first 1000 Hz */ | ||
488 | |||
489 | for(l=1; l<=l_1000hz; l++) { | ||
490 | Am.real = 0.0; | ||
491 | Am.imag = 0.0; | ||
492 | den = 0.0; | ||
493 | al = ceilf((l - 0.5)*Wo*FFT_ENC/TWO_PI); | ||
494 | bl = ceilf((l + 0.5)*Wo*FFT_ENC/TWO_PI); | ||
495 | |||
496 | /* Estimate amplitude of harmonic assuming harmonic is totally voiced */ | ||
497 | |||
498 | offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5; | ||
499 | for(m=al; m<bl; m++) { | ||
500 | Am.real += Sw[m].real*W[offset+m].real; | ||
501 | Am.imag += Sw[m].imag*W[offset+m].real; | ||
502 | den += W[offset+m].real*W[offset+m].real; | ||
503 | } | ||
504 | |||
505 | Am.real = Am.real/den; | ||
506 | Am.imag = Am.imag/den; | ||
507 | |||
508 | /* Determine error between estimated harmonic and original */ | ||
509 | |||
510 | // Redundant! offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5; | ||
511 | for(m=al; m<bl; m++) { | ||
512 | Ew.real = Sw[m].real - Am.real*W[offset+m].real; | ||
513 | Ew.imag = Sw[m].imag - Am.imag*W[offset+m].real; | ||
514 | error += Ew.real*Ew.real; | ||
515 | error += Ew.imag*Ew.imag; | ||
516 | } | ||
517 | } | ||
518 | |||
519 | snr = 10.0*log10f(sig/error); | ||
520 | if (snr > V_THRESH) | ||
521 | model->voiced = 1; | ||
522 | else | ||
523 | model->voiced = 0; | ||
524 | |||
525 | /* post processing, helps clean up some voicing errors ------------------*/ | ||
526 | |||
527 | /* | ||
528 | Determine the ratio of low freqency to high frequency energy, | ||
529 | voiced speech tends to be dominated by low frequency energy, | ||
530 | unvoiced by high frequency. This measure can be used to | ||
531 | determine if we have made any gross errors. | ||
532 | */ | ||
533 | |||
534 | int l_2000hz = model->L*2000.0/(c2const->Fs/2); | ||
535 | int l_4000hz = model->L*4000.0/(c2const->Fs/2); | ||
536 | elow = ehigh = 1E-4; | ||
537 | for(l=1; l<=l_2000hz; l++) { | ||
538 | elow += model->A[l]*model->A[l]; | ||
539 | } | ||
540 | for(l=l_2000hz; l<=l_4000hz; l++) { | ||
541 | ehigh += model->A[l]*model->A[l]; | ||
542 | } | ||
543 | eratio = 10.0*log10f(elow/ehigh); | ||
544 | |||
545 | /* Look for Type 1 errors, strongly V speech that has been | ||
546 | accidentally declared UV */ | ||
547 | |||
548 | if (model->voiced == 0) | ||
549 | if (eratio > 10.0) | ||
550 | model->voiced = 1; | ||
551 | |||
552 | /* Look for Type 2 errors, strongly UV speech that has been | ||
553 | accidentally declared V */ | ||
554 | |||
555 | if (model->voiced == 1) { | ||
556 | if (eratio < -10.0) | ||
557 | model->voiced = 0; | ||
558 | |||
559 | /* A common source of Type 2 errors is the pitch estimator | ||
560 | gives a low (50Hz) estimate for UV speech, which gives a | ||
561 | good match with noise due to the close harmoonic spacing. | ||
562 | These errors are much more common than people with 50Hz3 | ||
563 | pitch, so we have just a small eratio threshold. */ | ||
564 | |||
565 | sixty = 60.0*TWO_PI/c2const->Fs; | ||
566 | if ((eratio < -4.0) && (model->Wo <= sixty)) | ||
567 | model->voiced = 0; | ||
568 | } | ||
569 | //printf(" v: %d snr: %f eratio: %3.2f %f\n",model->voiced,snr,eratio,dF0); | ||
570 | |||
571 | return snr; | ||
572 | } | ||
573 | |||
574 | /*---------------------------------------------------------------------------*\ | ||
575 | |||
576 | FUNCTION....: make_synthesis_window | ||
577 | AUTHOR......: David Rowe | ||
578 | DATE CREATED: 11/5/94 | ||
579 | |||
580 | Init function that generates the trapezoidal (Parzen) sythesis window. | ||
581 | |||
582 | \*---------------------------------------------------------------------------*/ | ||
583 | |||
584 | void make_synthesis_window(C2CONST *c2const, float Pn[]) | ||
585 | { | ||
586 | int i; | ||
587 | float win; | ||
588 | int n_samp = c2const->n_samp; | ||
589 | int tw = c2const->tw; | ||
590 | |||
591 | /* Generate Parzen window in time domain */ | ||
592 | |||
593 | win = 0.0; | ||
594 | for(i=0; i<n_samp/2-tw; i++) | ||
595 | Pn[i] = 0.0; | ||
596 | win = 0.0; | ||
597 | for(i=n_samp/2-tw; i<n_samp/2+tw; win+=1.0/(2*tw), i++ ) | ||
598 | Pn[i] = win; | ||
599 | for(i=n_samp/2+tw; i<3*n_samp/2-tw; i++) | ||
600 | Pn[i] = 1.0; | ||
601 | win = 1.0; | ||
602 | for(i=3*n_samp/2-tw; i<3*n_samp/2+tw; win-=1.0/(2*tw), i++) | ||
603 | Pn[i] = win; | ||
604 | for(i=3*n_samp/2+tw; i<2*n_samp; i++) | ||
605 | Pn[i] = 0.0; | ||
606 | } | ||
607 | |||
608 | /*---------------------------------------------------------------------------*\ | ||
609 | |||
610 | FUNCTION....: synthesise | ||
611 | AUTHOR......: David Rowe | ||
612 | DATE CREATED: 20/2/95 | ||
613 | |||
614 | Synthesise a speech signal in the frequency domain from the | ||
615 | sinusodal model parameters. Uses overlap-add with a trapezoidal | ||
616 | window to smoothly interpolate betwen frames. | ||
617 | |||
618 | \*---------------------------------------------------------------------------*/ | ||
619 | |||
620 | void synthesise( | ||
621 | int n_samp, | ||
622 | codec2_fftr_cfg fftr_inv_cfg, | ||
623 | float Sn_[], /* time domain synthesised signal */ | ||
624 | MODEL *model, /* ptr to model parameters for this frame */ | ||
625 | float Pn[], /* time domain Parzen window */ | ||
626 | int shift /* flag used to handle transition frames */ | ||
627 | ) | ||
628 | { | ||
629 | int i,l,j,b; /* loop variables */ | ||
630 | COMP Sw_[FFT_DEC/2+1]; /* DFT of synthesised signal */ | ||
631 | float sw_[FFT_DEC]; /* synthesised signal */ | ||
632 | |||
633 | if (shift) { | ||
634 | /* Update memories */ | ||
635 | for(i=0; i<n_samp-1; i++) { | ||
636 | Sn_[i] = Sn_[i+n_samp]; | ||
637 | } | ||
638 | Sn_[n_samp-1] = 0.0; | ||
639 | } | ||
640 | |||
641 | for(i=0; i<FFT_DEC/2+1; i++) { | ||
642 | Sw_[i].real = 0.0; | ||
643 | Sw_[i].imag = 0.0; | ||
644 | } | ||
645 | |||
646 | /* Now set up frequency domain synthesised speech */ | ||
647 | |||
648 | for(l=1; l<=model->L; l++) { | ||
649 | b = (int)(l*model->Wo*FFT_DEC/TWO_PI + 0.5); | ||
650 | if (b > ((FFT_DEC/2)-1)) { | ||
651 | b = (FFT_DEC/2)-1; | ||
652 | } | ||
653 | Sw_[b].real = model->A[l]*cosf(model->phi[l]); | ||
654 | Sw_[b].imag = model->A[l]*sinf(model->phi[l]); | ||
655 | } | ||
656 | |||
657 | /* Perform inverse DFT */ | ||
658 | |||
659 | codec2_fftri(fftr_inv_cfg, Sw_,sw_); | ||
660 | |||
661 | /* Overlap add to previous samples */ | ||
662 | |||
663 | #ifdef USE_KISS_FFT | ||
664 | #define FFTI_FACTOR ((float)1.0) | ||
665 | #else | ||
666 | #define FFTI_FACTOR ((float32_t)FFT_DEC) | ||
667 | #endif | ||
668 | |||
669 | for(i=0; i<n_samp-1; i++) { | ||
670 | Sn_[i] += sw_[FFT_DEC-n_samp+1+i]*Pn[i] * FFTI_FACTOR; | ||
671 | } | ||
672 | |||
673 | if (shift) | ||
674 | for(i=n_samp-1,j=0; i<2*n_samp; i++,j++) | ||
675 | Sn_[i] = sw_[j]*Pn[i] * FFTI_FACTOR; | ||
676 | else | ||
677 | for(i=n_samp-1,j=0; i<2*n_samp; i++,j++) | ||
678 | Sn_[i] += sw_[j]*Pn[i] * FFTI_FACTOR; | ||
679 | } | ||
680 | |||
681 | |||
682 | /* todo: this should probably be in some states rather than a static */ | ||
683 | static unsigned long next = 1; | ||
684 | |||
685 | int codec2_rand(void) { | ||
686 | next = next * 1103515245 + 12345; | ||
687 | return((unsigned)(next/65536) % 32768); | ||
688 | } | ||
689 | |||