summaryrefslogtreecommitdiff
path: root/sine.c
diff options
context:
space:
mode:
authorerdgeist <erdgeist@erdgeist.org>2025-08-15 12:42:40 +0200
committererdgeist <erdgeist@erdgeist.org>2025-08-15 12:42:40 +0200
commit30325d24d107dbf133da39f7c96d1510fd1c9449 (patch)
tree932baa5b2a4475821f16dccf9e3e05011daa6d92 /sine.c
parent9022d768021bbe15c7815cc6f8b64218b46f0e10 (diff)
Bump to codec2 version 1.2.0erdgeist-bump-to-1.2.0
Diffstat (limited to 'sine.c')
-rw-r--r--sine.c591
1 files changed, 286 insertions, 305 deletions
diff --git a/sine.c b/sine.c
index d912cd7..814c269 100644
--- a/sine.c
+++ b/sine.c
@@ -27,64 +27,66 @@
27 27
28/*---------------------------------------------------------------------------*\ 28/*---------------------------------------------------------------------------*\
29 29
30 INCLUDES 30 INCLUDES
31 31
32\*---------------------------------------------------------------------------*/ 32\*---------------------------------------------------------------------------*/
33 33
34#include <stdlib.h> 34#include "sine.h"
35#include <stdio.h> 35
36#include <math.h> 36#include <math.h>
37#include <stdio.h>
38#include <stdlib.h>
37 39
38#include "defines.h" 40#include "defines.h"
39#include "sine.h"
40#include "kiss_fft.h" 41#include "kiss_fft.h"
41 42
42#define HPF_BETA 0.125 43#define HPF_BETA 0.125
43 44
44/*---------------------------------------------------------------------------*\ 45/*---------------------------------------------------------------------------*\
45 46
46 HEADERS 47 HEADERS
47 48
48\*---------------------------------------------------------------------------*/ 49\*---------------------------------------------------------------------------*/
49 50
50void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, 51void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax,
51 float pstep); 52 float pstep);
52 53
53/*---------------------------------------------------------------------------*\ 54/*---------------------------------------------------------------------------*\
54 55
55 FUNCTIONS 56 FUNCTIONS
56 57
57\*---------------------------------------------------------------------------*/ 58\*---------------------------------------------------------------------------*/
58 59
59C2CONST c2const_create(int Fs, float framelength_s) { 60C2CONST c2const_create(int Fs, float framelength_s) {
60 C2CONST c2const; 61 C2CONST c2const;
61 62
62 assert((Fs == 8000) || (Fs = 16000)); 63 assert((Fs == 8000) || (Fs == 16000));
63 c2const.Fs = Fs; 64 c2const.Fs = Fs;
64 c2const.n_samp = round(Fs*framelength_s); 65 c2const.n_samp = round(Fs * framelength_s);
65 c2const.max_amp = floor(Fs*P_MIN_S/2); 66 c2const.max_amp = floor(Fs * P_MAX_S / 2);
66 c2const.p_min = floor(Fs*P_MIN_S); 67 c2const.p_min = floor(Fs * P_MIN_S);
67 c2const.p_max = floor(Fs*P_MAX_S); 68 c2const.p_max = floor(Fs * P_MAX_S);
68 c2const.m_pitch = floor(Fs*M_PITCH_S); 69 c2const.m_pitch = floor(Fs * M_PITCH_S);
69 c2const.Wo_min = TWO_PI/c2const.p_max; 70 c2const.Wo_min = TWO_PI / c2const.p_max;
70 c2const.Wo_max = TWO_PI/c2const.p_min; 71 c2const.Wo_max = TWO_PI / c2const.p_min;
71 72
72 if (Fs == 8000) { 73 if (Fs == 8000) {
73 c2const.nw = 279; 74 c2const.nw = 279;
74 } else { 75 } else {
75 c2const.nw = 511; /* actually a bit shorter in time but lets us maintain constant FFT size */ 76 c2const.nw = 511; /* actually a bit shorter in time but lets us maintain
76 } 77 constant FFT size */
78 }
77 79
78 c2const.tw = Fs*TW_S; 80 c2const.tw = Fs * TW_S;
79 81
80 /* 82 /*
81 fprintf(stderr, "max_amp: %d m_pitch: %d\n", c2const.n_samp, c2const.m_pitch); 83 fprintf(stderr, "max_amp: %d m_pitch: %d\n", c2const.n_samp, c2const.m_pitch);
82 fprintf(stderr, "p_min: %d p_max: %d\n", c2const.p_min, c2const.p_max); 84 fprintf(stderr, "p_min: %d p_max: %d\n", c2const.p_min, c2const.p_max);
83 fprintf(stderr, "Wo_min: %f Wo_max: %f\n", c2const.Wo_min, c2const.Wo_max); 85 fprintf(stderr, "Wo_min: %f Wo_max: %f\n", c2const.Wo_min, c2const.Wo_max);
84 fprintf(stderr, "nw: %d tw: %d\n", c2const.nw, c2const.tw); 86 fprintf(stderr, "nw: %d tw: %d\n", c2const.nw, c2const.tw);
85 */ 87 */
86 88
87 return c2const; 89 return c2const;
88} 90}
89 91
90/*---------------------------------------------------------------------------*\ 92/*---------------------------------------------------------------------------*\
@@ -97,13 +99,13 @@ C2CONST c2const_create(int Fs, float framelength_s) {
97 99
98\*---------------------------------------------------------------------------*/ 100\*---------------------------------------------------------------------------*/
99 101
100void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[], float W[]) 102void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg,
101{ 103 float w[], float W[]) {
102 float m; 104 float m;
103 COMP wshift[FFT_ENC]; 105 COMP wshift[FFT_ENC];
104 int i,j; 106 int i, j;
105 int m_pitch = c2const->m_pitch; 107 int m_pitch = c2const->m_pitch;
106 int nw = c2const->nw; 108 int nw = c2const->nw;
107 109
108 /* 110 /*
109 Generate Hamming window centered on M-sample pitch analysis window 111 Generate Hamming window centered on M-sample pitch analysis window
@@ -117,20 +119,18 @@ void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[
117 */ 119 */
118 120
119 m = 0.0; 121 m = 0.0;
120 for(i=0; i<m_pitch/2-nw/2; i++) 122 for (i = 0; i < m_pitch / 2 - nw / 2; i++) w[i] = 0.0;
121 w[i] = 0.0; 123 for (i = m_pitch / 2 - nw / 2, j = 0; i < m_pitch / 2 + nw / 2; i++, j++) {
122 for(i=m_pitch/2-nw/2,j=0; i<m_pitch/2+nw/2; i++,j++) { 124 w[i] = 0.5 - 0.5 * cosf(TWO_PI * j / (nw - 1));
123 w[i] = 0.5 - 0.5*cosf(TWO_PI*j/(nw-1)); 125 m += w[i] * w[i];
124 m += w[i]*w[i];
125 } 126 }
126 for(i=m_pitch/2+nw/2; i<m_pitch; i++) 127 for (i = m_pitch / 2 + nw / 2; i < m_pitch; i++) w[i] = 0.0;
127 w[i] = 0.0;
128 128
129 /* Normalise - makes freq domain amplitude estimation straight 129 /* Normalise - makes freq domain amplitude estimation straight
130 forward */ 130 forward */
131 131
132 m = 1.0/sqrtf(m*FFT_ENC); 132 m = 1.0 / sqrtf(m * FFT_ENC);
133 for(i=0; i<m_pitch; i++) { 133 for (i = 0; i < m_pitch; i++) {
134 w[i] *= m; 134 w[i] *= m;
135 } 135 }
136 136
@@ -157,14 +157,13 @@ void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[
157 157
158 COMP temp[FFT_ENC]; 158 COMP temp[FFT_ENC];
159 159
160 for(i=0; i<FFT_ENC; i++) { 160 for (i = 0; i < FFT_ENC; i++) {
161 wshift[i].real = 0.0; 161 wshift[i].real = 0.0;
162 wshift[i].imag = 0.0; 162 wshift[i].imag = 0.0;
163 } 163 }
164 for(i=0; i<nw/2; i++) 164 for (i = 0; i < nw / 2; i++) wshift[i].real = w[i + m_pitch / 2];
165 wshift[i].real = w[i+m_pitch/2]; 165 for (i = FFT_ENC - nw / 2, j = m_pitch / 2 - nw / 2; i < FFT_ENC; i++, j++)
166 for(i=FFT_ENC-nw/2,j=m_pitch/2-nw/2; i<FFT_ENC; i++,j++) 166 wshift[i].real = w[j];
167 wshift[i].real = w[j];
168 167
169 codec2_fft(fft_fwd_cfg, wshift, temp); 168 codec2_fft(fft_fwd_cfg, wshift, temp);
170 169
@@ -191,12 +190,10 @@ void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[
191 190
192 */ 191 */
193 192
194 193 for (i = 0; i < FFT_ENC / 2; i++) {
195 for(i=0; i<FFT_ENC/2; i++) { 194 W[i] = temp[i + FFT_ENC / 2].real;
196 W[i] = temp[i + FFT_ENC / 2].real; 195 W[i + FFT_ENC / 2] = temp[i].real;
197 W[i + FFT_ENC / 2] = temp[i].real;
198 } 196 }
199
200} 197}
201 198
202/*---------------------------------------------------------------------------*\ 199/*---------------------------------------------------------------------------*\
@@ -211,12 +208,11 @@ void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[
211 208
212\*---------------------------------------------------------------------------*/ 209\*---------------------------------------------------------------------------*/
213 210
214float hpf(float x, float states[]) 211float hpf(float x, float states[]) {
215{ 212 states[0] = -HPF_BETA * states[0] + x - states[1];
216 states[0] = -HPF_BETA*states[0] + x - states[1]; 213 states[1] = x;
217 states[1] = x;
218 214
219 return states[0]; 215 return states[0];
220} 216}
221 217
222/*---------------------------------------------------------------------------*\ 218/*---------------------------------------------------------------------------*\
@@ -230,41 +226,43 @@ float hpf(float x, float states[])
230\*---------------------------------------------------------------------------*/ 226\*---------------------------------------------------------------------------*/
231 227
232// TODO: we can either go for a faster FFT using fftr and some stack usage 228// TODO: we can either go for a faster FFT using fftr and some stack usage
233// or we can reduce stack usage to almost zero on STM32 by switching to fft_inplace 229// or we can reduce stack usage to almost zero on STM32 by switching to
230// fft_inplace
234#if 1 231#if 1
235void dft_speech(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[]) 232void dft_speech(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, COMP Sw[],
236{ 233 float Sn[], float w[]) {
237 int i; 234 int i;
238 int m_pitch = c2const->m_pitch; 235 int m_pitch = c2const->m_pitch;
239 int nw = c2const->nw; 236 int nw = c2const->nw;
240 237
241 for(i=0; i<FFT_ENC; i++) { 238 for (i = 0; i < FFT_ENC; i++) {
242 Sw[i].real = 0.0; 239 Sw[i].real = 0.0;
243 Sw[i].imag = 0.0; 240 Sw[i].imag = 0.0;
244 } 241 }
245 242
246 /* Centre analysis window on time axis, we need to arrange input 243 /* Centre analysis window on time axis, we need to arrange input
247 to FFT this way to make FFT phases correct */ 244 to FFT this way to make FFT phases correct */
248 245
249 /* move 2nd half to start of FFT input vector */ 246 /* move 2nd half to start of FFT input vector */
250 247
251 for(i=0; i<nw/2; i++) 248 for (i = 0; i < nw / 2; i++)
252 Sw[i].real = Sn[i+m_pitch/2]*w[i+m_pitch/2]; 249 Sw[i].real = Sn[i + m_pitch / 2] * w[i + m_pitch / 2];
253 250
254 /* move 1st half to end of FFT input vector */ 251 /* move 1st half to end of FFT input vector */
255 252
256 for(i=0; i<nw/2; i++) 253 for (i = 0; i < nw / 2; i++)
257 Sw[FFT_ENC-nw/2+i].real = Sn[i+m_pitch/2-nw/2]*w[i+m_pitch/2-nw/2]; 254 Sw[FFT_ENC - nw / 2 + i].real =
255 Sn[i + m_pitch / 2 - nw / 2] * w[i + m_pitch / 2 - nw / 2];
258 256
259 codec2_fft_inplace(fft_fwd_cfg, Sw); 257 codec2_fft_inplace(fft_fwd_cfg, Sw);
260} 258}
261#else 259#else
262void dft_speech(codec2_fftr_cfg fftr_fwd_cfg, COMP Sw[], float Sn[], float w[]) 260void dft_speech(codec2_fftr_cfg fftr_fwd_cfg, COMP Sw[], float Sn[],
263{ 261 float w[]) {
264 int i; 262 int i;
265 float sw[FFT_ENC]; 263 float sw[FFT_ENC];
266 264
267 for(i=0; i<FFT_ENC; i++) { 265 for (i = 0; i < FFT_ENC; i++) {
268 sw[i] = 0.0; 266 sw[i] = 0.0;
269 } 267 }
270 268
@@ -273,19 +271,18 @@ void dft_speech(codec2_fftr_cfg fftr_fwd_cfg, COMP Sw[], float Sn[], float w[])
273 271
274 /* move 2nd half to start of FFT input vector */ 272 /* move 2nd half to start of FFT input vector */
275 273
276 for(i=0; i<nw/2; i++) 274 for (i = 0; i < nw / 2; i++) sw[i] = Sn[i + m_pitch / 2] * w[i + m_pitch / 2];
277 sw[i] = Sn[i+m_pitch/2]*w[i+m_pitch/2];
278 275
279 /* move 1st half to end of FFT input vector */ 276 /* move 1st half to end of FFT input vector */
280 277
281 for(i=0; i<nw/2; i++) 278 for (i = 0; i < nw / 2; i++)
282 sw[FFT_ENC-nw/2+i] = Sn[i+m_pitch/2-nw/2]*w[i+m_pitch/2-nw/2]; 279 sw[FFT_ENC - nw / 2 + i] =
280 Sn[i + m_pitch / 2 - nw / 2] * w[i + m_pitch / 2 - nw / 2];
283 281
284 codec2_fftr(fftr_fwd_cfg, sw, Sw); 282 codec2_fftr(fftr_fwd_cfg, sw, Sw);
285} 283}
286#endif 284#endif
287 285
288
289/*---------------------------------------------------------------------------*\ 286/*---------------------------------------------------------------------------*\
290 287
291 FUNCTION....: two_stage_pitch_refinement 288 FUNCTION....: two_stage_pitch_refinement
@@ -297,38 +294,35 @@ void dft_speech(codec2_fftr_cfg fftr_fwd_cfg, COMP Sw[], float Sn[], float w[])
297 294
298\*---------------------------------------------------------------------------*/ 295\*---------------------------------------------------------------------------*/
299 296
300void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, COMP Sw[]) 297void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, COMP Sw[]) {
301{ 298 float pmin, pmax, pstep; /* pitch refinement minimum, maximum and step */
302 float pmin,pmax,pstep; /* pitch refinment minimum, maximum and step */
303 299
304 /* Coarse refinement */ 300 /* Coarse refinement */
305 301
306 pmax = TWO_PI/model->Wo + 5; 302 pmax = TWO_PI / model->Wo + 5;
307 pmin = TWO_PI/model->Wo - 5; 303 pmin = TWO_PI / model->Wo - 5;
308 pstep = 1.0; 304 pstep = 1.0;
309 hs_pitch_refinement(model,Sw,pmin,pmax,pstep); 305 hs_pitch_refinement(model, Sw, pmin, pmax, pstep);
310 306
311 /* Fine refinement */ 307 /* Fine refinement */
312 308
313 pmax = TWO_PI/model->Wo + 1; 309 pmax = TWO_PI / model->Wo + 1;
314 pmin = TWO_PI/model->Wo - 1; 310 pmin = TWO_PI / model->Wo - 1;
315 pstep = 0.25; 311 pstep = 0.25;
316 hs_pitch_refinement(model,Sw,pmin,pmax,pstep); 312 hs_pitch_refinement(model, Sw, pmin, pmax, pstep);
317 313
318 /* Limit range */ 314 /* Limit range */
319 315
320 if (model->Wo < TWO_PI/c2const->p_max) 316 if (model->Wo < TWO_PI / c2const->p_max) model->Wo = TWO_PI / c2const->p_max;
321 model->Wo = TWO_PI/c2const->p_max; 317 if (model->Wo > TWO_PI / c2const->p_min) model->Wo = TWO_PI / c2const->p_min;
322 if (model->Wo > TWO_PI/c2const->p_min)
323 model->Wo = TWO_PI/c2const->p_min;
324 318
325 model->L = floorf(PI/model->Wo); 319 model->L = floorf(PI / model->Wo);
326 320
327 /* trap occasional round off issues with floorf() */ 321 /* trap occasional round off issues with floorf() */
328 if (model->Wo*model->L >= 0.95*PI) { 322 if (model->Wo * model->L >= 0.95 * PI) {
329 model->L--; 323 model->L--;
330 } 324 }
331 assert(model->Wo*model->L < PI); 325 assert(model->Wo * model->L < PI);
332} 326}
333 327
334/*---------------------------------------------------------------------------*\ 328/*---------------------------------------------------------------------------*\
@@ -348,35 +342,39 @@ void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, COMP Sw[])
348 342
349\*---------------------------------------------------------------------------*/ 343\*---------------------------------------------------------------------------*/
350 344
351void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float pstep) 345void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax,
352{ 346 float pstep) {
353 int m; /* loop variable */ 347 int m; /* loop variable */
354 int b; /* bin for current harmonic centre */ 348 int b; /* bin for current harmonic centre */
355 float E; /* energy for current pitch*/ 349 float E; /* energy for current pitch*/
356 float Wo; /* current "test" fundamental freq. */ 350 float Wo; /* current "test" fundamental freq. */
357 float Wom; /* Wo that maximises E */ 351 float Wom; /* Wo that maximises E */
358 float Em; /* mamimum energy */ 352 float Em; /* mamimum energy */
359 float r, one_on_r; /* number of rads/bin */ 353 float r, one_on_r; /* number of rads/bin */
360 float p; /* current pitch */ 354 float p; /* current pitch */
361 355
362 /* Initialisation */ 356 /* Initialisation */
363 357
364 model->L = PI/model->Wo; /* use initial pitch est. for L */ 358 model->L = PI / model->Wo; /* use initial pitch est. for L */
365 Wom = model->Wo; 359 Wom = model->Wo;
366 Em = 0.0; 360 Em = 0.0;
367 r = TWO_PI/FFT_ENC; 361 r = TWO_PI / FFT_ENC;
368 one_on_r = 1.0/r; 362 one_on_r = 1.0 / r;
369 363
370 /* Determine harmonic sum for a range of Wo values */ 364 /* Determine harmonic sum for a range of Wo values */
371 365
372 for(p=pmin; p<=pmax; p+=pstep) { 366 for (p = pmin; p <= pmax; p += pstep) {
373 E = 0.0; 367 E = 0.0;
374 Wo = TWO_PI/p; 368 Wo = TWO_PI / p;
369
370 float bFloat = Wo * one_on_r;
371 float currentBFloat = bFloat;
375 372
376 /* Sum harmonic magnitudes */ 373 /* Sum harmonic magnitudes */
377 for(m=1; m<=model->L; m++) { 374 for (m = 1; m <= model->L; m++) {
378 b = (int)(m*Wo*one_on_r + 0.5); 375 b = (int)(currentBFloat + 0.5);
379 E += Sw[b].real*Sw[b].real + Sw[b].imag*Sw[b].imag; 376 E += Sw[b].real * Sw[b].real + Sw[b].imag * Sw[b].imag;
377 currentBFloat += bFloat;
380 } 378 }
381 /* Compare to see if this is a maximum */ 379 /* Compare to see if this is a maximum */
382 380
@@ -399,35 +397,35 @@ void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float
399 397
400\*---------------------------------------------------------------------------*/ 398\*---------------------------------------------------------------------------*/
401 399
402void estimate_amplitudes(MODEL *model, COMP Sw[], float W[], int est_phase) 400void estimate_amplitudes(MODEL *model, COMP Sw[], float W[], int est_phase) {
403{ 401 int i, m; /* loop variables */
404 int i,m; /* loop variables */ 402 int am, bm; /* bounds of current harmonic */
405 int am,bm; /* bounds of current harmonic */ 403 float den; /* denominator of amplitude expression */
406 float den; /* denominator of amplitude expression */
407 404
408 float r = TWO_PI/FFT_ENC; 405 float r = TWO_PI / FFT_ENC;
409 float one_on_r = 1.0/r; 406 float one_on_r = 1.0 / r;
410 407
411 for(m=1; m<=model->L; m++) { 408 for (m = 1; m <= model->L; m++) {
412 /* Estimate ampltude of harmonic */ 409 /* Estimate ampltude of harmonic */
413 410
414 den = 0.0; 411 den = 0.0;
415 am = (int)((m - 0.5)*model->Wo*one_on_r + 0.5); 412 am = (int)((m - 0.5) * model->Wo * one_on_r + 0.5);
416 bm = (int)((m + 0.5)*model->Wo*one_on_r + 0.5); 413 bm = (int)((m + 0.5) * model->Wo * one_on_r + 0.5);
417 414
418 for(i=am; i<bm; i++) { 415 for (i = am; i < bm; i++) {
419 den += Sw[i].real*Sw[i].real + Sw[i].imag*Sw[i].imag; 416 den += Sw[i].real * Sw[i].real + Sw[i].imag * Sw[i].imag;
420 } 417 }
421 418
422 model->A[m] = sqrtf(den); 419 model->A[m] = sqrtf(den);
423 420
424 if (est_phase) { 421 if (est_phase) {
425 int b = (int)(m*model->Wo/r + 0.5); /* DFT bin of centre of current harmonic */ 422 int b = (int)(m * model->Wo / r +
423 0.5); /* DFT bin of centre of current harmonic */
426 424
427 /* Estimate phase of harmonic, this is expensive in CPU for 425 /* Estimate phase of harmonic, this is expensive in CPU for
428 embedded devicesso we make it an option */ 426 embedded devicesso we make it an option */
429 427
430 model->phi[m] = atan2f(Sw[b].imag,Sw[b].real); 428 model->phi[m] = atan2f(Sw[b].imag, Sw[b].real);
431 } 429 }
432 } 430 }
433} 431}
@@ -443,119 +441,110 @@ void estimate_amplitudes(MODEL *model, COMP Sw[], float W[], int est_phase)
443 441
444\*---------------------------------------------------------------------------*/ 442\*---------------------------------------------------------------------------*/
445 443
446float est_voicing_mbe( 444float est_voicing_mbe(C2CONST *c2const, MODEL *model, COMP Sw[], float W[]) {
447 C2CONST *c2const, 445 int l, al, bl, m; /* loop variables */
448 MODEL *model, 446 COMP Am; /* amplitude sample for this band */
449 COMP Sw[], 447 int offset; /* centers Hw[] about current harmonic */
450 float W[] 448 float den; /* denominator of Am expression */
451 ) 449 float error; /* accumulated error between original and synthesised */
452{ 450 float Wo;
453 int l,al,bl,m; /* loop variables */ 451 float sig, snr;
454 COMP Am; /* amplitude sample for this band */ 452 float elow, ehigh, eratio;
455 int offset; /* centers Hw[] about current harmonic */ 453 float sixty;
456 float den; /* denominator of Am expression */ 454 COMP Ew;
457 float error; /* accumulated error between original and synthesised */ 455 Ew.real = 0;
458 float Wo; 456 Ew.imag = 0;
459 float sig, snr; 457
460 float elow, ehigh, eratio; 458 int l_1000hz = model->L * 1000.0 / (c2const->Fs / 2);
461 float sixty; 459 sig = 1E-4;
462 COMP Ew; 460 for (l = 1; l <= l_1000hz; l++) {
463 Ew.real = 0; 461 sig += model->A[l] * model->A[l];
464 Ew.imag = 0; 462 }
465
466 int l_1000hz = model->L*1000.0/(c2const->Fs/2);
467 sig = 1E-4;
468 for(l=1; l<=l_1000hz; l++) {
469 sig += model->A[l]*model->A[l];
470 }
471 463
472 Wo = model->Wo; 464 Wo = model->Wo;
473 error = 1E-4; 465 error = 1E-4;
474 466
475 /* Just test across the harmonics in the first 1000 Hz */ 467 /* Just test across the harmonics in the first 1000 Hz */
476 468
477 for(l=1; l<=l_1000hz; l++) { 469 for (l = 1; l <= l_1000hz; l++) {
478 Am.real = 0.0; 470 Am.real = 0.0;
479 Am.imag = 0.0; 471 Am.imag = 0.0;
480 den = 0.0; 472 den = 0.0;
481 al = ceilf((l - 0.5)*Wo*FFT_ENC/TWO_PI); 473 al = ceilf((l - 0.5) * Wo * FFT_ENC / TWO_PI);
482 bl = ceilf((l + 0.5)*Wo*FFT_ENC/TWO_PI); 474 bl = ceilf((l + 0.5) * Wo * FFT_ENC / TWO_PI);
483 475
484 /* Estimate amplitude of harmonic assuming harmonic is totally voiced */ 476 /* Estimate amplitude of harmonic assuming harmonic is totally voiced */
485 477
486 offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5; 478 offset = FFT_ENC / 2 - l * Wo * FFT_ENC / TWO_PI + 0.5;
487 for(m=al; m<bl; m++) { 479 for (m = al; m < bl; m++) {
488 Am.real += Sw[m].real*W[offset+m]; 480 Am.real += Sw[m].real * W[offset + m];
489 Am.imag += Sw[m].imag*W[offset+m]; 481 Am.imag += Sw[m].imag * W[offset + m];
490 den += W[offset+m]*W[offset+m]; 482 den += W[offset + m] * W[offset + m];
491 } 483 }
492 484
493 Am.real = Am.real/den; 485 Am.real = Am.real / den;
494 Am.imag = Am.imag/den; 486 Am.imag = Am.imag / den;
495 487
496 /* Determine error between estimated harmonic and original */ 488 /* Determine error between estimated harmonic and original */
497 489
498 for(m=al; m<bl; m++) { 490 for (m = al; m < bl; m++) {
499 Ew.real = Sw[m].real - Am.real*W[offset+m]; 491 Ew.real = Sw[m].real - Am.real * W[offset + m];
500 Ew.imag = Sw[m].imag - Am.imag*W[offset+m]; 492 Ew.imag = Sw[m].imag - Am.imag * W[offset + m];
501 error += Ew.real*Ew.real; 493 error += Ew.real * Ew.real;
502 error += Ew.imag*Ew.imag; 494 error += Ew.imag * Ew.imag;
503 }
504 } 495 }
496 }
505 497
506 snr = 10.0*log10f(sig/error); 498 snr = 10.0 * log10f(sig / error);
507 if (snr > V_THRESH) 499 if (snr > V_THRESH)
508 model->voiced = 1; 500 model->voiced = 1;
509 else 501 else
510 model->voiced = 0; 502 model->voiced = 0;
511 503
512 /* post processing, helps clean up some voicing errors ------------------*/ 504 /* post processing, helps clean up some voicing errors ------------------*/
513
514 /*
515 Determine the ratio of low freqency to high frequency energy,
516 voiced speech tends to be dominated by low frequency energy,
517 unvoiced by high frequency. This measure can be used to
518 determine if we have made any gross errors.
519 */
520
521 int l_2000hz = model->L*2000.0/(c2const->Fs/2);
522 int l_4000hz = model->L*4000.0/(c2const->Fs/2);
523 elow = ehigh = 1E-4;
524 for(l=1; l<=l_2000hz; l++) {
525 elow += model->A[l]*model->A[l];
526 }
527 for(l=l_2000hz; l<=l_4000hz; l++) {
528 ehigh += model->A[l]*model->A[l];
529 }
530 eratio = 10.0*log10f(elow/ehigh);
531 505
532 /* Look for Type 1 errors, strongly V speech that has been 506 /*
533 accidentally declared UV */ 507 Determine the ratio of low frequency to high frequency energy,
508 voiced speech tends to be dominated by low frequency energy,
509 unvoiced by high frequency. This measure can be used to
510 determine if we have made any gross errors.
511 */
512
513 int l_2000hz = model->L * 2000.0 / (c2const->Fs / 2);
514 int l_4000hz = model->L * 4000.0 / (c2const->Fs / 2);
515 elow = ehigh = 1E-4;
516 for (l = 1; l <= l_2000hz; l++) {
517 elow += model->A[l] * model->A[l];
518 }
519 for (l = l_2000hz; l <= l_4000hz; l++) {
520 ehigh += model->A[l] * model->A[l];
521 }
522 eratio = 10.0 * log10f(elow / ehigh);
534 523
535 if (model->voiced == 0) 524 /* Look for Type 1 errors, strongly V speech that has been
536 if (eratio > 10.0) 525 accidentally declared UV */
537 model->voiced = 1;
538 526
539 /* Look for Type 2 errors, strongly UV speech that has been 527 if (model->voiced == 0)
540 accidentally declared V */ 528 if (eratio > 10.0) model->voiced = 1;
541 529
542 if (model->voiced == 1) { 530 /* Look for Type 2 errors, strongly UV speech that has been
543 if (eratio < -10.0) 531 accidentally declared V */
544 model->voiced = 0;
545 532
546 /* A common source of Type 2 errors is the pitch estimator 533 if (model->voiced == 1) {
547 gives a low (50Hz) estimate for UV speech, which gives a 534 if (eratio < -10.0) model->voiced = 0;
548 good match with noise due to the close harmoonic spacing.
549 These errors are much more common than people with 50Hz3
550 pitch, so we have just a small eratio threshold. */
551 535
552 sixty = 60.0*TWO_PI/c2const->Fs; 536 /* A common source of Type 2 errors is the pitch estimator
553 if ((eratio < -4.0) && (model->Wo <= sixty)) 537 gives a low (50Hz) estimate for UV speech, which gives a
554 model->voiced = 0; 538 good match with noise due to the close harmoonic spacing.
555 } 539 These errors are much more common than people with 50Hz3
556 //printf(" v: %d snr: %f eratio: %3.2f %f\n",model->voiced,snr,eratio,dF0); 540 pitch, so we have just a small eratio threshold. */
557 541
558 return snr; 542 sixty = 60.0 * TWO_PI / c2const->Fs;
543 if ((eratio < -4.0) && (model->Wo <= sixty)) model->voiced = 0;
544 }
545 // printf(" v: %d snr: %f eratio: %3.2f %f\n",model->voiced,snr,eratio,dF0);
546
547 return snr;
559} 548}
560 549
561/*---------------------------------------------------------------------------*\ 550/*---------------------------------------------------------------------------*\
@@ -564,32 +553,29 @@ float est_voicing_mbe(
564 AUTHOR......: David Rowe 553 AUTHOR......: David Rowe
565 DATE CREATED: 11/5/94 554 DATE CREATED: 11/5/94
566 555
567 Init function that generates the trapezoidal (Parzen) sythesis window. 556 Init function that generates the trapezoidal (Parzen) synthesis window.
568 557
569\*---------------------------------------------------------------------------*/ 558\*---------------------------------------------------------------------------*/
570 559
571void make_synthesis_window(C2CONST *c2const, float Pn[]) 560void make_synthesis_window(C2CONST *c2const, float Pn[]) {
572{ 561 int i;
573 int i;
574 float win; 562 float win;
575 int n_samp = c2const->n_samp; 563 int n_samp = c2const->n_samp;
576 int tw = c2const->tw; 564 int tw = c2const->tw;
577 565
578 /* Generate Parzen window in time domain */ 566 /* Generate Parzen window in time domain */
579 567
580 win = 0.0; 568 win = 0.0;
581 for(i=0; i<n_samp/2-tw; i++) 569 for (i = 0; i < n_samp / 2 - tw; i++) Pn[i] = 0.0;
582 Pn[i] = 0.0;
583 win = 0.0; 570 win = 0.0;
584 for(i=n_samp/2-tw; i<n_samp/2+tw; win+=1.0/(2*tw), i++ ) 571 for (i = n_samp / 2 - tw; i < n_samp / 2 + tw; win += 1.0 / (2 * tw), i++)
585 Pn[i] = win; 572 Pn[i] = win;
586 for(i=n_samp/2+tw; i<3*n_samp/2-tw; i++) 573 for (i = n_samp / 2 + tw; i < 3 * n_samp / 2 - tw; i++) Pn[i] = 1.0;
587 Pn[i] = 1.0;
588 win = 1.0; 574 win = 1.0;
589 for(i=3*n_samp/2-tw; i<3*n_samp/2+tw; win-=1.0/(2*tw), i++) 575 for (i = 3 * n_samp / 2 - tw; i < 3 * n_samp / 2 + tw;
576 win -= 1.0 / (2 * tw), i++)
590 Pn[i] = win; 577 Pn[i] = win;
591 for(i=3*n_samp/2+tw; i<2*n_samp; i++) 578 for (i = 3 * n_samp / 2 + tw; i < 2 * n_samp; i++) Pn[i] = 0.0;
592 Pn[i] = 0.0;
593} 579}
594 580
595/*---------------------------------------------------------------------------*\ 581/*---------------------------------------------------------------------------*\
@@ -600,77 +586,72 @@ void make_synthesis_window(C2CONST *c2const, float Pn[])
600 586
601 Synthesise a speech signal in the frequency domain from the 587 Synthesise a speech signal in the frequency domain from the
602 sinusodal model parameters. Uses overlap-add with a trapezoidal 588 sinusodal model parameters. Uses overlap-add with a trapezoidal
603 window to smoothly interpolate betwen frames. 589 window to smoothly interpolate between frames.
604 590
605\*---------------------------------------------------------------------------*/ 591\*---------------------------------------------------------------------------*/
606 592
607void synthesise( 593void synthesise(int n_samp, codec2_fftr_cfg fftr_inv_cfg,
608 int n_samp, 594 float Sn_[], /* time domain synthesised signal */
609 codec2_fftr_cfg fftr_inv_cfg, 595 MODEL *model, /* ptr to model parameters for this frame */
610 float Sn_[], /* time domain synthesised signal */ 596 float Pn[], /* time domain Parzen window */
611 MODEL *model, /* ptr to model parameters for this frame */ 597 int shift /* flag used to handle transition frames */
612 float Pn[], /* time domain Parzen window */ 598) {
613 int shift /* flag used to handle transition frames */ 599 int i, l, j, b; /* loop variables */
614) 600 COMP Sw_[FFT_DEC / 2 + 1]; /* DFT of synthesised signal */
615{ 601 float sw_[FFT_DEC]; /* synthesised signal */
616 int i,l,j,b; /* loop variables */ 602
617 COMP Sw_[FFT_DEC/2+1]; /* DFT of synthesised signal */ 603 if (shift) {
618 float sw_[FFT_DEC]; /* synthesised signal */ 604 /* Update memories */
619 605 for (i = 0; i < n_samp - 1; i++) {
620 if (shift) { 606 Sn_[i] = Sn_[i + n_samp];
621 /* Update memories */
622 for(i=0; i<n_samp-1; i++) {
623 Sn_[i] = Sn_[i+n_samp];
624 }
625 Sn_[n_samp-1] = 0.0;
626 } 607 }
608 Sn_[n_samp - 1] = 0.0;
609 }
627 610
628 for(i=0; i<FFT_DEC/2+1; i++) { 611 for (i = 0; i < FFT_DEC / 2 + 1; i++) {
629 Sw_[i].real = 0.0; 612 Sw_[i].real = 0.0;
630 Sw_[i].imag = 0.0; 613 Sw_[i].imag = 0.0;
631 } 614 }
632 615
633 /* Now set up frequency domain synthesised speech */ 616 /* Now set up frequency domain synthesised speech */
634 617
635 for(l=1; l<=model->L; l++) { 618 for (l = 1; l <= model->L; l++) {
636 b = (int)(l*model->Wo*FFT_DEC/TWO_PI + 0.5); 619 b = (int)(l * model->Wo * FFT_DEC / TWO_PI + 0.5);
637 if (b > ((FFT_DEC/2)-1)) { 620 if (b > ((FFT_DEC / 2) - 1)) {
638 b = (FFT_DEC/2)-1; 621 b = (FFT_DEC / 2) - 1;
639 }
640 Sw_[b].real = model->A[l]*cosf(model->phi[l]);
641 Sw_[b].imag = model->A[l]*sinf(model->phi[l]);
642 } 622 }
623 Sw_[b].real = model->A[l] * cosf(model->phi[l]);
624 Sw_[b].imag = model->A[l] * sinf(model->phi[l]);
625 }
643 626
644 /* Perform inverse DFT */ 627 /* Perform inverse DFT */
645 628
646 codec2_fftri(fftr_inv_cfg, Sw_,sw_); 629 codec2_fftri(fftr_inv_cfg, Sw_, sw_);
647 630
648 /* Overlap add to previous samples */ 631 /* Overlap add to previous samples */
649 632
650 #ifdef USE_KISS_FFT 633#ifdef USE_KISS_FFT
651 #define FFTI_FACTOR ((float)1.0) 634#define FFTI_FACTOR ((float)1.0)
652 #else 635#else
653 #define FFTI_FACTOR ((float32_t)FFT_DEC) 636#define FFTI_FACTOR ((float32_t)FFT_DEC)
654 #endif 637#endif
655 638
656 for(i=0; i<n_samp-1; i++) { 639 for (i = 0; i < n_samp - 1; i++) {
657 Sn_[i] += sw_[FFT_DEC-n_samp+1+i]*Pn[i] * FFTI_FACTOR; 640 Sn_[i] += sw_[FFT_DEC - n_samp + 1 + i] * Pn[i] * FFTI_FACTOR;
658 } 641 }
659 642
660 if (shift) 643 if (shift)
661 for(i=n_samp-1,j=0; i<2*n_samp; i++,j++) 644 for (i = n_samp - 1, j = 0; i < 2 * n_samp; i++, j++)
662 Sn_[i] = sw_[j]*Pn[i] * FFTI_FACTOR; 645 Sn_[i] = sw_[j] * Pn[i] * FFTI_FACTOR;
663 else 646 else
664 for(i=n_samp-1,j=0; i<2*n_samp; i++,j++) 647 for (i = n_samp - 1, j = 0; i < 2 * n_samp; i++, j++)
665 Sn_[i] += sw_[j]*Pn[i] * FFTI_FACTOR; 648 Sn_[i] += sw_[j] * Pn[i] * FFTI_FACTOR;
666} 649}
667 650
668
669/* todo: this should probably be in some states rather than a static */ 651/* todo: this should probably be in some states rather than a static */
670static unsigned long next = 1; 652static unsigned long next = 1;
671 653
672int codec2_rand(void) { 654int codec2_rand(void) {
673 next = next * 1103515245 + 12345; 655 next = next * 1103515245 + 12345;
674 return((unsigned)(next/65536) % 32768); 656 return ((unsigned)(next / 65536) % 32768);
675} 657}
676