diff options
Diffstat (limited to 'sine.c')
-rw-r--r-- | sine.c | 591 |
1 files changed, 286 insertions, 305 deletions
@@ -27,64 +27,66 @@ | |||
27 | 27 | ||
28 | /*---------------------------------------------------------------------------*\ | 28 | /*---------------------------------------------------------------------------*\ |
29 | 29 | ||
30 | INCLUDES | 30 | INCLUDES |
31 | 31 | ||
32 | \*---------------------------------------------------------------------------*/ | 32 | \*---------------------------------------------------------------------------*/ |
33 | 33 | ||
34 | #include <stdlib.h> | 34 | #include "sine.h" |
35 | #include <stdio.h> | 35 | |
36 | #include <math.h> | 36 | #include <math.h> |
37 | #include <stdio.h> | ||
38 | #include <stdlib.h> | ||
37 | 39 | ||
38 | #include "defines.h" | 40 | #include "defines.h" |
39 | #include "sine.h" | ||
40 | #include "kiss_fft.h" | 41 | #include "kiss_fft.h" |
41 | 42 | ||
42 | #define HPF_BETA 0.125 | 43 | #define HPF_BETA 0.125 |
43 | 44 | ||
44 | /*---------------------------------------------------------------------------*\ | 45 | /*---------------------------------------------------------------------------*\ |
45 | 46 | ||
46 | HEADERS | 47 | HEADERS |
47 | 48 | ||
48 | \*---------------------------------------------------------------------------*/ | 49 | \*---------------------------------------------------------------------------*/ |
49 | 50 | ||
50 | void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, | 51 | void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, |
51 | float pstep); | 52 | float pstep); |
52 | 53 | ||
53 | /*---------------------------------------------------------------------------*\ | 54 | /*---------------------------------------------------------------------------*\ |
54 | 55 | ||
55 | FUNCTIONS | 56 | FUNCTIONS |
56 | 57 | ||
57 | \*---------------------------------------------------------------------------*/ | 58 | \*---------------------------------------------------------------------------*/ |
58 | 59 | ||
59 | C2CONST c2const_create(int Fs, float framelength_s) { | 60 | C2CONST c2const_create(int Fs, float framelength_s) { |
60 | C2CONST c2const; | 61 | C2CONST c2const; |
61 | 62 | ||
62 | assert((Fs == 8000) || (Fs = 16000)); | 63 | assert((Fs == 8000) || (Fs == 16000)); |
63 | c2const.Fs = Fs; | 64 | c2const.Fs = Fs; |
64 | c2const.n_samp = round(Fs*framelength_s); | 65 | c2const.n_samp = round(Fs * framelength_s); |
65 | c2const.max_amp = floor(Fs*P_MIN_S/2); | 66 | c2const.max_amp = floor(Fs * P_MAX_S / 2); |
66 | c2const.p_min = floor(Fs*P_MIN_S); | 67 | c2const.p_min = floor(Fs * P_MIN_S); |
67 | c2const.p_max = floor(Fs*P_MAX_S); | 68 | c2const.p_max = floor(Fs * P_MAX_S); |
68 | c2const.m_pitch = floor(Fs*M_PITCH_S); | 69 | c2const.m_pitch = floor(Fs * M_PITCH_S); |
69 | c2const.Wo_min = TWO_PI/c2const.p_max; | 70 | c2const.Wo_min = TWO_PI / c2const.p_max; |
70 | c2const.Wo_max = TWO_PI/c2const.p_min; | 71 | c2const.Wo_max = TWO_PI / c2const.p_min; |
71 | 72 | ||
72 | if (Fs == 8000) { | 73 | if (Fs == 8000) { |
73 | c2const.nw = 279; | 74 | c2const.nw = 279; |
74 | } else { | 75 | } else { |
75 | c2const.nw = 511; /* actually a bit shorter in time but lets us maintain constant FFT size */ | 76 | c2const.nw = 511; /* actually a bit shorter in time but lets us maintain |
76 | } | 77 | constant FFT size */ |
78 | } | ||
77 | 79 | ||
78 | c2const.tw = Fs*TW_S; | 80 | c2const.tw = Fs * TW_S; |
79 | 81 | ||
80 | /* | 82 | /* |
81 | fprintf(stderr, "max_amp: %d m_pitch: %d\n", c2const.n_samp, c2const.m_pitch); | 83 | fprintf(stderr, "max_amp: %d m_pitch: %d\n", c2const.n_samp, c2const.m_pitch); |
82 | fprintf(stderr, "p_min: %d p_max: %d\n", c2const.p_min, c2const.p_max); | 84 | fprintf(stderr, "p_min: %d p_max: %d\n", c2const.p_min, c2const.p_max); |
83 | fprintf(stderr, "Wo_min: %f Wo_max: %f\n", c2const.Wo_min, c2const.Wo_max); | 85 | fprintf(stderr, "Wo_min: %f Wo_max: %f\n", c2const.Wo_min, c2const.Wo_max); |
84 | fprintf(stderr, "nw: %d tw: %d\n", c2const.nw, c2const.tw); | 86 | fprintf(stderr, "nw: %d tw: %d\n", c2const.nw, c2const.tw); |
85 | */ | 87 | */ |
86 | 88 | ||
87 | return c2const; | 89 | return c2const; |
88 | } | 90 | } |
89 | 91 | ||
90 | /*---------------------------------------------------------------------------*\ | 92 | /*---------------------------------------------------------------------------*\ |
@@ -97,13 +99,13 @@ C2CONST c2const_create(int Fs, float framelength_s) { | |||
97 | 99 | ||
98 | \*---------------------------------------------------------------------------*/ | 100 | \*---------------------------------------------------------------------------*/ |
99 | 101 | ||
100 | void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[], float W[]) | 102 | void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, |
101 | { | 103 | float w[], float W[]) { |
102 | float m; | 104 | float m; |
103 | COMP wshift[FFT_ENC]; | 105 | COMP wshift[FFT_ENC]; |
104 | int i,j; | 106 | int i, j; |
105 | int m_pitch = c2const->m_pitch; | 107 | int m_pitch = c2const->m_pitch; |
106 | int nw = c2const->nw; | 108 | int nw = c2const->nw; |
107 | 109 | ||
108 | /* | 110 | /* |
109 | Generate Hamming window centered on M-sample pitch analysis window | 111 | Generate Hamming window centered on M-sample pitch analysis window |
@@ -117,20 +119,18 @@ void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[ | |||
117 | */ | 119 | */ |
118 | 120 | ||
119 | m = 0.0; | 121 | m = 0.0; |
120 | for(i=0; i<m_pitch/2-nw/2; i++) | 122 | for (i = 0; i < m_pitch / 2 - nw / 2; i++) w[i] = 0.0; |
121 | w[i] = 0.0; | 123 | for (i = m_pitch / 2 - nw / 2, j = 0; i < m_pitch / 2 + nw / 2; i++, j++) { |
122 | for(i=m_pitch/2-nw/2,j=0; i<m_pitch/2+nw/2; i++,j++) { | 124 | w[i] = 0.5 - 0.5 * cosf(TWO_PI * j / (nw - 1)); |
123 | w[i] = 0.5 - 0.5*cosf(TWO_PI*j/(nw-1)); | 125 | m += w[i] * w[i]; |
124 | m += w[i]*w[i]; | ||
125 | } | 126 | } |
126 | for(i=m_pitch/2+nw/2; i<m_pitch; i++) | 127 | for (i = m_pitch / 2 + nw / 2; i < m_pitch; i++) w[i] = 0.0; |
127 | w[i] = 0.0; | ||
128 | 128 | ||
129 | /* Normalise - makes freq domain amplitude estimation straight | 129 | /* Normalise - makes freq domain amplitude estimation straight |
130 | forward */ | 130 | forward */ |
131 | 131 | ||
132 | m = 1.0/sqrtf(m*FFT_ENC); | 132 | m = 1.0 / sqrtf(m * FFT_ENC); |
133 | for(i=0; i<m_pitch; i++) { | 133 | for (i = 0; i < m_pitch; i++) { |
134 | w[i] *= m; | 134 | w[i] *= m; |
135 | } | 135 | } |
136 | 136 | ||
@@ -157,14 +157,13 @@ void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[ | |||
157 | 157 | ||
158 | COMP temp[FFT_ENC]; | 158 | COMP temp[FFT_ENC]; |
159 | 159 | ||
160 | for(i=0; i<FFT_ENC; i++) { | 160 | for (i = 0; i < FFT_ENC; i++) { |
161 | wshift[i].real = 0.0; | 161 | wshift[i].real = 0.0; |
162 | wshift[i].imag = 0.0; | 162 | wshift[i].imag = 0.0; |
163 | } | 163 | } |
164 | for(i=0; i<nw/2; i++) | 164 | for (i = 0; i < nw / 2; i++) wshift[i].real = w[i + m_pitch / 2]; |
165 | wshift[i].real = w[i+m_pitch/2]; | 165 | for (i = FFT_ENC - nw / 2, j = m_pitch / 2 - nw / 2; i < FFT_ENC; i++, j++) |
166 | for(i=FFT_ENC-nw/2,j=m_pitch/2-nw/2; i<FFT_ENC; i++,j++) | 166 | wshift[i].real = w[j]; |
167 | wshift[i].real = w[j]; | ||
168 | 167 | ||
169 | codec2_fft(fft_fwd_cfg, wshift, temp); | 168 | codec2_fft(fft_fwd_cfg, wshift, temp); |
170 | 169 | ||
@@ -191,12 +190,10 @@ void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[ | |||
191 | 190 | ||
192 | */ | 191 | */ |
193 | 192 | ||
194 | 193 | for (i = 0; i < FFT_ENC / 2; i++) { | |
195 | for(i=0; i<FFT_ENC/2; i++) { | 194 | W[i] = temp[i + FFT_ENC / 2].real; |
196 | W[i] = temp[i + FFT_ENC / 2].real; | 195 | W[i + FFT_ENC / 2] = temp[i].real; |
197 | W[i + FFT_ENC / 2] = temp[i].real; | ||
198 | } | 196 | } |
199 | |||
200 | } | 197 | } |
201 | 198 | ||
202 | /*---------------------------------------------------------------------------*\ | 199 | /*---------------------------------------------------------------------------*\ |
@@ -211,12 +208,11 @@ void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[ | |||
211 | 208 | ||
212 | \*---------------------------------------------------------------------------*/ | 209 | \*---------------------------------------------------------------------------*/ |
213 | 210 | ||
214 | float hpf(float x, float states[]) | 211 | float hpf(float x, float states[]) { |
215 | { | 212 | states[0] = -HPF_BETA * states[0] + x - states[1]; |
216 | states[0] = -HPF_BETA*states[0] + x - states[1]; | 213 | states[1] = x; |
217 | states[1] = x; | ||
218 | 214 | ||
219 | return states[0]; | 215 | return states[0]; |
220 | } | 216 | } |
221 | 217 | ||
222 | /*---------------------------------------------------------------------------*\ | 218 | /*---------------------------------------------------------------------------*\ |
@@ -230,41 +226,43 @@ float hpf(float x, float states[]) | |||
230 | \*---------------------------------------------------------------------------*/ | 226 | \*---------------------------------------------------------------------------*/ |
231 | 227 | ||
232 | // TODO: we can either go for a faster FFT using fftr and some stack usage | 228 | // TODO: we can either go for a faster FFT using fftr and some stack usage |
233 | // or we can reduce stack usage to almost zero on STM32 by switching to fft_inplace | 229 | // or we can reduce stack usage to almost zero on STM32 by switching to |
230 | // fft_inplace | ||
234 | #if 1 | 231 | #if 1 |
235 | void dft_speech(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[]) | 232 | void dft_speech(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, COMP Sw[], |
236 | { | 233 | float Sn[], float w[]) { |
237 | int i; | 234 | int i; |
238 | int m_pitch = c2const->m_pitch; | 235 | int m_pitch = c2const->m_pitch; |
239 | int nw = c2const->nw; | 236 | int nw = c2const->nw; |
240 | 237 | ||
241 | for(i=0; i<FFT_ENC; i++) { | 238 | for (i = 0; i < FFT_ENC; i++) { |
242 | Sw[i].real = 0.0; | 239 | Sw[i].real = 0.0; |
243 | Sw[i].imag = 0.0; | 240 | Sw[i].imag = 0.0; |
244 | } | 241 | } |
245 | 242 | ||
246 | /* Centre analysis window on time axis, we need to arrange input | 243 | /* Centre analysis window on time axis, we need to arrange input |
247 | to FFT this way to make FFT phases correct */ | 244 | to FFT this way to make FFT phases correct */ |
248 | 245 | ||
249 | /* move 2nd half to start of FFT input vector */ | 246 | /* move 2nd half to start of FFT input vector */ |
250 | 247 | ||
251 | for(i=0; i<nw/2; i++) | 248 | for (i = 0; i < nw / 2; i++) |
252 | Sw[i].real = Sn[i+m_pitch/2]*w[i+m_pitch/2]; | 249 | Sw[i].real = Sn[i + m_pitch / 2] * w[i + m_pitch / 2]; |
253 | 250 | ||
254 | /* move 1st half to end of FFT input vector */ | 251 | /* move 1st half to end of FFT input vector */ |
255 | 252 | ||
256 | for(i=0; i<nw/2; i++) | 253 | for (i = 0; i < nw / 2; i++) |
257 | Sw[FFT_ENC-nw/2+i].real = Sn[i+m_pitch/2-nw/2]*w[i+m_pitch/2-nw/2]; | 254 | Sw[FFT_ENC - nw / 2 + i].real = |
255 | Sn[i + m_pitch / 2 - nw / 2] * w[i + m_pitch / 2 - nw / 2]; | ||
258 | 256 | ||
259 | codec2_fft_inplace(fft_fwd_cfg, Sw); | 257 | codec2_fft_inplace(fft_fwd_cfg, Sw); |
260 | } | 258 | } |
261 | #else | 259 | #else |
262 | void dft_speech(codec2_fftr_cfg fftr_fwd_cfg, COMP Sw[], float Sn[], float w[]) | 260 | void dft_speech(codec2_fftr_cfg fftr_fwd_cfg, COMP Sw[], float Sn[], |
263 | { | 261 | float w[]) { |
264 | int i; | 262 | int i; |
265 | float sw[FFT_ENC]; | 263 | float sw[FFT_ENC]; |
266 | 264 | ||
267 | for(i=0; i<FFT_ENC; i++) { | 265 | for (i = 0; i < FFT_ENC; i++) { |
268 | sw[i] = 0.0; | 266 | sw[i] = 0.0; |
269 | } | 267 | } |
270 | 268 | ||
@@ -273,19 +271,18 @@ void dft_speech(codec2_fftr_cfg fftr_fwd_cfg, COMP Sw[], float Sn[], float w[]) | |||
273 | 271 | ||
274 | /* move 2nd half to start of FFT input vector */ | 272 | /* move 2nd half to start of FFT input vector */ |
275 | 273 | ||
276 | for(i=0; i<nw/2; i++) | 274 | for (i = 0; i < nw / 2; i++) sw[i] = Sn[i + m_pitch / 2] * w[i + m_pitch / 2]; |
277 | sw[i] = Sn[i+m_pitch/2]*w[i+m_pitch/2]; | ||
278 | 275 | ||
279 | /* move 1st half to end of FFT input vector */ | 276 | /* move 1st half to end of FFT input vector */ |
280 | 277 | ||
281 | for(i=0; i<nw/2; i++) | 278 | for (i = 0; i < nw / 2; i++) |
282 | sw[FFT_ENC-nw/2+i] = Sn[i+m_pitch/2-nw/2]*w[i+m_pitch/2-nw/2]; | 279 | sw[FFT_ENC - nw / 2 + i] = |
280 | Sn[i + m_pitch / 2 - nw / 2] * w[i + m_pitch / 2 - nw / 2]; | ||
283 | 281 | ||
284 | codec2_fftr(fftr_fwd_cfg, sw, Sw); | 282 | codec2_fftr(fftr_fwd_cfg, sw, Sw); |
285 | } | 283 | } |
286 | #endif | 284 | #endif |
287 | 285 | ||
288 | |||
289 | /*---------------------------------------------------------------------------*\ | 286 | /*---------------------------------------------------------------------------*\ |
290 | 287 | ||
291 | FUNCTION....: two_stage_pitch_refinement | 288 | FUNCTION....: two_stage_pitch_refinement |
@@ -297,38 +294,35 @@ void dft_speech(codec2_fftr_cfg fftr_fwd_cfg, COMP Sw[], float Sn[], float w[]) | |||
297 | 294 | ||
298 | \*---------------------------------------------------------------------------*/ | 295 | \*---------------------------------------------------------------------------*/ |
299 | 296 | ||
300 | void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, COMP Sw[]) | 297 | void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, COMP Sw[]) { |
301 | { | 298 | float pmin, pmax, pstep; /* pitch refinement minimum, maximum and step */ |
302 | float pmin,pmax,pstep; /* pitch refinment minimum, maximum and step */ | ||
303 | 299 | ||
304 | /* Coarse refinement */ | 300 | /* Coarse refinement */ |
305 | 301 | ||
306 | pmax = TWO_PI/model->Wo + 5; | 302 | pmax = TWO_PI / model->Wo + 5; |
307 | pmin = TWO_PI/model->Wo - 5; | 303 | pmin = TWO_PI / model->Wo - 5; |
308 | pstep = 1.0; | 304 | pstep = 1.0; |
309 | hs_pitch_refinement(model,Sw,pmin,pmax,pstep); | 305 | hs_pitch_refinement(model, Sw, pmin, pmax, pstep); |
310 | 306 | ||
311 | /* Fine refinement */ | 307 | /* Fine refinement */ |
312 | 308 | ||
313 | pmax = TWO_PI/model->Wo + 1; | 309 | pmax = TWO_PI / model->Wo + 1; |
314 | pmin = TWO_PI/model->Wo - 1; | 310 | pmin = TWO_PI / model->Wo - 1; |
315 | pstep = 0.25; | 311 | pstep = 0.25; |
316 | hs_pitch_refinement(model,Sw,pmin,pmax,pstep); | 312 | hs_pitch_refinement(model, Sw, pmin, pmax, pstep); |
317 | 313 | ||
318 | /* Limit range */ | 314 | /* Limit range */ |
319 | 315 | ||
320 | if (model->Wo < TWO_PI/c2const->p_max) | 316 | if (model->Wo < TWO_PI / c2const->p_max) model->Wo = TWO_PI / c2const->p_max; |
321 | model->Wo = TWO_PI/c2const->p_max; | 317 | if (model->Wo > TWO_PI / c2const->p_min) model->Wo = TWO_PI / c2const->p_min; |
322 | if (model->Wo > TWO_PI/c2const->p_min) | ||
323 | model->Wo = TWO_PI/c2const->p_min; | ||
324 | 318 | ||
325 | model->L = floorf(PI/model->Wo); | 319 | model->L = floorf(PI / model->Wo); |
326 | 320 | ||
327 | /* trap occasional round off issues with floorf() */ | 321 | /* trap occasional round off issues with floorf() */ |
328 | if (model->Wo*model->L >= 0.95*PI) { | 322 | if (model->Wo * model->L >= 0.95 * PI) { |
329 | model->L--; | 323 | model->L--; |
330 | } | 324 | } |
331 | assert(model->Wo*model->L < PI); | 325 | assert(model->Wo * model->L < PI); |
332 | } | 326 | } |
333 | 327 | ||
334 | /*---------------------------------------------------------------------------*\ | 328 | /*---------------------------------------------------------------------------*\ |
@@ -348,35 +342,39 @@ void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, COMP Sw[]) | |||
348 | 342 | ||
349 | \*---------------------------------------------------------------------------*/ | 343 | \*---------------------------------------------------------------------------*/ |
350 | 344 | ||
351 | void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float pstep) | 345 | void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, |
352 | { | 346 | float pstep) { |
353 | int m; /* loop variable */ | 347 | int m; /* loop variable */ |
354 | int b; /* bin for current harmonic centre */ | 348 | int b; /* bin for current harmonic centre */ |
355 | float E; /* energy for current pitch*/ | 349 | float E; /* energy for current pitch*/ |
356 | float Wo; /* current "test" fundamental freq. */ | 350 | float Wo; /* current "test" fundamental freq. */ |
357 | float Wom; /* Wo that maximises E */ | 351 | float Wom; /* Wo that maximises E */ |
358 | float Em; /* mamimum energy */ | 352 | float Em; /* mamimum energy */ |
359 | float r, one_on_r; /* number of rads/bin */ | 353 | float r, one_on_r; /* number of rads/bin */ |
360 | float p; /* current pitch */ | 354 | float p; /* current pitch */ |
361 | 355 | ||
362 | /* Initialisation */ | 356 | /* Initialisation */ |
363 | 357 | ||
364 | model->L = PI/model->Wo; /* use initial pitch est. for L */ | 358 | model->L = PI / model->Wo; /* use initial pitch est. for L */ |
365 | Wom = model->Wo; | 359 | Wom = model->Wo; |
366 | Em = 0.0; | 360 | Em = 0.0; |
367 | r = TWO_PI/FFT_ENC; | 361 | r = TWO_PI / FFT_ENC; |
368 | one_on_r = 1.0/r; | 362 | one_on_r = 1.0 / r; |
369 | 363 | ||
370 | /* Determine harmonic sum for a range of Wo values */ | 364 | /* Determine harmonic sum for a range of Wo values */ |
371 | 365 | ||
372 | for(p=pmin; p<=pmax; p+=pstep) { | 366 | for (p = pmin; p <= pmax; p += pstep) { |
373 | E = 0.0; | 367 | E = 0.0; |
374 | Wo = TWO_PI/p; | 368 | Wo = TWO_PI / p; |
369 | |||
370 | float bFloat = Wo * one_on_r; | ||
371 | float currentBFloat = bFloat; | ||
375 | 372 | ||
376 | /* Sum harmonic magnitudes */ | 373 | /* Sum harmonic magnitudes */ |
377 | for(m=1; m<=model->L; m++) { | 374 | for (m = 1; m <= model->L; m++) { |
378 | b = (int)(m*Wo*one_on_r + 0.5); | 375 | b = (int)(currentBFloat + 0.5); |
379 | E += Sw[b].real*Sw[b].real + Sw[b].imag*Sw[b].imag; | 376 | E += Sw[b].real * Sw[b].real + Sw[b].imag * Sw[b].imag; |
377 | currentBFloat += bFloat; | ||
380 | } | 378 | } |
381 | /* Compare to see if this is a maximum */ | 379 | /* Compare to see if this is a maximum */ |
382 | 380 | ||
@@ -399,35 +397,35 @@ void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float | |||
399 | 397 | ||
400 | \*---------------------------------------------------------------------------*/ | 398 | \*---------------------------------------------------------------------------*/ |
401 | 399 | ||
402 | void estimate_amplitudes(MODEL *model, COMP Sw[], float W[], int est_phase) | 400 | void estimate_amplitudes(MODEL *model, COMP Sw[], float W[], int est_phase) { |
403 | { | 401 | int i, m; /* loop variables */ |
404 | int i,m; /* loop variables */ | 402 | int am, bm; /* bounds of current harmonic */ |
405 | int am,bm; /* bounds of current harmonic */ | 403 | float den; /* denominator of amplitude expression */ |
406 | float den; /* denominator of amplitude expression */ | ||
407 | 404 | ||
408 | float r = TWO_PI/FFT_ENC; | 405 | float r = TWO_PI / FFT_ENC; |
409 | float one_on_r = 1.0/r; | 406 | float one_on_r = 1.0 / r; |
410 | 407 | ||
411 | for(m=1; m<=model->L; m++) { | 408 | for (m = 1; m <= model->L; m++) { |
412 | /* Estimate ampltude of harmonic */ | 409 | /* Estimate ampltude of harmonic */ |
413 | 410 | ||
414 | den = 0.0; | 411 | den = 0.0; |
415 | am = (int)((m - 0.5)*model->Wo*one_on_r + 0.5); | 412 | am = (int)((m - 0.5) * model->Wo * one_on_r + 0.5); |
416 | bm = (int)((m + 0.5)*model->Wo*one_on_r + 0.5); | 413 | bm = (int)((m + 0.5) * model->Wo * one_on_r + 0.5); |
417 | 414 | ||
418 | for(i=am; i<bm; i++) { | 415 | for (i = am; i < bm; i++) { |
419 | den += Sw[i].real*Sw[i].real + Sw[i].imag*Sw[i].imag; | 416 | den += Sw[i].real * Sw[i].real + Sw[i].imag * Sw[i].imag; |
420 | } | 417 | } |
421 | 418 | ||
422 | model->A[m] = sqrtf(den); | 419 | model->A[m] = sqrtf(den); |
423 | 420 | ||
424 | if (est_phase) { | 421 | if (est_phase) { |
425 | int b = (int)(m*model->Wo/r + 0.5); /* DFT bin of centre of current harmonic */ | 422 | int b = (int)(m * model->Wo / r + |
423 | 0.5); /* DFT bin of centre of current harmonic */ | ||
426 | 424 | ||
427 | /* Estimate phase of harmonic, this is expensive in CPU for | 425 | /* Estimate phase of harmonic, this is expensive in CPU for |
428 | embedded devicesso we make it an option */ | 426 | embedded devicesso we make it an option */ |
429 | 427 | ||
430 | model->phi[m] = atan2f(Sw[b].imag,Sw[b].real); | 428 | model->phi[m] = atan2f(Sw[b].imag, Sw[b].real); |
431 | } | 429 | } |
432 | } | 430 | } |
433 | } | 431 | } |
@@ -443,119 +441,110 @@ void estimate_amplitudes(MODEL *model, COMP Sw[], float W[], int est_phase) | |||
443 | 441 | ||
444 | \*---------------------------------------------------------------------------*/ | 442 | \*---------------------------------------------------------------------------*/ |
445 | 443 | ||
446 | float est_voicing_mbe( | 444 | float est_voicing_mbe(C2CONST *c2const, MODEL *model, COMP Sw[], float W[]) { |
447 | C2CONST *c2const, | 445 | int l, al, bl, m; /* loop variables */ |
448 | MODEL *model, | 446 | COMP Am; /* amplitude sample for this band */ |
449 | COMP Sw[], | 447 | int offset; /* centers Hw[] about current harmonic */ |
450 | float W[] | 448 | float den; /* denominator of Am expression */ |
451 | ) | 449 | float error; /* accumulated error between original and synthesised */ |
452 | { | 450 | float Wo; |
453 | int l,al,bl,m; /* loop variables */ | 451 | float sig, snr; |
454 | COMP Am; /* amplitude sample for this band */ | 452 | float elow, ehigh, eratio; |
455 | int offset; /* centers Hw[] about current harmonic */ | 453 | float sixty; |
456 | float den; /* denominator of Am expression */ | 454 | COMP Ew; |
457 | float error; /* accumulated error between original and synthesised */ | 455 | Ew.real = 0; |
458 | float Wo; | 456 | Ew.imag = 0; |
459 | float sig, snr; | 457 | |
460 | float elow, ehigh, eratio; | 458 | int l_1000hz = model->L * 1000.0 / (c2const->Fs / 2); |
461 | float sixty; | 459 | sig = 1E-4; |
462 | COMP Ew; | 460 | for (l = 1; l <= l_1000hz; l++) { |
463 | Ew.real = 0; | 461 | sig += model->A[l] * model->A[l]; |
464 | Ew.imag = 0; | 462 | } |
465 | |||
466 | int l_1000hz = model->L*1000.0/(c2const->Fs/2); | ||
467 | sig = 1E-4; | ||
468 | for(l=1; l<=l_1000hz; l++) { | ||
469 | sig += model->A[l]*model->A[l]; | ||
470 | } | ||
471 | 463 | ||
472 | Wo = model->Wo; | 464 | Wo = model->Wo; |
473 | error = 1E-4; | 465 | error = 1E-4; |
474 | 466 | ||
475 | /* Just test across the harmonics in the first 1000 Hz */ | 467 | /* Just test across the harmonics in the first 1000 Hz */ |
476 | 468 | ||
477 | for(l=1; l<=l_1000hz; l++) { | 469 | for (l = 1; l <= l_1000hz; l++) { |
478 | Am.real = 0.0; | 470 | Am.real = 0.0; |
479 | Am.imag = 0.0; | 471 | Am.imag = 0.0; |
480 | den = 0.0; | 472 | den = 0.0; |
481 | al = ceilf((l - 0.5)*Wo*FFT_ENC/TWO_PI); | 473 | al = ceilf((l - 0.5) * Wo * FFT_ENC / TWO_PI); |
482 | bl = ceilf((l + 0.5)*Wo*FFT_ENC/TWO_PI); | 474 | bl = ceilf((l + 0.5) * Wo * FFT_ENC / TWO_PI); |
483 | 475 | ||
484 | /* Estimate amplitude of harmonic assuming harmonic is totally voiced */ | 476 | /* Estimate amplitude of harmonic assuming harmonic is totally voiced */ |
485 | 477 | ||
486 | offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5; | 478 | offset = FFT_ENC / 2 - l * Wo * FFT_ENC / TWO_PI + 0.5; |
487 | for(m=al; m<bl; m++) { | 479 | for (m = al; m < bl; m++) { |
488 | Am.real += Sw[m].real*W[offset+m]; | 480 | Am.real += Sw[m].real * W[offset + m]; |
489 | Am.imag += Sw[m].imag*W[offset+m]; | 481 | Am.imag += Sw[m].imag * W[offset + m]; |
490 | den += W[offset+m]*W[offset+m]; | 482 | den += W[offset + m] * W[offset + m]; |
491 | } | 483 | } |
492 | 484 | ||
493 | Am.real = Am.real/den; | 485 | Am.real = Am.real / den; |
494 | Am.imag = Am.imag/den; | 486 | Am.imag = Am.imag / den; |
495 | 487 | ||
496 | /* Determine error between estimated harmonic and original */ | 488 | /* Determine error between estimated harmonic and original */ |
497 | 489 | ||
498 | for(m=al; m<bl; m++) { | 490 | for (m = al; m < bl; m++) { |
499 | Ew.real = Sw[m].real - Am.real*W[offset+m]; | 491 | Ew.real = Sw[m].real - Am.real * W[offset + m]; |
500 | Ew.imag = Sw[m].imag - Am.imag*W[offset+m]; | 492 | Ew.imag = Sw[m].imag - Am.imag * W[offset + m]; |
501 | error += Ew.real*Ew.real; | 493 | error += Ew.real * Ew.real; |
502 | error += Ew.imag*Ew.imag; | 494 | error += Ew.imag * Ew.imag; |
503 | } | ||
504 | } | 495 | } |
496 | } | ||
505 | 497 | ||
506 | snr = 10.0*log10f(sig/error); | 498 | snr = 10.0 * log10f(sig / error); |
507 | if (snr > V_THRESH) | 499 | if (snr > V_THRESH) |
508 | model->voiced = 1; | 500 | model->voiced = 1; |
509 | else | 501 | else |
510 | model->voiced = 0; | 502 | model->voiced = 0; |
511 | 503 | ||
512 | /* post processing, helps clean up some voicing errors ------------------*/ | 504 | /* post processing, helps clean up some voicing errors ------------------*/ |
513 | |||
514 | /* | ||
515 | Determine the ratio of low freqency to high frequency energy, | ||
516 | voiced speech tends to be dominated by low frequency energy, | ||
517 | unvoiced by high frequency. This measure can be used to | ||
518 | determine if we have made any gross errors. | ||
519 | */ | ||
520 | |||
521 | int l_2000hz = model->L*2000.0/(c2const->Fs/2); | ||
522 | int l_4000hz = model->L*4000.0/(c2const->Fs/2); | ||
523 | elow = ehigh = 1E-4; | ||
524 | for(l=1; l<=l_2000hz; l++) { | ||
525 | elow += model->A[l]*model->A[l]; | ||
526 | } | ||
527 | for(l=l_2000hz; l<=l_4000hz; l++) { | ||
528 | ehigh += model->A[l]*model->A[l]; | ||
529 | } | ||
530 | eratio = 10.0*log10f(elow/ehigh); | ||
531 | 505 | ||
532 | /* Look for Type 1 errors, strongly V speech that has been | 506 | /* |
533 | accidentally declared UV */ | 507 | Determine the ratio of low frequency to high frequency energy, |
508 | voiced speech tends to be dominated by low frequency energy, | ||
509 | unvoiced by high frequency. This measure can be used to | ||
510 | determine if we have made any gross errors. | ||
511 | */ | ||
512 | |||
513 | int l_2000hz = model->L * 2000.0 / (c2const->Fs / 2); | ||
514 | int l_4000hz = model->L * 4000.0 / (c2const->Fs / 2); | ||
515 | elow = ehigh = 1E-4; | ||
516 | for (l = 1; l <= l_2000hz; l++) { | ||
517 | elow += model->A[l] * model->A[l]; | ||
518 | } | ||
519 | for (l = l_2000hz; l <= l_4000hz; l++) { | ||
520 | ehigh += model->A[l] * model->A[l]; | ||
521 | } | ||
522 | eratio = 10.0 * log10f(elow / ehigh); | ||
534 | 523 | ||
535 | if (model->voiced == 0) | 524 | /* Look for Type 1 errors, strongly V speech that has been |
536 | if (eratio > 10.0) | 525 | accidentally declared UV */ |
537 | model->voiced = 1; | ||
538 | 526 | ||
539 | /* Look for Type 2 errors, strongly UV speech that has been | 527 | if (model->voiced == 0) |
540 | accidentally declared V */ | 528 | if (eratio > 10.0) model->voiced = 1; |
541 | 529 | ||
542 | if (model->voiced == 1) { | 530 | /* Look for Type 2 errors, strongly UV speech that has been |
543 | if (eratio < -10.0) | 531 | accidentally declared V */ |
544 | model->voiced = 0; | ||
545 | 532 | ||
546 | /* A common source of Type 2 errors is the pitch estimator | 533 | if (model->voiced == 1) { |
547 | gives a low (50Hz) estimate for UV speech, which gives a | 534 | if (eratio < -10.0) model->voiced = 0; |
548 | good match with noise due to the close harmoonic spacing. | ||
549 | These errors are much more common than people with 50Hz3 | ||
550 | pitch, so we have just a small eratio threshold. */ | ||
551 | 535 | ||
552 | sixty = 60.0*TWO_PI/c2const->Fs; | 536 | /* A common source of Type 2 errors is the pitch estimator |
553 | if ((eratio < -4.0) && (model->Wo <= sixty)) | 537 | gives a low (50Hz) estimate for UV speech, which gives a |
554 | model->voiced = 0; | 538 | good match with noise due to the close harmoonic spacing. |
555 | } | 539 | These errors are much more common than people with 50Hz3 |
556 | //printf(" v: %d snr: %f eratio: %3.2f %f\n",model->voiced,snr,eratio,dF0); | 540 | pitch, so we have just a small eratio threshold. */ |
557 | 541 | ||
558 | return snr; | 542 | sixty = 60.0 * TWO_PI / c2const->Fs; |
543 | if ((eratio < -4.0) && (model->Wo <= sixty)) model->voiced = 0; | ||
544 | } | ||
545 | // printf(" v: %d snr: %f eratio: %3.2f %f\n",model->voiced,snr,eratio,dF0); | ||
546 | |||
547 | return snr; | ||
559 | } | 548 | } |
560 | 549 | ||
561 | /*---------------------------------------------------------------------------*\ | 550 | /*---------------------------------------------------------------------------*\ |
@@ -564,32 +553,29 @@ float est_voicing_mbe( | |||
564 | AUTHOR......: David Rowe | 553 | AUTHOR......: David Rowe |
565 | DATE CREATED: 11/5/94 | 554 | DATE CREATED: 11/5/94 |
566 | 555 | ||
567 | Init function that generates the trapezoidal (Parzen) sythesis window. | 556 | Init function that generates the trapezoidal (Parzen) synthesis window. |
568 | 557 | ||
569 | \*---------------------------------------------------------------------------*/ | 558 | \*---------------------------------------------------------------------------*/ |
570 | 559 | ||
571 | void make_synthesis_window(C2CONST *c2const, float Pn[]) | 560 | void make_synthesis_window(C2CONST *c2const, float Pn[]) { |
572 | { | 561 | int i; |
573 | int i; | ||
574 | float win; | 562 | float win; |
575 | int n_samp = c2const->n_samp; | 563 | int n_samp = c2const->n_samp; |
576 | int tw = c2const->tw; | 564 | int tw = c2const->tw; |
577 | 565 | ||
578 | /* Generate Parzen window in time domain */ | 566 | /* Generate Parzen window in time domain */ |
579 | 567 | ||
580 | win = 0.0; | 568 | win = 0.0; |
581 | for(i=0; i<n_samp/2-tw; i++) | 569 | for (i = 0; i < n_samp / 2 - tw; i++) Pn[i] = 0.0; |
582 | Pn[i] = 0.0; | ||
583 | win = 0.0; | 570 | win = 0.0; |
584 | for(i=n_samp/2-tw; i<n_samp/2+tw; win+=1.0/(2*tw), i++ ) | 571 | for (i = n_samp / 2 - tw; i < n_samp / 2 + tw; win += 1.0 / (2 * tw), i++) |
585 | Pn[i] = win; | 572 | Pn[i] = win; |
586 | for(i=n_samp/2+tw; i<3*n_samp/2-tw; i++) | 573 | for (i = n_samp / 2 + tw; i < 3 * n_samp / 2 - tw; i++) Pn[i] = 1.0; |
587 | Pn[i] = 1.0; | ||
588 | win = 1.0; | 574 | win = 1.0; |
589 | for(i=3*n_samp/2-tw; i<3*n_samp/2+tw; win-=1.0/(2*tw), i++) | 575 | for (i = 3 * n_samp / 2 - tw; i < 3 * n_samp / 2 + tw; |
576 | win -= 1.0 / (2 * tw), i++) | ||
590 | Pn[i] = win; | 577 | Pn[i] = win; |
591 | for(i=3*n_samp/2+tw; i<2*n_samp; i++) | 578 | for (i = 3 * n_samp / 2 + tw; i < 2 * n_samp; i++) Pn[i] = 0.0; |
592 | Pn[i] = 0.0; | ||
593 | } | 579 | } |
594 | 580 | ||
595 | /*---------------------------------------------------------------------------*\ | 581 | /*---------------------------------------------------------------------------*\ |
@@ -600,77 +586,72 @@ void make_synthesis_window(C2CONST *c2const, float Pn[]) | |||
600 | 586 | ||
601 | Synthesise a speech signal in the frequency domain from the | 587 | Synthesise a speech signal in the frequency domain from the |
602 | sinusodal model parameters. Uses overlap-add with a trapezoidal | 588 | sinusodal model parameters. Uses overlap-add with a trapezoidal |
603 | window to smoothly interpolate betwen frames. | 589 | window to smoothly interpolate between frames. |
604 | 590 | ||
605 | \*---------------------------------------------------------------------------*/ | 591 | \*---------------------------------------------------------------------------*/ |
606 | 592 | ||
607 | void synthesise( | 593 | void synthesise(int n_samp, codec2_fftr_cfg fftr_inv_cfg, |
608 | int n_samp, | 594 | float Sn_[], /* time domain synthesised signal */ |
609 | codec2_fftr_cfg fftr_inv_cfg, | 595 | MODEL *model, /* ptr to model parameters for this frame */ |
610 | float Sn_[], /* time domain synthesised signal */ | 596 | float Pn[], /* time domain Parzen window */ |
611 | MODEL *model, /* ptr to model parameters for this frame */ | 597 | int shift /* flag used to handle transition frames */ |
612 | float Pn[], /* time domain Parzen window */ | 598 | ) { |
613 | int shift /* flag used to handle transition frames */ | 599 | int i, l, j, b; /* loop variables */ |
614 | ) | 600 | COMP Sw_[FFT_DEC / 2 + 1]; /* DFT of synthesised signal */ |
615 | { | 601 | float sw_[FFT_DEC]; /* synthesised signal */ |
616 | int i,l,j,b; /* loop variables */ | 602 | |
617 | COMP Sw_[FFT_DEC/2+1]; /* DFT of synthesised signal */ | 603 | if (shift) { |
618 | float sw_[FFT_DEC]; /* synthesised signal */ | 604 | /* Update memories */ |
619 | 605 | for (i = 0; i < n_samp - 1; i++) { | |
620 | if (shift) { | 606 | Sn_[i] = Sn_[i + n_samp]; |
621 | /* Update memories */ | ||
622 | for(i=0; i<n_samp-1; i++) { | ||
623 | Sn_[i] = Sn_[i+n_samp]; | ||
624 | } | ||
625 | Sn_[n_samp-1] = 0.0; | ||
626 | } | 607 | } |
608 | Sn_[n_samp - 1] = 0.0; | ||
609 | } | ||
627 | 610 | ||
628 | for(i=0; i<FFT_DEC/2+1; i++) { | 611 | for (i = 0; i < FFT_DEC / 2 + 1; i++) { |
629 | Sw_[i].real = 0.0; | 612 | Sw_[i].real = 0.0; |
630 | Sw_[i].imag = 0.0; | 613 | Sw_[i].imag = 0.0; |
631 | } | 614 | } |
632 | 615 | ||
633 | /* Now set up frequency domain synthesised speech */ | 616 | /* Now set up frequency domain synthesised speech */ |
634 | 617 | ||
635 | for(l=1; l<=model->L; l++) { | 618 | for (l = 1; l <= model->L; l++) { |
636 | b = (int)(l*model->Wo*FFT_DEC/TWO_PI + 0.5); | 619 | b = (int)(l * model->Wo * FFT_DEC / TWO_PI + 0.5); |
637 | if (b > ((FFT_DEC/2)-1)) { | 620 | if (b > ((FFT_DEC / 2) - 1)) { |
638 | b = (FFT_DEC/2)-1; | 621 | b = (FFT_DEC / 2) - 1; |
639 | } | ||
640 | Sw_[b].real = model->A[l]*cosf(model->phi[l]); | ||
641 | Sw_[b].imag = model->A[l]*sinf(model->phi[l]); | ||
642 | } | 622 | } |
623 | Sw_[b].real = model->A[l] * cosf(model->phi[l]); | ||
624 | Sw_[b].imag = model->A[l] * sinf(model->phi[l]); | ||
625 | } | ||
643 | 626 | ||
644 | /* Perform inverse DFT */ | 627 | /* Perform inverse DFT */ |
645 | 628 | ||
646 | codec2_fftri(fftr_inv_cfg, Sw_,sw_); | 629 | codec2_fftri(fftr_inv_cfg, Sw_, sw_); |
647 | 630 | ||
648 | /* Overlap add to previous samples */ | 631 | /* Overlap add to previous samples */ |
649 | 632 | ||
650 | #ifdef USE_KISS_FFT | 633 | #ifdef USE_KISS_FFT |
651 | #define FFTI_FACTOR ((float)1.0) | 634 | #define FFTI_FACTOR ((float)1.0) |
652 | #else | 635 | #else |
653 | #define FFTI_FACTOR ((float32_t)FFT_DEC) | 636 | #define FFTI_FACTOR ((float32_t)FFT_DEC) |
654 | #endif | 637 | #endif |
655 | 638 | ||
656 | for(i=0; i<n_samp-1; i++) { | 639 | for (i = 0; i < n_samp - 1; i++) { |
657 | Sn_[i] += sw_[FFT_DEC-n_samp+1+i]*Pn[i] * FFTI_FACTOR; | 640 | Sn_[i] += sw_[FFT_DEC - n_samp + 1 + i] * Pn[i] * FFTI_FACTOR; |
658 | } | 641 | } |
659 | 642 | ||
660 | if (shift) | 643 | if (shift) |
661 | for(i=n_samp-1,j=0; i<2*n_samp; i++,j++) | 644 | for (i = n_samp - 1, j = 0; i < 2 * n_samp; i++, j++) |
662 | Sn_[i] = sw_[j]*Pn[i] * FFTI_FACTOR; | 645 | Sn_[i] = sw_[j] * Pn[i] * FFTI_FACTOR; |
663 | else | 646 | else |
664 | for(i=n_samp-1,j=0; i<2*n_samp; i++,j++) | 647 | for (i = n_samp - 1, j = 0; i < 2 * n_samp; i++, j++) |
665 | Sn_[i] += sw_[j]*Pn[i] * FFTI_FACTOR; | 648 | Sn_[i] += sw_[j] * Pn[i] * FFTI_FACTOR; |
666 | } | 649 | } |
667 | 650 | ||
668 | |||
669 | /* todo: this should probably be in some states rather than a static */ | 651 | /* todo: this should probably be in some states rather than a static */ |
670 | static unsigned long next = 1; | 652 | static unsigned long next = 1; |
671 | 653 | ||
672 | int codec2_rand(void) { | 654 | int codec2_rand(void) { |
673 | next = next * 1103515245 + 12345; | 655 | next = next * 1103515245 + 12345; |
674 | return((unsigned)(next/65536) % 32768); | 656 | return ((unsigned)(next / 65536) % 32768); |
675 | } | 657 | } |
676 | |||