diff options
Diffstat (limited to 'sine.c')
| -rw-r--r-- | sine.c | 689 |
1 files changed, 689 insertions, 0 deletions
| @@ -0,0 +1,689 @@ | |||
| 1 | /*---------------------------------------------------------------------------*\ | ||
| 2 | |||
| 3 | FILE........: sine.c | ||
| 4 | AUTHOR......: David Rowe | ||
| 5 | DATE CREATED: 19/8/2010 | ||
| 6 | |||
| 7 | Sinusoidal analysis and synthesis functions. | ||
| 8 | |||
| 9 | \*---------------------------------------------------------------------------*/ | ||
| 10 | |||
| 11 | /* | ||
| 12 | Copyright (C) 1990-2010 David Rowe | ||
| 13 | |||
| 14 | All rights reserved. | ||
| 15 | |||
| 16 | This program is free software; you can redistribute it and/or modify | ||
| 17 | it under the terms of the GNU Lesser General Public License version 2.1, as | ||
| 18 | published by the Free Software Foundation. This program is | ||
| 19 | distributed in the hope that it will be useful, but WITHOUT ANY | ||
| 20 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 21 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | ||
| 22 | License for more details. | ||
| 23 | |||
| 24 | You should have received a copy of the GNU Lesser General Public License | ||
| 25 | along with this program; if not, see <http://www.gnu.org/licenses/>. | ||
| 26 | */ | ||
| 27 | |||
| 28 | /*---------------------------------------------------------------------------*\ | ||
| 29 | |||
| 30 | INCLUDES | ||
| 31 | |||
| 32 | \*---------------------------------------------------------------------------*/ | ||
| 33 | |||
| 34 | #include <stdlib.h> | ||
| 35 | #include <stdio.h> | ||
| 36 | #include <math.h> | ||
| 37 | |||
| 38 | #include "defines.h" | ||
| 39 | #include "sine.h" | ||
| 40 | #include "kiss_fft.h" | ||
| 41 | |||
| 42 | #define HPF_BETA 0.125 | ||
| 43 | |||
| 44 | /*---------------------------------------------------------------------------*\ | ||
| 45 | |||
| 46 | HEADERS | ||
| 47 | |||
| 48 | \*---------------------------------------------------------------------------*/ | ||
| 49 | |||
| 50 | void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, | ||
| 51 | float pstep); | ||
| 52 | |||
| 53 | /*---------------------------------------------------------------------------*\ | ||
| 54 | |||
| 55 | FUNCTIONS | ||
| 56 | |||
| 57 | \*---------------------------------------------------------------------------*/ | ||
| 58 | |||
| 59 | C2CONST c2const_create(int Fs, float framelength_s) { | ||
| 60 | C2CONST c2const; | ||
| 61 | |||
| 62 | assert((Fs == 8000) || (Fs = 16000)); | ||
| 63 | c2const.Fs = Fs; | ||
| 64 | c2const.n_samp = round(Fs*framelength_s); | ||
| 65 | c2const.max_amp = floor(Fs*P_MIN_S/2); | ||
| 66 | c2const.p_min = floor(Fs*P_MIN_S); | ||
| 67 | c2const.p_max = floor(Fs*P_MAX_S); | ||
| 68 | c2const.m_pitch = floor(Fs*M_PITCH_S); | ||
| 69 | c2const.Wo_min = TWO_PI/c2const.p_max; | ||
| 70 | c2const.Wo_max = TWO_PI/c2const.p_min; | ||
| 71 | |||
| 72 | if (Fs == 8000) { | ||
| 73 | c2const.nw = 279; | ||
| 74 | } else { | ||
| 75 | c2const.nw = 511; /* actually a bit shorter in time but lets us maintain constant FFT size */ | ||
| 76 | } | ||
| 77 | |||
| 78 | c2const.tw = Fs*TW_S; | ||
| 79 | |||
| 80 | /* | ||
| 81 | fprintf(stderr, "max_amp: %d m_pitch: %d\n", c2const.n_samp, c2const.m_pitch); | ||
| 82 | fprintf(stderr, "p_min: %d p_max: %d\n", c2const.p_min, c2const.p_max); | ||
| 83 | fprintf(stderr, "Wo_min: %f Wo_max: %f\n", c2const.Wo_min, c2const.Wo_max); | ||
| 84 | fprintf(stderr, "nw: %d tw: %d\n", c2const.nw, c2const.tw); | ||
| 85 | */ | ||
| 86 | |||
| 87 | return c2const; | ||
| 88 | } | ||
| 89 | |||
| 90 | /*---------------------------------------------------------------------------*\ | ||
| 91 | |||
| 92 | FUNCTION....: make_analysis_window | ||
| 93 | AUTHOR......: David Rowe | ||
| 94 | DATE CREATED: 11/5/94 | ||
| 95 | |||
| 96 | Init function that generates the time domain analysis window and it's DFT. | ||
| 97 | |||
| 98 | \*---------------------------------------------------------------------------*/ | ||
| 99 | |||
| 100 | void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[], COMP W[]) | ||
| 101 | { | ||
| 102 | float m; | ||
| 103 | COMP wshift[FFT_ENC]; | ||
| 104 | COMP temp; | ||
| 105 | int i,j; | ||
| 106 | int m_pitch = c2const->m_pitch; | ||
| 107 | int nw = c2const->nw; | ||
| 108 | |||
| 109 | /* | ||
| 110 | Generate Hamming window centered on M-sample pitch analysis window | ||
| 111 | |||
| 112 | 0 M/2 M-1 | ||
| 113 | |-------------|-------------| | ||
| 114 | |-------|-------| | ||
| 115 | nw samples | ||
| 116 | |||
| 117 | All our analysis/synthsis is centred on the M/2 sample. | ||
| 118 | */ | ||
| 119 | |||
| 120 | m = 0.0; | ||
| 121 | for(i=0; i<m_pitch/2-nw/2; i++) | ||
| 122 | w[i] = 0.0; | ||
| 123 | for(i=m_pitch/2-nw/2,j=0; i<m_pitch/2+nw/2; i++,j++) { | ||
| 124 | w[i] = 0.5 - 0.5*cosf(TWO_PI*j/(nw-1)); | ||
| 125 | m += w[i]*w[i]; | ||
| 126 | } | ||
| 127 | for(i=m_pitch/2+nw/2; i<m_pitch; i++) | ||
| 128 | w[i] = 0.0; | ||
| 129 | |||
| 130 | /* Normalise - makes freq domain amplitude estimation straight | ||
| 131 | forward */ | ||
| 132 | |||
| 133 | m = 1.0/sqrtf(m*FFT_ENC); | ||
| 134 | for(i=0; i<m_pitch; i++) { | ||
| 135 | w[i] *= m; | ||
| 136 | } | ||
| 137 | |||
| 138 | /* | ||
| 139 | Generate DFT of analysis window, used for later processing. Note | ||
| 140 | we modulo FFT_ENC shift the time domain window w[], this makes the | ||
| 141 | imaginary part of the DFT W[] equal to zero as the shifted w[] is | ||
| 142 | even about the n=0 time axis if nw is odd. Having the imag part | ||
| 143 | of the DFT W[] makes computation easier. | ||
| 144 | |||
| 145 | 0 FFT_ENC-1 | ||
| 146 | |-------------------------| | ||
| 147 | |||
| 148 | ----\ /---- | ||
| 149 | \ / | ||
| 150 | \ / <- shifted version of window w[n] | ||
| 151 | \ / | ||
| 152 | \ / | ||
| 153 | ------- | ||
| 154 | |||
| 155 | |---------| |---------| | ||
| 156 | nw/2 nw/2 | ||
| 157 | */ | ||
| 158 | |||
| 159 | for(i=0; i<FFT_ENC; i++) { | ||
| 160 | wshift[i].real = 0.0; | ||
| 161 | wshift[i].imag = 0.0; | ||
| 162 | } | ||
| 163 | for(i=0; i<nw/2; i++) | ||
| 164 | wshift[i].real = w[i+m_pitch/2]; | ||
| 165 | for(i=FFT_ENC-nw/2,j=m_pitch/2-nw/2; i<FFT_ENC; i++,j++) | ||
| 166 | wshift[i].real = w[j]; | ||
| 167 | |||
| 168 | codec2_fft(fft_fwd_cfg, wshift, W); | ||
| 169 | |||
| 170 | /* | ||
| 171 | Re-arrange W[] to be symmetrical about FFT_ENC/2. Makes later | ||
| 172 | analysis convenient. | ||
| 173 | |||
| 174 | Before: | ||
| 175 | |||
| 176 | |||
| 177 | 0 FFT_ENC-1 | ||
| 178 | |----------|---------| | ||
| 179 | __ _ | ||
| 180 | \ / | ||
| 181 | \_______________/ | ||
| 182 | |||
| 183 | After: | ||
| 184 | |||
| 185 | 0 FFT_ENC-1 | ||
| 186 | |----------|---------| | ||
| 187 | ___ | ||
| 188 | / \ | ||
| 189 | ________/ \_______ | ||
| 190 | |||
| 191 | */ | ||
| 192 | |||
| 193 | |||
| 194 | for(i=0; i<FFT_ENC/2; i++) { | ||
| 195 | temp.real = W[i].real; | ||
| 196 | temp.imag = W[i].imag; | ||
| 197 | W[i].real = W[i+FFT_ENC/2].real; | ||
| 198 | W[i].imag = W[i+FFT_ENC/2].imag; | ||
| 199 | W[i+FFT_ENC/2].real = temp.real; | ||
| 200 | W[i+FFT_ENC/2].imag = temp.imag; | ||
| 201 | } | ||
| 202 | |||
| 203 | } | ||
| 204 | |||
| 205 | /*---------------------------------------------------------------------------*\ | ||
| 206 | |||
| 207 | FUNCTION....: hpf | ||
| 208 | AUTHOR......: David Rowe | ||
| 209 | DATE CREATED: 16 Nov 2010 | ||
| 210 | |||
| 211 | High pass filter with a -3dB point of about 160Hz. | ||
| 212 | |||
| 213 | y(n) = -HPF_BETA*y(n-1) + x(n) - x(n-1) | ||
| 214 | |||
| 215 | \*---------------------------------------------------------------------------*/ | ||
| 216 | |||
| 217 | float hpf(float x, float states[]) | ||
| 218 | { | ||
| 219 | states[0] = -HPF_BETA*states[0] + x - states[1]; | ||
| 220 | states[1] = x; | ||
| 221 | |||
| 222 | return states[0]; | ||
| 223 | } | ||
| 224 | |||
| 225 | /*---------------------------------------------------------------------------*\ | ||
| 226 | |||
| 227 | FUNCTION....: dft_speech | ||
| 228 | AUTHOR......: David Rowe | ||
| 229 | DATE CREATED: 27/5/94 | ||
| 230 | |||
| 231 | Finds the DFT of the current speech input speech frame. | ||
| 232 | |||
| 233 | \*---------------------------------------------------------------------------*/ | ||
| 234 | |||
| 235 | // TODO: we can either go for a faster FFT using fftr and some stack usage | ||
| 236 | // or we can reduce stack usage to almost zero on STM32 by switching to fft_inplace | ||
| 237 | #if 1 | ||
| 238 | void dft_speech(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[]) | ||
| 239 | { | ||
| 240 | int i; | ||
| 241 | int m_pitch = c2const->m_pitch; | ||
| 242 | int nw = c2const->nw; | ||
| 243 | |||
| 244 | for(i=0; i<FFT_ENC; i++) { | ||
| 245 | Sw[i].real = 0.0; | ||
| 246 | Sw[i].imag = 0.0; | ||
| 247 | } | ||
| 248 | |||
| 249 | /* Centre analysis window on time axis, we need to arrange input | ||
| 250 | to FFT this way to make FFT phases correct */ | ||
| 251 | |||
| 252 | /* move 2nd half to start of FFT input vector */ | ||
| 253 | |||
| 254 | for(i=0; i<nw/2; i++) | ||
| 255 | Sw[i].real = Sn[i+m_pitch/2]*w[i+m_pitch/2]; | ||
| 256 | |||
| 257 | /* move 1st half to end of FFT input vector */ | ||
| 258 | |||
| 259 | for(i=0; i<nw/2; i++) | ||
| 260 | Sw[FFT_ENC-nw/2+i].real = Sn[i+m_pitch/2-nw/2]*w[i+m_pitch/2-nw/2]; | ||
| 261 | |||
| 262 | codec2_fft_inplace(fft_fwd_cfg, Sw); | ||
| 263 | } | ||
| 264 | #else | ||
| 265 | void dft_speech(codec2_fftr_cfg fftr_fwd_cfg, COMP Sw[], float Sn[], float w[]) | ||
| 266 | { | ||
| 267 | int i; | ||
| 268 | float sw[FFT_ENC]; | ||
| 269 | |||
| 270 | for(i=0; i<FFT_ENC; i++) { | ||
| 271 | sw[i] = 0.0; | ||
| 272 | } | ||
| 273 | |||
| 274 | /* Centre analysis window on time axis, we need to arrange input | ||
| 275 | to FFT this way to make FFT phases correct */ | ||
| 276 | |||
| 277 | /* move 2nd half to start of FFT input vector */ | ||
| 278 | |||
| 279 | for(i=0; i<nw/2; i++) | ||
| 280 | sw[i] = Sn[i+m_pitch/2]*w[i+m_pitch/2]; | ||
| 281 | |||
| 282 | /* move 1st half to end of FFT input vector */ | ||
| 283 | |||
| 284 | for(i=0; i<nw/2; i++) | ||
| 285 | sw[FFT_ENC-nw/2+i] = Sn[i+m_pitch/2-nw/2]*w[i+m_pitch/2-nw/2]; | ||
| 286 | |||
| 287 | codec2_fftr(fftr_fwd_cfg, sw, Sw); | ||
| 288 | } | ||
| 289 | #endif | ||
| 290 | |||
| 291 | |||
| 292 | /*---------------------------------------------------------------------------*\ | ||
| 293 | |||
| 294 | FUNCTION....: two_stage_pitch_refinement | ||
| 295 | AUTHOR......: David Rowe | ||
| 296 | DATE CREATED: 27/5/94 | ||
| 297 | |||
| 298 | Refines the current pitch estimate using the harmonic sum pitch | ||
| 299 | estimation technique. | ||
| 300 | |||
| 301 | \*---------------------------------------------------------------------------*/ | ||
| 302 | |||
| 303 | void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, COMP Sw[]) | ||
| 304 | { | ||
| 305 | float pmin,pmax,pstep; /* pitch refinment minimum, maximum and step */ | ||
| 306 | |||
| 307 | /* Coarse refinement */ | ||
| 308 | |||
| 309 | pmax = TWO_PI/model->Wo + 5; | ||
| 310 | pmin = TWO_PI/model->Wo - 5; | ||
| 311 | pstep = 1.0; | ||
| 312 | hs_pitch_refinement(model,Sw,pmin,pmax,pstep); | ||
| 313 | |||
| 314 | /* Fine refinement */ | ||
| 315 | |||
| 316 | pmax = TWO_PI/model->Wo + 1; | ||
| 317 | pmin = TWO_PI/model->Wo - 1; | ||
| 318 | pstep = 0.25; | ||
| 319 | hs_pitch_refinement(model,Sw,pmin,pmax,pstep); | ||
| 320 | |||
| 321 | /* Limit range */ | ||
| 322 | |||
| 323 | if (model->Wo < TWO_PI/c2const->p_max) | ||
| 324 | model->Wo = TWO_PI/c2const->p_max; | ||
| 325 | if (model->Wo > TWO_PI/c2const->p_min) | ||
| 326 | model->Wo = TWO_PI/c2const->p_min; | ||
| 327 | |||
| 328 | model->L = floorf(PI/model->Wo); | ||
| 329 | |||
| 330 | /* trap occasional round off issues with floorf() */ | ||
| 331 | if (model->Wo*model->L >= 0.95*PI) { | ||
| 332 | model->L--; | ||
| 333 | } | ||
| 334 | assert(model->Wo*model->L < PI); | ||
| 335 | } | ||
| 336 | |||
| 337 | /*---------------------------------------------------------------------------*\ | ||
| 338 | |||
| 339 | FUNCTION....: hs_pitch_refinement | ||
| 340 | AUTHOR......: David Rowe | ||
| 341 | DATE CREATED: 27/5/94 | ||
| 342 | |||
| 343 | Harmonic sum pitch refinement function. | ||
| 344 | |||
| 345 | pmin pitch search range minimum | ||
| 346 | pmax pitch search range maximum | ||
| 347 | step pitch search step size | ||
| 348 | model current pitch estimate in model.Wo | ||
| 349 | |||
| 350 | model refined pitch estimate in model.Wo | ||
| 351 | |||
| 352 | \*---------------------------------------------------------------------------*/ | ||
| 353 | |||
| 354 | void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float pstep) | ||
| 355 | { | ||
| 356 | int m; /* loop variable */ | ||
| 357 | int b; /* bin for current harmonic centre */ | ||
| 358 | float E; /* energy for current pitch*/ | ||
| 359 | float Wo; /* current "test" fundamental freq. */ | ||
| 360 | float Wom; /* Wo that maximises E */ | ||
| 361 | float Em; /* mamimum energy */ | ||
| 362 | float r, one_on_r; /* number of rads/bin */ | ||
| 363 | float p; /* current pitch */ | ||
| 364 | |||
| 365 | /* Initialisation */ | ||
| 366 | |||
| 367 | model->L = PI/model->Wo; /* use initial pitch est. for L */ | ||
| 368 | Wom = model->Wo; | ||
| 369 | Em = 0.0; | ||
| 370 | r = TWO_PI/FFT_ENC; | ||
| 371 | one_on_r = 1.0/r; | ||
| 372 | |||
| 373 | /* Determine harmonic sum for a range of Wo values */ | ||
| 374 | |||
| 375 | for(p=pmin; p<=pmax; p+=pstep) { | ||
| 376 | E = 0.0; | ||
| 377 | Wo = TWO_PI/p; | ||
| 378 | |||
| 379 | /* Sum harmonic magnitudes */ | ||
| 380 | for(m=1; m<=model->L; m++) { | ||
| 381 | b = (int)(m*Wo*one_on_r + 0.5); | ||
| 382 | E += Sw[b].real*Sw[b].real + Sw[b].imag*Sw[b].imag; | ||
| 383 | } | ||
| 384 | /* Compare to see if this is a maximum */ | ||
| 385 | |||
| 386 | if (E > Em) { | ||
| 387 | Em = E; | ||
| 388 | Wom = Wo; | ||
| 389 | } | ||
| 390 | } | ||
| 391 | |||
| 392 | model->Wo = Wom; | ||
| 393 | } | ||
| 394 | |||
| 395 | /*---------------------------------------------------------------------------*\ | ||
| 396 | |||
| 397 | FUNCTION....: estimate_amplitudes | ||
| 398 | AUTHOR......: David Rowe | ||
| 399 | DATE CREATED: 27/5/94 | ||
| 400 | |||
| 401 | Estimates the complex amplitudes of the harmonics. | ||
| 402 | |||
| 403 | \*---------------------------------------------------------------------------*/ | ||
| 404 | |||
| 405 | void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[], int est_phase) | ||
| 406 | { | ||
| 407 | int i,m; /* loop variables */ | ||
| 408 | int am,bm; /* bounds of current harmonic */ | ||
| 409 | int b; /* DFT bin of centre of current harmonic */ | ||
| 410 | float den; /* denominator of amplitude expression */ | ||
| 411 | float r, one_on_r; /* number of rads/bin */ | ||
| 412 | int offset; | ||
| 413 | COMP Am; | ||
| 414 | |||
| 415 | r = TWO_PI/FFT_ENC; | ||
| 416 | one_on_r = 1.0/r; | ||
| 417 | |||
| 418 | for(m=1; m<=model->L; m++) { | ||
| 419 | den = 0.0; | ||
| 420 | am = (int)((m - 0.5)*model->Wo*one_on_r + 0.5); | ||
| 421 | bm = (int)((m + 0.5)*model->Wo*one_on_r + 0.5); | ||
| 422 | b = (int)(m*model->Wo/r + 0.5); | ||
| 423 | |||
| 424 | /* Estimate ampltude of harmonic */ | ||
| 425 | |||
| 426 | den = 0.0; | ||
| 427 | Am.real = Am.imag = 0.0; | ||
| 428 | offset = FFT_ENC/2 - (int)(m*model->Wo*one_on_r + 0.5); | ||
| 429 | for(i=am; i<bm; i++) { | ||
| 430 | den += Sw[i].real*Sw[i].real + Sw[i].imag*Sw[i].imag; | ||
| 431 | Am.real += Sw[i].real*W[i + offset].real; | ||
| 432 | Am.imag += Sw[i].imag*W[i + offset].real; | ||
| 433 | } | ||
| 434 | |||
| 435 | model->A[m] = sqrtf(den); | ||
| 436 | |||
| 437 | if (est_phase) { | ||
| 438 | |||
| 439 | /* Estimate phase of harmonic, this is expensive in CPU for | ||
| 440 | embedded devicesso we make it an option */ | ||
| 441 | |||
| 442 | model->phi[m] = atan2f(Sw[b].imag,Sw[b].real); | ||
| 443 | } | ||
| 444 | } | ||
| 445 | } | ||
| 446 | |||
| 447 | /*---------------------------------------------------------------------------*\ | ||
| 448 | |||
| 449 | est_voicing_mbe() | ||
| 450 | |||
| 451 | Returns the error of the MBE cost function for a fiven F0. | ||
| 452 | |||
| 453 | Note: I think a lot of the operations below can be simplified as | ||
| 454 | W[].imag = 0 and has been normalised such that den always equals 1. | ||
| 455 | |||
| 456 | \*---------------------------------------------------------------------------*/ | ||
| 457 | |||
| 458 | float est_voicing_mbe( | ||
| 459 | C2CONST *c2const, | ||
| 460 | MODEL *model, | ||
| 461 | COMP Sw[], | ||
| 462 | COMP W[] | ||
| 463 | ) | ||
| 464 | { | ||
| 465 | int l,al,bl,m; /* loop variables */ | ||
| 466 | COMP Am; /* amplitude sample for this band */ | ||
| 467 | int offset; /* centers Hw[] about current harmonic */ | ||
| 468 | float den; /* denominator of Am expression */ | ||
| 469 | float error; /* accumulated error between original and synthesised */ | ||
| 470 | float Wo; | ||
| 471 | float sig, snr; | ||
| 472 | float elow, ehigh, eratio; | ||
| 473 | float sixty; | ||
| 474 | COMP Ew; | ||
| 475 | Ew.real = 0; | ||
| 476 | Ew.imag = 0; | ||
| 477 | |||
| 478 | int l_1000hz = model->L*1000.0/(c2const->Fs/2); | ||
| 479 | sig = 1E-4; | ||
| 480 | for(l=1; l<=l_1000hz; l++) { | ||
| 481 | sig += model->A[l]*model->A[l]; | ||
| 482 | } | ||
| 483 | |||
| 484 | Wo = model->Wo; | ||
| 485 | error = 1E-4; | ||
| 486 | |||
| 487 | /* Just test across the harmonics in the first 1000 Hz */ | ||
| 488 | |||
| 489 | for(l=1; l<=l_1000hz; l++) { | ||
| 490 | Am.real = 0.0; | ||
| 491 | Am.imag = 0.0; | ||
| 492 | den = 0.0; | ||
| 493 | al = ceilf((l - 0.5)*Wo*FFT_ENC/TWO_PI); | ||
| 494 | bl = ceilf((l + 0.5)*Wo*FFT_ENC/TWO_PI); | ||
| 495 | |||
| 496 | /* Estimate amplitude of harmonic assuming harmonic is totally voiced */ | ||
| 497 | |||
| 498 | offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5; | ||
| 499 | for(m=al; m<bl; m++) { | ||
| 500 | Am.real += Sw[m].real*W[offset+m].real; | ||
| 501 | Am.imag += Sw[m].imag*W[offset+m].real; | ||
| 502 | den += W[offset+m].real*W[offset+m].real; | ||
| 503 | } | ||
| 504 | |||
| 505 | Am.real = Am.real/den; | ||
| 506 | Am.imag = Am.imag/den; | ||
| 507 | |||
| 508 | /* Determine error between estimated harmonic and original */ | ||
| 509 | |||
| 510 | // Redundant! offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5; | ||
| 511 | for(m=al; m<bl; m++) { | ||
| 512 | Ew.real = Sw[m].real - Am.real*W[offset+m].real; | ||
| 513 | Ew.imag = Sw[m].imag - Am.imag*W[offset+m].real; | ||
| 514 | error += Ew.real*Ew.real; | ||
| 515 | error += Ew.imag*Ew.imag; | ||
| 516 | } | ||
| 517 | } | ||
| 518 | |||
| 519 | snr = 10.0*log10f(sig/error); | ||
| 520 | if (snr > V_THRESH) | ||
| 521 | model->voiced = 1; | ||
| 522 | else | ||
| 523 | model->voiced = 0; | ||
| 524 | |||
| 525 | /* post processing, helps clean up some voicing errors ------------------*/ | ||
| 526 | |||
| 527 | /* | ||
| 528 | Determine the ratio of low freqency to high frequency energy, | ||
| 529 | voiced speech tends to be dominated by low frequency energy, | ||
| 530 | unvoiced by high frequency. This measure can be used to | ||
| 531 | determine if we have made any gross errors. | ||
| 532 | */ | ||
| 533 | |||
| 534 | int l_2000hz = model->L*2000.0/(c2const->Fs/2); | ||
| 535 | int l_4000hz = model->L*4000.0/(c2const->Fs/2); | ||
| 536 | elow = ehigh = 1E-4; | ||
| 537 | for(l=1; l<=l_2000hz; l++) { | ||
| 538 | elow += model->A[l]*model->A[l]; | ||
| 539 | } | ||
| 540 | for(l=l_2000hz; l<=l_4000hz; l++) { | ||
| 541 | ehigh += model->A[l]*model->A[l]; | ||
| 542 | } | ||
| 543 | eratio = 10.0*log10f(elow/ehigh); | ||
| 544 | |||
| 545 | /* Look for Type 1 errors, strongly V speech that has been | ||
| 546 | accidentally declared UV */ | ||
| 547 | |||
| 548 | if (model->voiced == 0) | ||
| 549 | if (eratio > 10.0) | ||
| 550 | model->voiced = 1; | ||
| 551 | |||
| 552 | /* Look for Type 2 errors, strongly UV speech that has been | ||
| 553 | accidentally declared V */ | ||
| 554 | |||
| 555 | if (model->voiced == 1) { | ||
| 556 | if (eratio < -10.0) | ||
| 557 | model->voiced = 0; | ||
| 558 | |||
| 559 | /* A common source of Type 2 errors is the pitch estimator | ||
| 560 | gives a low (50Hz) estimate for UV speech, which gives a | ||
| 561 | good match with noise due to the close harmoonic spacing. | ||
| 562 | These errors are much more common than people with 50Hz3 | ||
| 563 | pitch, so we have just a small eratio threshold. */ | ||
| 564 | |||
| 565 | sixty = 60.0*TWO_PI/c2const->Fs; | ||
| 566 | if ((eratio < -4.0) && (model->Wo <= sixty)) | ||
| 567 | model->voiced = 0; | ||
| 568 | } | ||
| 569 | //printf(" v: %d snr: %f eratio: %3.2f %f\n",model->voiced,snr,eratio,dF0); | ||
| 570 | |||
| 571 | return snr; | ||
| 572 | } | ||
| 573 | |||
| 574 | /*---------------------------------------------------------------------------*\ | ||
| 575 | |||
| 576 | FUNCTION....: make_synthesis_window | ||
| 577 | AUTHOR......: David Rowe | ||
| 578 | DATE CREATED: 11/5/94 | ||
| 579 | |||
| 580 | Init function that generates the trapezoidal (Parzen) sythesis window. | ||
| 581 | |||
| 582 | \*---------------------------------------------------------------------------*/ | ||
| 583 | |||
| 584 | void make_synthesis_window(C2CONST *c2const, float Pn[]) | ||
| 585 | { | ||
| 586 | int i; | ||
| 587 | float win; | ||
| 588 | int n_samp = c2const->n_samp; | ||
| 589 | int tw = c2const->tw; | ||
| 590 | |||
| 591 | /* Generate Parzen window in time domain */ | ||
| 592 | |||
| 593 | win = 0.0; | ||
| 594 | for(i=0; i<n_samp/2-tw; i++) | ||
| 595 | Pn[i] = 0.0; | ||
| 596 | win = 0.0; | ||
| 597 | for(i=n_samp/2-tw; i<n_samp/2+tw; win+=1.0/(2*tw), i++ ) | ||
| 598 | Pn[i] = win; | ||
| 599 | for(i=n_samp/2+tw; i<3*n_samp/2-tw; i++) | ||
| 600 | Pn[i] = 1.0; | ||
| 601 | win = 1.0; | ||
| 602 | for(i=3*n_samp/2-tw; i<3*n_samp/2+tw; win-=1.0/(2*tw), i++) | ||
| 603 | Pn[i] = win; | ||
| 604 | for(i=3*n_samp/2+tw; i<2*n_samp; i++) | ||
| 605 | Pn[i] = 0.0; | ||
| 606 | } | ||
| 607 | |||
| 608 | /*---------------------------------------------------------------------------*\ | ||
| 609 | |||
| 610 | FUNCTION....: synthesise | ||
| 611 | AUTHOR......: David Rowe | ||
| 612 | DATE CREATED: 20/2/95 | ||
| 613 | |||
| 614 | Synthesise a speech signal in the frequency domain from the | ||
| 615 | sinusodal model parameters. Uses overlap-add with a trapezoidal | ||
| 616 | window to smoothly interpolate betwen frames. | ||
| 617 | |||
| 618 | \*---------------------------------------------------------------------------*/ | ||
| 619 | |||
| 620 | void synthesise( | ||
| 621 | int n_samp, | ||
| 622 | codec2_fftr_cfg fftr_inv_cfg, | ||
| 623 | float Sn_[], /* time domain synthesised signal */ | ||
| 624 | MODEL *model, /* ptr to model parameters for this frame */ | ||
| 625 | float Pn[], /* time domain Parzen window */ | ||
| 626 | int shift /* flag used to handle transition frames */ | ||
| 627 | ) | ||
| 628 | { | ||
| 629 | int i,l,j,b; /* loop variables */ | ||
| 630 | COMP Sw_[FFT_DEC/2+1]; /* DFT of synthesised signal */ | ||
| 631 | float sw_[FFT_DEC]; /* synthesised signal */ | ||
| 632 | |||
| 633 | if (shift) { | ||
| 634 | /* Update memories */ | ||
| 635 | for(i=0; i<n_samp-1; i++) { | ||
| 636 | Sn_[i] = Sn_[i+n_samp]; | ||
| 637 | } | ||
| 638 | Sn_[n_samp-1] = 0.0; | ||
| 639 | } | ||
| 640 | |||
| 641 | for(i=0; i<FFT_DEC/2+1; i++) { | ||
| 642 | Sw_[i].real = 0.0; | ||
| 643 | Sw_[i].imag = 0.0; | ||
| 644 | } | ||
| 645 | |||
| 646 | /* Now set up frequency domain synthesised speech */ | ||
| 647 | |||
| 648 | for(l=1; l<=model->L; l++) { | ||
| 649 | b = (int)(l*model->Wo*FFT_DEC/TWO_PI + 0.5); | ||
| 650 | if (b > ((FFT_DEC/2)-1)) { | ||
| 651 | b = (FFT_DEC/2)-1; | ||
| 652 | } | ||
| 653 | Sw_[b].real = model->A[l]*cosf(model->phi[l]); | ||
| 654 | Sw_[b].imag = model->A[l]*sinf(model->phi[l]); | ||
| 655 | } | ||
| 656 | |||
| 657 | /* Perform inverse DFT */ | ||
| 658 | |||
| 659 | codec2_fftri(fftr_inv_cfg, Sw_,sw_); | ||
| 660 | |||
| 661 | /* Overlap add to previous samples */ | ||
| 662 | |||
| 663 | #ifdef USE_KISS_FFT | ||
| 664 | #define FFTI_FACTOR ((float)1.0) | ||
| 665 | #else | ||
| 666 | #define FFTI_FACTOR ((float32_t)FFT_DEC) | ||
| 667 | #endif | ||
| 668 | |||
| 669 | for(i=0; i<n_samp-1; i++) { | ||
| 670 | Sn_[i] += sw_[FFT_DEC-n_samp+1+i]*Pn[i] * FFTI_FACTOR; | ||
| 671 | } | ||
| 672 | |||
| 673 | if (shift) | ||
| 674 | for(i=n_samp-1,j=0; i<2*n_samp; i++,j++) | ||
| 675 | Sn_[i] = sw_[j]*Pn[i] * FFTI_FACTOR; | ||
| 676 | else | ||
| 677 | for(i=n_samp-1,j=0; i<2*n_samp; i++,j++) | ||
| 678 | Sn_[i] += sw_[j]*Pn[i] * FFTI_FACTOR; | ||
| 679 | } | ||
| 680 | |||
| 681 | |||
| 682 | /* todo: this should probably be in some states rather than a static */ | ||
| 683 | static unsigned long next = 1; | ||
| 684 | |||
| 685 | int codec2_rand(void) { | ||
| 686 | next = next * 1103515245 + 12345; | ||
| 687 | return((unsigned)(next/65536) % 32768); | ||
| 688 | } | ||
| 689 | |||
