summaryrefslogtreecommitdiff
path: root/sine.c
diff options
context:
space:
mode:
Diffstat (limited to 'sine.c')
-rw-r--r--sine.c689
1 files changed, 689 insertions, 0 deletions
diff --git a/sine.c b/sine.c
new file mode 100644
index 0000000..4e31f37
--- /dev/null
+++ b/sine.c
@@ -0,0 +1,689 @@
1/*---------------------------------------------------------------------------*\
2
3 FILE........: sine.c
4 AUTHOR......: David Rowe
5 DATE CREATED: 19/8/2010
6
7 Sinusoidal analysis and synthesis functions.
8
9\*---------------------------------------------------------------------------*/
10
11/*
12 Copyright (C) 1990-2010 David Rowe
13
14 All rights reserved.
15
16 This program is free software; you can redistribute it and/or modify
17 it under the terms of the GNU Lesser General Public License version 2.1, as
18 published by the Free Software Foundation. This program is
19 distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
22 License for more details.
23
24 You should have received a copy of the GNU Lesser General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
26*/
27
28/*---------------------------------------------------------------------------*\
29
30 INCLUDES
31
32\*---------------------------------------------------------------------------*/
33
34#include <stdlib.h>
35#include <stdio.h>
36#include <math.h>
37
38#include "defines.h"
39#include "sine.h"
40#include "kiss_fft.h"
41
42#define HPF_BETA 0.125
43
44/*---------------------------------------------------------------------------*\
45
46 HEADERS
47
48\*---------------------------------------------------------------------------*/
49
50void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax,
51 float pstep);
52
53/*---------------------------------------------------------------------------*\
54
55 FUNCTIONS
56
57\*---------------------------------------------------------------------------*/
58
59C2CONST c2const_create(int Fs, float framelength_s) {
60 C2CONST c2const;
61
62 assert((Fs == 8000) || (Fs = 16000));
63 c2const.Fs = Fs;
64 c2const.n_samp = round(Fs*framelength_s);
65 c2const.max_amp = floor(Fs*P_MIN_S/2);
66 c2const.p_min = floor(Fs*P_MIN_S);
67 c2const.p_max = floor(Fs*P_MAX_S);
68 c2const.m_pitch = floor(Fs*M_PITCH_S);
69 c2const.Wo_min = TWO_PI/c2const.p_max;
70 c2const.Wo_max = TWO_PI/c2const.p_min;
71
72 if (Fs == 8000) {
73 c2const.nw = 279;
74 } else {
75 c2const.nw = 511; /* actually a bit shorter in time but lets us maintain constant FFT size */
76 }
77
78 c2const.tw = Fs*TW_S;
79
80 /*
81 fprintf(stderr, "max_amp: %d m_pitch: %d\n", c2const.n_samp, c2const.m_pitch);
82 fprintf(stderr, "p_min: %d p_max: %d\n", c2const.p_min, c2const.p_max);
83 fprintf(stderr, "Wo_min: %f Wo_max: %f\n", c2const.Wo_min, c2const.Wo_max);
84 fprintf(stderr, "nw: %d tw: %d\n", c2const.nw, c2const.tw);
85 */
86
87 return c2const;
88}
89
90/*---------------------------------------------------------------------------*\
91
92 FUNCTION....: make_analysis_window
93 AUTHOR......: David Rowe
94 DATE CREATED: 11/5/94
95
96 Init function that generates the time domain analysis window and it's DFT.
97
98\*---------------------------------------------------------------------------*/
99
100void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[], COMP W[])
101{
102 float m;
103 COMP wshift[FFT_ENC];
104 COMP temp;
105 int i,j;
106 int m_pitch = c2const->m_pitch;
107 int nw = c2const->nw;
108
109 /*
110 Generate Hamming window centered on M-sample pitch analysis window
111
112 0 M/2 M-1
113 |-------------|-------------|
114 |-------|-------|
115 nw samples
116
117 All our analysis/synthsis is centred on the M/2 sample.
118 */
119
120 m = 0.0;
121 for(i=0; i<m_pitch/2-nw/2; i++)
122 w[i] = 0.0;
123 for(i=m_pitch/2-nw/2,j=0; i<m_pitch/2+nw/2; i++,j++) {
124 w[i] = 0.5 - 0.5*cosf(TWO_PI*j/(nw-1));
125 m += w[i]*w[i];
126 }
127 for(i=m_pitch/2+nw/2; i<m_pitch; i++)
128 w[i] = 0.0;
129
130 /* Normalise - makes freq domain amplitude estimation straight
131 forward */
132
133 m = 1.0/sqrtf(m*FFT_ENC);
134 for(i=0; i<m_pitch; i++) {
135 w[i] *= m;
136 }
137
138 /*
139 Generate DFT of analysis window, used for later processing. Note
140 we modulo FFT_ENC shift the time domain window w[], this makes the
141 imaginary part of the DFT W[] equal to zero as the shifted w[] is
142 even about the n=0 time axis if nw is odd. Having the imag part
143 of the DFT W[] makes computation easier.
144
145 0 FFT_ENC-1
146 |-------------------------|
147
148 ----\ /----
149 \ /
150 \ / <- shifted version of window w[n]
151 \ /
152 \ /
153 -------
154
155 |---------| |---------|
156 nw/2 nw/2
157 */
158
159 for(i=0; i<FFT_ENC; i++) {
160 wshift[i].real = 0.0;
161 wshift[i].imag = 0.0;
162 }
163 for(i=0; i<nw/2; i++)
164 wshift[i].real = w[i+m_pitch/2];
165 for(i=FFT_ENC-nw/2,j=m_pitch/2-nw/2; i<FFT_ENC; i++,j++)
166 wshift[i].real = w[j];
167
168 codec2_fft(fft_fwd_cfg, wshift, W);
169
170 /*
171 Re-arrange W[] to be symmetrical about FFT_ENC/2. Makes later
172 analysis convenient.
173
174 Before:
175
176
177 0 FFT_ENC-1
178 |----------|---------|
179 __ _
180 \ /
181 \_______________/
182
183 After:
184
185 0 FFT_ENC-1
186 |----------|---------|
187 ___
188 / \
189 ________/ \_______
190
191 */
192
193
194 for(i=0; i<FFT_ENC/2; i++) {
195 temp.real = W[i].real;
196 temp.imag = W[i].imag;
197 W[i].real = W[i+FFT_ENC/2].real;
198 W[i].imag = W[i+FFT_ENC/2].imag;
199 W[i+FFT_ENC/2].real = temp.real;
200 W[i+FFT_ENC/2].imag = temp.imag;
201 }
202
203}
204
205/*---------------------------------------------------------------------------*\
206
207 FUNCTION....: hpf
208 AUTHOR......: David Rowe
209 DATE CREATED: 16 Nov 2010
210
211 High pass filter with a -3dB point of about 160Hz.
212
213 y(n) = -HPF_BETA*y(n-1) + x(n) - x(n-1)
214
215\*---------------------------------------------------------------------------*/
216
217float hpf(float x, float states[])
218{
219 states[0] = -HPF_BETA*states[0] + x - states[1];
220 states[1] = x;
221
222 return states[0];
223}
224
225/*---------------------------------------------------------------------------*\
226
227 FUNCTION....: dft_speech
228 AUTHOR......: David Rowe
229 DATE CREATED: 27/5/94
230
231 Finds the DFT of the current speech input speech frame.
232
233\*---------------------------------------------------------------------------*/
234
235// TODO: we can either go for a faster FFT using fftr and some stack usage
236// or we can reduce stack usage to almost zero on STM32 by switching to fft_inplace
237#if 1
238void dft_speech(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[])
239{
240 int i;
241 int m_pitch = c2const->m_pitch;
242 int nw = c2const->nw;
243
244 for(i=0; i<FFT_ENC; i++) {
245 Sw[i].real = 0.0;
246 Sw[i].imag = 0.0;
247 }
248
249 /* Centre analysis window on time axis, we need to arrange input
250 to FFT this way to make FFT phases correct */
251
252 /* move 2nd half to start of FFT input vector */
253
254 for(i=0; i<nw/2; i++)
255 Sw[i].real = Sn[i+m_pitch/2]*w[i+m_pitch/2];
256
257 /* move 1st half to end of FFT input vector */
258
259 for(i=0; i<nw/2; i++)
260 Sw[FFT_ENC-nw/2+i].real = Sn[i+m_pitch/2-nw/2]*w[i+m_pitch/2-nw/2];
261
262 codec2_fft_inplace(fft_fwd_cfg, Sw);
263}
264#else
265void dft_speech(codec2_fftr_cfg fftr_fwd_cfg, COMP Sw[], float Sn[], float w[])
266{
267 int i;
268 float sw[FFT_ENC];
269
270 for(i=0; i<FFT_ENC; i++) {
271 sw[i] = 0.0;
272 }
273
274 /* Centre analysis window on time axis, we need to arrange input
275 to FFT this way to make FFT phases correct */
276
277 /* move 2nd half to start of FFT input vector */
278
279 for(i=0; i<nw/2; i++)
280 sw[i] = Sn[i+m_pitch/2]*w[i+m_pitch/2];
281
282 /* move 1st half to end of FFT input vector */
283
284 for(i=0; i<nw/2; i++)
285 sw[FFT_ENC-nw/2+i] = Sn[i+m_pitch/2-nw/2]*w[i+m_pitch/2-nw/2];
286
287 codec2_fftr(fftr_fwd_cfg, sw, Sw);
288}
289#endif
290
291
292/*---------------------------------------------------------------------------*\
293
294 FUNCTION....: two_stage_pitch_refinement
295 AUTHOR......: David Rowe
296 DATE CREATED: 27/5/94
297
298 Refines the current pitch estimate using the harmonic sum pitch
299 estimation technique.
300
301\*---------------------------------------------------------------------------*/
302
303void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, COMP Sw[])
304{
305 float pmin,pmax,pstep; /* pitch refinment minimum, maximum and step */
306
307 /* Coarse refinement */
308
309 pmax = TWO_PI/model->Wo + 5;
310 pmin = TWO_PI/model->Wo - 5;
311 pstep = 1.0;
312 hs_pitch_refinement(model,Sw,pmin,pmax,pstep);
313
314 /* Fine refinement */
315
316 pmax = TWO_PI/model->Wo + 1;
317 pmin = TWO_PI/model->Wo - 1;
318 pstep = 0.25;
319 hs_pitch_refinement(model,Sw,pmin,pmax,pstep);
320
321 /* Limit range */
322
323 if (model->Wo < TWO_PI/c2const->p_max)
324 model->Wo = TWO_PI/c2const->p_max;
325 if (model->Wo > TWO_PI/c2const->p_min)
326 model->Wo = TWO_PI/c2const->p_min;
327
328 model->L = floorf(PI/model->Wo);
329
330 /* trap occasional round off issues with floorf() */
331 if (model->Wo*model->L >= 0.95*PI) {
332 model->L--;
333 }
334 assert(model->Wo*model->L < PI);
335}
336
337/*---------------------------------------------------------------------------*\
338
339 FUNCTION....: hs_pitch_refinement
340 AUTHOR......: David Rowe
341 DATE CREATED: 27/5/94
342
343 Harmonic sum pitch refinement function.
344
345 pmin pitch search range minimum
346 pmax pitch search range maximum
347 step pitch search step size
348 model current pitch estimate in model.Wo
349
350 model refined pitch estimate in model.Wo
351
352\*---------------------------------------------------------------------------*/
353
354void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float pstep)
355{
356 int m; /* loop variable */
357 int b; /* bin for current harmonic centre */
358 float E; /* energy for current pitch*/
359 float Wo; /* current "test" fundamental freq. */
360 float Wom; /* Wo that maximises E */
361 float Em; /* mamimum energy */
362 float r, one_on_r; /* number of rads/bin */
363 float p; /* current pitch */
364
365 /* Initialisation */
366
367 model->L = PI/model->Wo; /* use initial pitch est. for L */
368 Wom = model->Wo;
369 Em = 0.0;
370 r = TWO_PI/FFT_ENC;
371 one_on_r = 1.0/r;
372
373 /* Determine harmonic sum for a range of Wo values */
374
375 for(p=pmin; p<=pmax; p+=pstep) {
376 E = 0.0;
377 Wo = TWO_PI/p;
378
379 /* Sum harmonic magnitudes */
380 for(m=1; m<=model->L; m++) {
381 b = (int)(m*Wo*one_on_r + 0.5);
382 E += Sw[b].real*Sw[b].real + Sw[b].imag*Sw[b].imag;
383 }
384 /* Compare to see if this is a maximum */
385
386 if (E > Em) {
387 Em = E;
388 Wom = Wo;
389 }
390 }
391
392 model->Wo = Wom;
393}
394
395/*---------------------------------------------------------------------------*\
396
397 FUNCTION....: estimate_amplitudes
398 AUTHOR......: David Rowe
399 DATE CREATED: 27/5/94
400
401 Estimates the complex amplitudes of the harmonics.
402
403\*---------------------------------------------------------------------------*/
404
405void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[], int est_phase)
406{
407 int i,m; /* loop variables */
408 int am,bm; /* bounds of current harmonic */
409 int b; /* DFT bin of centre of current harmonic */
410 float den; /* denominator of amplitude expression */
411 float r, one_on_r; /* number of rads/bin */
412 int offset;
413 COMP Am;
414
415 r = TWO_PI/FFT_ENC;
416 one_on_r = 1.0/r;
417
418 for(m=1; m<=model->L; m++) {
419 den = 0.0;
420 am = (int)((m - 0.5)*model->Wo*one_on_r + 0.5);
421 bm = (int)((m + 0.5)*model->Wo*one_on_r + 0.5);
422 b = (int)(m*model->Wo/r + 0.5);
423
424 /* Estimate ampltude of harmonic */
425
426 den = 0.0;
427 Am.real = Am.imag = 0.0;
428 offset = FFT_ENC/2 - (int)(m*model->Wo*one_on_r + 0.5);
429 for(i=am; i<bm; i++) {
430 den += Sw[i].real*Sw[i].real + Sw[i].imag*Sw[i].imag;
431 Am.real += Sw[i].real*W[i + offset].real;
432 Am.imag += Sw[i].imag*W[i + offset].real;
433 }
434
435 model->A[m] = sqrtf(den);
436
437 if (est_phase) {
438
439 /* Estimate phase of harmonic, this is expensive in CPU for
440 embedded devicesso we make it an option */
441
442 model->phi[m] = atan2f(Sw[b].imag,Sw[b].real);
443 }
444 }
445}
446
447/*---------------------------------------------------------------------------*\
448
449 est_voicing_mbe()
450
451 Returns the error of the MBE cost function for a fiven F0.
452
453 Note: I think a lot of the operations below can be simplified as
454 W[].imag = 0 and has been normalised such that den always equals 1.
455
456\*---------------------------------------------------------------------------*/
457
458float est_voicing_mbe(
459 C2CONST *c2const,
460 MODEL *model,
461 COMP Sw[],
462 COMP W[]
463 )
464{
465 int l,al,bl,m; /* loop variables */
466 COMP Am; /* amplitude sample for this band */
467 int offset; /* centers Hw[] about current harmonic */
468 float den; /* denominator of Am expression */
469 float error; /* accumulated error between original and synthesised */
470 float Wo;
471 float sig, snr;
472 float elow, ehigh, eratio;
473 float sixty;
474 COMP Ew;
475 Ew.real = 0;
476 Ew.imag = 0;
477
478 int l_1000hz = model->L*1000.0/(c2const->Fs/2);
479 sig = 1E-4;
480 for(l=1; l<=l_1000hz; l++) {
481 sig += model->A[l]*model->A[l];
482 }
483
484 Wo = model->Wo;
485 error = 1E-4;
486
487 /* Just test across the harmonics in the first 1000 Hz */
488
489 for(l=1; l<=l_1000hz; l++) {
490 Am.real = 0.0;
491 Am.imag = 0.0;
492 den = 0.0;
493 al = ceilf((l - 0.5)*Wo*FFT_ENC/TWO_PI);
494 bl = ceilf((l + 0.5)*Wo*FFT_ENC/TWO_PI);
495
496 /* Estimate amplitude of harmonic assuming harmonic is totally voiced */
497
498 offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5;
499 for(m=al; m<bl; m++) {
500 Am.real += Sw[m].real*W[offset+m].real;
501 Am.imag += Sw[m].imag*W[offset+m].real;
502 den += W[offset+m].real*W[offset+m].real;
503 }
504
505 Am.real = Am.real/den;
506 Am.imag = Am.imag/den;
507
508 /* Determine error between estimated harmonic and original */
509
510// Redundant! offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5;
511 for(m=al; m<bl; m++) {
512 Ew.real = Sw[m].real - Am.real*W[offset+m].real;
513 Ew.imag = Sw[m].imag - Am.imag*W[offset+m].real;
514 error += Ew.real*Ew.real;
515 error += Ew.imag*Ew.imag;
516 }
517 }
518
519 snr = 10.0*log10f(sig/error);
520 if (snr > V_THRESH)
521 model->voiced = 1;
522 else
523 model->voiced = 0;
524
525 /* post processing, helps clean up some voicing errors ------------------*/
526
527 /*
528 Determine the ratio of low freqency to high frequency energy,
529 voiced speech tends to be dominated by low frequency energy,
530 unvoiced by high frequency. This measure can be used to
531 determine if we have made any gross errors.
532 */
533
534 int l_2000hz = model->L*2000.0/(c2const->Fs/2);
535 int l_4000hz = model->L*4000.0/(c2const->Fs/2);
536 elow = ehigh = 1E-4;
537 for(l=1; l<=l_2000hz; l++) {
538 elow += model->A[l]*model->A[l];
539 }
540 for(l=l_2000hz; l<=l_4000hz; l++) {
541 ehigh += model->A[l]*model->A[l];
542 }
543 eratio = 10.0*log10f(elow/ehigh);
544
545 /* Look for Type 1 errors, strongly V speech that has been
546 accidentally declared UV */
547
548 if (model->voiced == 0)
549 if (eratio > 10.0)
550 model->voiced = 1;
551
552 /* Look for Type 2 errors, strongly UV speech that has been
553 accidentally declared V */
554
555 if (model->voiced == 1) {
556 if (eratio < -10.0)
557 model->voiced = 0;
558
559 /* A common source of Type 2 errors is the pitch estimator
560 gives a low (50Hz) estimate for UV speech, which gives a
561 good match with noise due to the close harmoonic spacing.
562 These errors are much more common than people with 50Hz3
563 pitch, so we have just a small eratio threshold. */
564
565 sixty = 60.0*TWO_PI/c2const->Fs;
566 if ((eratio < -4.0) && (model->Wo <= sixty))
567 model->voiced = 0;
568 }
569 //printf(" v: %d snr: %f eratio: %3.2f %f\n",model->voiced,snr,eratio,dF0);
570
571 return snr;
572}
573
574/*---------------------------------------------------------------------------*\
575
576 FUNCTION....: make_synthesis_window
577 AUTHOR......: David Rowe
578 DATE CREATED: 11/5/94
579
580 Init function that generates the trapezoidal (Parzen) sythesis window.
581
582\*---------------------------------------------------------------------------*/
583
584void make_synthesis_window(C2CONST *c2const, float Pn[])
585{
586 int i;
587 float win;
588 int n_samp = c2const->n_samp;
589 int tw = c2const->tw;
590
591 /* Generate Parzen window in time domain */
592
593 win = 0.0;
594 for(i=0; i<n_samp/2-tw; i++)
595 Pn[i] = 0.0;
596 win = 0.0;
597 for(i=n_samp/2-tw; i<n_samp/2+tw; win+=1.0/(2*tw), i++ )
598 Pn[i] = win;
599 for(i=n_samp/2+tw; i<3*n_samp/2-tw; i++)
600 Pn[i] = 1.0;
601 win = 1.0;
602 for(i=3*n_samp/2-tw; i<3*n_samp/2+tw; win-=1.0/(2*tw), i++)
603 Pn[i] = win;
604 for(i=3*n_samp/2+tw; i<2*n_samp; i++)
605 Pn[i] = 0.0;
606}
607
608/*---------------------------------------------------------------------------*\
609
610 FUNCTION....: synthesise
611 AUTHOR......: David Rowe
612 DATE CREATED: 20/2/95
613
614 Synthesise a speech signal in the frequency domain from the
615 sinusodal model parameters. Uses overlap-add with a trapezoidal
616 window to smoothly interpolate betwen frames.
617
618\*---------------------------------------------------------------------------*/
619
620void synthesise(
621 int n_samp,
622 codec2_fftr_cfg fftr_inv_cfg,
623 float Sn_[], /* time domain synthesised signal */
624 MODEL *model, /* ptr to model parameters for this frame */
625 float Pn[], /* time domain Parzen window */
626 int shift /* flag used to handle transition frames */
627)
628{
629 int i,l,j,b; /* loop variables */
630 COMP Sw_[FFT_DEC/2+1]; /* DFT of synthesised signal */
631 float sw_[FFT_DEC]; /* synthesised signal */
632
633 if (shift) {
634 /* Update memories */
635 for(i=0; i<n_samp-1; i++) {
636 Sn_[i] = Sn_[i+n_samp];
637 }
638 Sn_[n_samp-1] = 0.0;
639 }
640
641 for(i=0; i<FFT_DEC/2+1; i++) {
642 Sw_[i].real = 0.0;
643 Sw_[i].imag = 0.0;
644 }
645
646 /* Now set up frequency domain synthesised speech */
647
648 for(l=1; l<=model->L; l++) {
649 b = (int)(l*model->Wo*FFT_DEC/TWO_PI + 0.5);
650 if (b > ((FFT_DEC/2)-1)) {
651 b = (FFT_DEC/2)-1;
652 }
653 Sw_[b].real = model->A[l]*cosf(model->phi[l]);
654 Sw_[b].imag = model->A[l]*sinf(model->phi[l]);
655 }
656
657 /* Perform inverse DFT */
658
659 codec2_fftri(fftr_inv_cfg, Sw_,sw_);
660
661 /* Overlap add to previous samples */
662
663 #ifdef USE_KISS_FFT
664 #define FFTI_FACTOR ((float)1.0)
665 #else
666 #define FFTI_FACTOR ((float32_t)FFT_DEC)
667 #endif
668
669 for(i=0; i<n_samp-1; i++) {
670 Sn_[i] += sw_[FFT_DEC-n_samp+1+i]*Pn[i] * FFTI_FACTOR;
671 }
672
673 if (shift)
674 for(i=n_samp-1,j=0; i<2*n_samp; i++,j++)
675 Sn_[i] = sw_[j]*Pn[i] * FFTI_FACTOR;
676 else
677 for(i=n_samp-1,j=0; i<2*n_samp; i++,j++)
678 Sn_[i] += sw_[j]*Pn[i] * FFTI_FACTOR;
679}
680
681
682/* todo: this should probably be in some states rather than a static */
683static unsigned long next = 1;
684
685int codec2_rand(void) {
686 next = next * 1103515245 + 12345;
687 return((unsigned)(next/65536) % 32768);
688}
689