summaryrefslogtreecommitdiff
path: root/nlp.c
diff options
context:
space:
mode:
Diffstat (limited to 'nlp.c')
-rw-r--r--nlp.c609
1 files changed, 279 insertions, 330 deletions
diff --git a/nlp.c b/nlp.c
index 036f6be..53cf040 100644
--- a/nlp.c
+++ b/nlp.c
@@ -25,115 +25,79 @@
25 along with this program; if not, see <http://www.gnu.org/licenses/>. 25 along with this program; if not, see <http://www.gnu.org/licenses/>.
26*/ 26*/
27 27
28#include "defines.h"
29#include "nlp.h" 28#include "nlp.h"
30#include "dump.h" 29
31#include "codec2_fft.h" 30#include "codec2_fft.h"
31#include "defines.h"
32#include "dump.h"
32#undef PROFILE 33#undef PROFILE
33#include "machdep.h"
34#include "os.h"
35
36#include <assert.h> 34#include <assert.h>
37#include <math.h> 35#include <math.h>
38#include <stdlib.h> 36#include <stdlib.h>
39 37
38#include "machdep.h"
39#include "os.h"
40
40/*---------------------------------------------------------------------------*\ 41/*---------------------------------------------------------------------------*\
41 42
42 DEFINES 43 DEFINES
43 44
44\*---------------------------------------------------------------------------*/ 45\*---------------------------------------------------------------------------*/
45 46
46#define PMAX_M 320 /* maximum NLP analysis window size */ 47#define PMAX_M 320 /* maximum NLP analysis window size */
47#define COEFF 0.95 /* notch filter parameter */ 48#define COEFF 0.95 /* notch filter parameter */
48#define PE_FFT_SIZE 512 /* DFT size for pitch estimation */ 49#define PE_FFT_SIZE 512 /* DFT size for pitch estimation */
49#define DEC 5 /* decimation factor */ 50#define DEC 5 /* decimation factor */
50#define SAMPLE_RATE 8000 51#define SAMPLE_RATE 8000
51#define PI 3.141592654 /* mathematical constant */ 52#define PI 3.141592654 /* mathematical constant */
52#define T 0.1 /* threshold for local minima candidate */ 53#define T 0.1 /* threshold for local minima candidate */
53#define F0_MAX 500 54#define F0_MAX 500
54#define CNLP 0.3 /* post processor constant */ 55#define CNLP 0.3 /* post processor constant */
55#define NLP_NTAP 48 /* Decimation LPF order */ 56#define NLP_NTAP 48 /* Decimation LPF order */
56 57
57/* 8 to 16 kHz sample rate conversion */ 58/* 8 to 16 kHz sample rate conversion */
58 59
59#define FDMDV_OS 2 /* oversampling rate */ 60#define FDMDV_OS 2 /* oversampling rate */
60#define FDMDV_OS_TAPS_16K 48 /* number of OS filter taps at 16kHz */ 61#define FDMDV_OS_TAPS_16K 48 /* number of OS filter taps at 16kHz */
61#define FDMDV_OS_TAPS_8K (FDMDV_OS_TAPS_16K/FDMDV_OS) /* number of OS filter taps at 8kHz */ 62#define FDMDV_OS_TAPS_8K \
63 (FDMDV_OS_TAPS_16K / FDMDV_OS) /* number of OS filter taps at 8kHz */
62 64
63/*---------------------------------------------------------------------------*\ 65/*---------------------------------------------------------------------------*\
64 66
65 GLOBALS 67 GLOBALS
66 68
67\*---------------------------------------------------------------------------*/ 69\*---------------------------------------------------------------------------*/
68 70
69/* 48 tap 600Hz low pass FIR filter coefficients */ 71/* 48 tap 600Hz low pass FIR filter coefficients */
70 72
71const float nlp_fir[] = { 73const float nlp_fir[] = {
72 -1.0818124e-03, 74 -1.0818124e-03, -1.1008344e-03, -9.2768838e-04, -4.2289438e-04,
73 -1.1008344e-03, 75 5.5034190e-04, 2.0029849e-03, 3.7058509e-03, 5.1449415e-03,
74 -9.2768838e-04, 76 5.5924666e-03, 4.3036754e-03, 8.0284511e-04, -4.8204610e-03,
75 -4.2289438e-04, 77 -1.1705810e-02, -1.8199275e-02, -2.2065282e-02, -2.0920610e-02,
76 5.5034190e-04, 78 -1.2808831e-02, 3.2204775e-03, 2.6683811e-02, 5.5520624e-02,
77 2.0029849e-03, 79 8.6305944e-02, 1.1480192e-01, 1.3674206e-01, 1.4867556e-01,
78 3.7058509e-03, 80 1.4867556e-01, 1.3674206e-01, 1.1480192e-01, 8.6305944e-02,
79 5.1449415e-03, 81 5.5520624e-02, 2.6683811e-02, 3.2204775e-03, -1.2808831e-02,
80 5.5924666e-03, 82 -2.0920610e-02, -2.2065282e-02, -1.8199275e-02, -1.1705810e-02,
81 4.3036754e-03, 83 -4.8204610e-03, 8.0284511e-04, 4.3036754e-03, 5.5924666e-03,
82 8.0284511e-04, 84 5.1449415e-03, 3.7058509e-03, 2.0029849e-03, 5.5034190e-04,
83 -4.8204610e-03, 85 -4.2289438e-04, -9.2768838e-04, -1.1008344e-03, -1.0818124e-03};
84 -1.1705810e-02,
85 -1.8199275e-02,
86 -2.2065282e-02,
87 -2.0920610e-02,
88 -1.2808831e-02,
89 3.2204775e-03,
90 2.6683811e-02,
91 5.5520624e-02,
92 8.6305944e-02,
93 1.1480192e-01,
94 1.3674206e-01,
95 1.4867556e-01,
96 1.4867556e-01,
97 1.3674206e-01,
98 1.1480192e-01,
99 8.6305944e-02,
100 5.5520624e-02,
101 2.6683811e-02,
102 3.2204775e-03,
103 -1.2808831e-02,
104 -2.0920610e-02,
105 -2.2065282e-02,
106 -1.8199275e-02,
107 -1.1705810e-02,
108 -4.8204610e-03,
109 8.0284511e-04,
110 4.3036754e-03,
111 5.5924666e-03,
112 5.1449415e-03,
113 3.7058509e-03,
114 2.0029849e-03,
115 5.5034190e-04,
116 -4.2289438e-04,
117 -9.2768838e-04,
118 -1.1008344e-03,
119 -1.0818124e-03
120};
121 86
122typedef struct { 87typedef struct {
123 int Fs; /* sample rate in Hz */ 88 int Fs; /* sample rate in Hz */
124 int m; 89 int m;
125 float w[PMAX_M/DEC]; /* DFT window */ 90 float w[PMAX_M / DEC]; /* DFT window */
126 float sq[PMAX_M]; /* squared speech samples */ 91 float sq[PMAX_M]; /* squared speech samples */
127 float mem_x,mem_y; /* memory for notch filter */ 92 float mem_x, mem_y; /* memory for notch filter */
128 float mem_fir[NLP_NTAP]; /* decimation FIR filter memory */ 93 float mem_fir[NLP_NTAP]; /* decimation FIR filter memory */
129 codec2_fft_cfg fft_cfg; /* kiss FFT config */ 94 codec2_fft_cfg fft_cfg; /* kiss FFT config */
130 float *Sn16k; /* Fs=16kHz input speech vector */ 95 float *Sn16k; /* Fs=16kHz input speech vector */
131 FILE *f; 96 FILE *f;
132} NLP; 97} NLP;
133 98
134float post_process_sub_multiples(COMP Fw[], 99float post_process_sub_multiples(COMP Fw[], int pmin, int pmax, float gmax,
135 int pmin, int pmax, float gmax, int gmax_bin, 100 int gmax_bin, float *prev_f0);
136 float *prev_f0);
137static void fdmdv_16_to_8(float out8k[], float in16k[], int n); 101static void fdmdv_16_to_8(float out8k[], float in16k[], int n);
138 102
139/*---------------------------------------------------------------------------*\ 103/*---------------------------------------------------------------------------*\
@@ -144,56 +108,53 @@ static void fdmdv_16_to_8(float out8k[], float in16k[], int n);
144 108
145\*---------------------------------------------------------------------------*/ 109\*---------------------------------------------------------------------------*/
146 110
147void *nlp_create(C2CONST *c2const) 111void *nlp_create(C2CONST *c2const) {
148{ 112 NLP *nlp;
149 NLP *nlp; 113 int i;
150 int i; 114 int m = c2const->m_pitch;
151 int m = c2const->m_pitch; 115 int Fs = c2const->Fs;
152 int Fs = c2const->Fs; 116
117 nlp = (NLP *)malloc(sizeof(NLP));
118 if (nlp == NULL) return NULL;
153 119
154 nlp = (NLP*)malloc(sizeof(NLP)); 120 assert((Fs == 8000) || (Fs == 16000));
155 if (nlp == NULL) 121 nlp->Fs = Fs;
156 return NULL;
157 122
158 assert((Fs == 8000) || (Fs == 16000)); 123 nlp->m = m;
159 nlp->Fs = Fs;
160 124
161 nlp->m = m; 125 /* if running at 16kHz allocate storage for decimating filter memory */
162 126
163 /* if running at 16kHz allocate storage for decimating filter memory */ 127 if (Fs == 16000) {
128 nlp->Sn16k =
129 (float *)malloc(sizeof(float) * (FDMDV_OS_TAPS_16K + c2const->n_samp));
130 for (i = 0; i < FDMDV_OS_TAPS_16K; i++) {
131 nlp->Sn16k[i] = 0.0;
132 }
133 if (nlp->Sn16k == NULL) {
134 free(nlp);
135 return NULL;
136 }
164 137
165 if (Fs == 16000) { 138 /* most processing occurs at 8 kHz sample rate so halve m */
166 nlp->Sn16k = (float*)malloc(sizeof(float)*(FDMDV_OS_TAPS_16K + c2const->n_samp));
167 for(i=0; i<FDMDV_OS_TAPS_16K; i++) {
168 nlp->Sn16k[i] = 0.0;
169 }
170 if (nlp->Sn16k == NULL) {
171 free(nlp);
172 return NULL;
173 }
174 139
175 /* most processing occurs at 8 kHz sample rate so halve m */ 140 m /= 2;
141 }
176 142
177 m /= 2; 143 assert(m <= PMAX_M);
178 }
179 144
180 assert(m <= PMAX_M); 145 for (i = 0; i < m / DEC; i++) {
181 146 nlp->w[i] = 0.5 - 0.5 * cosf(2 * PI * i / (m / DEC - 1));
182 for(i=0; i<m/DEC; i++) { 147 }
183 nlp->w[i] = 0.5 - 0.5*cosf(2*PI*i/(m/DEC-1));
184 }
185 148
186 for(i=0; i<PMAX_M; i++) 149 for (i = 0; i < PMAX_M; i++) nlp->sq[i] = 0.0;
187 nlp->sq[i] = 0.0; 150 nlp->mem_x = 0.0;
188 nlp->mem_x = 0.0; 151 nlp->mem_y = 0.0;
189 nlp->mem_y = 0.0; 152 for (i = 0; i < NLP_NTAP; i++) nlp->mem_fir[i] = 0.0;
190 for(i=0; i<NLP_NTAP; i++)
191 nlp->mem_fir[i] = 0.0;
192 153
193 nlp->fft_cfg = codec2_fft_alloc (PE_FFT_SIZE, 0, NULL, NULL); 154 nlp->fft_cfg = codec2_fft_alloc(PE_FFT_SIZE, 0, NULL, NULL);
194 assert(nlp->fft_cfg != NULL); 155 assert(nlp->fft_cfg != NULL);
195 156
196 return (void*)nlp; 157 return (void *)nlp;
197} 158}
198 159
199/*---------------------------------------------------------------------------*\ 160/*---------------------------------------------------------------------------*\
@@ -204,17 +165,16 @@ void *nlp_create(C2CONST *c2const)
204 165
205\*---------------------------------------------------------------------------*/ 166\*---------------------------------------------------------------------------*/
206 167
207void nlp_destroy(void *nlp_state) 168void nlp_destroy(void *nlp_state) {
208{ 169 NLP *nlp;
209 NLP *nlp; 170 assert(nlp_state != NULL);
210 assert(nlp_state != NULL); 171 nlp = (NLP *)nlp_state;
211 nlp = (NLP*)nlp_state;
212 172
213 codec2_fft_free(nlp->fft_cfg); 173 codec2_fft_free(nlp->fft_cfg);
214 if (nlp->Fs == 16000) { 174 if (nlp->Fs == 16000) {
215 free(nlp->Sn16k); 175 free(nlp->Sn16k);
216 } 176 }
217 free(nlp_state); 177 free(nlp_state);
218} 178}
219 179
220/*---------------------------------------------------------------------------*\ 180/*---------------------------------------------------------------------------*\
@@ -248,162 +208,157 @@ void nlp_destroy(void *nlp_state)
248\*---------------------------------------------------------------------------*/ 208\*---------------------------------------------------------------------------*/
249 209
250float nlp( 210float nlp(
251 void *nlp_state, 211 void *nlp_state, float Sn[], /* input speech vector */
252 float Sn[], /* input speech vector */ 212 int n, /* frames shift (no. new samples in Sn[]) */
253 int n, /* frames shift (no. new samples in Sn[]) */ 213 float *pitch, /* estimated pitch period in samples at current Fs */
254 float *pitch, /* estimated pitch period in samples at current Fs */ 214 COMP Sw[], /* Freq domain version of Sn[] */
255 COMP Sw[], /* Freq domain version of Sn[] */ 215 float W[], /* Freq domain window */
256 float W[], /* Freq domain window */ 216 float *prev_f0 /* previous pitch f0 in Hz, memory for pitch tracking */
257 float *prev_f0 /* previous pitch f0 in Hz, memory for pitch tracking */ 217) {
258) 218 NLP *nlp;
259{ 219 float notch; /* current notch filter output */
260 NLP *nlp; 220 COMP Fw[PE_FFT_SIZE]; /* DFT of squared signal (input/output) */
261 float notch; /* current notch filter output */ 221 float gmax;
262 COMP Fw[PE_FFT_SIZE]; /* DFT of squared signal (input/output) */ 222 int gmax_bin;
263 float gmax; 223 int m, i, j;
264 int gmax_bin; 224 float best_f0;
265 int m, i, j; 225 PROFILE_VAR(start, tnotch, filter, peakpick, window, fft, magsq, shiftmem);
266 float best_f0; 226
267 PROFILE_VAR(start, tnotch, filter, peakpick, window, fft, magsq, shiftmem); 227 assert(nlp_state != NULL);
268 228 nlp = (NLP *)nlp_state;
269 assert(nlp_state != NULL); 229 m = nlp->m;
270 nlp = (NLP*)nlp_state; 230
271 m = nlp->m; 231 /* Square, notch filter at DC, and LP filter vector */
272 232
273 /* Square, notch filter at DC, and LP filter vector */ 233 /* If running at 16 kHz decimate to 8 kHz, as NLP ws designed for
274 234 Fs = 8kHz. The decimating filter introduces about 3ms of delay,
275 /* If running at 16 kHz decimate to 8 kHz, as NLP ws designed for 235 that shouldn't be a problem as pitch changes slowly. */
276 Fs = 8kHz. The decimating filter introduces about 3ms of delay, 236
277 that shouldn't be a problem as pitch changes slowly. */ 237 if (nlp->Fs == 8000) {
278 238 /* Square latest input samples */
279 if (nlp->Fs == 8000) { 239
280 /* Square latest input samples */ 240 for (i = m - n; i < m; i++) {
281 241 nlp->sq[i] = Sn[i] * Sn[i];
282 for(i=m-n; i<m; i++) {
283 nlp->sq[i] = Sn[i]*Sn[i];
284 }
285 } 242 }
286 else { 243 } else {
287 assert(nlp->Fs == 16000); 244 assert(nlp->Fs == 16000);
288
289 /* re-sample at 8 KHz */
290
291 for(i=0; i<n; i++) {
292 nlp->Sn16k[FDMDV_OS_TAPS_16K+i] = Sn[m-n+i];
293 }
294
295 m /= 2; n /= 2;
296 245
297 float Sn8k[n]; 246 /* re-sample at 8 KHz */
298 fdmdv_16_to_8(Sn8k, &nlp->Sn16k[FDMDV_OS_TAPS_16K], n);
299 247
300 /* Square latest input samples */ 248 for (i = 0; i < n; i++) {
301 249 nlp->Sn16k[FDMDV_OS_TAPS_16K + i] = Sn[m - n + i];
302 for(i=m-n, j=0; i<m; i++, j++) {
303 nlp->sq[i] = Sn8k[j]*Sn8k[j];
304 }
305 assert(j <= n);
306 }
307 //fprintf(stderr, "n: %d m: %d\n", n, m);
308
309 PROFILE_SAMPLE(start);
310
311 for(i=m-n; i<m; i++) { /* notch filter at DC */
312 notch = nlp->sq[i] - nlp->mem_x;
313 notch += COEFF*nlp->mem_y;
314 nlp->mem_x = nlp->sq[i];
315 nlp->mem_y = notch;
316 nlp->sq[i] = notch + 1.0; /* With 0 input vectors to codec,
317 kiss_fft() would take a long
318 time to execute when running in
319 real time. Problem was traced
320 to kiss_fft function call in
321 this function. Adding this small
322 constant fixed problem. Not
323 exactly sure why. */
324 } 250 }
325 251
326 PROFILE_SAMPLE_AND_LOG(tnotch, start, " square and notch"); 252 m /= 2;
253 n /= 2;
327 254
328 for(i=m-n; i<m; i++) { /* FIR filter vector */ 255 float Sn8k[n];
256 fdmdv_16_to_8(Sn8k, &nlp->Sn16k[FDMDV_OS_TAPS_16K], n);
329 257
330 for(j=0; j<NLP_NTAP-1; j++) 258 /* Square latest input samples */
331 nlp->mem_fir[j] = nlp->mem_fir[j+1];
332 nlp->mem_fir[NLP_NTAP-1] = nlp->sq[i];
333 259
334 nlp->sq[i] = 0.0; 260 for (i = m - n, j = 0; i < m; i++, j++) {
335 for(j=0; j<NLP_NTAP; j++) 261 nlp->sq[i] = Sn8k[j] * Sn8k[j];
336 nlp->sq[i] += nlp->mem_fir[j]*nlp_fir[j];
337 } 262 }
338 263 assert(j <= n);
339 PROFILE_SAMPLE_AND_LOG(filter, tnotch, " filter"); 264 }
340 265 // fprintf(stderr, "n: %d m: %d\n", n, m);
341 /* Decimate and DFT */ 266
342 267 PROFILE_SAMPLE(start);
343 for(i=0; i<PE_FFT_SIZE; i++) { 268
344 Fw[i].real = 0.0; 269 for (i = m - n; i < m; i++) { /* notch filter at DC */
345 Fw[i].imag = 0.0; 270 notch = nlp->sq[i] - nlp->mem_x;
346 } 271 notch += COEFF * nlp->mem_y;
347 for(i=0; i<m/DEC; i++) { 272 nlp->mem_x = nlp->sq[i];
348 Fw[i].real = nlp->sq[i*DEC]*nlp->w[i]; 273 nlp->mem_y = notch;
349 } 274 nlp->sq[i] = notch + 1.0; /* With 0 input vectors to codec,
350 PROFILE_SAMPLE_AND_LOG(window, filter, " window"); 275 kiss_fft() would take a long
351 #ifdef DUMP 276 time to execute when running in
352 dump_dec(Fw); 277 real time. Problem was traced
353 #endif 278 to kiss_fft function call in
354 279 this function. Adding this small
355 // FIXME: check if this can be converted to a real fft 280 constant fixed problem. Not
356 // since all imag inputs are 0 281 exactly sure why. */
357 codec2_fft_inplace(nlp->fft_cfg, Fw); 282 }
358 PROFILE_SAMPLE_AND_LOG(fft, window, " fft"); 283
359 284 PROFILE_SAMPLE_AND_LOG(tnotch, start, " square and notch");
360 for(i=0; i<PE_FFT_SIZE; i++) 285
361 Fw[i].real = Fw[i].real*Fw[i].real + Fw[i].imag*Fw[i].imag; 286 for (i = m - n; i < m; i++) { /* FIR filter vector */
362 287
363 PROFILE_SAMPLE_AND_LOG(magsq, fft, " mag sq"); 288 for (j = 0; j < NLP_NTAP - 1; j++) nlp->mem_fir[j] = nlp->mem_fir[j + 1];
364 #ifdef DUMP 289 nlp->mem_fir[NLP_NTAP - 1] = nlp->sq[i];
365 dump_sq(m, nlp->sq); 290
366 dump_Fw(Fw); 291 nlp->sq[i] = 0.0;
367 #endif 292 for (j = 0; j < NLP_NTAP; j++) nlp->sq[i] += nlp->mem_fir[j] * nlp_fir[j];
368 293 }
369 /* todo: express everything in f0, as pitch in samples is dep on Fs */ 294
370 295 PROFILE_SAMPLE_AND_LOG(filter, tnotch, " filter");
371 int pmin = floor(SAMPLE_RATE*P_MIN_S); 296
372 int pmax = floor(SAMPLE_RATE*P_MAX_S); 297 /* Decimate and DFT */
373 298
374 /* find global peak */ 299 for (i = 0; i < PE_FFT_SIZE; i++) {
375 300 Fw[i].real = 0.0;
376 gmax = 0.0; 301 Fw[i].imag = 0.0;
377 gmax_bin = PE_FFT_SIZE*DEC/pmax; 302 }
378 for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++) { 303 for (i = 0; i < m / DEC; i++) {
379 if (Fw[i].real > gmax) { 304 Fw[i].real = nlp->sq[i * DEC] * nlp->w[i];
380 gmax = Fw[i].real; 305 }
381 gmax_bin = i; 306 PROFILE_SAMPLE_AND_LOG(window, filter, " window");
382 } 307#ifdef DUMP
308 dump_dec(Fw);
309#endif
310
311 // FIXME: check if this can be converted to a real fft
312 // since all imag inputs are 0
313 codec2_fft_inplace(nlp->fft_cfg, Fw);
314 PROFILE_SAMPLE_AND_LOG(fft, window, " fft");
315
316 for (i = 0; i < PE_FFT_SIZE; i++)
317 Fw[i].real = Fw[i].real * Fw[i].real + Fw[i].imag * Fw[i].imag;
318
319 PROFILE_SAMPLE_AND_LOG(magsq, fft, " mag sq");
320#ifdef DUMP
321 dump_sq(m, nlp->sq);
322 dump_Fw(Fw);
323#endif
324
325 /* todo: express everything in f0, as pitch in samples is dep on Fs */
326
327 int pmin = floor(SAMPLE_RATE * P_MIN_S);
328 int pmax = floor(SAMPLE_RATE * P_MAX_S);
329
330 /* find global peak */
331
332 gmax = 0.0;
333 gmax_bin = PE_FFT_SIZE * DEC / pmax;
334 for (i = PE_FFT_SIZE * DEC / pmax; i <= PE_FFT_SIZE * DEC / pmin; i++) {
335 if (Fw[i].real > gmax) {
336 gmax = Fw[i].real;
337 gmax_bin = i;
383 } 338 }
339 }
384 340
385 PROFILE_SAMPLE_AND_LOG(peakpick, magsq, " peak pick"); 341 PROFILE_SAMPLE_AND_LOG(peakpick, magsq, " peak pick");
386 342
387 best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_f0); 343 best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_f0);
388 344
389 PROFILE_SAMPLE_AND_LOG(shiftmem, peakpick, " post process"); 345 PROFILE_SAMPLE_AND_LOG(shiftmem, peakpick, " post process");
390 346
391 /* Shift samples in buffer to make room for new samples */ 347 /* Shift samples in buffer to make room for new samples */
392 348
393 for(i=0; i<m-n; i++) 349 for (i = 0; i < m - n; i++) nlp->sq[i] = nlp->sq[i + n];
394 nlp->sq[i] = nlp->sq[i+n];
395 350
396 /* return pitch period in samples and F0 estimate */ 351 /* return pitch period in samples and F0 estimate */
397 352
398 *pitch = (float)nlp->Fs/best_f0; 353 *pitch = (float)nlp->Fs / best_f0;
399 354
400 PROFILE_SAMPLE_AND_LOG2(shiftmem, " shift mem"); 355 PROFILE_SAMPLE_AND_LOG2(shiftmem, " shift mem");
401 356
402 PROFILE_SAMPLE_AND_LOG2(start, " nlp int"); 357 PROFILE_SAMPLE_AND_LOG2(start, " nlp int");
403 358
404 *prev_f0 = best_f0; 359 *prev_f0 = best_f0;
405 360
406 return(best_f0); 361 return (best_f0);
407} 362}
408 363
409/*---------------------------------------------------------------------------*\ 364/*---------------------------------------------------------------------------*\
@@ -427,59 +382,55 @@ float nlp(
427 382
428\*---------------------------------------------------------------------------*/ 383\*---------------------------------------------------------------------------*/
429 384
430float post_process_sub_multiples(COMP Fw[], 385float post_process_sub_multiples(COMP Fw[], int pmin, int pmax, float gmax,
431 int pmin, int pmax, float gmax, int gmax_bin, 386 int gmax_bin, float *prev_f0) {
432 float *prev_f0) 387 int min_bin, cmax_bin;
433{ 388 int mult;
434 int min_bin, cmax_bin; 389 float thresh, best_f0;
435 int mult; 390 int b, bmin, bmax, lmax_bin;
436 float thresh, best_f0; 391 float lmax;
437 int b, bmin, bmax, lmax_bin; 392 int prev_f0_bin;
438 float lmax; 393
439 int prev_f0_bin; 394 /* post process estimate by searching submultiples */
440 395
441 /* post process estimate by searching submultiples */ 396 mult = 2;
442 397 min_bin = PE_FFT_SIZE * DEC / pmax;
443 mult = 2; 398 cmax_bin = gmax_bin;
444 min_bin = PE_FFT_SIZE*DEC/pmax; 399 prev_f0_bin = *prev_f0 * (PE_FFT_SIZE * DEC) / SAMPLE_RATE;
445 cmax_bin = gmax_bin; 400
446 prev_f0_bin = *prev_f0*(PE_FFT_SIZE*DEC)/SAMPLE_RATE; 401 while (gmax_bin / mult >= min_bin) {
447 402 b = gmax_bin / mult; /* determine search interval */
448 while(gmax_bin/mult >= min_bin) { 403 bmin = 0.8 * b;
449 404 bmax = 1.2 * b;
450 b = gmax_bin/mult; /* determine search interval */ 405 if (bmin < min_bin) bmin = min_bin;
451 bmin = 0.8*b; 406
452 bmax = 1.2*b; 407 /* lower threshold to favour previous frames pitch estimate,
453 if (bmin < min_bin) 408 this is a form of pitch tracking */
454 bmin = min_bin; 409
455 410 if ((prev_f0_bin > bmin) && (prev_f0_bin < bmax))
456 /* lower threshold to favour previous frames pitch estimate, 411 thresh = CNLP * 0.5 * gmax;
457 this is a form of pitch tracking */ 412 else
458 413 thresh = CNLP * gmax;
459 if ((prev_f0_bin > bmin) && (prev_f0_bin < bmax)) 414
460 thresh = CNLP*0.5*gmax; 415 lmax = 0;
461 else 416 lmax_bin = bmin;
462 thresh = CNLP*gmax; 417 for (b = bmin; b <= bmax; b++) /* look for maximum in interval */
463 418 if (Fw[b].real > lmax) {
464 lmax = 0; 419 lmax = Fw[b].real;
465 lmax_bin = bmin; 420 lmax_bin = b;
466 for (b=bmin; b<=bmax; b++) /* look for maximum in interval */ 421 }
467 if (Fw[b].real > lmax) { 422
468 lmax = Fw[b].real; 423 if (lmax > thresh)
469 lmax_bin = b; 424 if ((lmax > Fw[lmax_bin - 1].real) && (lmax > Fw[lmax_bin + 1].real)) {
470 } 425 cmax_bin = lmax_bin;
471 426 }
472 if (lmax > thresh) 427
473 if ((lmax > Fw[lmax_bin-1].real) && (lmax > Fw[lmax_bin+1].real)) { 428 mult++;
474 cmax_bin = lmax_bin; 429 }
475 } 430
476 431 best_f0 = (float)cmax_bin * SAMPLE_RATE / (PE_FFT_SIZE * DEC);
477 mult++; 432
478 } 433 return best_f0;
479
480 best_f0 = (float)cmax_bin*SAMPLE_RATE/(PE_FFT_SIZE*DEC);
481
482 return best_f0;
483} 434}
484 435
485/*---------------------------------------------------------------------------*\ 436/*---------------------------------------------------------------------------*\
@@ -502,20 +453,18 @@ float post_process_sub_multiples(COMP Fw[],
502 453
503\*---------------------------------------------------------------------------*/ 454\*---------------------------------------------------------------------------*/
504 455
505static void fdmdv_16_to_8(float out8k[], float in16k[], int n) 456static void fdmdv_16_to_8(float out8k[], float in16k[], int n) {
506{ 457 float acc;
507 float acc; 458 int i, j, k;
508 int i,j,k;
509 459
510 for(i=0, k=0; k<n; i+=FDMDV_OS, k++) { 460 for (i = 0, k = 0; k < n; i += FDMDV_OS, k++) {
511 acc = 0.0; 461 acc = 0.0;
512 for(j=0; j<FDMDV_OS_TAPS_16K; j++) 462 for (j = 0; j < FDMDV_OS_TAPS_16K; j++)
513 acc += fdmdv_os_filter[j]*in16k[i-j]; 463 acc += fdmdv_os_filter[j] * in16k[i - j];
514 out8k[k] = acc; 464 out8k[k] = acc;
515 } 465 }
516 466
517 /* update filter memory */ 467 /* update filter memory */
518 468
519 for(i=-FDMDV_OS_TAPS_16K; i<0; i++) 469 for (i = -FDMDV_OS_TAPS_16K; i < 0; i++) in16k[i] = in16k[i + n * FDMDV_OS];
520 in16k[i] = in16k[i + n*FDMDV_OS];
521} 470}