diff options
Diffstat (limited to 'nlp.c')
-rw-r--r-- | nlp.c | 609 |
1 files changed, 279 insertions, 330 deletions
@@ -25,115 +25,79 @@ | |||
25 | along with this program; if not, see <http://www.gnu.org/licenses/>. | 25 | along with this program; if not, see <http://www.gnu.org/licenses/>. |
26 | */ | 26 | */ |
27 | 27 | ||
28 | #include "defines.h" | ||
29 | #include "nlp.h" | 28 | #include "nlp.h" |
30 | #include "dump.h" | 29 | |
31 | #include "codec2_fft.h" | 30 | #include "codec2_fft.h" |
31 | #include "defines.h" | ||
32 | #include "dump.h" | ||
32 | #undef PROFILE | 33 | #undef PROFILE |
33 | #include "machdep.h" | ||
34 | #include "os.h" | ||
35 | |||
36 | #include <assert.h> | 34 | #include <assert.h> |
37 | #include <math.h> | 35 | #include <math.h> |
38 | #include <stdlib.h> | 36 | #include <stdlib.h> |
39 | 37 | ||
38 | #include "machdep.h" | ||
39 | #include "os.h" | ||
40 | |||
40 | /*---------------------------------------------------------------------------*\ | 41 | /*---------------------------------------------------------------------------*\ |
41 | 42 | ||
42 | DEFINES | 43 | DEFINES |
43 | 44 | ||
44 | \*---------------------------------------------------------------------------*/ | 45 | \*---------------------------------------------------------------------------*/ |
45 | 46 | ||
46 | #define PMAX_M 320 /* maximum NLP analysis window size */ | 47 | #define PMAX_M 320 /* maximum NLP analysis window size */ |
47 | #define COEFF 0.95 /* notch filter parameter */ | 48 | #define COEFF 0.95 /* notch filter parameter */ |
48 | #define PE_FFT_SIZE 512 /* DFT size for pitch estimation */ | 49 | #define PE_FFT_SIZE 512 /* DFT size for pitch estimation */ |
49 | #define DEC 5 /* decimation factor */ | 50 | #define DEC 5 /* decimation factor */ |
50 | #define SAMPLE_RATE 8000 | 51 | #define SAMPLE_RATE 8000 |
51 | #define PI 3.141592654 /* mathematical constant */ | 52 | #define PI 3.141592654 /* mathematical constant */ |
52 | #define T 0.1 /* threshold for local minima candidate */ | 53 | #define T 0.1 /* threshold for local minima candidate */ |
53 | #define F0_MAX 500 | 54 | #define F0_MAX 500 |
54 | #define CNLP 0.3 /* post processor constant */ | 55 | #define CNLP 0.3 /* post processor constant */ |
55 | #define NLP_NTAP 48 /* Decimation LPF order */ | 56 | #define NLP_NTAP 48 /* Decimation LPF order */ |
56 | 57 | ||
57 | /* 8 to 16 kHz sample rate conversion */ | 58 | /* 8 to 16 kHz sample rate conversion */ |
58 | 59 | ||
59 | #define FDMDV_OS 2 /* oversampling rate */ | 60 | #define FDMDV_OS 2 /* oversampling rate */ |
60 | #define FDMDV_OS_TAPS_16K 48 /* number of OS filter taps at 16kHz */ | 61 | #define FDMDV_OS_TAPS_16K 48 /* number of OS filter taps at 16kHz */ |
61 | #define FDMDV_OS_TAPS_8K (FDMDV_OS_TAPS_16K/FDMDV_OS) /* number of OS filter taps at 8kHz */ | 62 | #define FDMDV_OS_TAPS_8K \ |
63 | (FDMDV_OS_TAPS_16K / FDMDV_OS) /* number of OS filter taps at 8kHz */ | ||
62 | 64 | ||
63 | /*---------------------------------------------------------------------------*\ | 65 | /*---------------------------------------------------------------------------*\ |
64 | 66 | ||
65 | GLOBALS | 67 | GLOBALS |
66 | 68 | ||
67 | \*---------------------------------------------------------------------------*/ | 69 | \*---------------------------------------------------------------------------*/ |
68 | 70 | ||
69 | /* 48 tap 600Hz low pass FIR filter coefficients */ | 71 | /* 48 tap 600Hz low pass FIR filter coefficients */ |
70 | 72 | ||
71 | const float nlp_fir[] = { | 73 | const float nlp_fir[] = { |
72 | -1.0818124e-03, | 74 | -1.0818124e-03, -1.1008344e-03, -9.2768838e-04, -4.2289438e-04, |
73 | -1.1008344e-03, | 75 | 5.5034190e-04, 2.0029849e-03, 3.7058509e-03, 5.1449415e-03, |
74 | -9.2768838e-04, | 76 | 5.5924666e-03, 4.3036754e-03, 8.0284511e-04, -4.8204610e-03, |
75 | -4.2289438e-04, | 77 | -1.1705810e-02, -1.8199275e-02, -2.2065282e-02, -2.0920610e-02, |
76 | 5.5034190e-04, | 78 | -1.2808831e-02, 3.2204775e-03, 2.6683811e-02, 5.5520624e-02, |
77 | 2.0029849e-03, | 79 | 8.6305944e-02, 1.1480192e-01, 1.3674206e-01, 1.4867556e-01, |
78 | 3.7058509e-03, | 80 | 1.4867556e-01, 1.3674206e-01, 1.1480192e-01, 8.6305944e-02, |
79 | 5.1449415e-03, | 81 | 5.5520624e-02, 2.6683811e-02, 3.2204775e-03, -1.2808831e-02, |
80 | 5.5924666e-03, | 82 | -2.0920610e-02, -2.2065282e-02, -1.8199275e-02, -1.1705810e-02, |
81 | 4.3036754e-03, | 83 | -4.8204610e-03, 8.0284511e-04, 4.3036754e-03, 5.5924666e-03, |
82 | 8.0284511e-04, | 84 | 5.1449415e-03, 3.7058509e-03, 2.0029849e-03, 5.5034190e-04, |
83 | -4.8204610e-03, | 85 | -4.2289438e-04, -9.2768838e-04, -1.1008344e-03, -1.0818124e-03}; |
84 | -1.1705810e-02, | ||
85 | -1.8199275e-02, | ||
86 | -2.2065282e-02, | ||
87 | -2.0920610e-02, | ||
88 | -1.2808831e-02, | ||
89 | 3.2204775e-03, | ||
90 | 2.6683811e-02, | ||
91 | 5.5520624e-02, | ||
92 | 8.6305944e-02, | ||
93 | 1.1480192e-01, | ||
94 | 1.3674206e-01, | ||
95 | 1.4867556e-01, | ||
96 | 1.4867556e-01, | ||
97 | 1.3674206e-01, | ||
98 | 1.1480192e-01, | ||
99 | 8.6305944e-02, | ||
100 | 5.5520624e-02, | ||
101 | 2.6683811e-02, | ||
102 | 3.2204775e-03, | ||
103 | -1.2808831e-02, | ||
104 | -2.0920610e-02, | ||
105 | -2.2065282e-02, | ||
106 | -1.8199275e-02, | ||
107 | -1.1705810e-02, | ||
108 | -4.8204610e-03, | ||
109 | 8.0284511e-04, | ||
110 | 4.3036754e-03, | ||
111 | 5.5924666e-03, | ||
112 | 5.1449415e-03, | ||
113 | 3.7058509e-03, | ||
114 | 2.0029849e-03, | ||
115 | 5.5034190e-04, | ||
116 | -4.2289438e-04, | ||
117 | -9.2768838e-04, | ||
118 | -1.1008344e-03, | ||
119 | -1.0818124e-03 | ||
120 | }; | ||
121 | 86 | ||
122 | typedef struct { | 87 | typedef struct { |
123 | int Fs; /* sample rate in Hz */ | 88 | int Fs; /* sample rate in Hz */ |
124 | int m; | 89 | int m; |
125 | float w[PMAX_M/DEC]; /* DFT window */ | 90 | float w[PMAX_M / DEC]; /* DFT window */ |
126 | float sq[PMAX_M]; /* squared speech samples */ | 91 | float sq[PMAX_M]; /* squared speech samples */ |
127 | float mem_x,mem_y; /* memory for notch filter */ | 92 | float mem_x, mem_y; /* memory for notch filter */ |
128 | float mem_fir[NLP_NTAP]; /* decimation FIR filter memory */ | 93 | float mem_fir[NLP_NTAP]; /* decimation FIR filter memory */ |
129 | codec2_fft_cfg fft_cfg; /* kiss FFT config */ | 94 | codec2_fft_cfg fft_cfg; /* kiss FFT config */ |
130 | float *Sn16k; /* Fs=16kHz input speech vector */ | 95 | float *Sn16k; /* Fs=16kHz input speech vector */ |
131 | FILE *f; | 96 | FILE *f; |
132 | } NLP; | 97 | } NLP; |
133 | 98 | ||
134 | float post_process_sub_multiples(COMP Fw[], | 99 | float post_process_sub_multiples(COMP Fw[], int pmin, int pmax, float gmax, |
135 | int pmin, int pmax, float gmax, int gmax_bin, | 100 | int gmax_bin, float *prev_f0); |
136 | float *prev_f0); | ||
137 | static void fdmdv_16_to_8(float out8k[], float in16k[], int n); | 101 | static void fdmdv_16_to_8(float out8k[], float in16k[], int n); |
138 | 102 | ||
139 | /*---------------------------------------------------------------------------*\ | 103 | /*---------------------------------------------------------------------------*\ |
@@ -144,56 +108,53 @@ static void fdmdv_16_to_8(float out8k[], float in16k[], int n); | |||
144 | 108 | ||
145 | \*---------------------------------------------------------------------------*/ | 109 | \*---------------------------------------------------------------------------*/ |
146 | 110 | ||
147 | void *nlp_create(C2CONST *c2const) | 111 | void *nlp_create(C2CONST *c2const) { |
148 | { | 112 | NLP *nlp; |
149 | NLP *nlp; | 113 | int i; |
150 | int i; | 114 | int m = c2const->m_pitch; |
151 | int m = c2const->m_pitch; | 115 | int Fs = c2const->Fs; |
152 | int Fs = c2const->Fs; | 116 | |
117 | nlp = (NLP *)malloc(sizeof(NLP)); | ||
118 | if (nlp == NULL) return NULL; | ||
153 | 119 | ||
154 | nlp = (NLP*)malloc(sizeof(NLP)); | 120 | assert((Fs == 8000) || (Fs == 16000)); |
155 | if (nlp == NULL) | 121 | nlp->Fs = Fs; |
156 | return NULL; | ||
157 | 122 | ||
158 | assert((Fs == 8000) || (Fs == 16000)); | 123 | nlp->m = m; |
159 | nlp->Fs = Fs; | ||
160 | 124 | ||
161 | nlp->m = m; | 125 | /* if running at 16kHz allocate storage for decimating filter memory */ |
162 | 126 | ||
163 | /* if running at 16kHz allocate storage for decimating filter memory */ | 127 | if (Fs == 16000) { |
128 | nlp->Sn16k = | ||
129 | (float *)malloc(sizeof(float) * (FDMDV_OS_TAPS_16K + c2const->n_samp)); | ||
130 | for (i = 0; i < FDMDV_OS_TAPS_16K; i++) { | ||
131 | nlp->Sn16k[i] = 0.0; | ||
132 | } | ||
133 | if (nlp->Sn16k == NULL) { | ||
134 | free(nlp); | ||
135 | return NULL; | ||
136 | } | ||
164 | 137 | ||
165 | if (Fs == 16000) { | 138 | /* most processing occurs at 8 kHz sample rate so halve m */ |
166 | nlp->Sn16k = (float*)malloc(sizeof(float)*(FDMDV_OS_TAPS_16K + c2const->n_samp)); | ||
167 | for(i=0; i<FDMDV_OS_TAPS_16K; i++) { | ||
168 | nlp->Sn16k[i] = 0.0; | ||
169 | } | ||
170 | if (nlp->Sn16k == NULL) { | ||
171 | free(nlp); | ||
172 | return NULL; | ||
173 | } | ||
174 | 139 | ||
175 | /* most processing occurs at 8 kHz sample rate so halve m */ | 140 | m /= 2; |
141 | } | ||
176 | 142 | ||
177 | m /= 2; | 143 | assert(m <= PMAX_M); |
178 | } | ||
179 | 144 | ||
180 | assert(m <= PMAX_M); | 145 | for (i = 0; i < m / DEC; i++) { |
181 | 146 | nlp->w[i] = 0.5 - 0.5 * cosf(2 * PI * i / (m / DEC - 1)); | |
182 | for(i=0; i<m/DEC; i++) { | 147 | } |
183 | nlp->w[i] = 0.5 - 0.5*cosf(2*PI*i/(m/DEC-1)); | ||
184 | } | ||
185 | 148 | ||
186 | for(i=0; i<PMAX_M; i++) | 149 | for (i = 0; i < PMAX_M; i++) nlp->sq[i] = 0.0; |
187 | nlp->sq[i] = 0.0; | 150 | nlp->mem_x = 0.0; |
188 | nlp->mem_x = 0.0; | 151 | nlp->mem_y = 0.0; |
189 | nlp->mem_y = 0.0; | 152 | for (i = 0; i < NLP_NTAP; i++) nlp->mem_fir[i] = 0.0; |
190 | for(i=0; i<NLP_NTAP; i++) | ||
191 | nlp->mem_fir[i] = 0.0; | ||
192 | 153 | ||
193 | nlp->fft_cfg = codec2_fft_alloc (PE_FFT_SIZE, 0, NULL, NULL); | 154 | nlp->fft_cfg = codec2_fft_alloc(PE_FFT_SIZE, 0, NULL, NULL); |
194 | assert(nlp->fft_cfg != NULL); | 155 | assert(nlp->fft_cfg != NULL); |
195 | 156 | ||
196 | return (void*)nlp; | 157 | return (void *)nlp; |
197 | } | 158 | } |
198 | 159 | ||
199 | /*---------------------------------------------------------------------------*\ | 160 | /*---------------------------------------------------------------------------*\ |
@@ -204,17 +165,16 @@ void *nlp_create(C2CONST *c2const) | |||
204 | 165 | ||
205 | \*---------------------------------------------------------------------------*/ | 166 | \*---------------------------------------------------------------------------*/ |
206 | 167 | ||
207 | void nlp_destroy(void *nlp_state) | 168 | void nlp_destroy(void *nlp_state) { |
208 | { | 169 | NLP *nlp; |
209 | NLP *nlp; | 170 | assert(nlp_state != NULL); |
210 | assert(nlp_state != NULL); | 171 | nlp = (NLP *)nlp_state; |
211 | nlp = (NLP*)nlp_state; | ||
212 | 172 | ||
213 | codec2_fft_free(nlp->fft_cfg); | 173 | codec2_fft_free(nlp->fft_cfg); |
214 | if (nlp->Fs == 16000) { | 174 | if (nlp->Fs == 16000) { |
215 | free(nlp->Sn16k); | 175 | free(nlp->Sn16k); |
216 | } | 176 | } |
217 | free(nlp_state); | 177 | free(nlp_state); |
218 | } | 178 | } |
219 | 179 | ||
220 | /*---------------------------------------------------------------------------*\ | 180 | /*---------------------------------------------------------------------------*\ |
@@ -248,162 +208,157 @@ void nlp_destroy(void *nlp_state) | |||
248 | \*---------------------------------------------------------------------------*/ | 208 | \*---------------------------------------------------------------------------*/ |
249 | 209 | ||
250 | float nlp( | 210 | float nlp( |
251 | void *nlp_state, | 211 | void *nlp_state, float Sn[], /* input speech vector */ |
252 | float Sn[], /* input speech vector */ | 212 | int n, /* frames shift (no. new samples in Sn[]) */ |
253 | int n, /* frames shift (no. new samples in Sn[]) */ | 213 | float *pitch, /* estimated pitch period in samples at current Fs */ |
254 | float *pitch, /* estimated pitch period in samples at current Fs */ | 214 | COMP Sw[], /* Freq domain version of Sn[] */ |
255 | COMP Sw[], /* Freq domain version of Sn[] */ | 215 | float W[], /* Freq domain window */ |
256 | float W[], /* Freq domain window */ | 216 | float *prev_f0 /* previous pitch f0 in Hz, memory for pitch tracking */ |
257 | float *prev_f0 /* previous pitch f0 in Hz, memory for pitch tracking */ | 217 | ) { |
258 | ) | 218 | NLP *nlp; |
259 | { | 219 | float notch; /* current notch filter output */ |
260 | NLP *nlp; | 220 | COMP Fw[PE_FFT_SIZE]; /* DFT of squared signal (input/output) */ |
261 | float notch; /* current notch filter output */ | 221 | float gmax; |
262 | COMP Fw[PE_FFT_SIZE]; /* DFT of squared signal (input/output) */ | 222 | int gmax_bin; |
263 | float gmax; | 223 | int m, i, j; |
264 | int gmax_bin; | 224 | float best_f0; |
265 | int m, i, j; | 225 | PROFILE_VAR(start, tnotch, filter, peakpick, window, fft, magsq, shiftmem); |
266 | float best_f0; | 226 | |
267 | PROFILE_VAR(start, tnotch, filter, peakpick, window, fft, magsq, shiftmem); | 227 | assert(nlp_state != NULL); |
268 | 228 | nlp = (NLP *)nlp_state; | |
269 | assert(nlp_state != NULL); | 229 | m = nlp->m; |
270 | nlp = (NLP*)nlp_state; | 230 | |
271 | m = nlp->m; | 231 | /* Square, notch filter at DC, and LP filter vector */ |
272 | 232 | ||
273 | /* Square, notch filter at DC, and LP filter vector */ | 233 | /* If running at 16 kHz decimate to 8 kHz, as NLP ws designed for |
274 | 234 | Fs = 8kHz. The decimating filter introduces about 3ms of delay, | |
275 | /* If running at 16 kHz decimate to 8 kHz, as NLP ws designed for | 235 | that shouldn't be a problem as pitch changes slowly. */ |
276 | Fs = 8kHz. The decimating filter introduces about 3ms of delay, | 236 | |
277 | that shouldn't be a problem as pitch changes slowly. */ | 237 | if (nlp->Fs == 8000) { |
278 | 238 | /* Square latest input samples */ | |
279 | if (nlp->Fs == 8000) { | 239 | |
280 | /* Square latest input samples */ | 240 | for (i = m - n; i < m; i++) { |
281 | 241 | nlp->sq[i] = Sn[i] * Sn[i]; | |
282 | for(i=m-n; i<m; i++) { | ||
283 | nlp->sq[i] = Sn[i]*Sn[i]; | ||
284 | } | ||
285 | } | 242 | } |
286 | else { | 243 | } else { |
287 | assert(nlp->Fs == 16000); | 244 | assert(nlp->Fs == 16000); |
288 | |||
289 | /* re-sample at 8 KHz */ | ||
290 | |||
291 | for(i=0; i<n; i++) { | ||
292 | nlp->Sn16k[FDMDV_OS_TAPS_16K+i] = Sn[m-n+i]; | ||
293 | } | ||
294 | |||
295 | m /= 2; n /= 2; | ||
296 | 245 | ||
297 | float Sn8k[n]; | 246 | /* re-sample at 8 KHz */ |
298 | fdmdv_16_to_8(Sn8k, &nlp->Sn16k[FDMDV_OS_TAPS_16K], n); | ||
299 | 247 | ||
300 | /* Square latest input samples */ | 248 | for (i = 0; i < n; i++) { |
301 | 249 | nlp->Sn16k[FDMDV_OS_TAPS_16K + i] = Sn[m - n + i]; | |
302 | for(i=m-n, j=0; i<m; i++, j++) { | ||
303 | nlp->sq[i] = Sn8k[j]*Sn8k[j]; | ||
304 | } | ||
305 | assert(j <= n); | ||
306 | } | ||
307 | //fprintf(stderr, "n: %d m: %d\n", n, m); | ||
308 | |||
309 | PROFILE_SAMPLE(start); | ||
310 | |||
311 | for(i=m-n; i<m; i++) { /* notch filter at DC */ | ||
312 | notch = nlp->sq[i] - nlp->mem_x; | ||
313 | notch += COEFF*nlp->mem_y; | ||
314 | nlp->mem_x = nlp->sq[i]; | ||
315 | nlp->mem_y = notch; | ||
316 | nlp->sq[i] = notch + 1.0; /* With 0 input vectors to codec, | ||
317 | kiss_fft() would take a long | ||
318 | time to execute when running in | ||
319 | real time. Problem was traced | ||
320 | to kiss_fft function call in | ||
321 | this function. Adding this small | ||
322 | constant fixed problem. Not | ||
323 | exactly sure why. */ | ||
324 | } | 250 | } |
325 | 251 | ||
326 | PROFILE_SAMPLE_AND_LOG(tnotch, start, " square and notch"); | 252 | m /= 2; |
253 | n /= 2; | ||
327 | 254 | ||
328 | for(i=m-n; i<m; i++) { /* FIR filter vector */ | 255 | float Sn8k[n]; |
256 | fdmdv_16_to_8(Sn8k, &nlp->Sn16k[FDMDV_OS_TAPS_16K], n); | ||
329 | 257 | ||
330 | for(j=0; j<NLP_NTAP-1; j++) | 258 | /* Square latest input samples */ |
331 | nlp->mem_fir[j] = nlp->mem_fir[j+1]; | ||
332 | nlp->mem_fir[NLP_NTAP-1] = nlp->sq[i]; | ||
333 | 259 | ||
334 | nlp->sq[i] = 0.0; | 260 | for (i = m - n, j = 0; i < m; i++, j++) { |
335 | for(j=0; j<NLP_NTAP; j++) | 261 | nlp->sq[i] = Sn8k[j] * Sn8k[j]; |
336 | nlp->sq[i] += nlp->mem_fir[j]*nlp_fir[j]; | ||
337 | } | 262 | } |
338 | 263 | assert(j <= n); | |
339 | PROFILE_SAMPLE_AND_LOG(filter, tnotch, " filter"); | 264 | } |
340 | 265 | // fprintf(stderr, "n: %d m: %d\n", n, m); | |
341 | /* Decimate and DFT */ | 266 | |
342 | 267 | PROFILE_SAMPLE(start); | |
343 | for(i=0; i<PE_FFT_SIZE; i++) { | 268 | |
344 | Fw[i].real = 0.0; | 269 | for (i = m - n; i < m; i++) { /* notch filter at DC */ |
345 | Fw[i].imag = 0.0; | 270 | notch = nlp->sq[i] - nlp->mem_x; |
346 | } | 271 | notch += COEFF * nlp->mem_y; |
347 | for(i=0; i<m/DEC; i++) { | 272 | nlp->mem_x = nlp->sq[i]; |
348 | Fw[i].real = nlp->sq[i*DEC]*nlp->w[i]; | 273 | nlp->mem_y = notch; |
349 | } | 274 | nlp->sq[i] = notch + 1.0; /* With 0 input vectors to codec, |
350 | PROFILE_SAMPLE_AND_LOG(window, filter, " window"); | 275 | kiss_fft() would take a long |
351 | #ifdef DUMP | 276 | time to execute when running in |
352 | dump_dec(Fw); | 277 | real time. Problem was traced |
353 | #endif | 278 | to kiss_fft function call in |
354 | 279 | this function. Adding this small | |
355 | // FIXME: check if this can be converted to a real fft | 280 | constant fixed problem. Not |
356 | // since all imag inputs are 0 | 281 | exactly sure why. */ |
357 | codec2_fft_inplace(nlp->fft_cfg, Fw); | 282 | } |
358 | PROFILE_SAMPLE_AND_LOG(fft, window, " fft"); | 283 | |
359 | 284 | PROFILE_SAMPLE_AND_LOG(tnotch, start, " square and notch"); | |
360 | for(i=0; i<PE_FFT_SIZE; i++) | 285 | |
361 | Fw[i].real = Fw[i].real*Fw[i].real + Fw[i].imag*Fw[i].imag; | 286 | for (i = m - n; i < m; i++) { /* FIR filter vector */ |
362 | 287 | ||
363 | PROFILE_SAMPLE_AND_LOG(magsq, fft, " mag sq"); | 288 | for (j = 0; j < NLP_NTAP - 1; j++) nlp->mem_fir[j] = nlp->mem_fir[j + 1]; |
364 | #ifdef DUMP | 289 | nlp->mem_fir[NLP_NTAP - 1] = nlp->sq[i]; |
365 | dump_sq(m, nlp->sq); | 290 | |
366 | dump_Fw(Fw); | 291 | nlp->sq[i] = 0.0; |
367 | #endif | 292 | for (j = 0; j < NLP_NTAP; j++) nlp->sq[i] += nlp->mem_fir[j] * nlp_fir[j]; |
368 | 293 | } | |
369 | /* todo: express everything in f0, as pitch in samples is dep on Fs */ | 294 | |
370 | 295 | PROFILE_SAMPLE_AND_LOG(filter, tnotch, " filter"); | |
371 | int pmin = floor(SAMPLE_RATE*P_MIN_S); | 296 | |
372 | int pmax = floor(SAMPLE_RATE*P_MAX_S); | 297 | /* Decimate and DFT */ |
373 | 298 | ||
374 | /* find global peak */ | 299 | for (i = 0; i < PE_FFT_SIZE; i++) { |
375 | 300 | Fw[i].real = 0.0; | |
376 | gmax = 0.0; | 301 | Fw[i].imag = 0.0; |
377 | gmax_bin = PE_FFT_SIZE*DEC/pmax; | 302 | } |
378 | for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++) { | 303 | for (i = 0; i < m / DEC; i++) { |
379 | if (Fw[i].real > gmax) { | 304 | Fw[i].real = nlp->sq[i * DEC] * nlp->w[i]; |
380 | gmax = Fw[i].real; | 305 | } |
381 | gmax_bin = i; | 306 | PROFILE_SAMPLE_AND_LOG(window, filter, " window"); |
382 | } | 307 | #ifdef DUMP |
308 | dump_dec(Fw); | ||
309 | #endif | ||
310 | |||
311 | // FIXME: check if this can be converted to a real fft | ||
312 | // since all imag inputs are 0 | ||
313 | codec2_fft_inplace(nlp->fft_cfg, Fw); | ||
314 | PROFILE_SAMPLE_AND_LOG(fft, window, " fft"); | ||
315 | |||
316 | for (i = 0; i < PE_FFT_SIZE; i++) | ||
317 | Fw[i].real = Fw[i].real * Fw[i].real + Fw[i].imag * Fw[i].imag; | ||
318 | |||
319 | PROFILE_SAMPLE_AND_LOG(magsq, fft, " mag sq"); | ||
320 | #ifdef DUMP | ||
321 | dump_sq(m, nlp->sq); | ||
322 | dump_Fw(Fw); | ||
323 | #endif | ||
324 | |||
325 | /* todo: express everything in f0, as pitch in samples is dep on Fs */ | ||
326 | |||
327 | int pmin = floor(SAMPLE_RATE * P_MIN_S); | ||
328 | int pmax = floor(SAMPLE_RATE * P_MAX_S); | ||
329 | |||
330 | /* find global peak */ | ||
331 | |||
332 | gmax = 0.0; | ||
333 | gmax_bin = PE_FFT_SIZE * DEC / pmax; | ||
334 | for (i = PE_FFT_SIZE * DEC / pmax; i <= PE_FFT_SIZE * DEC / pmin; i++) { | ||
335 | if (Fw[i].real > gmax) { | ||
336 | gmax = Fw[i].real; | ||
337 | gmax_bin = i; | ||
383 | } | 338 | } |
339 | } | ||
384 | 340 | ||
385 | PROFILE_SAMPLE_AND_LOG(peakpick, magsq, " peak pick"); | 341 | PROFILE_SAMPLE_AND_LOG(peakpick, magsq, " peak pick"); |
386 | 342 | ||
387 | best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_f0); | 343 | best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_f0); |
388 | 344 | ||
389 | PROFILE_SAMPLE_AND_LOG(shiftmem, peakpick, " post process"); | 345 | PROFILE_SAMPLE_AND_LOG(shiftmem, peakpick, " post process"); |
390 | 346 | ||
391 | /* Shift samples in buffer to make room for new samples */ | 347 | /* Shift samples in buffer to make room for new samples */ |
392 | 348 | ||
393 | for(i=0; i<m-n; i++) | 349 | for (i = 0; i < m - n; i++) nlp->sq[i] = nlp->sq[i + n]; |
394 | nlp->sq[i] = nlp->sq[i+n]; | ||
395 | 350 | ||
396 | /* return pitch period in samples and F0 estimate */ | 351 | /* return pitch period in samples and F0 estimate */ |
397 | 352 | ||
398 | *pitch = (float)nlp->Fs/best_f0; | 353 | *pitch = (float)nlp->Fs / best_f0; |
399 | 354 | ||
400 | PROFILE_SAMPLE_AND_LOG2(shiftmem, " shift mem"); | 355 | PROFILE_SAMPLE_AND_LOG2(shiftmem, " shift mem"); |
401 | 356 | ||
402 | PROFILE_SAMPLE_AND_LOG2(start, " nlp int"); | 357 | PROFILE_SAMPLE_AND_LOG2(start, " nlp int"); |
403 | 358 | ||
404 | *prev_f0 = best_f0; | 359 | *prev_f0 = best_f0; |
405 | 360 | ||
406 | return(best_f0); | 361 | return (best_f0); |
407 | } | 362 | } |
408 | 363 | ||
409 | /*---------------------------------------------------------------------------*\ | 364 | /*---------------------------------------------------------------------------*\ |
@@ -427,59 +382,55 @@ float nlp( | |||
427 | 382 | ||
428 | \*---------------------------------------------------------------------------*/ | 383 | \*---------------------------------------------------------------------------*/ |
429 | 384 | ||
430 | float post_process_sub_multiples(COMP Fw[], | 385 | float post_process_sub_multiples(COMP Fw[], int pmin, int pmax, float gmax, |
431 | int pmin, int pmax, float gmax, int gmax_bin, | 386 | int gmax_bin, float *prev_f0) { |
432 | float *prev_f0) | 387 | int min_bin, cmax_bin; |
433 | { | 388 | int mult; |
434 | int min_bin, cmax_bin; | 389 | float thresh, best_f0; |
435 | int mult; | 390 | int b, bmin, bmax, lmax_bin; |
436 | float thresh, best_f0; | 391 | float lmax; |
437 | int b, bmin, bmax, lmax_bin; | 392 | int prev_f0_bin; |
438 | float lmax; | 393 | |
439 | int prev_f0_bin; | 394 | /* post process estimate by searching submultiples */ |
440 | 395 | ||
441 | /* post process estimate by searching submultiples */ | 396 | mult = 2; |
442 | 397 | min_bin = PE_FFT_SIZE * DEC / pmax; | |
443 | mult = 2; | 398 | cmax_bin = gmax_bin; |
444 | min_bin = PE_FFT_SIZE*DEC/pmax; | 399 | prev_f0_bin = *prev_f0 * (PE_FFT_SIZE * DEC) / SAMPLE_RATE; |
445 | cmax_bin = gmax_bin; | 400 | |
446 | prev_f0_bin = *prev_f0*(PE_FFT_SIZE*DEC)/SAMPLE_RATE; | 401 | while (gmax_bin / mult >= min_bin) { |
447 | 402 | b = gmax_bin / mult; /* determine search interval */ | |
448 | while(gmax_bin/mult >= min_bin) { | 403 | bmin = 0.8 * b; |
449 | 404 | bmax = 1.2 * b; | |
450 | b = gmax_bin/mult; /* determine search interval */ | 405 | if (bmin < min_bin) bmin = min_bin; |
451 | bmin = 0.8*b; | 406 | |
452 | bmax = 1.2*b; | 407 | /* lower threshold to favour previous frames pitch estimate, |
453 | if (bmin < min_bin) | 408 | this is a form of pitch tracking */ |
454 | bmin = min_bin; | 409 | |
455 | 410 | if ((prev_f0_bin > bmin) && (prev_f0_bin < bmax)) | |
456 | /* lower threshold to favour previous frames pitch estimate, | 411 | thresh = CNLP * 0.5 * gmax; |
457 | this is a form of pitch tracking */ | 412 | else |
458 | 413 | thresh = CNLP * gmax; | |
459 | if ((prev_f0_bin > bmin) && (prev_f0_bin < bmax)) | 414 | |
460 | thresh = CNLP*0.5*gmax; | 415 | lmax = 0; |
461 | else | 416 | lmax_bin = bmin; |
462 | thresh = CNLP*gmax; | 417 | for (b = bmin; b <= bmax; b++) /* look for maximum in interval */ |
463 | 418 | if (Fw[b].real > lmax) { | |
464 | lmax = 0; | 419 | lmax = Fw[b].real; |
465 | lmax_bin = bmin; | 420 | lmax_bin = b; |
466 | for (b=bmin; b<=bmax; b++) /* look for maximum in interval */ | 421 | } |
467 | if (Fw[b].real > lmax) { | 422 | |
468 | lmax = Fw[b].real; | 423 | if (lmax > thresh) |
469 | lmax_bin = b; | 424 | if ((lmax > Fw[lmax_bin - 1].real) && (lmax > Fw[lmax_bin + 1].real)) { |
470 | } | 425 | cmax_bin = lmax_bin; |
471 | 426 | } | |
472 | if (lmax > thresh) | 427 | |
473 | if ((lmax > Fw[lmax_bin-1].real) && (lmax > Fw[lmax_bin+1].real)) { | 428 | mult++; |
474 | cmax_bin = lmax_bin; | 429 | } |
475 | } | 430 | |
476 | 431 | best_f0 = (float)cmax_bin * SAMPLE_RATE / (PE_FFT_SIZE * DEC); | |
477 | mult++; | 432 | |
478 | } | 433 | return best_f0; |
479 | |||
480 | best_f0 = (float)cmax_bin*SAMPLE_RATE/(PE_FFT_SIZE*DEC); | ||
481 | |||
482 | return best_f0; | ||
483 | } | 434 | } |
484 | 435 | ||
485 | /*---------------------------------------------------------------------------*\ | 436 | /*---------------------------------------------------------------------------*\ |
@@ -502,20 +453,18 @@ float post_process_sub_multiples(COMP Fw[], | |||
502 | 453 | ||
503 | \*---------------------------------------------------------------------------*/ | 454 | \*---------------------------------------------------------------------------*/ |
504 | 455 | ||
505 | static void fdmdv_16_to_8(float out8k[], float in16k[], int n) | 456 | static void fdmdv_16_to_8(float out8k[], float in16k[], int n) { |
506 | { | 457 | float acc; |
507 | float acc; | 458 | int i, j, k; |
508 | int i,j,k; | ||
509 | 459 | ||
510 | for(i=0, k=0; k<n; i+=FDMDV_OS, k++) { | 460 | for (i = 0, k = 0; k < n; i += FDMDV_OS, k++) { |
511 | acc = 0.0; | 461 | acc = 0.0; |
512 | for(j=0; j<FDMDV_OS_TAPS_16K; j++) | 462 | for (j = 0; j < FDMDV_OS_TAPS_16K; j++) |
513 | acc += fdmdv_os_filter[j]*in16k[i-j]; | 463 | acc += fdmdv_os_filter[j] * in16k[i - j]; |
514 | out8k[k] = acc; | 464 | out8k[k] = acc; |
515 | } | 465 | } |
516 | 466 | ||
517 | /* update filter memory */ | 467 | /* update filter memory */ |
518 | 468 | ||
519 | for(i=-FDMDV_OS_TAPS_16K; i<0; i++) | 469 | for (i = -FDMDV_OS_TAPS_16K; i < 0; i++) in16k[i] = in16k[i + n * FDMDV_OS]; |
520 | in16k[i] = in16k[i + n*FDMDV_OS]; | ||
521 | } | 470 | } |