diff options
author | erdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box> | 2019-07-04 23:26:09 +0200 |
---|---|---|
committer | erdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box> | 2019-07-04 23:26:09 +0200 |
commit | f02dfce6e6c34b3d8a7b8a0e784b506178e331fa (patch) | |
tree | 45556e6104242d4702689760433d7321ae74ec17 /codec2.c |
stripdown of version 0.9
Diffstat (limited to 'codec2.c')
-rw-r--r-- | codec2.c | 2721 |
1 files changed, 2721 insertions, 0 deletions
diff --git a/codec2.c b/codec2.c new file mode 100644 index 0000000..840fe21 --- /dev/null +++ b/codec2.c | |||
@@ -0,0 +1,2721 @@ | |||
1 | /*---------------------------------------------------------------------------*\ | ||
2 | |||
3 | FILE........: codec2.c | ||
4 | AUTHOR......: David Rowe | ||
5 | DATE CREATED: 21/8/2010 | ||
6 | |||
7 | Codec2 fully quantised encoder and decoder functions. If you want use | ||
8 | codec2, the codec2_xxx functions are for you. | ||
9 | |||
10 | \*---------------------------------------------------------------------------*/ | ||
11 | |||
12 | /* | ||
13 | Copyright (C) 2010 David Rowe | ||
14 | |||
15 | All rights reserved. | ||
16 | |||
17 | This program is free software; you can redistribute it and/or modify | ||
18 | it under the terms of the GNU Lesser General Public License version 2.1, as | ||
19 | published by the Free Software Foundation. This program is | ||
20 | distributed in the hope that it will be useful, but WITHOUT ANY | ||
21 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
22 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | ||
23 | License for more details. | ||
24 | |||
25 | You should have received a copy of the GNU Lesser General Public License | ||
26 | along with this program; if not, see <http://www.gnu.org/licenses/>. | ||
27 | */ | ||
28 | |||
29 | #include <assert.h> | ||
30 | #include <stdio.h> | ||
31 | #include <stdlib.h> | ||
32 | #include <string.h> | ||
33 | #include <math.h> | ||
34 | |||
35 | #include "defines.h" | ||
36 | #include "codec2_fft.h" | ||
37 | #include "sine.h" | ||
38 | #include "nlp.h" | ||
39 | #include "dump.h" | ||
40 | #include "lpc.h" | ||
41 | #include "quantise.h" | ||
42 | #include "phase.h" | ||
43 | #include "interp.h" | ||
44 | #include "postfilter.h" | ||
45 | #include "codec2.h" | ||
46 | #include "lsp.h" | ||
47 | #include "newamp2.h" | ||
48 | #include "codec2_internal.h" | ||
49 | #include "machdep.h" | ||
50 | #include "bpf.h" | ||
51 | #include "bpfb.h" | ||
52 | #include "c2wideband.h" | ||
53 | |||
54 | #include "debug_alloc.h" | ||
55 | |||
56 | /*---------------------------------------------------------------------------* \ | ||
57 | |||
58 | FUNCTION HEADERS | ||
59 | |||
60 | \*---------------------------------------------------------------------------*/ | ||
61 | |||
62 | void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]); | ||
63 | void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, | ||
64 | COMP Aw[], float gain); | ||
65 | void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[]); | ||
66 | void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char * bits); | ||
67 | void codec2_encode_2400(struct CODEC2 *c2, unsigned char * bits, short speech[]); | ||
68 | void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char * bits); | ||
69 | void codec2_encode_1600(struct CODEC2 *c2, unsigned char * bits, short speech[]); | ||
70 | void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * bits); | ||
71 | void codec2_encode_1400(struct CODEC2 *c2, unsigned char * bits, short speech[]); | ||
72 | void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char * bits); | ||
73 | void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[]); | ||
74 | void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * bits, float ber_est); | ||
75 | void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[]); | ||
76 | void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * bits); | ||
77 | void codec2_encode_700(struct CODEC2 *c2, unsigned char * bits, short speech[]); | ||
78 | void codec2_decode_700(struct CODEC2 *c2, short speech[], const unsigned char * bits); | ||
79 | void codec2_encode_700b(struct CODEC2 *c2, unsigned char * bits, short speech[]); | ||
80 | void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * bits); | ||
81 | void codec2_encode_700c(struct CODEC2 *c2, unsigned char * bits, short speech[]); | ||
82 | void codec2_decode_700c(struct CODEC2 *c2, short speech[], const unsigned char * bits); | ||
83 | void codec2_encode_450(struct CODEC2 *c2, unsigned char * bits, short speech[]); | ||
84 | void codec2_decode_450(struct CODEC2 *c2, short speech[], const unsigned char * bits); | ||
85 | void codec2_decode_450pwb(struct CODEC2 *c2, short speech[], const unsigned char * bits); | ||
86 | static void ear_protection(float in_out[], int n); | ||
87 | |||
88 | |||
89 | |||
90 | /*---------------------------------------------------------------------------*\ | ||
91 | |||
92 | FUNCTIONS | ||
93 | |||
94 | \*---------------------------------------------------------------------------*/ | ||
95 | |||
96 | /*---------------------------------------------------------------------------*\ | ||
97 | |||
98 | FUNCTION....: codec2_create | ||
99 | AUTHOR......: David Rowe | ||
100 | DATE CREATED: 21/8/2010 | ||
101 | |||
102 | Create and initialise an instance of the codec. Returns a pointer | ||
103 | to the codec states or NULL on failure. One set of states is | ||
104 | sufficient for a full duuplex codec (i.e. an encoder and decoder). | ||
105 | You don't need separate states for encoders and decoders. See | ||
106 | c2enc.c and c2dec.c for examples. | ||
107 | |||
108 | \*---------------------------------------------------------------------------*/ | ||
109 | |||
110 | |||
111 | //Don't create CODEC2_MODE_450PWB for Encoding as it has undefined behavior ! | ||
112 | struct CODEC2 * codec2_create(int mode) | ||
113 | { | ||
114 | struct CODEC2 *c2; | ||
115 | int i,l; | ||
116 | |||
117 | #ifndef CORTEX_M4 | ||
118 | if (( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, mode)) || | ||
119 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, mode)) ) { | ||
120 | return NULL; | ||
121 | } | ||
122 | #endif | ||
123 | |||
124 | c2 = (struct CODEC2*)MALLOC(sizeof(struct CODEC2)); | ||
125 | if (c2 == NULL) | ||
126 | return NULL; | ||
127 | |||
128 | c2->mode = mode; | ||
129 | |||
130 | /* store constants in a few places for convenience */ | ||
131 | |||
132 | if( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, mode) == 0){ | ||
133 | c2->c2const = c2const_create(8000, N_S); | ||
134 | }else{ | ||
135 | c2->c2const = c2const_create(16000, N_S); | ||
136 | } | ||
137 | c2->Fs = c2->c2const.Fs; | ||
138 | int n_samp = c2->n_samp = c2->c2const.n_samp; | ||
139 | int m_pitch = c2->m_pitch = c2->c2const.m_pitch; | ||
140 | |||
141 | c2->Pn = (float*)MALLOC(2*n_samp*sizeof(float)); | ||
142 | if (c2->Pn == NULL) { | ||
143 | return NULL; | ||
144 | } | ||
145 | c2->Sn_ = (float*)MALLOC(2*n_samp*sizeof(float)); | ||
146 | if (c2->Sn_ == NULL) { | ||
147 | FREE(c2->Pn); | ||
148 | return NULL; | ||
149 | } | ||
150 | c2->w = (float*)MALLOC(m_pitch*sizeof(float)); | ||
151 | if (c2->w == NULL) { | ||
152 | FREE(c2->Pn); | ||
153 | FREE(c2->Sn_); | ||
154 | return NULL; | ||
155 | } | ||
156 | c2->Sn = (float*)MALLOC(m_pitch*sizeof(float)); | ||
157 | if (c2->Sn == NULL) { | ||
158 | FREE(c2->Pn); | ||
159 | FREE(c2->Sn_); | ||
160 | FREE(c2->w); | ||
161 | return NULL; | ||
162 | } | ||
163 | |||
164 | for(i=0; i<m_pitch; i++) | ||
165 | c2->Sn[i] = 1.0; | ||
166 | c2->hpf_states[0] = c2->hpf_states[1] = 0.0; | ||
167 | for(i=0; i<2*n_samp; i++) | ||
168 | c2->Sn_[i] = 0; | ||
169 | c2->fft_fwd_cfg = codec2_fft_alloc(FFT_ENC, 0, NULL, NULL); | ||
170 | c2->fftr_fwd_cfg = codec2_fftr_alloc(FFT_ENC, 0, NULL, NULL); | ||
171 | make_analysis_window(&c2->c2const, c2->fft_fwd_cfg, c2->w,c2->W); | ||
172 | make_synthesis_window(&c2->c2const, c2->Pn); | ||
173 | c2->fftr_inv_cfg = codec2_fftr_alloc(FFT_DEC, 1, NULL, NULL); | ||
174 | quantise_init(); | ||
175 | c2->prev_f0_enc = 1/P_MAX_S; | ||
176 | c2->bg_est = 0.0; | ||
177 | c2->ex_phase = 0.0; | ||
178 | |||
179 | for(l=1; l<=MAX_AMP; l++) | ||
180 | c2->prev_model_dec.A[l] = 0.0; | ||
181 | c2->prev_model_dec.Wo = TWO_PI/c2->c2const.p_max; | ||
182 | c2->prev_model_dec.L = PI/c2->prev_model_dec.Wo; | ||
183 | c2->prev_model_dec.voiced = 0; | ||
184 | |||
185 | for(i=0; i<LPC_ORD; i++) { | ||
186 | c2->prev_lsps_dec[i] = i*PI/(LPC_ORD+1); | ||
187 | } | ||
188 | c2->prev_e_dec = 1; | ||
189 | |||
190 | c2->nlp = nlp_create(&c2->c2const); | ||
191 | if (c2->nlp == NULL) { | ||
192 | return NULL; | ||
193 | } | ||
194 | |||
195 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, mode)) | ||
196 | c2->gray = 0; // natural binary better for trellis decoding (hopefully added later) | ||
197 | else | ||
198 | c2->gray = 1; | ||
199 | |||
200 | c2->lpc_pf = 1; c2->bass_boost = 1; c2->beta = LPCPF_BETA; c2->gamma = LPCPF_GAMMA; | ||
201 | |||
202 | c2->xq_enc[0] = c2->xq_enc[1] = 0.0; | ||
203 | c2->xq_dec[0] = c2->xq_dec[1] = 0.0; | ||
204 | |||
205 | c2->smoothing = 0; | ||
206 | c2->se = 0.0; c2->nse = 0; | ||
207 | c2->user_rate_K_vec_no_mean_ = NULL; | ||
208 | c2->post_filter_en = 1; | ||
209 | |||
210 | c2->bpf_buf = (float*)MALLOC(sizeof(float)*(BPF_N+4*c2->n_samp)); | ||
211 | assert(c2->bpf_buf != NULL); | ||
212 | for(i=0; i<BPF_N+4*c2->n_samp; i++) | ||
213 | c2->bpf_buf[i] = 0.0; | ||
214 | |||
215 | c2->softdec = NULL; | ||
216 | |||
217 | /* newamp1 initialisation */ | ||
218 | |||
219 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { | ||
220 | mel_sample_freqs_kHz(c2->rate_K_sample_freqs_kHz, NEWAMP1_K, ftomel(200.0), ftomel(3700.0) ); | ||
221 | int k; | ||
222 | for(k=0; k<NEWAMP1_K; k++) { | ||
223 | c2->prev_rate_K_vec_[k] = 0.0; | ||
224 | } | ||
225 | c2->Wo_left = 0.0; | ||
226 | c2->voicing_left = 0;; | ||
227 | c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 0, NULL, NULL); | ||
228 | c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 1, NULL, NULL); | ||
229 | } | ||
230 | |||
231 | #ifndef CORTEX_M4 | ||
232 | /* newamp2 initialisation */ | ||
233 | |||
234 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) { | ||
235 | n2_mel_sample_freqs_kHz(c2->n2_rate_K_sample_freqs_kHz, NEWAMP2_K); | ||
236 | int k; | ||
237 | for(k=0; k<NEWAMP2_K; k++) { | ||
238 | c2->n2_prev_rate_K_vec_[k] = 0.0; | ||
239 | } | ||
240 | c2->Wo_left = 0.0; | ||
241 | c2->voicing_left = 0;; | ||
242 | c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 0, NULL, NULL); | ||
243 | c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 1, NULL, NULL); | ||
244 | } | ||
245 | /* newamp2 PWB initialisation */ | ||
246 | |||
247 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) { | ||
248 | n2_mel_sample_freqs_kHz(c2->n2_pwb_rate_K_sample_freqs_kHz, NEWAMP2_16K_K); | ||
249 | int k; | ||
250 | for(k=0; k<NEWAMP2_16K_K; k++) { | ||
251 | c2->n2_pwb_prev_rate_K_vec_[k] = 0.0; | ||
252 | } | ||
253 | c2->Wo_left = 0.0; | ||
254 | c2->voicing_left = 0;; | ||
255 | c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 0, NULL, NULL); | ||
256 | c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 1, NULL, NULL); | ||
257 | } | ||
258 | #endif | ||
259 | |||
260 | c2->fmlfeat = NULL; | ||
261 | |||
262 | // make sure that one of the two decode function pointers is empty | ||
263 | // for the encode function pointer this is not required since we always set it | ||
264 | // to a meaningful value | ||
265 | |||
266 | c2->decode = NULL; | ||
267 | c2->decode_ber = NULL; | ||
268 | |||
269 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) | ||
270 | { | ||
271 | c2->encode = codec2_encode_3200; | ||
272 | c2->decode = codec2_decode_3200; | ||
273 | } | ||
274 | |||
275 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) | ||
276 | { | ||
277 | c2->encode = codec2_encode_2400; | ||
278 | c2->decode = codec2_decode_2400; | ||
279 | } | ||
280 | |||
281 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) | ||
282 | { | ||
283 | c2->encode = codec2_encode_1600; | ||
284 | c2->decode = codec2_decode_1600; | ||
285 | } | ||
286 | |||
287 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) | ||
288 | { | ||
289 | c2->encode = codec2_encode_1400; | ||
290 | c2->decode = codec2_decode_1400; | ||
291 | } | ||
292 | |||
293 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) | ||
294 | { | ||
295 | c2->encode = codec2_encode_1300; | ||
296 | c2->decode_ber = codec2_decode_1300; | ||
297 | } | ||
298 | |||
299 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) | ||
300 | { | ||
301 | c2->encode = codec2_encode_1200; | ||
302 | c2->decode = codec2_decode_1200; | ||
303 | } | ||
304 | |||
305 | #ifndef CORTEX_M4 | ||
306 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) | ||
307 | { | ||
308 | c2->encode = codec2_encode_700; | ||
309 | c2->decode = codec2_decode_700; | ||
310 | } | ||
311 | |||
312 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) | ||
313 | { | ||
314 | c2->encode = codec2_encode_700b; | ||
315 | c2->decode = codec2_decode_700b; | ||
316 | } | ||
317 | #endif | ||
318 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) | ||
319 | { | ||
320 | c2->encode = codec2_encode_700c; | ||
321 | c2->decode = codec2_decode_700c; | ||
322 | } | ||
323 | #ifndef CORTEX_M4 | ||
324 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) | ||
325 | { | ||
326 | c2->encode = codec2_encode_450; | ||
327 | c2->decode = codec2_decode_450; | ||
328 | } | ||
329 | |||
330 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) | ||
331 | { | ||
332 | //Encode PWB doesnt make sense | ||
333 | c2->encode = codec2_encode_450; | ||
334 | c2->decode = codec2_decode_450pwb; | ||
335 | } | ||
336 | |||
337 | #endif | ||
338 | |||
339 | return c2; | ||
340 | } | ||
341 | |||
342 | /*---------------------------------------------------------------------------*\ | ||
343 | |||
344 | FUNCTION....: codec2_destroy | ||
345 | AUTHOR......: David Rowe | ||
346 | DATE CREATED: 21/8/2010 | ||
347 | |||
348 | Destroy an instance of the codec. | ||
349 | |||
350 | \*---------------------------------------------------------------------------*/ | ||
351 | |||
352 | void codec2_destroy(struct CODEC2 *c2) | ||
353 | { | ||
354 | assert(c2 != NULL); | ||
355 | FREE(c2->bpf_buf); | ||
356 | nlp_destroy(c2->nlp); | ||
357 | codec2_fft_free(c2->fft_fwd_cfg); | ||
358 | codec2_fftr_free(c2->fftr_fwd_cfg); | ||
359 | codec2_fftr_free(c2->fftr_inv_cfg); | ||
360 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { | ||
361 | codec2_fft_free(c2->phase_fft_fwd_cfg); | ||
362 | codec2_fft_free(c2->phase_fft_inv_cfg); | ||
363 | } | ||
364 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) { | ||
365 | codec2_fft_free(c2->phase_fft_fwd_cfg); | ||
366 | codec2_fft_free(c2->phase_fft_inv_cfg); | ||
367 | } | ||
368 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) { | ||
369 | codec2_fft_free(c2->phase_fft_fwd_cfg); | ||
370 | codec2_fft_free(c2->phase_fft_inv_cfg); | ||
371 | } | ||
372 | FREE(c2->Pn); | ||
373 | FREE(c2->Sn); | ||
374 | FREE(c2->w); | ||
375 | FREE(c2->Sn_); | ||
376 | FREE(c2); | ||
377 | } | ||
378 | |||
379 | /*---------------------------------------------------------------------------*\ | ||
380 | |||
381 | FUNCTION....: codec2_bits_per_frame | ||
382 | AUTHOR......: David Rowe | ||
383 | DATE CREATED: Nov 14 2011 | ||
384 | |||
385 | Returns the number of bits per frame. | ||
386 | |||
387 | \*---------------------------------------------------------------------------*/ | ||
388 | |||
389 | int codec2_bits_per_frame(struct CODEC2 *c2) { | ||
390 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) | ||
391 | return 64; | ||
392 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) | ||
393 | return 48; | ||
394 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) | ||
395 | return 64; | ||
396 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) | ||
397 | return 56; | ||
398 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) | ||
399 | return 52; | ||
400 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) | ||
401 | return 48; | ||
402 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) | ||
403 | return 28; | ||
404 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) | ||
405 | return 28; | ||
406 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) | ||
407 | return 28; | ||
408 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) | ||
409 | return 18; | ||
410 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) | ||
411 | return 18; | ||
412 | |||
413 | return 0; /* shouldn't get here */ | ||
414 | } | ||
415 | |||
416 | |||
417 | /*---------------------------------------------------------------------------*\ | ||
418 | |||
419 | FUNCTION....: codec2_samples_per_frame | ||
420 | AUTHOR......: David Rowe | ||
421 | DATE CREATED: Nov 14 2011 | ||
422 | |||
423 | Returns the number of speech samples per frame. | ||
424 | |||
425 | \*---------------------------------------------------------------------------*/ | ||
426 | |||
427 | int codec2_samples_per_frame(struct CODEC2 *c2) { | ||
428 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) | ||
429 | return 160; | ||
430 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) | ||
431 | return 160; | ||
432 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) | ||
433 | return 320; | ||
434 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) | ||
435 | return 320; | ||
436 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) | ||
437 | return 320; | ||
438 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) | ||
439 | return 320; | ||
440 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) | ||
441 | return 320; | ||
442 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) | ||
443 | return 320; | ||
444 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) | ||
445 | return 320; | ||
446 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) | ||
447 | return 320; | ||
448 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) | ||
449 | return 640; | ||
450 | return 0; /* shouldnt get here */ | ||
451 | } | ||
452 | |||
453 | void codec2_encode(struct CODEC2 *c2, unsigned char *bits, short speech[]) | ||
454 | { | ||
455 | assert(c2 != NULL); | ||
456 | assert(c2->encode != NULL); | ||
457 | |||
458 | c2->encode(c2, bits, speech); | ||
459 | |||
460 | } | ||
461 | |||
462 | void codec2_decode(struct CODEC2 *c2, short speech[], const unsigned char *bits) | ||
463 | { | ||
464 | codec2_decode_ber(c2, speech, bits, 0.0); | ||
465 | } | ||
466 | |||
467 | void codec2_decode_ber(struct CODEC2 *c2, short speech[], const unsigned char *bits, float ber_est) | ||
468 | { | ||
469 | assert(c2 != NULL); | ||
470 | assert(c2->decode != NULL || c2->decode_ber != NULL); | ||
471 | |||
472 | if (c2->decode != NULL) | ||
473 | { | ||
474 | c2->decode(c2, speech, bits); | ||
475 | } | ||
476 | else | ||
477 | { | ||
478 | c2->decode_ber(c2, speech, bits, ber_est); | ||
479 | } | ||
480 | } | ||
481 | |||
482 | |||
483 | /*---------------------------------------------------------------------------*\ | ||
484 | |||
485 | FUNCTION....: codec2_encode_3200 | ||
486 | AUTHOR......: David Rowe | ||
487 | DATE CREATED: 13 Sep 2012 | ||
488 | |||
489 | Encodes 160 speech samples (20ms of speech) into 64 bits. | ||
490 | |||
491 | The codec2 algorithm actually operates internally on 10ms (80 | ||
492 | sample) frames, so we run the encoding algorithm twice. On the | ||
493 | first frame we just send the voicing bits. On the second frame we | ||
494 | send all model parameters. Compared to 2400 we use a larger number | ||
495 | of bits for the LSPs and non-VQ pitch and energy. | ||
496 | |||
497 | The bit allocation is: | ||
498 | |||
499 | Parameter bits/frame | ||
500 | -------------------------------------- | ||
501 | Harmonic magnitudes (LSPs) 50 | ||
502 | Pitch (Wo) 7 | ||
503 | Energy 5 | ||
504 | Voicing (10ms update) 2 | ||
505 | TOTAL 64 | ||
506 | |||
507 | \*---------------------------------------------------------------------------*/ | ||
508 | |||
509 | void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[]) | ||
510 | { | ||
511 | MODEL model; | ||
512 | float ak[LPC_ORD+1]; | ||
513 | float lsps[LPC_ORD]; | ||
514 | float e; | ||
515 | int Wo_index, e_index; | ||
516 | int lspd_indexes[LPC_ORD]; | ||
517 | int i; | ||
518 | unsigned int nbit = 0; | ||
519 | |||
520 | assert(c2 != NULL); | ||
521 | |||
522 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | ||
523 | |||
524 | /* first 10ms analysis frame - we just want voicing */ | ||
525 | |||
526 | analyse_one_frame(c2, &model, speech); | ||
527 | pack(bits, &nbit, model.voiced, 1); | ||
528 | |||
529 | /* second 10ms analysis frame */ | ||
530 | |||
531 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); | ||
532 | pack(bits, &nbit, model.voiced, 1); | ||
533 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); | ||
534 | pack(bits, &nbit, Wo_index, WO_BITS); | ||
535 | |||
536 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | ||
537 | e_index = encode_energy(e, E_BITS); | ||
538 | pack(bits, &nbit, e_index, E_BITS); | ||
539 | |||
540 | encode_lspds_scalar(lspd_indexes, lsps, LPC_ORD); | ||
541 | for(i=0; i<LSPD_SCALAR_INDEXES; i++) { | ||
542 | pack(bits, &nbit, lspd_indexes[i], lspd_bits(i)); | ||
543 | } | ||
544 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | ||
545 | } | ||
546 | |||
547 | |||
548 | /*---------------------------------------------------------------------------*\ | ||
549 | |||
550 | FUNCTION....: codec2_decode_3200 | ||
551 | AUTHOR......: David Rowe | ||
552 | DATE CREATED: 13 Sep 2012 | ||
553 | |||
554 | Decodes a frame of 64 bits into 160 samples (20ms) of speech. | ||
555 | |||
556 | \*---------------------------------------------------------------------------*/ | ||
557 | |||
558 | void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char * bits) | ||
559 | { | ||
560 | MODEL model[2]; | ||
561 | int lspd_indexes[LPC_ORD]; | ||
562 | float lsps[2][LPC_ORD]; | ||
563 | int Wo_index, e_index; | ||
564 | float e[2]; | ||
565 | float snr; | ||
566 | float ak[2][LPC_ORD+1]; | ||
567 | int i,j; | ||
568 | unsigned int nbit = 0; | ||
569 | COMP Aw[FFT_ENC]; | ||
570 | |||
571 | assert(c2 != NULL); | ||
572 | |||
573 | /* only need to zero these out due to (unused) snr calculation */ | ||
574 | |||
575 | for(i=0; i<2; i++) | ||
576 | for(j=1; j<=MAX_AMP; j++) | ||
577 | model[i].A[j] = 0.0; | ||
578 | |||
579 | /* unpack bits from channel ------------------------------------*/ | ||
580 | |||
581 | /* this will partially fill the model params for the 2 x 10ms | ||
582 | frames */ | ||
583 | |||
584 | model[0].voiced = unpack(bits, &nbit, 1); | ||
585 | model[1].voiced = unpack(bits, &nbit, 1); | ||
586 | |||
587 | Wo_index = unpack(bits, &nbit, WO_BITS); | ||
588 | model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); | ||
589 | model[1].L = PI/model[1].Wo; | ||
590 | |||
591 | e_index = unpack(bits, &nbit, E_BITS); | ||
592 | e[1] = decode_energy(e_index, E_BITS); | ||
593 | |||
594 | for(i=0; i<LSPD_SCALAR_INDEXES; i++) { | ||
595 | lspd_indexes[i] = unpack(bits, &nbit, lspd_bits(i)); | ||
596 | } | ||
597 | decode_lspds_scalar(&lsps[1][0], lspd_indexes, LPC_ORD); | ||
598 | |||
599 | /* interpolate ------------------------------------------------*/ | ||
600 | |||
601 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | ||
602 | 10ms frame between 20ms samples */ | ||
603 | |||
604 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | ||
605 | e[0] = interp_energy(c2->prev_e_dec, e[1]); | ||
606 | |||
607 | /* LSPs are sampled every 20ms so we interpolate the frame in | ||
608 | between, then recover spectral amplitudes */ | ||
609 | |||
610 | interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, LPC_ORD); | ||
611 | |||
612 | for(i=0; i<2; i++) { | ||
613 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | ||
614 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | ||
615 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | ||
616 | apply_lpc_correction(&model[i]); | ||
617 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | ||
618 | } | ||
619 | |||
620 | /* update memories for next frame ----------------------------*/ | ||
621 | |||
622 | c2->prev_model_dec = model[1]; | ||
623 | c2->prev_e_dec = e[1]; | ||
624 | for(i=0; i<LPC_ORD; i++) | ||
625 | c2->prev_lsps_dec[i] = lsps[1][i]; | ||
626 | } | ||
627 | |||
628 | |||
629 | /*---------------------------------------------------------------------------*\ | ||
630 | |||
631 | FUNCTION....: codec2_encode_2400 | ||
632 | AUTHOR......: David Rowe | ||
633 | DATE CREATED: 21/8/2010 | ||
634 | |||
635 | Encodes 160 speech samples (20ms of speech) into 48 bits. | ||
636 | |||
637 | The codec2 algorithm actually operates internally on 10ms (80 | ||
638 | sample) frames, so we run the encoding algorithm twice. On the | ||
639 | first frame we just send the voicing bit. On the second frame we | ||
640 | send all model parameters. | ||
641 | |||
642 | The bit allocation is: | ||
643 | |||
644 | Parameter bits/frame | ||
645 | -------------------------------------- | ||
646 | Harmonic magnitudes (LSPs) 36 | ||
647 | Joint VQ of Energy and Wo 8 | ||
648 | Voicing (10ms update) 2 | ||
649 | Spare 2 | ||
650 | TOTAL 48 | ||
651 | |||
652 | \*---------------------------------------------------------------------------*/ | ||
653 | |||
654 | void codec2_encode_2400(struct CODEC2 *c2, unsigned char * bits, short speech[]) | ||
655 | { | ||
656 | MODEL model; | ||
657 | float ak[LPC_ORD+1]; | ||
658 | float lsps[LPC_ORD]; | ||
659 | float e; | ||
660 | int WoE_index; | ||
661 | int lsp_indexes[LPC_ORD]; | ||
662 | int i; | ||
663 | int spare = 0; | ||
664 | unsigned int nbit = 0; | ||
665 | |||
666 | assert(c2 != NULL); | ||
667 | |||
668 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | ||
669 | |||
670 | /* first 10ms analysis frame - we just want voicing */ | ||
671 | |||
672 | analyse_one_frame(c2, &model, speech); | ||
673 | pack(bits, &nbit, model.voiced, 1); | ||
674 | |||
675 | /* second 10ms analysis frame */ | ||
676 | |||
677 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); | ||
678 | pack(bits, &nbit, model.voiced, 1); | ||
679 | |||
680 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | ||
681 | WoE_index = encode_WoE(&model, e, c2->xq_enc); | ||
682 | pack(bits, &nbit, WoE_index, WO_E_BITS); | ||
683 | |||
684 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); | ||
685 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | ||
686 | pack(bits, &nbit, lsp_indexes[i], lsp_bits(i)); | ||
687 | } | ||
688 | pack(bits, &nbit, spare, 2); | ||
689 | |||
690 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | ||
691 | } | ||
692 | |||
693 | |||
694 | /*---------------------------------------------------------------------------*\ | ||
695 | |||
696 | FUNCTION....: codec2_decode_2400 | ||
697 | AUTHOR......: David Rowe | ||
698 | DATE CREATED: 21/8/2010 | ||
699 | |||
700 | Decodes frames of 48 bits into 160 samples (20ms) of speech. | ||
701 | |||
702 | \*---------------------------------------------------------------------------*/ | ||
703 | |||
704 | void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char * bits) | ||
705 | { | ||
706 | MODEL model[2]; | ||
707 | int lsp_indexes[LPC_ORD]; | ||
708 | float lsps[2][LPC_ORD]; | ||
709 | int WoE_index; | ||
710 | float e[2]; | ||
711 | float snr; | ||
712 | float ak[2][LPC_ORD+1]; | ||
713 | int i,j; | ||
714 | unsigned int nbit = 0; | ||
715 | COMP Aw[FFT_ENC]; | ||
716 | |||
717 | assert(c2 != NULL); | ||
718 | |||
719 | /* only need to zero these out due to (unused) snr calculation */ | ||
720 | |||
721 | for(i=0; i<2; i++) | ||
722 | for(j=1; j<=MAX_AMP; j++) | ||
723 | model[i].A[j] = 0.0; | ||
724 | |||
725 | /* unpack bits from channel ------------------------------------*/ | ||
726 | |||
727 | /* this will partially fill the model params for the 2 x 10ms | ||
728 | frames */ | ||
729 | |||
730 | model[0].voiced = unpack(bits, &nbit, 1); | ||
731 | |||
732 | model[1].voiced = unpack(bits, &nbit, 1); | ||
733 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | ||
734 | decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); | ||
735 | |||
736 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | ||
737 | lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i)); | ||
738 | } | ||
739 | decode_lsps_scalar(&lsps[1][0], lsp_indexes, LPC_ORD); | ||
740 | check_lsp_order(&lsps[1][0], LPC_ORD); | ||
741 | bw_expand_lsps(&lsps[1][0], LPC_ORD, 50.0, 100.0); | ||
742 | |||
743 | /* interpolate ------------------------------------------------*/ | ||
744 | |||
745 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | ||
746 | 10ms frame between 20ms samples */ | ||
747 | |||
748 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | ||
749 | e[0] = interp_energy(c2->prev_e_dec, e[1]); | ||
750 | |||
751 | /* LSPs are sampled every 20ms so we interpolate the frame in | ||
752 | between, then recover spectral amplitudes */ | ||
753 | |||
754 | interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, LPC_ORD); | ||
755 | for(i=0; i<2; i++) { | ||
756 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | ||
757 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | ||
758 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | ||
759 | apply_lpc_correction(&model[i]); | ||
760 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | ||
761 | |||
762 | /* dump parameters for deep learning experiments */ | ||
763 | |||
764 | if (c2->fmlfeat != NULL) { | ||
765 | /* 10 LSPs - energy - Wo - voicing flag - 10 LPCs */ | ||
766 | fwrite(&lsps[i][0], LPC_ORD, sizeof(float), c2->fmlfeat); | ||
767 | fwrite(&e[i], 1, sizeof(float), c2->fmlfeat); | ||
768 | fwrite(&model[i].Wo, 1, sizeof(float), c2->fmlfeat); | ||
769 | float voiced_float = model[i].voiced; | ||
770 | fwrite(&voiced_float, 1, sizeof(float), c2->fmlfeat); | ||
771 | fwrite(&ak[i][1], LPC_ORD, sizeof(float), c2->fmlfeat); | ||
772 | } | ||
773 | } | ||
774 | |||
775 | /* update memories for next frame ----------------------------*/ | ||
776 | |||
777 | c2->prev_model_dec = model[1]; | ||
778 | c2->prev_e_dec = e[1]; | ||
779 | for(i=0; i<LPC_ORD; i++) | ||
780 | c2->prev_lsps_dec[i] = lsps[1][i]; | ||
781 | } | ||
782 | |||
783 | |||
784 | /*---------------------------------------------------------------------------*\ | ||
785 | |||
786 | FUNCTION....: codec2_encode_1600 | ||
787 | AUTHOR......: David Rowe | ||
788 | DATE CREATED: Feb 28 2013 | ||
789 | |||
790 | Encodes 320 speech samples (40ms of speech) into 64 bits. | ||
791 | |||
792 | The codec2 algorithm actually operates internally on 10ms (80 | ||
793 | sample) frames, so we run the encoding algorithm 4 times: | ||
794 | |||
795 | frame 0: voicing bit | ||
796 | frame 1: voicing bit, Wo and E | ||
797 | frame 2: voicing bit | ||
798 | frame 3: voicing bit, Wo and E, scalar LSPs | ||
799 | |||
800 | The bit allocation is: | ||
801 | |||
802 | Parameter frame 2 frame 4 Total | ||
803 | ------------------------------------------------------- | ||
804 | Harmonic magnitudes (LSPs) 0 36 36 | ||
805 | Pitch (Wo) 7 7 14 | ||
806 | Energy 5 5 10 | ||
807 | Voicing (10ms update) 2 2 4 | ||
808 | TOTAL 14 50 64 | ||
809 | |||
810 | \*---------------------------------------------------------------------------*/ | ||
811 | |||
812 | void codec2_encode_1600(struct CODEC2 *c2, unsigned char * bits, short speech[]) | ||
813 | { | ||
814 | MODEL model; | ||
815 | float lsps[LPC_ORD]; | ||
816 | float ak[LPC_ORD+1]; | ||
817 | float e; | ||
818 | int lsp_indexes[LPC_ORD]; | ||
819 | int Wo_index, e_index; | ||
820 | int i; | ||
821 | unsigned int nbit = 0; | ||
822 | |||
823 | assert(c2 != NULL); | ||
824 | |||
825 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | ||
826 | |||
827 | /* frame 1: - voicing ---------------------------------------------*/ | ||
828 | |||
829 | analyse_one_frame(c2, &model, speech); | ||
830 | pack(bits, &nbit, model.voiced, 1); | ||
831 | |||
832 | /* frame 2: - voicing, scalar Wo & E -------------------------------*/ | ||
833 | |||
834 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); | ||
835 | pack(bits, &nbit, model.voiced, 1); | ||
836 | |||
837 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); | ||
838 | pack(bits, &nbit, Wo_index, WO_BITS); | ||
839 | |||
840 | /* need to run this just to get LPC energy */ | ||
841 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | ||
842 | e_index = encode_energy(e, E_BITS); | ||
843 | pack(bits, &nbit, e_index, E_BITS); | ||
844 | |||
845 | /* frame 3: - voicing ---------------------------------------------*/ | ||
846 | |||
847 | analyse_one_frame(c2, &model, &speech[2*c2->n_samp]); | ||
848 | pack(bits, &nbit, model.voiced, 1); | ||
849 | |||
850 | /* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/ | ||
851 | |||
852 | analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); | ||
853 | pack(bits, &nbit, model.voiced, 1); | ||
854 | |||
855 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); | ||
856 | pack(bits, &nbit, Wo_index, WO_BITS); | ||
857 | |||
858 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | ||
859 | e_index = encode_energy(e, E_BITS); | ||
860 | pack(bits, &nbit, e_index, E_BITS); | ||
861 | |||
862 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); | ||
863 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | ||
864 | pack(bits, &nbit, lsp_indexes[i], lsp_bits(i)); | ||
865 | } | ||
866 | |||
867 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | ||
868 | } | ||
869 | |||
870 | |||
871 | /*---------------------------------------------------------------------------*\ | ||
872 | |||
873 | FUNCTION....: codec2_decode_1600 | ||
874 | AUTHOR......: David Rowe | ||
875 | DATE CREATED: 11 May 2012 | ||
876 | |||
877 | Decodes frames of 64 bits into 320 samples (40ms) of speech. | ||
878 | |||
879 | \*---------------------------------------------------------------------------*/ | ||
880 | |||
881 | void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * bits) | ||
882 | { | ||
883 | MODEL model[4]; | ||
884 | int lsp_indexes[LPC_ORD]; | ||
885 | float lsps[4][LPC_ORD]; | ||
886 | int Wo_index, e_index; | ||
887 | float e[4]; | ||
888 | float snr; | ||
889 | float ak[4][LPC_ORD+1]; | ||
890 | int i,j; | ||
891 | unsigned int nbit = 0; | ||
892 | float weight; | ||
893 | COMP Aw[FFT_ENC]; | ||
894 | |||
895 | assert(c2 != NULL); | ||
896 | |||
897 | /* only need to zero these out due to (unused) snr calculation */ | ||
898 | |||
899 | for(i=0; i<4; i++) | ||
900 | for(j=1; j<=MAX_AMP; j++) | ||
901 | model[i].A[j] = 0.0; | ||
902 | |||
903 | /* unpack bits from channel ------------------------------------*/ | ||
904 | |||
905 | /* this will partially fill the model params for the 4 x 10ms | ||
906 | frames */ | ||
907 | |||
908 | model[0].voiced = unpack(bits, &nbit, 1); | ||
909 | |||
910 | model[1].voiced = unpack(bits, &nbit, 1); | ||
911 | Wo_index = unpack(bits, &nbit, WO_BITS); | ||
912 | model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); | ||
913 | model[1].L = PI/model[1].Wo; | ||
914 | |||
915 | e_index = unpack(bits, &nbit, E_BITS); | ||
916 | e[1] = decode_energy(e_index, E_BITS); | ||
917 | |||
918 | model[2].voiced = unpack(bits, &nbit, 1); | ||
919 | |||
920 | model[3].voiced = unpack(bits, &nbit, 1); | ||
921 | Wo_index = unpack(bits, &nbit, WO_BITS); | ||
922 | model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); | ||
923 | model[3].L = PI/model[3].Wo; | ||
924 | |||
925 | e_index = unpack(bits, &nbit, E_BITS); | ||
926 | e[3] = decode_energy(e_index, E_BITS); | ||
927 | |||
928 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | ||
929 | lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i)); | ||
930 | } | ||
931 | decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD); | ||
932 | check_lsp_order(&lsps[3][0], LPC_ORD); | ||
933 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); | ||
934 | |||
935 | /* interpolate ------------------------------------------------*/ | ||
936 | |||
937 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | ||
938 | 10ms frame between 20ms samples */ | ||
939 | |||
940 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | ||
941 | e[0] = interp_energy(c2->prev_e_dec, e[1]); | ||
942 | interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); | ||
943 | e[2] = interp_energy(e[1], e[3]); | ||
944 | |||
945 | /* LSPs are sampled every 40ms so we interpolate the 3 frames in | ||
946 | between, then recover spectral amplitudes */ | ||
947 | |||
948 | for(i=0, weight=0.25; i<3; i++, weight += 0.25) { | ||
949 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); | ||
950 | } | ||
951 | for(i=0; i<4; i++) { | ||
952 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | ||
953 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | ||
954 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | ||
955 | apply_lpc_correction(&model[i]); | ||
956 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | ||
957 | } | ||
958 | |||
959 | /* update memories for next frame ----------------------------*/ | ||
960 | |||
961 | c2->prev_model_dec = model[3]; | ||
962 | c2->prev_e_dec = e[3]; | ||
963 | for(i=0; i<LPC_ORD; i++) | ||
964 | c2->prev_lsps_dec[i] = lsps[3][i]; | ||
965 | |||
966 | } | ||
967 | |||
968 | /*---------------------------------------------------------------------------*\ | ||
969 | |||
970 | FUNCTION....: codec2_encode_1400 | ||
971 | AUTHOR......: David Rowe | ||
972 | DATE CREATED: May 11 2012 | ||
973 | |||
974 | Encodes 320 speech samples (40ms of speech) into 56 bits. | ||
975 | |||
976 | The codec2 algorithm actually operates internally on 10ms (80 | ||
977 | sample) frames, so we run the encoding algorithm 4 times: | ||
978 | |||
979 | frame 0: voicing bit | ||
980 | frame 1: voicing bit, joint VQ of Wo and E | ||
981 | frame 2: voicing bit | ||
982 | frame 3: voicing bit, joint VQ of Wo and E, scalar LSPs | ||
983 | |||
984 | The bit allocation is: | ||
985 | |||
986 | Parameter frame 2 frame 4 Total | ||
987 | ------------------------------------------------------- | ||
988 | Harmonic magnitudes (LSPs) 0 36 36 | ||
989 | Energy+Wo 8 8 16 | ||
990 | Voicing (10ms update) 2 2 4 | ||
991 | TOTAL 10 46 56 | ||
992 | |||
993 | \*---------------------------------------------------------------------------*/ | ||
994 | |||
995 | void codec2_encode_1400(struct CODEC2 *c2, unsigned char * bits, short speech[]) | ||
996 | { | ||
997 | MODEL model; | ||
998 | float lsps[LPC_ORD]; | ||
999 | float ak[LPC_ORD+1]; | ||
1000 | float e; | ||
1001 | int lsp_indexes[LPC_ORD]; | ||
1002 | int WoE_index; | ||
1003 | int i; | ||
1004 | unsigned int nbit = 0; | ||
1005 | |||
1006 | assert(c2 != NULL); | ||
1007 | |||
1008 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | ||
1009 | |||
1010 | /* frame 1: - voicing ---------------------------------------------*/ | ||
1011 | |||
1012 | analyse_one_frame(c2, &model, speech); | ||
1013 | pack(bits, &nbit, model.voiced, 1); | ||
1014 | |||
1015 | /* frame 2: - voicing, joint Wo & E -------------------------------*/ | ||
1016 | |||
1017 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); | ||
1018 | pack(bits, &nbit, model.voiced, 1); | ||
1019 | |||
1020 | /* need to run this just to get LPC energy */ | ||
1021 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | ||
1022 | |||
1023 | WoE_index = encode_WoE(&model, e, c2->xq_enc); | ||
1024 | pack(bits, &nbit, WoE_index, WO_E_BITS); | ||
1025 | |||
1026 | /* frame 3: - voicing ---------------------------------------------*/ | ||
1027 | |||
1028 | analyse_one_frame(c2, &model, &speech[2*c2->n_samp]); | ||
1029 | pack(bits, &nbit, model.voiced, 1); | ||
1030 | |||
1031 | /* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/ | ||
1032 | |||
1033 | analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); | ||
1034 | pack(bits, &nbit, model.voiced, 1); | ||
1035 | |||
1036 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | ||
1037 | WoE_index = encode_WoE(&model, e, c2->xq_enc); | ||
1038 | pack(bits, &nbit, WoE_index, WO_E_BITS); | ||
1039 | |||
1040 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); | ||
1041 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | ||
1042 | pack(bits, &nbit, lsp_indexes[i], lsp_bits(i)); | ||
1043 | } | ||
1044 | |||
1045 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | ||
1046 | } | ||
1047 | |||
1048 | |||
1049 | /*---------------------------------------------------------------------------*\ | ||
1050 | |||
1051 | FUNCTION....: codec2_decode_1400 | ||
1052 | AUTHOR......: David Rowe | ||
1053 | DATE CREATED: 11 May 2012 | ||
1054 | |||
1055 | Decodes frames of 56 bits into 320 samples (40ms) of speech. | ||
1056 | |||
1057 | \*---------------------------------------------------------------------------*/ | ||
1058 | |||
1059 | void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char * bits) | ||
1060 | { | ||
1061 | MODEL model[4]; | ||
1062 | int lsp_indexes[LPC_ORD]; | ||
1063 | float lsps[4][LPC_ORD]; | ||
1064 | int WoE_index; | ||
1065 | float e[4]; | ||
1066 | float snr; | ||
1067 | float ak[4][LPC_ORD+1]; | ||
1068 | int i,j; | ||
1069 | unsigned int nbit = 0; | ||
1070 | float weight; | ||
1071 | COMP Aw[FFT_ENC]; | ||
1072 | |||
1073 | assert(c2 != NULL); | ||
1074 | |||
1075 | /* only need to zero these out due to (unused) snr calculation */ | ||
1076 | |||
1077 | for(i=0; i<4; i++) | ||
1078 | for(j=1; j<=MAX_AMP; j++) | ||
1079 | model[i].A[j] = 0.0; | ||
1080 | |||
1081 | /* unpack bits from channel ------------------------------------*/ | ||
1082 | |||
1083 | /* this will partially fill the model params for the 4 x 10ms | ||
1084 | frames */ | ||
1085 | |||
1086 | model[0].voiced = unpack(bits, &nbit, 1); | ||
1087 | |||
1088 | model[1].voiced = unpack(bits, &nbit, 1); | ||
1089 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | ||
1090 | decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); | ||
1091 | |||
1092 | model[2].voiced = unpack(bits, &nbit, 1); | ||
1093 | |||
1094 | model[3].voiced = unpack(bits, &nbit, 1); | ||
1095 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | ||
1096 | decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index); | ||
1097 | |||
1098 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | ||
1099 | lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i)); | ||
1100 | } | ||
1101 | decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD); | ||
1102 | check_lsp_order(&lsps[3][0], LPC_ORD); | ||
1103 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); | ||
1104 | |||
1105 | /* interpolate ------------------------------------------------*/ | ||
1106 | |||
1107 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | ||
1108 | 10ms frame between 20ms samples */ | ||
1109 | |||
1110 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | ||
1111 | e[0] = interp_energy(c2->prev_e_dec, e[1]); | ||
1112 | interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); | ||
1113 | e[2] = interp_energy(e[1], e[3]); | ||
1114 | |||
1115 | /* LSPs are sampled every 40ms so we interpolate the 3 frames in | ||
1116 | between, then recover spectral amplitudes */ | ||
1117 | |||
1118 | for(i=0, weight=0.25; i<3; i++, weight += 0.25) { | ||
1119 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); | ||
1120 | } | ||
1121 | for(i=0; i<4; i++) { | ||
1122 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | ||
1123 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | ||
1124 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | ||
1125 | apply_lpc_correction(&model[i]); | ||
1126 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | ||
1127 | } | ||
1128 | |||
1129 | /* update memories for next frame ----------------------------*/ | ||
1130 | |||
1131 | c2->prev_model_dec = model[3]; | ||
1132 | c2->prev_e_dec = e[3]; | ||
1133 | for(i=0; i<LPC_ORD; i++) | ||
1134 | c2->prev_lsps_dec[i] = lsps[3][i]; | ||
1135 | |||
1136 | } | ||
1137 | |||
1138 | /*---------------------------------------------------------------------------*\ | ||
1139 | |||
1140 | FUNCTION....: codec2_encode_1300 | ||
1141 | AUTHOR......: David Rowe | ||
1142 | DATE CREATED: March 14 2013 | ||
1143 | |||
1144 | Encodes 320 speech samples (40ms of speech) into 52 bits. | ||
1145 | |||
1146 | The codec2 algorithm actually operates internally on 10ms (80 | ||
1147 | sample) frames, so we run the encoding algorithm 4 times: | ||
1148 | |||
1149 | frame 0: voicing bit | ||
1150 | frame 1: voicing bit, | ||
1151 | frame 2: voicing bit | ||
1152 | frame 3: voicing bit, Wo and E, scalar LSPs | ||
1153 | |||
1154 | The bit allocation is: | ||
1155 | |||
1156 | Parameter frame 2 frame 4 Total | ||
1157 | ------------------------------------------------------- | ||
1158 | Harmonic magnitudes (LSPs) 0 36 36 | ||
1159 | Pitch (Wo) 0 7 7 | ||
1160 | Energy 0 5 5 | ||
1161 | Voicing (10ms update) 2 2 4 | ||
1162 | TOTAL 2 50 52 | ||
1163 | |||
1164 | \*---------------------------------------------------------------------------*/ | ||
1165 | |||
1166 | void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[]) | ||
1167 | { | ||
1168 | MODEL model; | ||
1169 | float lsps[LPC_ORD]; | ||
1170 | float ak[LPC_ORD+1]; | ||
1171 | float e; | ||
1172 | int lsp_indexes[LPC_ORD]; | ||
1173 | int Wo_index, e_index; | ||
1174 | int i; | ||
1175 | unsigned int nbit = 0; | ||
1176 | //#ifdef PROFILE | ||
1177 | //unsigned int quant_start; | ||
1178 | //#endif | ||
1179 | |||
1180 | assert(c2 != NULL); | ||
1181 | |||
1182 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | ||
1183 | |||
1184 | /* frame 1: - voicing ---------------------------------------------*/ | ||
1185 | |||
1186 | analyse_one_frame(c2, &model, speech); | ||
1187 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); | ||
1188 | |||
1189 | /* frame 2: - voicing ---------------------------------------------*/ | ||
1190 | |||
1191 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); | ||
1192 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); | ||
1193 | |||
1194 | /* frame 3: - voicing ---------------------------------------------*/ | ||
1195 | |||
1196 | analyse_one_frame(c2, &model, &speech[2*c2->n_samp]); | ||
1197 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); | ||
1198 | |||
1199 | /* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/ | ||
1200 | |||
1201 | analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); | ||
1202 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); | ||
1203 | |||
1204 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); | ||
1205 | pack_natural_or_gray(bits, &nbit, Wo_index, WO_BITS, c2->gray); | ||
1206 | |||
1207 | //#ifdef PROFILE | ||
1208 | //quant_start = machdep_profile_sample(); | ||
1209 | //#endif | ||
1210 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | ||
1211 | e_index = encode_energy(e, E_BITS); | ||
1212 | pack_natural_or_gray(bits, &nbit, e_index, E_BITS, c2->gray); | ||
1213 | |||
1214 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); | ||
1215 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | ||
1216 | pack_natural_or_gray(bits, &nbit, lsp_indexes[i], lsp_bits(i), c2->gray); | ||
1217 | } | ||
1218 | //#ifdef PROFILE | ||
1219 | //machdep_profile_sample_and_log(quant_start, " quant/packing"); | ||
1220 | //#endif | ||
1221 | |||
1222 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | ||
1223 | } | ||
1224 | |||
1225 | |||
1226 | /*---------------------------------------------------------------------------*\ | ||
1227 | |||
1228 | FUNCTION....: codec2_decode_1300 | ||
1229 | AUTHOR......: David Rowe | ||
1230 | DATE CREATED: 11 May 2012 | ||
1231 | |||
1232 | Decodes frames of 52 bits into 320 samples (40ms) of speech. | ||
1233 | |||
1234 | \*---------------------------------------------------------------------------*/ | ||
1235 | static int frames; | ||
1236 | void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * bits, float ber_est) | ||
1237 | { | ||
1238 | MODEL model[4]; | ||
1239 | int lsp_indexes[LPC_ORD]; | ||
1240 | float lsps[4][LPC_ORD]; | ||
1241 | int Wo_index, e_index; | ||
1242 | float e[4]; | ||
1243 | float snr; | ||
1244 | float ak[4][LPC_ORD+1]; | ||
1245 | int i,j; | ||
1246 | unsigned int nbit = 0; | ||
1247 | float weight; | ||
1248 | COMP Aw[FFT_ENC]; | ||
1249 | //PROFILE_VAR(recover_start); | ||
1250 | |||
1251 | assert(c2 != NULL); | ||
1252 | frames+= 4; | ||
1253 | /* only need to zero these out due to (unused) snr calculation */ | ||
1254 | |||
1255 | for(i=0; i<4; i++) | ||
1256 | for(j=1; j<=MAX_AMP; j++) | ||
1257 | model[i].A[j] = 0.0; | ||
1258 | |||
1259 | /* unpack bits from channel ------------------------------------*/ | ||
1260 | |||
1261 | /* this will partially fill the model params for the 4 x 10ms | ||
1262 | frames */ | ||
1263 | |||
1264 | model[0].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
1265 | model[1].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
1266 | model[2].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
1267 | model[3].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
1268 | |||
1269 | Wo_index = unpack_natural_or_gray(bits, &nbit, WO_BITS, c2->gray); | ||
1270 | model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); | ||
1271 | model[3].L = PI/model[3].Wo; | ||
1272 | |||
1273 | e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray); | ||
1274 | e[3] = decode_energy(e_index, E_BITS); | ||
1275 | //fprintf(stderr, "%d %f\n", e_index, e[3]); | ||
1276 | |||
1277 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | ||
1278 | lsp_indexes[i] = unpack_natural_or_gray(bits, &nbit, lsp_bits(i), c2->gray); | ||
1279 | } | ||
1280 | decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD); | ||
1281 | check_lsp_order(&lsps[3][0], LPC_ORD); | ||
1282 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); | ||
1283 | |||
1284 | if (ber_est > 0.15) { | ||
1285 | model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced = 0; | ||
1286 | e[3] = decode_energy(10, E_BITS); | ||
1287 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 200.0, 200.0); | ||
1288 | //fprintf(stderr, "soft mute\n"); | ||
1289 | } | ||
1290 | |||
1291 | /* interpolate ------------------------------------------------*/ | ||
1292 | |||
1293 | /* Wo, energy, and LSPs are sampled every 40ms so we interpolate | ||
1294 | the 3 frames in between */ | ||
1295 | |||
1296 | //PROFILE_SAMPLE(recover_start); | ||
1297 | for(i=0, weight=0.25; i<3; i++, weight += 0.25) { | ||
1298 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); | ||
1299 | interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min); | ||
1300 | e[i] = interp_energy2(c2->prev_e_dec, e[3],weight); | ||
1301 | } | ||
1302 | |||
1303 | /* then recover spectral amplitudes */ | ||
1304 | |||
1305 | for(i=0; i<4; i++) { | ||
1306 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | ||
1307 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | ||
1308 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | ||
1309 | apply_lpc_correction(&model[i]); | ||
1310 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | ||
1311 | |||
1312 | /* dump parameters for deep learning experiments */ | ||
1313 | |||
1314 | if (c2->fmlfeat != NULL) { | ||
1315 | /* 10 LSPs - energy - Wo - voicing flag - 10 LPCs */ | ||
1316 | fwrite(&lsps[i][0], LPC_ORD, sizeof(float), c2->fmlfeat); | ||
1317 | fwrite(&e[i], 1, sizeof(float), c2->fmlfeat); | ||
1318 | fwrite(&model[i].Wo, 1, sizeof(float), c2->fmlfeat); | ||
1319 | float voiced_float = model[i].voiced; | ||
1320 | fwrite(&voiced_float, 1, sizeof(float), c2->fmlfeat); | ||
1321 | fwrite(&ak[i][1], LPC_ORD, sizeof(float), c2->fmlfeat); | ||
1322 | } | ||
1323 | } | ||
1324 | /* | ||
1325 | for(i=0; i<4; i++) { | ||
1326 | printf("%d Wo: %f L: %d v: %d\n", frames, model[i].Wo, model[i].L, model[i].voiced); | ||
1327 | } | ||
1328 | if (frames == 4*50) | ||
1329 | exit(0); | ||
1330 | */ | ||
1331 | //PROFILE_SAMPLE_AND_LOG2(recover_start, " recover"); | ||
1332 | #ifdef DUMP | ||
1333 | dump_lsp_(&lsps[3][0]); | ||
1334 | dump_ak_(&ak[3][0], LPC_ORD); | ||
1335 | #endif | ||
1336 | |||
1337 | /* update memories for next frame ----------------------------*/ | ||
1338 | |||
1339 | c2->prev_model_dec = model[3]; | ||
1340 | c2->prev_e_dec = e[3]; | ||
1341 | for(i=0; i<LPC_ORD; i++) | ||
1342 | c2->prev_lsps_dec[i] = lsps[3][i]; | ||
1343 | |||
1344 | } | ||
1345 | |||
1346 | |||
1347 | /*---------------------------------------------------------------------------*\ | ||
1348 | |||
1349 | FUNCTION....: codec2_encode_1200 | ||
1350 | AUTHOR......: David Rowe | ||
1351 | DATE CREATED: Nov 14 2011 | ||
1352 | |||
1353 | Encodes 320 speech samples (40ms of speech) into 48 bits. | ||
1354 | |||
1355 | The codec2 algorithm actually operates internally on 10ms (80 | ||
1356 | sample) frames, so we run the encoding algorithm four times: | ||
1357 | |||
1358 | frame 0: voicing bit | ||
1359 | frame 1: voicing bit, joint VQ of Wo and E | ||
1360 | frame 2: voicing bit | ||
1361 | frame 3: voicing bit, joint VQ of Wo and E, VQ LSPs | ||
1362 | |||
1363 | The bit allocation is: | ||
1364 | |||
1365 | Parameter frame 2 frame 4 Total | ||
1366 | ------------------------------------------------------- | ||
1367 | Harmonic magnitudes (LSPs) 0 27 27 | ||
1368 | Energy+Wo 8 8 16 | ||
1369 | Voicing (10ms update) 2 2 4 | ||
1370 | Spare 0 1 1 | ||
1371 | TOTAL 10 38 48 | ||
1372 | |||
1373 | \*---------------------------------------------------------------------------*/ | ||
1374 | |||
1375 | void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[]) | ||
1376 | { | ||
1377 | MODEL model; | ||
1378 | float lsps[LPC_ORD]; | ||
1379 | float lsps_[LPC_ORD]; | ||
1380 | float ak[LPC_ORD+1]; | ||
1381 | float e; | ||
1382 | int lsp_indexes[LPC_ORD]; | ||
1383 | int WoE_index; | ||
1384 | int i; | ||
1385 | int spare = 0; | ||
1386 | unsigned int nbit = 0; | ||
1387 | |||
1388 | assert(c2 != NULL); | ||
1389 | |||
1390 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | ||
1391 | |||
1392 | /* frame 1: - voicing ---------------------------------------------*/ | ||
1393 | |||
1394 | analyse_one_frame(c2, &model, speech); | ||
1395 | pack(bits, &nbit, model.voiced, 1); | ||
1396 | |||
1397 | /* frame 2: - voicing, joint Wo & E -------------------------------*/ | ||
1398 | |||
1399 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); | ||
1400 | pack(bits, &nbit, model.voiced, 1); | ||
1401 | |||
1402 | /* need to run this just to get LPC energy */ | ||
1403 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | ||
1404 | |||
1405 | WoE_index = encode_WoE(&model, e, c2->xq_enc); | ||
1406 | pack(bits, &nbit, WoE_index, WO_E_BITS); | ||
1407 | |||
1408 | /* frame 3: - voicing ---------------------------------------------*/ | ||
1409 | |||
1410 | analyse_one_frame(c2, &model, &speech[2*c2->n_samp]); | ||
1411 | pack(bits, &nbit, model.voiced, 1); | ||
1412 | |||
1413 | /* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/ | ||
1414 | |||
1415 | analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); | ||
1416 | pack(bits, &nbit, model.voiced, 1); | ||
1417 | |||
1418 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | ||
1419 | WoE_index = encode_WoE(&model, e, c2->xq_enc); | ||
1420 | pack(bits, &nbit, WoE_index, WO_E_BITS); | ||
1421 | |||
1422 | encode_lsps_vq(lsp_indexes, lsps, lsps_, LPC_ORD); | ||
1423 | for(i=0; i<LSP_PRED_VQ_INDEXES; i++) { | ||
1424 | pack(bits, &nbit, lsp_indexes[i], lsp_pred_vq_bits(i)); | ||
1425 | } | ||
1426 | pack(bits, &nbit, spare, 1); | ||
1427 | |||
1428 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | ||
1429 | } | ||
1430 | |||
1431 | |||
1432 | /*---------------------------------------------------------------------------*\ | ||
1433 | |||
1434 | FUNCTION....: codec2_decode_1200 | ||
1435 | AUTHOR......: David Rowe | ||
1436 | DATE CREATED: 14 Feb 2012 | ||
1437 | |||
1438 | Decodes frames of 48 bits into 320 samples (40ms) of speech. | ||
1439 | |||
1440 | \*---------------------------------------------------------------------------*/ | ||
1441 | |||
1442 | void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * bits) | ||
1443 | { | ||
1444 | MODEL model[4]; | ||
1445 | int lsp_indexes[LPC_ORD]; | ||
1446 | float lsps[4][LPC_ORD]; | ||
1447 | int WoE_index; | ||
1448 | float e[4]; | ||
1449 | float snr; | ||
1450 | float ak[4][LPC_ORD+1]; | ||
1451 | int i,j; | ||
1452 | unsigned int nbit = 0; | ||
1453 | float weight; | ||
1454 | COMP Aw[FFT_ENC]; | ||
1455 | |||
1456 | assert(c2 != NULL); | ||
1457 | |||
1458 | /* only need to zero these out due to (unused) snr calculation */ | ||
1459 | |||
1460 | for(i=0; i<4; i++) | ||
1461 | for(j=1; j<=MAX_AMP; j++) | ||
1462 | model[i].A[j] = 0.0; | ||
1463 | |||
1464 | /* unpack bits from channel ------------------------------------*/ | ||
1465 | |||
1466 | /* this will partially fill the model params for the 4 x 10ms | ||
1467 | frames */ | ||
1468 | |||
1469 | model[0].voiced = unpack(bits, &nbit, 1); | ||
1470 | |||
1471 | model[1].voiced = unpack(bits, &nbit, 1); | ||
1472 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | ||
1473 | decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); | ||
1474 | |||
1475 | model[2].voiced = unpack(bits, &nbit, 1); | ||
1476 | |||
1477 | model[3].voiced = unpack(bits, &nbit, 1); | ||
1478 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | ||
1479 | decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index); | ||
1480 | |||
1481 | for(i=0; i<LSP_PRED_VQ_INDEXES; i++) { | ||
1482 | lsp_indexes[i] = unpack(bits, &nbit, lsp_pred_vq_bits(i)); | ||
1483 | } | ||
1484 | decode_lsps_vq(lsp_indexes, &lsps[3][0], LPC_ORD , 0); | ||
1485 | check_lsp_order(&lsps[3][0], LPC_ORD); | ||
1486 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); | ||
1487 | |||
1488 | /* interpolate ------------------------------------------------*/ | ||
1489 | |||
1490 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | ||
1491 | 10ms frame between 20ms samples */ | ||
1492 | |||
1493 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | ||
1494 | e[0] = interp_energy(c2->prev_e_dec, e[1]); | ||
1495 | interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); | ||
1496 | e[2] = interp_energy(e[1], e[3]); | ||
1497 | |||
1498 | /* LSPs are sampled every 40ms so we interpolate the 3 frames in | ||
1499 | between, then recover spectral amplitudes */ | ||
1500 | |||
1501 | for(i=0, weight=0.25; i<3; i++, weight += 0.25) { | ||
1502 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); | ||
1503 | } | ||
1504 | for(i=0; i<4; i++) { | ||
1505 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | ||
1506 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | ||
1507 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | ||
1508 | apply_lpc_correction(&model[i]); | ||
1509 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | ||
1510 | } | ||
1511 | |||
1512 | /* update memories for next frame ----------------------------*/ | ||
1513 | |||
1514 | c2->prev_model_dec = model[3]; | ||
1515 | c2->prev_e_dec = e[3]; | ||
1516 | for(i=0; i<LPC_ORD; i++) | ||
1517 | c2->prev_lsps_dec[i] = lsps[3][i]; | ||
1518 | } | ||
1519 | |||
1520 | |||
1521 | #ifndef CORTEX_M4 | ||
1522 | /*---------------------------------------------------------------------------*\ | ||
1523 | |||
1524 | FUNCTION....: codec2_encode_700 | ||
1525 | AUTHOR......: David Rowe | ||
1526 | DATE CREATED: April 2015 | ||
1527 | |||
1528 | Encodes 320 speech samples (40ms of speech) into 28 bits. | ||
1529 | |||
1530 | The codec2 algorithm actually operates internally on 10ms (80 | ||
1531 | sample) frames, so we run the encoding algorithm four times: | ||
1532 | |||
1533 | frame 0: nothing | ||
1534 | frame 1: nothing | ||
1535 | frame 2: nothing | ||
1536 | frame 3: voicing bit, scalar Wo and E, 17 bit LSP MEL scalar, 2 spare | ||
1537 | |||
1538 | The bit allocation is: | ||
1539 | |||
1540 | Parameter frames 1-3 frame 4 Total | ||
1541 | ----------------------------------------------------------- | ||
1542 | Harmonic magnitudes (LSPs) 0 17 17 | ||
1543 | Energy 0 3 3 | ||
1544 | log Wo 0 5 5 | ||
1545 | Voicing 0 1 1 | ||
1546 | spare 0 2 2 | ||
1547 | TOTAL 0 28 28 | ||
1548 | |||
1549 | \*---------------------------------------------------------------------------*/ | ||
1550 | |||
1551 | void codec2_encode_700(struct CODEC2 *c2, unsigned char * bits, short speech[]) | ||
1552 | { | ||
1553 | MODEL model; | ||
1554 | float lsps[LPC_ORD_LOW]; | ||
1555 | float mel[LPC_ORD_LOW]; | ||
1556 | float ak[LPC_ORD_LOW+1]; | ||
1557 | float e, f; | ||
1558 | int indexes[LPC_ORD_LOW]; | ||
1559 | int Wo_index, e_index, i; | ||
1560 | unsigned int nbit = 0; | ||
1561 | float bpf_out[4*c2->n_samp]; | ||
1562 | short bpf_speech[4*c2->n_samp]; | ||
1563 | int spare = 0; | ||
1564 | |||
1565 | assert(c2 != NULL); | ||
1566 | |||
1567 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | ||
1568 | |||
1569 | /* band pass filter */ | ||
1570 | |||
1571 | for(i=0; i<BPF_N; i++) | ||
1572 | c2->bpf_buf[i] = c2->bpf_buf[4*c2->n_samp+i]; | ||
1573 | for(i=0; i<4*c2->n_samp; i++) | ||
1574 | c2->bpf_buf[BPF_N+i] = speech[i]; | ||
1575 | inverse_filter(&c2->bpf_buf[BPF_N], bpf, 4*c2->n_samp, bpf_out, BPF_N-1); | ||
1576 | for(i=0; i<4*c2->n_samp; i++) | ||
1577 | bpf_speech[i] = bpf_out[i]; | ||
1578 | |||
1579 | /* frame 1 --------------------------------------------------------*/ | ||
1580 | |||
1581 | analyse_one_frame(c2, &model, bpf_speech); | ||
1582 | |||
1583 | /* frame 2 --------------------------------------------------------*/ | ||
1584 | |||
1585 | analyse_one_frame(c2, &model, &bpf_speech[c2->n_samp]); | ||
1586 | |||
1587 | /* frame 3 --------------------------------------------------------*/ | ||
1588 | |||
1589 | analyse_one_frame(c2, &model, &bpf_speech[2*c2->n_samp]); | ||
1590 | |||
1591 | /* frame 4: - voicing, scalar Wo & E, scalar LSPs -----------------*/ | ||
1592 | |||
1593 | analyse_one_frame(c2, &model, &bpf_speech[3*c2->n_samp]); | ||
1594 | pack(bits, &nbit, model.voiced, 1); | ||
1595 | Wo_index = encode_log_Wo(&c2->c2const, model.Wo, 5); | ||
1596 | pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray); | ||
1597 | |||
1598 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD_LOW); | ||
1599 | e_index = encode_energy(e, 3); | ||
1600 | pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray); | ||
1601 | |||
1602 | for(i=0; i<LPC_ORD_LOW; i++) { | ||
1603 | f = (4000.0/PI)*lsps[i]; | ||
1604 | mel[i] = floor(2595.0*log10(1.0 + f/700.0) + 0.5); | ||
1605 | } | ||
1606 | encode_mels_scalar(indexes, mel, LPC_ORD_LOW); | ||
1607 | |||
1608 | for(i=0; i<LPC_ORD_LOW; i++) { | ||
1609 | pack_natural_or_gray(bits, &nbit, indexes[i], mel_bits(i), c2->gray); | ||
1610 | } | ||
1611 | |||
1612 | pack_natural_or_gray(bits, &nbit, spare, 2, c2->gray); | ||
1613 | |||
1614 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | ||
1615 | } | ||
1616 | |||
1617 | |||
1618 | /*---------------------------------------------------------------------------*\ | ||
1619 | |||
1620 | FUNCTION....: codec2_decode_700 | ||
1621 | AUTHOR......: David Rowe | ||
1622 | DATE CREATED: April 2015 | ||
1623 | |||
1624 | Decodes frames of 28 bits into 320 samples (40ms) of speech. | ||
1625 | |||
1626 | \*---------------------------------------------------------------------------*/ | ||
1627 | |||
1628 | void codec2_decode_700(struct CODEC2 *c2, short speech[], const unsigned char * bits) | ||
1629 | { | ||
1630 | MODEL model[4]; | ||
1631 | int indexes[LPC_ORD_LOW]; | ||
1632 | float mel[LPC_ORD_LOW]; | ||
1633 | float lsps[4][LPC_ORD_LOW]; | ||
1634 | int Wo_index, e_index; | ||
1635 | float e[4]; | ||
1636 | float snr, f_; | ||
1637 | float ak[4][LPC_ORD_LOW+1]; | ||
1638 | int i,j; | ||
1639 | unsigned int nbit = 0; | ||
1640 | float weight; | ||
1641 | COMP Aw[FFT_ENC]; | ||
1642 | |||
1643 | assert(c2 != NULL); | ||
1644 | |||
1645 | /* only need to zero these out due to (unused) snr calculation */ | ||
1646 | |||
1647 | for(i=0; i<4; i++) | ||
1648 | for(j=1; j<=MAX_AMP; j++) | ||
1649 | model[i].A[j] = 0.0; | ||
1650 | |||
1651 | /* unpack bits from channel ------------------------------------*/ | ||
1652 | |||
1653 | model[3].voiced = unpack(bits, &nbit, 1); | ||
1654 | model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced; | ||
1655 | |||
1656 | Wo_index = unpack_natural_or_gray(bits, &nbit, 5, c2->gray); | ||
1657 | model[3].Wo = decode_log_Wo(&c2->c2const, Wo_index, 5); | ||
1658 | model[3].L = PI/model[3].Wo; | ||
1659 | |||
1660 | e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray); | ||
1661 | e[3] = decode_energy(e_index, 3); | ||
1662 | |||
1663 | for(i=0; i<LPC_ORD_LOW; i++) { | ||
1664 | indexes[i] = unpack_natural_or_gray(bits, &nbit, mel_bits(i), c2->gray); | ||
1665 | } | ||
1666 | |||
1667 | decode_mels_scalar(mel, indexes, LPC_ORD_LOW); | ||
1668 | for(i=0; i<LPC_ORD_LOW; i++) { | ||
1669 | f_ = 700.0*( pow(10.0, (float)mel[i]/2595.0) - 1.0); | ||
1670 | lsps[3][i] = f_*(PI/4000.0); | ||
1671 | //printf("lsps[3][%d] %f\n", i, lsps[3][i]); | ||
1672 | } | ||
1673 | |||
1674 | check_lsp_order(&lsps[3][0], LPC_ORD_LOW); | ||
1675 | bw_expand_lsps(&lsps[3][0], LPC_ORD_LOW, 50.0, 100.0); | ||
1676 | |||
1677 | #ifdef MASK_NOT_FOR_NOW | ||
1678 | /* first pass at soft decn error masking, needs further work */ | ||
1679 | /* If soft dec info available expand further for low power frames */ | ||
1680 | |||
1681 | if (c2->softdec) { | ||
1682 | float e = 0.0; | ||
1683 | for(i=9; i<9+17; i++) | ||
1684 | e += c2->softdec[i]*c2->softdec[i]; | ||
1685 | e /= 6.0; | ||
1686 | //fprintf(stderr, "e: %f\n", e); | ||
1687 | //if (e < 0.3) | ||
1688 | // bw_expand_lsps(&lsps[3][0], LPC_ORD_LOW, 150.0, 300.0); | ||
1689 | } | ||
1690 | #endif | ||
1691 | |||
1692 | /* interpolate ------------------------------------------------*/ | ||
1693 | |||
1694 | /* LSPs, Wo, and energy are sampled every 40ms so we interpolate | ||
1695 | the 3 frames in between, then recover spectral amplitudes */ | ||
1696 | |||
1697 | for(i=0, weight=0.25; i<3; i++, weight += 0.25) { | ||
1698 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW); | ||
1699 | interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min); | ||
1700 | e[i] = interp_energy2(c2->prev_e_dec, e[3],weight); | ||
1701 | } | ||
1702 | for(i=0; i<4; i++) { | ||
1703 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD_LOW); | ||
1704 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0, | ||
1705 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | ||
1706 | apply_lpc_correction(&model[i]); | ||
1707 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | ||
1708 | } | ||
1709 | |||
1710 | #ifdef DUMP | ||
1711 | dump_lsp_(&lsps[3][0]); | ||
1712 | dump_ak_(&ak[3][0], LPC_ORD_LOW); | ||
1713 | dump_model(&model[3]); | ||
1714 | if (c2->softdec) | ||
1715 | dump_softdec(c2->softdec, nbit); | ||
1716 | #endif | ||
1717 | |||
1718 | /* update memories for next frame ----------------------------*/ | ||
1719 | |||
1720 | c2->prev_model_dec = model[3]; | ||
1721 | c2->prev_e_dec = e[3]; | ||
1722 | for(i=0; i<LPC_ORD_LOW; i++) | ||
1723 | c2->prev_lsps_dec[i] = lsps[3][i]; | ||
1724 | } | ||
1725 | |||
1726 | |||
1727 | /*---------------------------------------------------------------------------*\ | ||
1728 | |||
1729 | FUNCTION....: codec2_encode_700b | ||
1730 | AUTHOR......: David Rowe | ||
1731 | DATE CREATED: August 2015 | ||
1732 | |||
1733 | Version b of 700 bit/s codec. After some experiments over the air I | ||
1734 | wanted was unhappy with the rate 700 codec so spent a few weeks | ||
1735 | trying to improve the speech quality. This version uses a wider BPF | ||
1736 | and vector quantised mel-lsps. | ||
1737 | |||
1738 | Encodes 320 speech samples (40ms of speech) into 28 bits. | ||
1739 | |||
1740 | The codec2 algorithm actually operates internally on 10ms (80 | ||
1741 | sample) frames, so we run the encoding algorithm four times: | ||
1742 | |||
1743 | frame 0: nothing | ||
1744 | frame 1: nothing | ||
1745 | frame 2: nothing | ||
1746 | frame 3: voicing bit, 5 bit scalar Wo and 3 bit E, 18 bit LSP MEL VQ, | ||
1747 | 1 spare | ||
1748 | |||
1749 | The bit allocation is: | ||
1750 | |||
1751 | Parameter frames 1-3 frame 4 Total | ||
1752 | ----------------------------------------------------------- | ||
1753 | Harmonic magnitudes (LSPs) 0 18 18 | ||
1754 | Energy 0 3 3 | ||
1755 | log Wo 0 5 5 | ||
1756 | Voicing 0 1 1 | ||
1757 | spare 0 1 1 | ||
1758 | TOTAL 0 28 28 | ||
1759 | |||
1760 | \*---------------------------------------------------------------------------*/ | ||
1761 | |||
1762 | void codec2_encode_700b(struct CODEC2 *c2, unsigned char * bits, short speech[]) | ||
1763 | { | ||
1764 | MODEL model; | ||
1765 | float lsps[LPC_ORD_LOW]; | ||
1766 | float mel[LPC_ORD_LOW]; | ||
1767 | float mel_[LPC_ORD_LOW]; | ||
1768 | float ak[LPC_ORD_LOW+1]; | ||
1769 | float e, f; | ||
1770 | int indexes[3]; | ||
1771 | int Wo_index, e_index, i; | ||
1772 | unsigned int nbit = 0; | ||
1773 | float bpf_out[4*c2->n_samp]; | ||
1774 | short bpf_speech[4*c2->n_samp]; | ||
1775 | int spare = 0; | ||
1776 | |||
1777 | assert(c2 != NULL); | ||
1778 | |||
1779 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | ||
1780 | |||
1781 | /* band pass filter */ | ||
1782 | |||
1783 | for(i=0; i<BPF_N; i++) | ||
1784 | c2->bpf_buf[i] = c2->bpf_buf[4*c2->n_samp+i]; | ||
1785 | for(i=0; i<4*c2->n_samp; i++) | ||
1786 | c2->bpf_buf[BPF_N+i] = speech[i]; | ||
1787 | inverse_filter(&c2->bpf_buf[BPF_N], bpfb, 4*c2->n_samp, bpf_out, BPF_N-1); | ||
1788 | for(i=0; i<4*c2->n_samp; i++) | ||
1789 | bpf_speech[i] = bpf_out[i]; | ||
1790 | |||
1791 | /* frame 1 --------------------------------------------------------*/ | ||
1792 | |||
1793 | analyse_one_frame(c2, &model, bpf_speech); | ||
1794 | |||
1795 | /* frame 2 --------------------------------------------------------*/ | ||
1796 | |||
1797 | analyse_one_frame(c2, &model, &bpf_speech[c2->n_samp]); | ||
1798 | |||
1799 | /* frame 3 --------------------------------------------------------*/ | ||
1800 | |||
1801 | analyse_one_frame(c2, &model, &bpf_speech[2*c2->n_samp]); | ||
1802 | |||
1803 | /* frame 4: - voicing, scalar Wo & E, VQ mel LSPs -----------------*/ | ||
1804 | |||
1805 | analyse_one_frame(c2, &model, &bpf_speech[3*c2->n_samp]); | ||
1806 | pack(bits, &nbit, model.voiced, 1); | ||
1807 | Wo_index = encode_log_Wo(&c2->c2const, model.Wo, 5); | ||
1808 | pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray); | ||
1809 | |||
1810 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD_LOW); | ||
1811 | e_index = encode_energy(e, 3); | ||
1812 | pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray); | ||
1813 | |||
1814 | for(i=0; i<LPC_ORD_LOW; i++) { | ||
1815 | f = (4000.0/PI)*lsps[i]; | ||
1816 | mel[i] = floor(2595.0*log10(1.0 + f/700.0) + 0.5); | ||
1817 | } | ||
1818 | lspmelvq_mbest_encode(indexes, mel, mel_, LPC_ORD_LOW, 5); | ||
1819 | |||
1820 | for(i=0; i<3; i++) { | ||
1821 | pack_natural_or_gray(bits, &nbit, indexes[i], lspmelvq_cb_bits(i), c2->gray); | ||
1822 | } | ||
1823 | |||
1824 | pack_natural_or_gray(bits, &nbit, spare, 1, c2->gray); | ||
1825 | |||
1826 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | ||
1827 | } | ||
1828 | |||
1829 | |||
1830 | /*---------------------------------------------------------------------------*\ | ||
1831 | |||
1832 | FUNCTION....: codec2_decode_700b | ||
1833 | AUTHOR......: David Rowe | ||
1834 | DATE CREATED: August 2015 | ||
1835 | |||
1836 | Decodes frames of 28 bits into 320 samples (40ms) of speech. | ||
1837 | |||
1838 | \*---------------------------------------------------------------------------*/ | ||
1839 | |||
1840 | void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * bits) | ||
1841 | { | ||
1842 | MODEL model[4]; | ||
1843 | int indexes[3]; | ||
1844 | float mel[LPC_ORD_LOW]; | ||
1845 | float lsps[4][LPC_ORD_LOW]; | ||
1846 | int Wo_index, e_index; | ||
1847 | float e[4]; | ||
1848 | float snr, f_; | ||
1849 | float ak[4][LPC_ORD_LOW+1]; | ||
1850 | int i,j; | ||
1851 | unsigned int nbit = 0; | ||
1852 | float weight; | ||
1853 | COMP Aw[FFT_ENC]; | ||
1854 | |||
1855 | assert(c2 != NULL); | ||
1856 | |||
1857 | /* only need to zero these out due to (unused) snr calculation */ | ||
1858 | |||
1859 | for(i=0; i<4; i++) | ||
1860 | for(j=1; j<=MAX_AMP; j++) | ||
1861 | model[i].A[j] = 0.0; | ||
1862 | |||
1863 | /* unpack bits from channel ------------------------------------*/ | ||
1864 | |||
1865 | model[3].voiced = unpack(bits, &nbit, 1); | ||
1866 | model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced; | ||
1867 | |||
1868 | Wo_index = unpack_natural_or_gray(bits, &nbit, 5, c2->gray); | ||
1869 | model[3].Wo = decode_log_Wo(&c2->c2const, Wo_index, 5); | ||
1870 | model[3].L = PI/model[3].Wo; | ||
1871 | |||
1872 | e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray); | ||
1873 | e[3] = decode_energy(e_index, 3); | ||
1874 | |||
1875 | for(i=0; i<3; i++) { | ||
1876 | indexes[i] = unpack_natural_or_gray(bits, &nbit, lspmelvq_cb_bits(i), c2->gray); | ||
1877 | } | ||
1878 | |||
1879 | lspmelvq_decode(indexes, mel, LPC_ORD_LOW); | ||
1880 | |||
1881 | #define MEL_ROUND 10 | ||
1882 | for(i=1; i<LPC_ORD_LOW; i++) { | ||
1883 | if (mel[i] <= mel[i-1]+MEL_ROUND) { | ||
1884 | mel[i]+=MEL_ROUND/2; | ||
1885 | mel[i-1]-=MEL_ROUND/2; | ||
1886 | i = 1; | ||
1887 | } | ||
1888 | } | ||
1889 | |||
1890 | for(i=0; i<LPC_ORD_LOW; i++) { | ||
1891 | f_ = 700.0*( pow(10.0, (float)mel[i]/2595.0) - 1.0); | ||
1892 | lsps[3][i] = f_*(PI/4000.0); | ||
1893 | //printf("lsps[3][%d] %f\n", i, lsps[3][i]); | ||
1894 | } | ||
1895 | |||
1896 | /* interpolate ------------------------------------------------*/ | ||
1897 | |||
1898 | /* LSPs, Wo, and energy are sampled every 40ms so we interpolate | ||
1899 | the 3 frames in between, then recover spectral amplitudes */ | ||
1900 | |||
1901 | for(i=0, weight=0.25; i<3; i++, weight += 0.25) { | ||
1902 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW); | ||
1903 | interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min); | ||
1904 | e[i] = interp_energy2(c2->prev_e_dec, e[3],weight); | ||
1905 | } | ||
1906 | for(i=0; i<4; i++) { | ||
1907 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD_LOW); | ||
1908 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0, | ||
1909 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | ||
1910 | apply_lpc_correction(&model[i]); | ||
1911 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | ||
1912 | } | ||
1913 | |||
1914 | #ifdef DUMP | ||
1915 | dump_lsp_(&lsps[3][0]); | ||
1916 | dump_ak_(&ak[3][0], LPC_ORD_LOW); | ||
1917 | dump_model(&model[3]); | ||
1918 | if (c2->softdec) | ||
1919 | dump_softdec(c2->softdec, nbit); | ||
1920 | #endif | ||
1921 | |||
1922 | /* update memories for next frame ----------------------------*/ | ||
1923 | |||
1924 | c2->prev_model_dec = model[3]; | ||
1925 | c2->prev_e_dec = e[3]; | ||
1926 | for(i=0; i<LPC_ORD_LOW; i++) | ||
1927 | c2->prev_lsps_dec[i] = lsps[3][i]; | ||
1928 | } | ||
1929 | #endif | ||
1930 | |||
1931 | |||
1932 | /*---------------------------------------------------------------------------*\ | ||
1933 | |||
1934 | FUNCTION....: codec2_encode_700c | ||
1935 | AUTHOR......: David Rowe | ||
1936 | DATE CREATED: Jan 2017 | ||
1937 | |||
1938 | Version c of 700 bit/s codec that uses newamp1 fixed rate VQ of amplitudes. | ||
1939 | |||
1940 | Encodes 320 speech samples (40ms of speech) into 28 bits. | ||
1941 | |||
1942 | The codec2 algorithm actually operates internally on 10ms (80 | ||
1943 | sample) frames, so we run the encoding algorithm four times: | ||
1944 | |||
1945 | frame 0: nothing | ||
1946 | frame 1: nothing | ||
1947 | frame 2: nothing | ||
1948 | frame 3: 18 bit 2 stage VQ (9 bits/stage), 4 bits energy, | ||
1949 | 6 bit scalar Wo/voicing. No spare bits. | ||
1950 | |||
1951 | Voicing is encoded using the 0 index of the Wo quantiser. | ||
1952 | |||
1953 | The bit allocation is: | ||
1954 | |||
1955 | Parameter frames 1-3 frame 4 Total | ||
1956 | ----------------------------------------------------------- | ||
1957 | Harmonic magnitudes (rate k VQ) 0 18 18 | ||
1958 | Energy 0 4 4 | ||
1959 | log Wo/voicing 0 6 6 | ||
1960 | TOTAL 0 28 28 | ||
1961 | |||
1962 | \*---------------------------------------------------------------------------*/ | ||
1963 | |||
1964 | void codec2_encode_700c(struct CODEC2 *c2, unsigned char * bits, short speech[]) | ||
1965 | { | ||
1966 | MODEL model; | ||
1967 | int indexes[4], i, M=4; | ||
1968 | unsigned int nbit = 0; | ||
1969 | |||
1970 | assert(c2 != NULL); | ||
1971 | |||
1972 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | ||
1973 | |||
1974 | for(i=0; i<M; i++) { | ||
1975 | analyse_one_frame(c2, &model, &speech[i*c2->n_samp]); | ||
1976 | } | ||
1977 | |||
1978 | int K = 20; | ||
1979 | float rate_K_vec[K], mean; | ||
1980 | float rate_K_vec_no_mean[K], rate_K_vec_no_mean_[K]; | ||
1981 | |||
1982 | newamp1_model_to_indexes(&c2->c2const, | ||
1983 | indexes, | ||
1984 | &model, | ||
1985 | rate_K_vec, | ||
1986 | c2->rate_K_sample_freqs_kHz, | ||
1987 | K, | ||
1988 | &mean, | ||
1989 | rate_K_vec_no_mean, | ||
1990 | rate_K_vec_no_mean_, &c2->se); | ||
1991 | c2->nse += K; | ||
1992 | |||
1993 | #ifndef CORTEX_M4 | ||
1994 | /* dump features for deep learning experiments */ | ||
1995 | if (c2->fmlfeat != NULL) { | ||
1996 | fwrite(&mean, 1, sizeof(float), c2->fmlfeat); | ||
1997 | fwrite(rate_K_vec_no_mean, K, sizeof(float), c2->fmlfeat); | ||
1998 | fwrite(rate_K_vec_no_mean_, K, sizeof(float), c2->fmlfeat); | ||
1999 | } | ||
2000 | #endif | ||
2001 | |||
2002 | pack_natural_or_gray(bits, &nbit, indexes[0], 9, 0); | ||
2003 | pack_natural_or_gray(bits, &nbit, indexes[1], 9, 0); | ||
2004 | pack_natural_or_gray(bits, &nbit, indexes[2], 4, 0); | ||
2005 | pack_natural_or_gray(bits, &nbit, indexes[3], 6, 0); | ||
2006 | |||
2007 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | ||
2008 | } | ||
2009 | |||
2010 | |||
2011 | /*---------------------------------------------------------------------------*\ | ||
2012 | |||
2013 | FUNCTION....: codec2_decode_700c | ||
2014 | AUTHOR......: David Rowe | ||
2015 | DATE CREATED: August 2015 | ||
2016 | |||
2017 | Decodes frames of 28 bits into 320 samples (40ms) of speech. | ||
2018 | |||
2019 | \*---------------------------------------------------------------------------*/ | ||
2020 | |||
2021 | void codec2_decode_700c(struct CODEC2 *c2, short speech[], const unsigned char * bits) | ||
2022 | { | ||
2023 | MODEL model[4]; | ||
2024 | int indexes[4]; | ||
2025 | int i; | ||
2026 | unsigned int nbit = 0; | ||
2027 | |||
2028 | assert(c2 != NULL); | ||
2029 | |||
2030 | /* unpack bits from channel ------------------------------------*/ | ||
2031 | |||
2032 | indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
2033 | indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
2034 | indexes[2] = unpack_natural_or_gray(bits, &nbit, 4, 0); | ||
2035 | indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); | ||
2036 | |||
2037 | int M = 4; | ||
2038 | COMP HH[M][MAX_AMP+1]; | ||
2039 | float interpolated_surface_[M][NEWAMP1_K]; | ||
2040 | |||
2041 | newamp1_indexes_to_model(&c2->c2const, | ||
2042 | model, | ||
2043 | (COMP*)HH, | ||
2044 | (float*)interpolated_surface_, | ||
2045 | c2->prev_rate_K_vec_, | ||
2046 | &c2->Wo_left, | ||
2047 | &c2->voicing_left, | ||
2048 | c2->rate_K_sample_freqs_kHz, | ||
2049 | NEWAMP1_K, | ||
2050 | c2->phase_fft_fwd_cfg, | ||
2051 | c2->phase_fft_inv_cfg, | ||
2052 | indexes, | ||
2053 | c2->user_rate_K_vec_no_mean_, | ||
2054 | c2->post_filter_en); | ||
2055 | |||
2056 | |||
2057 | for(i=0; i<M; i++) { | ||
2058 | /* 700C is a little quiter so lets apply some experimentally derived audio gain */ | ||
2059 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], &HH[i][0], 1.5); | ||
2060 | } | ||
2061 | } | ||
2062 | |||
2063 | /*---------------------------------------------------------------------------*\ | ||
2064 | |||
2065 | FUNCTION....: codec2_energy_700c | ||
2066 | AUTHOR......: Jeroen Vreeken | ||
2067 | DATE CREATED: Jan 2017 | ||
2068 | |||
2069 | Decodes energy value from encoded bits. | ||
2070 | |||
2071 | \*---------------------------------------------------------------------------*/ | ||
2072 | |||
2073 | float codec2_energy_700c(struct CODEC2 *c2, const unsigned char * bits) | ||
2074 | { | ||
2075 | int indexes[4]; | ||
2076 | unsigned int nbit = 0; | ||
2077 | |||
2078 | assert(c2 != NULL); | ||
2079 | |||
2080 | /* unpack bits from channel ------------------------------------*/ | ||
2081 | |||
2082 | indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
2083 | indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
2084 | indexes[2] = unpack_natural_or_gray(bits, &nbit, 4, 0); | ||
2085 | indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); | ||
2086 | |||
2087 | float mean = newamp1_energy_cb[0].cb[indexes[2]]; | ||
2088 | mean -= 10; | ||
2089 | if (indexes[3] == 0) | ||
2090 | mean -= 10; | ||
2091 | |||
2092 | return POW10F(mean/10.0); | ||
2093 | } | ||
2094 | |||
2095 | #ifndef CORTEX_M4 | ||
2096 | float codec2_energy_450(struct CODEC2 *c2, const unsigned char * bits) | ||
2097 | { | ||
2098 | int indexes[4]; | ||
2099 | unsigned int nbit = 0; | ||
2100 | |||
2101 | assert(c2 != NULL); | ||
2102 | |||
2103 | /* unpack bits from channel ------------------------------------*/ | ||
2104 | |||
2105 | indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
2106 | //indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
2107 | indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0); | ||
2108 | indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); | ||
2109 | |||
2110 | float mean = newamp2_energy_cb[0].cb[indexes[2]]; | ||
2111 | mean -= 10; | ||
2112 | if (indexes[3] == 0) | ||
2113 | mean -= 10; | ||
2114 | |||
2115 | return POW10F(mean/10.0); | ||
2116 | } | ||
2117 | |||
2118 | /*---------------------------------------------------------------------------*\ | ||
2119 | |||
2120 | FUNCTION....: codec2_get_energy() | ||
2121 | AUTHOR......: Jeroen Vreeken | ||
2122 | DATE CREATED: 08/03/2016 | ||
2123 | |||
2124 | Extract energy value from an encoded frame. | ||
2125 | |||
2126 | \*---------------------------------------------------------------------------*/ | ||
2127 | |||
2128 | float codec2_get_energy(struct CODEC2 *c2, const unsigned char *bits) | ||
2129 | { | ||
2130 | assert(c2 != NULL); | ||
2131 | assert( | ||
2132 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) || | ||
2133 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) || | ||
2134 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) || | ||
2135 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) || | ||
2136 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) || | ||
2137 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) || | ||
2138 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) || | ||
2139 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) || | ||
2140 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) || | ||
2141 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) || | ||
2142 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) | ||
2143 | ); | ||
2144 | MODEL model; | ||
2145 | float xq_dec[2] = {}; | ||
2146 | int e_index, WoE_index; | ||
2147 | float e; | ||
2148 | unsigned int nbit; | ||
2149 | |||
2150 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) { | ||
2151 | nbit = 1 + 1 + WO_BITS; | ||
2152 | e_index = unpack(bits, &nbit, E_BITS); | ||
2153 | e = decode_energy(e_index, E_BITS); | ||
2154 | } | ||
2155 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) { | ||
2156 | nbit = 1 + 1; | ||
2157 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | ||
2158 | decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); | ||
2159 | } | ||
2160 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) { | ||
2161 | nbit = 1 + 1 + WO_BITS; | ||
2162 | e_index = unpack(bits, &nbit, E_BITS); | ||
2163 | e = decode_energy(e_index, E_BITS); | ||
2164 | } | ||
2165 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) { | ||
2166 | nbit = 1 + 1; | ||
2167 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | ||
2168 | decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); | ||
2169 | } | ||
2170 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) { | ||
2171 | nbit = 1 + 1 + 1 + 1 + WO_BITS; | ||
2172 | e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray); | ||
2173 | e = decode_energy(e_index, E_BITS); | ||
2174 | } | ||
2175 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) { | ||
2176 | nbit = 1 + 1; | ||
2177 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | ||
2178 | decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); | ||
2179 | } | ||
2180 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) { | ||
2181 | nbit = 1 + 5; | ||
2182 | e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray); | ||
2183 | e = decode_energy(e_index, 3); | ||
2184 | } | ||
2185 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) { | ||
2186 | nbit = 1 + 5; | ||
2187 | e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray); | ||
2188 | e = decode_energy(e_index, 3); | ||
2189 | } | ||
2190 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { | ||
2191 | e = codec2_energy_700c(c2, bits); | ||
2192 | } | ||
2193 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) { | ||
2194 | e = codec2_energy_450(c2, bits); | ||
2195 | } | ||
2196 | |||
2197 | return e; | ||
2198 | } | ||
2199 | |||
2200 | |||
2201 | /*---------------------------------------------------------------------------*\ | ||
2202 | |||
2203 | FUNCTION....: codec2_encode_450 | ||
2204 | AUTHOR......: Thomas Kurin and Stefan Erhardt | ||
2205 | INSTITUTE...: Institute for Electronics Engineering, University of Erlangen-Nuremberg | ||
2206 | DATE CREATED: July 2018 | ||
2207 | |||
2208 | 450 bit/s codec that uses newamp2 fixed rate VQ of amplitudes. | ||
2209 | |||
2210 | Encodes 320 speech samples (40ms of speech) into 28 bits. | ||
2211 | |||
2212 | The codec2 algorithm actually operates internally on 10ms (80 | ||
2213 | sample) frames, so we run the encoding algorithm four times: | ||
2214 | |||
2215 | frame 0: nothing | ||
2216 | frame 1: nothing | ||
2217 | frame 2: nothing | ||
2218 | frame 3: 9 bit 1 stage VQ, 3 bits energy, | ||
2219 | 6 bit scalar Wo/voicing/plosive. No spare bits. | ||
2220 | |||
2221 | If a plosive is detected the frame at the energy-step is encoded. | ||
2222 | |||
2223 | Voicing is encoded using the 000000 index of the Wo quantiser. | ||
2224 | Plosive is encoded using the 111111 index of the Wo quantiser. | ||
2225 | |||
2226 | The bit allocation is: | ||
2227 | |||
2228 | Parameter frames 1-3 frame 4 Total | ||
2229 | ----------------------------------------------------------- | ||
2230 | Harmonic magnitudes (rate k VQ) 0 9 9 | ||
2231 | Energy 0 3 3 | ||
2232 | log Wo/voicing/plosive 0 6 6 | ||
2233 | TOTAL 0 18 18 | ||
2234 | |||
2235 | |||
2236 | \*---------------------------------------------------------------------------*/ | ||
2237 | |||
2238 | void codec2_encode_450(struct CODEC2 *c2, unsigned char * bits, short speech[]) | ||
2239 | { | ||
2240 | MODEL model; | ||
2241 | int indexes[4], i,h, M=4; | ||
2242 | unsigned int nbit = 0; | ||
2243 | int plosiv = 0; | ||
2244 | float energydelta[M]; | ||
2245 | int spectralCounter; | ||
2246 | |||
2247 | assert(c2 != NULL); | ||
2248 | |||
2249 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | ||
2250 | for(i=0; i<M; i++){ | ||
2251 | analyse_one_frame(c2, &model, &speech[i*c2->n_samp]); | ||
2252 | energydelta[i] = 0; | ||
2253 | spectralCounter = 0; | ||
2254 | for(h = 0;h<(model.L);h++){ | ||
2255 | //only detect above 300 Hz | ||
2256 | if(h*model.Wo*(c2->c2const.Fs/2000.0)/M_PI > 0.3){ | ||
2257 | energydelta[i] = energydelta[i] + 20.0*log10(model.A[10]+1E-16); | ||
2258 | spectralCounter = spectralCounter+1; | ||
2259 | } | ||
2260 | |||
2261 | } | ||
2262 | energydelta[i] = energydelta[i] / spectralCounter ; | ||
2263 | } | ||
2264 | //Constants for plosive Detection tdB = threshold; minPwr = from below this level plosives have to rise | ||
2265 | float tdB = 15; //not fixed can be changed | ||
2266 | float minPwr = 15; //not fixed can be changed | ||
2267 | if((c2->energy_prev)<minPwr && energydelta[0]>((c2->energy_prev)+tdB)){ | ||
2268 | |||
2269 | plosiv = 1; | ||
2270 | } | ||
2271 | if(energydelta[0]<minPwr && energydelta[1]>(energydelta[0]+tdB)){ | ||
2272 | |||
2273 | plosiv = 2; | ||
2274 | } | ||
2275 | if(energydelta[1]<minPwr &&energydelta[2]>(energydelta[1]+tdB)){ | ||
2276 | |||
2277 | plosiv = 3; | ||
2278 | } | ||
2279 | if(energydelta[2]<minPwr &&energydelta[3]>(energydelta[2]+tdB)){ | ||
2280 | |||
2281 | plosiv = 4; | ||
2282 | } | ||
2283 | if(plosiv != 0 && plosiv != 4){ | ||
2284 | analyse_one_frame(c2, &model, &speech[(plosiv-1)*c2->n_samp]); | ||
2285 | } | ||
2286 | |||
2287 | c2->energy_prev = energydelta[3]; | ||
2288 | |||
2289 | |||
2290 | int K = 29; | ||
2291 | float rate_K_vec[K], mean; | ||
2292 | float rate_K_vec_no_mean[K], rate_K_vec_no_mean_[K]; | ||
2293 | if(plosiv > 0){ | ||
2294 | plosiv = 1; | ||
2295 | } | ||
2296 | newamp2_model_to_indexes(&c2->c2const, | ||
2297 | indexes, | ||
2298 | &model, | ||
2299 | rate_K_vec, | ||
2300 | c2->n2_rate_K_sample_freqs_kHz, | ||
2301 | K, | ||
2302 | &mean, | ||
2303 | rate_K_vec_no_mean, | ||
2304 | rate_K_vec_no_mean_, | ||
2305 | plosiv); | ||
2306 | |||
2307 | |||
2308 | pack_natural_or_gray(bits, &nbit, indexes[0], 9, 0); | ||
2309 | //pack_natural_or_gray(bits, &nbit, indexes[1], 9, 0); | ||
2310 | pack_natural_or_gray(bits, &nbit, indexes[2], 3, 0); | ||
2311 | pack_natural_or_gray(bits, &nbit, indexes[3], 6, 0); | ||
2312 | |||
2313 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | ||
2314 | } | ||
2315 | |||
2316 | |||
2317 | /*---------------------------------------------------------------------------*\ | ||
2318 | |||
2319 | FUNCTION....: codec2_decode_450 | ||
2320 | AUTHOR......: Thomas Kurin and Stefan Erhardt | ||
2321 | INSTITUTE...: Institute for Electronics Engineering, University of Erlangen-Nuremberg | ||
2322 | DATE CREATED: July 2018 | ||
2323 | |||
2324 | \*---------------------------------------------------------------------------*/ | ||
2325 | |||
2326 | void codec2_decode_450(struct CODEC2 *c2, short speech[], const unsigned char * bits) | ||
2327 | { | ||
2328 | MODEL model[4]; | ||
2329 | int indexes[4]; | ||
2330 | int i; | ||
2331 | unsigned int nbit = 0; | ||
2332 | |||
2333 | assert(c2 != NULL); | ||
2334 | |||
2335 | /* unpack bits from channel ------------------------------------*/ | ||
2336 | |||
2337 | indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
2338 | //indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
2339 | indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0); | ||
2340 | indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); | ||
2341 | |||
2342 | int M = 4; | ||
2343 | COMP HH[M][MAX_AMP+1]; | ||
2344 | float interpolated_surface_[M][NEWAMP2_K]; | ||
2345 | int pwbFlag = 0; | ||
2346 | |||
2347 | newamp2_indexes_to_model(&c2->c2const, | ||
2348 | model, | ||
2349 | (COMP*)HH, | ||
2350 | (float*)interpolated_surface_, | ||
2351 | c2->n2_prev_rate_K_vec_, | ||
2352 | &c2->Wo_left, | ||
2353 | &c2->voicing_left, | ||
2354 | c2->n2_rate_K_sample_freqs_kHz, | ||
2355 | NEWAMP2_K, | ||
2356 | c2->phase_fft_fwd_cfg, | ||
2357 | c2->phase_fft_inv_cfg, | ||
2358 | indexes, | ||
2359 | 1.5, | ||
2360 | pwbFlag); | ||
2361 | |||
2362 | |||
2363 | for(i=0; i<M; i++) { | ||
2364 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], &HH[i][0], 1.5); | ||
2365 | } | ||
2366 | } | ||
2367 | |||
2368 | /*---------------------------------------------------------------------------*\ | ||
2369 | |||
2370 | FUNCTION....: codec2_decode_450pwb | ||
2371 | AUTHOR......: Thomas Kurin and Stefan Erhardt | ||
2372 | INSTITUTE...: Institute for Electronics Engineering, University of Erlangen-Nuremberg | ||
2373 | DATE CREATED: July 2018 | ||
2374 | |||
2375 | Decodes the 450 codec data in pseudo wideband at 16kHz samplerate. | ||
2376 | |||
2377 | \*---------------------------------------------------------------------------*/ | ||
2378 | |||
2379 | void codec2_decode_450pwb(struct CODEC2 *c2, short speech[], const unsigned char * bits) | ||
2380 | { | ||
2381 | MODEL model[4]; | ||
2382 | int indexes[4]; | ||
2383 | int i; | ||
2384 | unsigned int nbit = 0; | ||
2385 | |||
2386 | assert(c2 != NULL); | ||
2387 | |||
2388 | /* unpack bits from channel ------------------------------------*/ | ||
2389 | |||
2390 | indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
2391 | //indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
2392 | indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0); | ||
2393 | indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); | ||
2394 | |||
2395 | int M = 4; | ||
2396 | COMP HH[M][MAX_AMP+1]; | ||
2397 | float interpolated_surface_[M][NEWAMP2_16K_K]; | ||
2398 | int pwbFlag = 1; | ||
2399 | |||
2400 | newamp2_indexes_to_model(&c2->c2const, | ||
2401 | model, | ||
2402 | (COMP*)HH, | ||
2403 | (float*)interpolated_surface_, | ||
2404 | c2->n2_pwb_prev_rate_K_vec_, | ||
2405 | &c2->Wo_left, | ||
2406 | &c2->voicing_left, | ||
2407 | c2->n2_pwb_rate_K_sample_freqs_kHz, | ||
2408 | NEWAMP2_16K_K, | ||
2409 | c2->phase_fft_fwd_cfg, | ||
2410 | c2->phase_fft_inv_cfg, | ||
2411 | indexes, | ||
2412 | 1.5, | ||
2413 | pwbFlag); | ||
2414 | |||
2415 | |||
2416 | for(i=0; i<M; i++) { | ||
2417 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], &HH[i][0], 1.5); | ||
2418 | } | ||
2419 | } | ||
2420 | |||
2421 | #endif | ||
2422 | |||
2423 | /*---------------------------------------------------------------------------* \ | ||
2424 | |||
2425 | FUNCTION....: synthesise_one_frame() | ||
2426 | AUTHOR......: David Rowe | ||
2427 | DATE CREATED: 23/8/2010 | ||
2428 | |||
2429 | Synthesise 80 speech samples (10ms) from model parameters. | ||
2430 | |||
2431 | \*---------------------------------------------------------------------------*/ | ||
2432 | |||
2433 | void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, COMP Aw[], float gain) | ||
2434 | { | ||
2435 | int i; | ||
2436 | //PROFILE_VAR(phase_start, pf_start, synth_start); | ||
2437 | |||
2438 | //#ifdef DUMP | ||
2439 | //dump_quantised_model(model); | ||
2440 | //#endif | ||
2441 | |||
2442 | //PROFILE_SAMPLE(phase_start); | ||
2443 | |||
2444 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode) ) { | ||
2445 | /* newamp1/2, we've already worked out rate L phase */ | ||
2446 | COMP *H = Aw; | ||
2447 | phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H); | ||
2448 | } else { | ||
2449 | /* LPC based phase synthesis */ | ||
2450 | COMP H[MAX_AMP+1]; | ||
2451 | sample_phase(model, H, Aw); | ||
2452 | phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H); | ||
2453 | } | ||
2454 | |||
2455 | //PROFILE_SAMPLE_AND_LOG(pf_start, phase_start, " phase_synth"); | ||
2456 | |||
2457 | postfilter(model, &c2->bg_est); | ||
2458 | |||
2459 | //PROFILE_SAMPLE_AND_LOG(synth_start, pf_start, " postfilter"); | ||
2460 | |||
2461 | synthesise(c2->n_samp, c2->fftr_inv_cfg, c2->Sn_, model, c2->Pn, 1); | ||
2462 | |||
2463 | for(i=0; i<c2->n_samp; i++) { | ||
2464 | c2->Sn_[i] *= gain; | ||
2465 | } | ||
2466 | |||
2467 | //PROFILE_SAMPLE_AND_LOG2(synth_start, " synth"); | ||
2468 | |||
2469 | ear_protection(c2->Sn_, c2->n_samp); | ||
2470 | |||
2471 | for(i=0; i<c2->n_samp; i++) { | ||
2472 | if (c2->Sn_[i] > 32767.0) | ||
2473 | speech[i] = 32767; | ||
2474 | else if (c2->Sn_[i] < -32767.0) | ||
2475 | speech[i] = -32767; | ||
2476 | else | ||
2477 | speech[i] = c2->Sn_[i]; | ||
2478 | } | ||
2479 | |||
2480 | } | ||
2481 | |||
2482 | /*---------------------------------------------------------------------------*\ | ||
2483 | |||
2484 | FUNCTION....: analyse_one_frame() | ||
2485 | AUTHOR......: David Rowe | ||
2486 | DATE CREATED: 23/8/2010 | ||
2487 | |||
2488 | Extract sinusoidal model parameters from 80 speech samples (10ms of | ||
2489 | speech). | ||
2490 | |||
2491 | \*---------------------------------------------------------------------------*/ | ||
2492 | |||
2493 | void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]) | ||
2494 | { | ||
2495 | COMP Sw[FFT_ENC]; | ||
2496 | float pitch; | ||
2497 | int i; | ||
2498 | //PROFILE_VAR(dft_start, nlp_start, model_start, two_stage, estamps); | ||
2499 | int n_samp = c2->n_samp; | ||
2500 | int m_pitch = c2->m_pitch; | ||
2501 | |||
2502 | /* Read input speech */ | ||
2503 | |||
2504 | for(i=0; i<m_pitch-n_samp; i++) | ||
2505 | c2->Sn[i] = c2->Sn[i+n_samp]; | ||
2506 | for(i=0; i<n_samp; i++) | ||
2507 | c2->Sn[i+m_pitch-n_samp] = speech[i]; | ||
2508 | |||
2509 | //PROFILE_SAMPLE(dft_start); | ||
2510 | dft_speech(&c2->c2const, c2->fft_fwd_cfg, Sw, c2->Sn, c2->w); | ||
2511 | //PROFILE_SAMPLE_AND_LOG(nlp_start, dft_start, " dft_speech"); | ||
2512 | |||
2513 | /* Estimate pitch */ | ||
2514 | |||
2515 | nlp(c2->nlp, c2->Sn, n_samp, &pitch, Sw, c2->W, &c2->prev_f0_enc); | ||
2516 | //PROFILE_SAMPLE_AND_LOG(model_start, nlp_start, " nlp"); | ||
2517 | |||
2518 | model->Wo = TWO_PI/pitch; | ||
2519 | model->L = PI/model->Wo; | ||
2520 | |||
2521 | /* estimate model parameters */ | ||
2522 | |||
2523 | two_stage_pitch_refinement(&c2->c2const, model, Sw); | ||
2524 | //PROFILE_SAMPLE_AND_LOG(two_stage, model_start, " two_stage"); | ||
2525 | estimate_amplitudes(model, Sw, c2->W, 0); | ||
2526 | //PROFILE_SAMPLE_AND_LOG(estamps, two_stage, " est_amps"); | ||
2527 | est_voicing_mbe(&c2->c2const, model, Sw, c2->W); | ||
2528 | //PROFILE_SAMPLE_AND_LOG2(estamps, " est_voicing"); | ||
2529 | #ifdef DUMP | ||
2530 | dump_model(model); | ||
2531 | #endif | ||
2532 | } | ||
2533 | |||
2534 | /*---------------------------------------------------------------------------*\ | ||
2535 | |||
2536 | FUNCTION....: ear_protection() | ||
2537 | AUTHOR......: David Rowe | ||
2538 | DATE CREATED: Nov 7 2012 | ||
2539 | |||
2540 | Limits output level to protect ears when there are bit errors or the input | ||
2541 | is overdriven. This doesn't correct or mask bit errors, just reduces the | ||
2542 | worst of their damage. | ||
2543 | |||
2544 | \*---------------------------------------------------------------------------*/ | ||
2545 | |||
2546 | static void ear_protection(float in_out[], int n) { | ||
2547 | float max_sample, over, gain; | ||
2548 | int i; | ||
2549 | |||
2550 | /* find maximum sample in frame */ | ||
2551 | |||
2552 | max_sample = 0.0; | ||
2553 | for(i=0; i<n; i++) | ||
2554 | if (in_out[i] > max_sample) | ||
2555 | max_sample = in_out[i]; | ||
2556 | |||
2557 | /* determine how far above set point */ | ||
2558 | |||
2559 | over = max_sample/30000.0; | ||
2560 | |||
2561 | /* If we are x dB over set point we reduce level by 2x dB, this | ||
2562 | attenuates major excursions in amplitude (likely to be caused | ||
2563 | by bit errors) more than smaller ones */ | ||
2564 | |||
2565 | if (over > 1.0) { | ||
2566 | gain = 1.0/(over*over); | ||
2567 | //fprintf(stderr, "gain: %f\n", gain); | ||
2568 | for(i=0; i<n; i++) | ||
2569 | in_out[i] *= gain; | ||
2570 | } | ||
2571 | } | ||
2572 | |||
2573 | void codec2_set_lpc_post_filter(struct CODEC2 *c2, int enable, int bass_boost, float beta, float gamma) | ||
2574 | { | ||
2575 | assert((beta >= 0.0) && (beta <= 1.0)); | ||
2576 | assert((gamma >= 0.0) && (gamma <= 1.0)); | ||
2577 | c2->lpc_pf = enable; | ||
2578 | c2->bass_boost = bass_boost; | ||
2579 | c2->beta = beta; | ||
2580 | c2->gamma = gamma; | ||
2581 | } | ||
2582 | |||
2583 | /* | ||
2584 | Allows optional stealing of one of the voicing bits for use as a | ||
2585 | spare bit, only 1300 & 1400 & 1600 bit/s supported for now. | ||
2586 | Experimental method of sending voice/data frames for FreeDV. | ||
2587 | */ | ||
2588 | |||
2589 | int codec2_get_spare_bit_index(struct CODEC2 *c2) | ||
2590 | { | ||
2591 | assert(c2 != NULL); | ||
2592 | |||
2593 | switch(c2->mode) { | ||
2594 | case CODEC2_MODE_1300: | ||
2595 | return 2; // bit 2 (3th bit) is v2 (third voicing bit) | ||
2596 | break; | ||
2597 | case CODEC2_MODE_1400: | ||
2598 | return 10; // bit 10 (11th bit) is v2 (third voicing bit) | ||
2599 | break; | ||
2600 | case CODEC2_MODE_1600: | ||
2601 | return 15; // bit 15 (16th bit) is v2 (third voicing bit) | ||
2602 | break; | ||
2603 | case CODEC2_MODE_700: | ||
2604 | return 26; // bits 26 and 27 are spare | ||
2605 | break; | ||
2606 | case CODEC2_MODE_700B: | ||
2607 | return 27; // bit 27 is spare | ||
2608 | break; | ||
2609 | } | ||
2610 | |||
2611 | return -1; | ||
2612 | } | ||
2613 | |||
2614 | /* | ||
2615 | Reconstructs the spare voicing bit. Note works on unpacked bits | ||
2616 | for convenience. | ||
2617 | */ | ||
2618 | |||
2619 | int codec2_rebuild_spare_bit(struct CODEC2 *c2, int unpacked_bits[]) | ||
2620 | { | ||
2621 | int v1,v3; | ||
2622 | |||
2623 | assert(c2 != NULL); | ||
2624 | |||
2625 | v1 = unpacked_bits[1]; | ||
2626 | |||
2627 | switch(c2->mode) { | ||
2628 | case CODEC2_MODE_1300: | ||
2629 | |||
2630 | v3 = unpacked_bits[1+1+1]; | ||
2631 | |||
2632 | /* if either adjacent frame is voiced, make this one voiced */ | ||
2633 | |||
2634 | unpacked_bits[2] = (v1 || v3); | ||
2635 | |||
2636 | return 0; | ||
2637 | |||
2638 | break; | ||
2639 | |||
2640 | case CODEC2_MODE_1400: | ||
2641 | |||
2642 | v3 = unpacked_bits[1+1+8+1]; | ||
2643 | |||
2644 | /* if either adjacent frame is voiced, make this one voiced */ | ||
2645 | |||
2646 | unpacked_bits[10] = (v1 || v3); | ||
2647 | |||
2648 | return 0; | ||
2649 | |||
2650 | break; | ||
2651 | |||
2652 | case CODEC2_MODE_1600: | ||
2653 | v3 = unpacked_bits[1+1+8+5+1]; | ||
2654 | |||
2655 | /* if either adjacent frame is voiced, make this one voiced */ | ||
2656 | |||
2657 | unpacked_bits[15] = (v1 || v3); | ||
2658 | |||
2659 | return 0; | ||
2660 | |||
2661 | break; | ||
2662 | } | ||
2663 | |||
2664 | return -1; | ||
2665 | } | ||
2666 | |||
2667 | void codec2_set_natural_or_gray(struct CODEC2 *c2, int gray) | ||
2668 | { | ||
2669 | assert(c2 != NULL); | ||
2670 | c2->gray = gray; | ||
2671 | } | ||
2672 | |||
2673 | void codec2_set_softdec(struct CODEC2 *c2, float *softdec) | ||
2674 | { | ||
2675 | assert(c2 != NULL); | ||
2676 | c2->softdec = softdec; | ||
2677 | } | ||
2678 | |||
2679 | void codec2_open_mlfeat(struct CODEC2 *codec2_state, char *filename) { | ||
2680 | if ((codec2_state->fmlfeat = fopen(filename, "wb")) == NULL) { | ||
2681 | fprintf(stderr, "error opening machine learning feature file: %s\n", filename); | ||
2682 | exit(1); | ||
2683 | } | ||
2684 | } | ||
2685 | |||
2686 | #ifndef __EMBEDDED__ | ||
2687 | void codec2_load_codebook(struct CODEC2 *codec2_state, int num, char *filename) { | ||
2688 | FILE *f; | ||
2689 | |||
2690 | if ((f = fopen(filename, "rb")) == NULL) { | ||
2691 | fprintf(stderr, "error opening codebook file: %s\n", filename); | ||
2692 | exit(1); | ||
2693 | } | ||
2694 | //fprintf(stderr, "reading newamp1vq_cb[%d] k=%d m=%d\n", num, newamp1vq_cb[num].k, newamp1vq_cb[num].m); | ||
2695 | float tmp[newamp1vq_cb[num].k*newamp1vq_cb[num].m]; | ||
2696 | int nread = fread(tmp, sizeof(float), newamp1vq_cb[num].k*newamp1vq_cb[num].m, f); | ||
2697 | float *p = (float*)newamp1vq_cb[num].cb; | ||
2698 | for(int i=0; i<newamp1vq_cb[num].k*newamp1vq_cb[num].m; i++) | ||
2699 | p[i] = tmp[i]; | ||
2700 | // fprintf(stderr, "nread = %d %f %f\n", nread, newamp1vq_cb[num].cb[0], newamp1vq_cb[num].cb[1]); | ||
2701 | assert(nread == newamp1vq_cb[num].k*newamp1vq_cb[num].m); | ||
2702 | fclose(f); | ||
2703 | } | ||
2704 | #endif | ||
2705 | |||
2706 | float codec2_get_var(struct CODEC2 *codec2_state) { | ||
2707 | if (codec2_state->nse) | ||
2708 | return codec2_state->se/codec2_state->nse; | ||
2709 | else | ||
2710 | return 0; | ||
2711 | } | ||
2712 | |||
2713 | float *codec2_enable_user_ratek(struct CODEC2 *codec2_state, int *K) { | ||
2714 | codec2_state->user_rate_K_vec_no_mean_ = (float*)malloc(sizeof(float)*NEWAMP1_K); | ||
2715 | *K = NEWAMP1_K; | ||
2716 | return codec2_state->user_rate_K_vec_no_mean_; | ||
2717 | } | ||
2718 | |||
2719 | void codec2_700c_post_filter(struct CODEC2 *codec2_state, int en) { | ||
2720 | codec2_state->post_filter_en = en; | ||
2721 | } | ||