summaryrefslogtreecommitdiff
path: root/codec2.c
diff options
context:
space:
mode:
authorerdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box>2019-07-04 23:26:09 +0200
committererdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box>2019-07-04 23:26:09 +0200
commitf02dfce6e6c34b3d8a7b8a0e784b506178e331fa (patch)
tree45556e6104242d4702689760433d7321ae74ec17 /codec2.c
stripdown of version 0.9
Diffstat (limited to 'codec2.c')
-rw-r--r--codec2.c2721
1 files changed, 2721 insertions, 0 deletions
diff --git a/codec2.c b/codec2.c
new file mode 100644
index 0000000..840fe21
--- /dev/null
+++ b/codec2.c
@@ -0,0 +1,2721 @@
1/*---------------------------------------------------------------------------*\
2
3 FILE........: codec2.c
4 AUTHOR......: David Rowe
5 DATE CREATED: 21/8/2010
6
7 Codec2 fully quantised encoder and decoder functions. If you want use
8 codec2, the codec2_xxx functions are for you.
9
10\*---------------------------------------------------------------------------*/
11
12/*
13 Copyright (C) 2010 David Rowe
14
15 All rights reserved.
16
17 This program is free software; you can redistribute it and/or modify
18 it under the terms of the GNU Lesser General Public License version 2.1, as
19 published by the Free Software Foundation. This program is
20 distributed in the hope that it will be useful, but WITHOUT ANY
21 WARRANTY; without even the implied warranty of MERCHANTABILITY or
22 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
23 License for more details.
24
25 You should have received a copy of the GNU Lesser General Public License
26 along with this program; if not, see <http://www.gnu.org/licenses/>.
27*/
28
29#include <assert.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <math.h>
34
35#include "defines.h"
36#include "codec2_fft.h"
37#include "sine.h"
38#include "nlp.h"
39#include "dump.h"
40#include "lpc.h"
41#include "quantise.h"
42#include "phase.h"
43#include "interp.h"
44#include "postfilter.h"
45#include "codec2.h"
46#include "lsp.h"
47#include "newamp2.h"
48#include "codec2_internal.h"
49#include "machdep.h"
50#include "bpf.h"
51#include "bpfb.h"
52#include "c2wideband.h"
53
54#include "debug_alloc.h"
55
56/*---------------------------------------------------------------------------* \
57
58 FUNCTION HEADERS
59
60\*---------------------------------------------------------------------------*/
61
62void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]);
63void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model,
64 COMP Aw[], float gain);
65void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[]);
66void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char * bits);
67void codec2_encode_2400(struct CODEC2 *c2, unsigned char * bits, short speech[]);
68void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char * bits);
69void codec2_encode_1600(struct CODEC2 *c2, unsigned char * bits, short speech[]);
70void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * bits);
71void codec2_encode_1400(struct CODEC2 *c2, unsigned char * bits, short speech[]);
72void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char * bits);
73void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[]);
74void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * bits, float ber_est);
75void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[]);
76void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * bits);
77void codec2_encode_700(struct CODEC2 *c2, unsigned char * bits, short speech[]);
78void codec2_decode_700(struct CODEC2 *c2, short speech[], const unsigned char * bits);
79void codec2_encode_700b(struct CODEC2 *c2, unsigned char * bits, short speech[]);
80void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * bits);
81void codec2_encode_700c(struct CODEC2 *c2, unsigned char * bits, short speech[]);
82void codec2_decode_700c(struct CODEC2 *c2, short speech[], const unsigned char * bits);
83void codec2_encode_450(struct CODEC2 *c2, unsigned char * bits, short speech[]);
84void codec2_decode_450(struct CODEC2 *c2, short speech[], const unsigned char * bits);
85void codec2_decode_450pwb(struct CODEC2 *c2, short speech[], const unsigned char * bits);
86static void ear_protection(float in_out[], int n);
87
88
89
90/*---------------------------------------------------------------------------*\
91
92 FUNCTIONS
93
94\*---------------------------------------------------------------------------*/
95
96/*---------------------------------------------------------------------------*\
97
98 FUNCTION....: codec2_create
99 AUTHOR......: David Rowe
100 DATE CREATED: 21/8/2010
101
102 Create and initialise an instance of the codec. Returns a pointer
103 to the codec states or NULL on failure. One set of states is
104 sufficient for a full duuplex codec (i.e. an encoder and decoder).
105 You don't need separate states for encoders and decoders. See
106 c2enc.c and c2dec.c for examples.
107
108\*---------------------------------------------------------------------------*/
109
110
111//Don't create CODEC2_MODE_450PWB for Encoding as it has undefined behavior !
112struct CODEC2 * codec2_create(int mode)
113{
114 struct CODEC2 *c2;
115 int i,l;
116
117#ifndef CORTEX_M4
118 if (( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, mode)) ||
119 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, mode)) ) {
120 return NULL;
121 }
122#endif
123
124 c2 = (struct CODEC2*)MALLOC(sizeof(struct CODEC2));
125 if (c2 == NULL)
126 return NULL;
127
128 c2->mode = mode;
129
130 /* store constants in a few places for convenience */
131
132 if( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, mode) == 0){
133 c2->c2const = c2const_create(8000, N_S);
134 }else{
135 c2->c2const = c2const_create(16000, N_S);
136 }
137 c2->Fs = c2->c2const.Fs;
138 int n_samp = c2->n_samp = c2->c2const.n_samp;
139 int m_pitch = c2->m_pitch = c2->c2const.m_pitch;
140
141 c2->Pn = (float*)MALLOC(2*n_samp*sizeof(float));
142 if (c2->Pn == NULL) {
143 return NULL;
144 }
145 c2->Sn_ = (float*)MALLOC(2*n_samp*sizeof(float));
146 if (c2->Sn_ == NULL) {
147 FREE(c2->Pn);
148 return NULL;
149 }
150 c2->w = (float*)MALLOC(m_pitch*sizeof(float));
151 if (c2->w == NULL) {
152 FREE(c2->Pn);
153 FREE(c2->Sn_);
154 return NULL;
155 }
156 c2->Sn = (float*)MALLOC(m_pitch*sizeof(float));
157 if (c2->Sn == NULL) {
158 FREE(c2->Pn);
159 FREE(c2->Sn_);
160 FREE(c2->w);
161 return NULL;
162 }
163
164 for(i=0; i<m_pitch; i++)
165 c2->Sn[i] = 1.0;
166 c2->hpf_states[0] = c2->hpf_states[1] = 0.0;
167 for(i=0; i<2*n_samp; i++)
168 c2->Sn_[i] = 0;
169 c2->fft_fwd_cfg = codec2_fft_alloc(FFT_ENC, 0, NULL, NULL);
170 c2->fftr_fwd_cfg = codec2_fftr_alloc(FFT_ENC, 0, NULL, NULL);
171 make_analysis_window(&c2->c2const, c2->fft_fwd_cfg, c2->w,c2->W);
172 make_synthesis_window(&c2->c2const, c2->Pn);
173 c2->fftr_inv_cfg = codec2_fftr_alloc(FFT_DEC, 1, NULL, NULL);
174 quantise_init();
175 c2->prev_f0_enc = 1/P_MAX_S;
176 c2->bg_est = 0.0;
177 c2->ex_phase = 0.0;
178
179 for(l=1; l<=MAX_AMP; l++)
180 c2->prev_model_dec.A[l] = 0.0;
181 c2->prev_model_dec.Wo = TWO_PI/c2->c2const.p_max;
182 c2->prev_model_dec.L = PI/c2->prev_model_dec.Wo;
183 c2->prev_model_dec.voiced = 0;
184
185 for(i=0; i<LPC_ORD; i++) {
186 c2->prev_lsps_dec[i] = i*PI/(LPC_ORD+1);
187 }
188 c2->prev_e_dec = 1;
189
190 c2->nlp = nlp_create(&c2->c2const);
191 if (c2->nlp == NULL) {
192 return NULL;
193 }
194
195 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, mode))
196 c2->gray = 0; // natural binary better for trellis decoding (hopefully added later)
197 else
198 c2->gray = 1;
199
200 c2->lpc_pf = 1; c2->bass_boost = 1; c2->beta = LPCPF_BETA; c2->gamma = LPCPF_GAMMA;
201
202 c2->xq_enc[0] = c2->xq_enc[1] = 0.0;
203 c2->xq_dec[0] = c2->xq_dec[1] = 0.0;
204
205 c2->smoothing = 0;
206 c2->se = 0.0; c2->nse = 0;
207 c2->user_rate_K_vec_no_mean_ = NULL;
208 c2->post_filter_en = 1;
209
210 c2->bpf_buf = (float*)MALLOC(sizeof(float)*(BPF_N+4*c2->n_samp));
211 assert(c2->bpf_buf != NULL);
212 for(i=0; i<BPF_N+4*c2->n_samp; i++)
213 c2->bpf_buf[i] = 0.0;
214
215 c2->softdec = NULL;
216
217 /* newamp1 initialisation */
218
219 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) {
220 mel_sample_freqs_kHz(c2->rate_K_sample_freqs_kHz, NEWAMP1_K, ftomel(200.0), ftomel(3700.0) );
221 int k;
222 for(k=0; k<NEWAMP1_K; k++) {
223 c2->prev_rate_K_vec_[k] = 0.0;
224 }
225 c2->Wo_left = 0.0;
226 c2->voicing_left = 0;;
227 c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 0, NULL, NULL);
228 c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 1, NULL, NULL);
229 }
230
231#ifndef CORTEX_M4
232 /* newamp2 initialisation */
233
234 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) {
235 n2_mel_sample_freqs_kHz(c2->n2_rate_K_sample_freqs_kHz, NEWAMP2_K);
236 int k;
237 for(k=0; k<NEWAMP2_K; k++) {
238 c2->n2_prev_rate_K_vec_[k] = 0.0;
239 }
240 c2->Wo_left = 0.0;
241 c2->voicing_left = 0;;
242 c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 0, NULL, NULL);
243 c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 1, NULL, NULL);
244 }
245 /* newamp2 PWB initialisation */
246
247 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) {
248 n2_mel_sample_freqs_kHz(c2->n2_pwb_rate_K_sample_freqs_kHz, NEWAMP2_16K_K);
249 int k;
250 for(k=0; k<NEWAMP2_16K_K; k++) {
251 c2->n2_pwb_prev_rate_K_vec_[k] = 0.0;
252 }
253 c2->Wo_left = 0.0;
254 c2->voicing_left = 0;;
255 c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 0, NULL, NULL);
256 c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 1, NULL, NULL);
257 }
258#endif
259
260 c2->fmlfeat = NULL;
261
262 // make sure that one of the two decode function pointers is empty
263 // for the encode function pointer this is not required since we always set it
264 // to a meaningful value
265
266 c2->decode = NULL;
267 c2->decode_ber = NULL;
268
269 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode))
270 {
271 c2->encode = codec2_encode_3200;
272 c2->decode = codec2_decode_3200;
273 }
274
275 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode))
276 {
277 c2->encode = codec2_encode_2400;
278 c2->decode = codec2_decode_2400;
279 }
280
281 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode))
282 {
283 c2->encode = codec2_encode_1600;
284 c2->decode = codec2_decode_1600;
285 }
286
287 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode))
288 {
289 c2->encode = codec2_encode_1400;
290 c2->decode = codec2_decode_1400;
291 }
292
293 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode))
294 {
295 c2->encode = codec2_encode_1300;
296 c2->decode_ber = codec2_decode_1300;
297 }
298
299 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode))
300 {
301 c2->encode = codec2_encode_1200;
302 c2->decode = codec2_decode_1200;
303 }
304
305#ifndef CORTEX_M4
306 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode))
307 {
308 c2->encode = codec2_encode_700;
309 c2->decode = codec2_decode_700;
310 }
311
312 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode))
313 {
314 c2->encode = codec2_encode_700b;
315 c2->decode = codec2_decode_700b;
316 }
317#endif
318 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode))
319 {
320 c2->encode = codec2_encode_700c;
321 c2->decode = codec2_decode_700c;
322 }
323#ifndef CORTEX_M4
324 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode))
325 {
326 c2->encode = codec2_encode_450;
327 c2->decode = codec2_decode_450;
328 }
329
330 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode))
331 {
332 //Encode PWB doesnt make sense
333 c2->encode = codec2_encode_450;
334 c2->decode = codec2_decode_450pwb;
335 }
336
337#endif
338
339 return c2;
340}
341
342/*---------------------------------------------------------------------------*\
343
344 FUNCTION....: codec2_destroy
345 AUTHOR......: David Rowe
346 DATE CREATED: 21/8/2010
347
348 Destroy an instance of the codec.
349
350\*---------------------------------------------------------------------------*/
351
352void codec2_destroy(struct CODEC2 *c2)
353{
354 assert(c2 != NULL);
355 FREE(c2->bpf_buf);
356 nlp_destroy(c2->nlp);
357 codec2_fft_free(c2->fft_fwd_cfg);
358 codec2_fftr_free(c2->fftr_fwd_cfg);
359 codec2_fftr_free(c2->fftr_inv_cfg);
360 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) {
361 codec2_fft_free(c2->phase_fft_fwd_cfg);
362 codec2_fft_free(c2->phase_fft_inv_cfg);
363 }
364 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) {
365 codec2_fft_free(c2->phase_fft_fwd_cfg);
366 codec2_fft_free(c2->phase_fft_inv_cfg);
367 }
368 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) {
369 codec2_fft_free(c2->phase_fft_fwd_cfg);
370 codec2_fft_free(c2->phase_fft_inv_cfg);
371 }
372 FREE(c2->Pn);
373 FREE(c2->Sn);
374 FREE(c2->w);
375 FREE(c2->Sn_);
376 FREE(c2);
377}
378
379/*---------------------------------------------------------------------------*\
380
381 FUNCTION....: codec2_bits_per_frame
382 AUTHOR......: David Rowe
383 DATE CREATED: Nov 14 2011
384
385 Returns the number of bits per frame.
386
387\*---------------------------------------------------------------------------*/
388
389int codec2_bits_per_frame(struct CODEC2 *c2) {
390 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode))
391 return 64;
392 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode))
393 return 48;
394 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode))
395 return 64;
396 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode))
397 return 56;
398 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode))
399 return 52;
400 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode))
401 return 48;
402 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode))
403 return 28;
404 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode))
405 return 28;
406 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode))
407 return 28;
408 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode))
409 return 18;
410 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode))
411 return 18;
412
413 return 0; /* shouldn't get here */
414}
415
416
417/*---------------------------------------------------------------------------*\
418
419 FUNCTION....: codec2_samples_per_frame
420 AUTHOR......: David Rowe
421 DATE CREATED: Nov 14 2011
422
423 Returns the number of speech samples per frame.
424
425\*---------------------------------------------------------------------------*/
426
427int codec2_samples_per_frame(struct CODEC2 *c2) {
428 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode))
429 return 160;
430 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode))
431 return 160;
432 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode))
433 return 320;
434 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode))
435 return 320;
436 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode))
437 return 320;
438 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode))
439 return 320;
440 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode))
441 return 320;
442 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode))
443 return 320;
444 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode))
445 return 320;
446 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode))
447 return 320;
448 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode))
449 return 640;
450 return 0; /* shouldnt get here */
451}
452
453void codec2_encode(struct CODEC2 *c2, unsigned char *bits, short speech[])
454{
455 assert(c2 != NULL);
456 assert(c2->encode != NULL);
457
458 c2->encode(c2, bits, speech);
459
460}
461
462void codec2_decode(struct CODEC2 *c2, short speech[], const unsigned char *bits)
463{
464 codec2_decode_ber(c2, speech, bits, 0.0);
465}
466
467void codec2_decode_ber(struct CODEC2 *c2, short speech[], const unsigned char *bits, float ber_est)
468{
469 assert(c2 != NULL);
470 assert(c2->decode != NULL || c2->decode_ber != NULL);
471
472 if (c2->decode != NULL)
473 {
474 c2->decode(c2, speech, bits);
475 }
476 else
477 {
478 c2->decode_ber(c2, speech, bits, ber_est);
479 }
480}
481
482
483/*---------------------------------------------------------------------------*\
484
485 FUNCTION....: codec2_encode_3200
486 AUTHOR......: David Rowe
487 DATE CREATED: 13 Sep 2012
488
489 Encodes 160 speech samples (20ms of speech) into 64 bits.
490
491 The codec2 algorithm actually operates internally on 10ms (80
492 sample) frames, so we run the encoding algorithm twice. On the
493 first frame we just send the voicing bits. On the second frame we
494 send all model parameters. Compared to 2400 we use a larger number
495 of bits for the LSPs and non-VQ pitch and energy.
496
497 The bit allocation is:
498
499 Parameter bits/frame
500 --------------------------------------
501 Harmonic magnitudes (LSPs) 50
502 Pitch (Wo) 7
503 Energy 5
504 Voicing (10ms update) 2
505 TOTAL 64
506
507\*---------------------------------------------------------------------------*/
508
509void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[])
510{
511 MODEL model;
512 float ak[LPC_ORD+1];
513 float lsps[LPC_ORD];
514 float e;
515 int Wo_index, e_index;
516 int lspd_indexes[LPC_ORD];
517 int i;
518 unsigned int nbit = 0;
519
520 assert(c2 != NULL);
521
522 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
523
524 /* first 10ms analysis frame - we just want voicing */
525
526 analyse_one_frame(c2, &model, speech);
527 pack(bits, &nbit, model.voiced, 1);
528
529 /* second 10ms analysis frame */
530
531 analyse_one_frame(c2, &model, &speech[c2->n_samp]);
532 pack(bits, &nbit, model.voiced, 1);
533 Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS);
534 pack(bits, &nbit, Wo_index, WO_BITS);
535
536 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
537 e_index = encode_energy(e, E_BITS);
538 pack(bits, &nbit, e_index, E_BITS);
539
540 encode_lspds_scalar(lspd_indexes, lsps, LPC_ORD);
541 for(i=0; i<LSPD_SCALAR_INDEXES; i++) {
542 pack(bits, &nbit, lspd_indexes[i], lspd_bits(i));
543 }
544 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
545}
546
547
548/*---------------------------------------------------------------------------*\
549
550 FUNCTION....: codec2_decode_3200
551 AUTHOR......: David Rowe
552 DATE CREATED: 13 Sep 2012
553
554 Decodes a frame of 64 bits into 160 samples (20ms) of speech.
555
556\*---------------------------------------------------------------------------*/
557
558void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char * bits)
559{
560 MODEL model[2];
561 int lspd_indexes[LPC_ORD];
562 float lsps[2][LPC_ORD];
563 int Wo_index, e_index;
564 float e[2];
565 float snr;
566 float ak[2][LPC_ORD+1];
567 int i,j;
568 unsigned int nbit = 0;
569 COMP Aw[FFT_ENC];
570
571 assert(c2 != NULL);
572
573 /* only need to zero these out due to (unused) snr calculation */
574
575 for(i=0; i<2; i++)
576 for(j=1; j<=MAX_AMP; j++)
577 model[i].A[j] = 0.0;
578
579 /* unpack bits from channel ------------------------------------*/
580
581 /* this will partially fill the model params for the 2 x 10ms
582 frames */
583
584 model[0].voiced = unpack(bits, &nbit, 1);
585 model[1].voiced = unpack(bits, &nbit, 1);
586
587 Wo_index = unpack(bits, &nbit, WO_BITS);
588 model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS);
589 model[1].L = PI/model[1].Wo;
590
591 e_index = unpack(bits, &nbit, E_BITS);
592 e[1] = decode_energy(e_index, E_BITS);
593
594 for(i=0; i<LSPD_SCALAR_INDEXES; i++) {
595 lspd_indexes[i] = unpack(bits, &nbit, lspd_bits(i));
596 }
597 decode_lspds_scalar(&lsps[1][0], lspd_indexes, LPC_ORD);
598
599 /* interpolate ------------------------------------------------*/
600
601 /* Wo and energy are sampled every 20ms, so we interpolate just 1
602 10ms frame between 20ms samples */
603
604 interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
605 e[0] = interp_energy(c2->prev_e_dec, e[1]);
606
607 /* LSPs are sampled every 20ms so we interpolate the frame in
608 between, then recover spectral amplitudes */
609
610 interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, LPC_ORD);
611
612 for(i=0; i<2; i++) {
613 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
614 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
615 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
616 apply_lpc_correction(&model[i]);
617 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0);
618 }
619
620 /* update memories for next frame ----------------------------*/
621
622 c2->prev_model_dec = model[1];
623 c2->prev_e_dec = e[1];
624 for(i=0; i<LPC_ORD; i++)
625 c2->prev_lsps_dec[i] = lsps[1][i];
626}
627
628
629/*---------------------------------------------------------------------------*\
630
631 FUNCTION....: codec2_encode_2400
632 AUTHOR......: David Rowe
633 DATE CREATED: 21/8/2010
634
635 Encodes 160 speech samples (20ms of speech) into 48 bits.
636
637 The codec2 algorithm actually operates internally on 10ms (80
638 sample) frames, so we run the encoding algorithm twice. On the
639 first frame we just send the voicing bit. On the second frame we
640 send all model parameters.
641
642 The bit allocation is:
643
644 Parameter bits/frame
645 --------------------------------------
646 Harmonic magnitudes (LSPs) 36
647 Joint VQ of Energy and Wo 8
648 Voicing (10ms update) 2
649 Spare 2
650 TOTAL 48
651
652\*---------------------------------------------------------------------------*/
653
654void codec2_encode_2400(struct CODEC2 *c2, unsigned char * bits, short speech[])
655{
656 MODEL model;
657 float ak[LPC_ORD+1];
658 float lsps[LPC_ORD];
659 float e;
660 int WoE_index;
661 int lsp_indexes[LPC_ORD];
662 int i;
663 int spare = 0;
664 unsigned int nbit = 0;
665
666 assert(c2 != NULL);
667
668 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
669
670 /* first 10ms analysis frame - we just want voicing */
671
672 analyse_one_frame(c2, &model, speech);
673 pack(bits, &nbit, model.voiced, 1);
674
675 /* second 10ms analysis frame */
676
677 analyse_one_frame(c2, &model, &speech[c2->n_samp]);
678 pack(bits, &nbit, model.voiced, 1);
679
680 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
681 WoE_index = encode_WoE(&model, e, c2->xq_enc);
682 pack(bits, &nbit, WoE_index, WO_E_BITS);
683
684 encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD);
685 for(i=0; i<LSP_SCALAR_INDEXES; i++) {
686 pack(bits, &nbit, lsp_indexes[i], lsp_bits(i));
687 }
688 pack(bits, &nbit, spare, 2);
689
690 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
691}
692
693
694/*---------------------------------------------------------------------------*\
695
696 FUNCTION....: codec2_decode_2400
697 AUTHOR......: David Rowe
698 DATE CREATED: 21/8/2010
699
700 Decodes frames of 48 bits into 160 samples (20ms) of speech.
701
702\*---------------------------------------------------------------------------*/
703
704void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char * bits)
705{
706 MODEL model[2];
707 int lsp_indexes[LPC_ORD];
708 float lsps[2][LPC_ORD];
709 int WoE_index;
710 float e[2];
711 float snr;
712 float ak[2][LPC_ORD+1];
713 int i,j;
714 unsigned int nbit = 0;
715 COMP Aw[FFT_ENC];
716
717 assert(c2 != NULL);
718
719 /* only need to zero these out due to (unused) snr calculation */
720
721 for(i=0; i<2; i++)
722 for(j=1; j<=MAX_AMP; j++)
723 model[i].A[j] = 0.0;
724
725 /* unpack bits from channel ------------------------------------*/
726
727 /* this will partially fill the model params for the 2 x 10ms
728 frames */
729
730 model[0].voiced = unpack(bits, &nbit, 1);
731
732 model[1].voiced = unpack(bits, &nbit, 1);
733 WoE_index = unpack(bits, &nbit, WO_E_BITS);
734 decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index);
735
736 for(i=0; i<LSP_SCALAR_INDEXES; i++) {
737 lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i));
738 }
739 decode_lsps_scalar(&lsps[1][0], lsp_indexes, LPC_ORD);
740 check_lsp_order(&lsps[1][0], LPC_ORD);
741 bw_expand_lsps(&lsps[1][0], LPC_ORD, 50.0, 100.0);
742
743 /* interpolate ------------------------------------------------*/
744
745 /* Wo and energy are sampled every 20ms, so we interpolate just 1
746 10ms frame between 20ms samples */
747
748 interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
749 e[0] = interp_energy(c2->prev_e_dec, e[1]);
750
751 /* LSPs are sampled every 20ms so we interpolate the frame in
752 between, then recover spectral amplitudes */
753
754 interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, LPC_ORD);
755 for(i=0; i<2; i++) {
756 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
757 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
758 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
759 apply_lpc_correction(&model[i]);
760 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0);
761
762 /* dump parameters for deep learning experiments */
763
764 if (c2->fmlfeat != NULL) {
765 /* 10 LSPs - energy - Wo - voicing flag - 10 LPCs */
766 fwrite(&lsps[i][0], LPC_ORD, sizeof(float), c2->fmlfeat);
767 fwrite(&e[i], 1, sizeof(float), c2->fmlfeat);
768 fwrite(&model[i].Wo, 1, sizeof(float), c2->fmlfeat);
769 float voiced_float = model[i].voiced;
770 fwrite(&voiced_float, 1, sizeof(float), c2->fmlfeat);
771 fwrite(&ak[i][1], LPC_ORD, sizeof(float), c2->fmlfeat);
772 }
773 }
774
775 /* update memories for next frame ----------------------------*/
776
777 c2->prev_model_dec = model[1];
778 c2->prev_e_dec = e[1];
779 for(i=0; i<LPC_ORD; i++)
780 c2->prev_lsps_dec[i] = lsps[1][i];
781}
782
783
784/*---------------------------------------------------------------------------*\
785
786 FUNCTION....: codec2_encode_1600
787 AUTHOR......: David Rowe
788 DATE CREATED: Feb 28 2013
789
790 Encodes 320 speech samples (40ms of speech) into 64 bits.
791
792 The codec2 algorithm actually operates internally on 10ms (80
793 sample) frames, so we run the encoding algorithm 4 times:
794
795 frame 0: voicing bit
796 frame 1: voicing bit, Wo and E
797 frame 2: voicing bit
798 frame 3: voicing bit, Wo and E, scalar LSPs
799
800 The bit allocation is:
801
802 Parameter frame 2 frame 4 Total
803 -------------------------------------------------------
804 Harmonic magnitudes (LSPs) 0 36 36
805 Pitch (Wo) 7 7 14
806 Energy 5 5 10
807 Voicing (10ms update) 2 2 4
808 TOTAL 14 50 64
809
810\*---------------------------------------------------------------------------*/
811
812void codec2_encode_1600(struct CODEC2 *c2, unsigned char * bits, short speech[])
813{
814 MODEL model;
815 float lsps[LPC_ORD];
816 float ak[LPC_ORD+1];
817 float e;
818 int lsp_indexes[LPC_ORD];
819 int Wo_index, e_index;
820 int i;
821 unsigned int nbit = 0;
822
823 assert(c2 != NULL);
824
825 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
826
827 /* frame 1: - voicing ---------------------------------------------*/
828
829 analyse_one_frame(c2, &model, speech);
830 pack(bits, &nbit, model.voiced, 1);
831
832 /* frame 2: - voicing, scalar Wo & E -------------------------------*/
833
834 analyse_one_frame(c2, &model, &speech[c2->n_samp]);
835 pack(bits, &nbit, model.voiced, 1);
836
837 Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS);
838 pack(bits, &nbit, Wo_index, WO_BITS);
839
840 /* need to run this just to get LPC energy */
841 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
842 e_index = encode_energy(e, E_BITS);
843 pack(bits, &nbit, e_index, E_BITS);
844
845 /* frame 3: - voicing ---------------------------------------------*/
846
847 analyse_one_frame(c2, &model, &speech[2*c2->n_samp]);
848 pack(bits, &nbit, model.voiced, 1);
849
850 /* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/
851
852 analyse_one_frame(c2, &model, &speech[3*c2->n_samp]);
853 pack(bits, &nbit, model.voiced, 1);
854
855 Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS);
856 pack(bits, &nbit, Wo_index, WO_BITS);
857
858 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
859 e_index = encode_energy(e, E_BITS);
860 pack(bits, &nbit, e_index, E_BITS);
861
862 encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD);
863 for(i=0; i<LSP_SCALAR_INDEXES; i++) {
864 pack(bits, &nbit, lsp_indexes[i], lsp_bits(i));
865 }
866
867 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
868}
869
870
871/*---------------------------------------------------------------------------*\
872
873 FUNCTION....: codec2_decode_1600
874 AUTHOR......: David Rowe
875 DATE CREATED: 11 May 2012
876
877 Decodes frames of 64 bits into 320 samples (40ms) of speech.
878
879\*---------------------------------------------------------------------------*/
880
881void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * bits)
882{
883 MODEL model[4];
884 int lsp_indexes[LPC_ORD];
885 float lsps[4][LPC_ORD];
886 int Wo_index, e_index;
887 float e[4];
888 float snr;
889 float ak[4][LPC_ORD+1];
890 int i,j;
891 unsigned int nbit = 0;
892 float weight;
893 COMP Aw[FFT_ENC];
894
895 assert(c2 != NULL);
896
897 /* only need to zero these out due to (unused) snr calculation */
898
899 for(i=0; i<4; i++)
900 for(j=1; j<=MAX_AMP; j++)
901 model[i].A[j] = 0.0;
902
903 /* unpack bits from channel ------------------------------------*/
904
905 /* this will partially fill the model params for the 4 x 10ms
906 frames */
907
908 model[0].voiced = unpack(bits, &nbit, 1);
909
910 model[1].voiced = unpack(bits, &nbit, 1);
911 Wo_index = unpack(bits, &nbit, WO_BITS);
912 model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS);
913 model[1].L = PI/model[1].Wo;
914
915 e_index = unpack(bits, &nbit, E_BITS);
916 e[1] = decode_energy(e_index, E_BITS);
917
918 model[2].voiced = unpack(bits, &nbit, 1);
919
920 model[3].voiced = unpack(bits, &nbit, 1);
921 Wo_index = unpack(bits, &nbit, WO_BITS);
922 model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS);
923 model[3].L = PI/model[3].Wo;
924
925 e_index = unpack(bits, &nbit, E_BITS);
926 e[3] = decode_energy(e_index, E_BITS);
927
928 for(i=0; i<LSP_SCALAR_INDEXES; i++) {
929 lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i));
930 }
931 decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD);
932 check_lsp_order(&lsps[3][0], LPC_ORD);
933 bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0);
934
935 /* interpolate ------------------------------------------------*/
936
937 /* Wo and energy are sampled every 20ms, so we interpolate just 1
938 10ms frame between 20ms samples */
939
940 interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
941 e[0] = interp_energy(c2->prev_e_dec, e[1]);
942 interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min);
943 e[2] = interp_energy(e[1], e[3]);
944
945 /* LSPs are sampled every 40ms so we interpolate the 3 frames in
946 between, then recover spectral amplitudes */
947
948 for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
949 interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD);
950 }
951 for(i=0; i<4; i++) {
952 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
953 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
954 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
955 apply_lpc_correction(&model[i]);
956 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0);
957 }
958
959 /* update memories for next frame ----------------------------*/
960
961 c2->prev_model_dec = model[3];
962 c2->prev_e_dec = e[3];
963 for(i=0; i<LPC_ORD; i++)
964 c2->prev_lsps_dec[i] = lsps[3][i];
965
966}
967
968/*---------------------------------------------------------------------------*\
969
970 FUNCTION....: codec2_encode_1400
971 AUTHOR......: David Rowe
972 DATE CREATED: May 11 2012
973
974 Encodes 320 speech samples (40ms of speech) into 56 bits.
975
976 The codec2 algorithm actually operates internally on 10ms (80
977 sample) frames, so we run the encoding algorithm 4 times:
978
979 frame 0: voicing bit
980 frame 1: voicing bit, joint VQ of Wo and E
981 frame 2: voicing bit
982 frame 3: voicing bit, joint VQ of Wo and E, scalar LSPs
983
984 The bit allocation is:
985
986 Parameter frame 2 frame 4 Total
987 -------------------------------------------------------
988 Harmonic magnitudes (LSPs) 0 36 36
989 Energy+Wo 8 8 16
990 Voicing (10ms update) 2 2 4
991 TOTAL 10 46 56
992
993\*---------------------------------------------------------------------------*/
994
995void codec2_encode_1400(struct CODEC2 *c2, unsigned char * bits, short speech[])
996{
997 MODEL model;
998 float lsps[LPC_ORD];
999 float ak[LPC_ORD+1];
1000 float e;
1001 int lsp_indexes[LPC_ORD];
1002 int WoE_index;
1003 int i;
1004 unsigned int nbit = 0;
1005
1006 assert(c2 != NULL);
1007
1008 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
1009
1010 /* frame 1: - voicing ---------------------------------------------*/
1011
1012 analyse_one_frame(c2, &model, speech);
1013 pack(bits, &nbit, model.voiced, 1);
1014
1015 /* frame 2: - voicing, joint Wo & E -------------------------------*/
1016
1017 analyse_one_frame(c2, &model, &speech[c2->n_samp]);
1018 pack(bits, &nbit, model.voiced, 1);
1019
1020 /* need to run this just to get LPC energy */
1021 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
1022
1023 WoE_index = encode_WoE(&model, e, c2->xq_enc);
1024 pack(bits, &nbit, WoE_index, WO_E_BITS);
1025
1026 /* frame 3: - voicing ---------------------------------------------*/
1027
1028 analyse_one_frame(c2, &model, &speech[2*c2->n_samp]);
1029 pack(bits, &nbit, model.voiced, 1);
1030
1031 /* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/
1032
1033 analyse_one_frame(c2, &model, &speech[3*c2->n_samp]);
1034 pack(bits, &nbit, model.voiced, 1);
1035
1036 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
1037 WoE_index = encode_WoE(&model, e, c2->xq_enc);
1038 pack(bits, &nbit, WoE_index, WO_E_BITS);
1039
1040 encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD);
1041 for(i=0; i<LSP_SCALAR_INDEXES; i++) {
1042 pack(bits, &nbit, lsp_indexes[i], lsp_bits(i));
1043 }
1044
1045 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
1046}
1047
1048
1049/*---------------------------------------------------------------------------*\
1050
1051 FUNCTION....: codec2_decode_1400
1052 AUTHOR......: David Rowe
1053 DATE CREATED: 11 May 2012
1054
1055 Decodes frames of 56 bits into 320 samples (40ms) of speech.
1056
1057\*---------------------------------------------------------------------------*/
1058
1059void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char * bits)
1060{
1061 MODEL model[4];
1062 int lsp_indexes[LPC_ORD];
1063 float lsps[4][LPC_ORD];
1064 int WoE_index;
1065 float e[4];
1066 float snr;
1067 float ak[4][LPC_ORD+1];
1068 int i,j;
1069 unsigned int nbit = 0;
1070 float weight;
1071 COMP Aw[FFT_ENC];
1072
1073 assert(c2 != NULL);
1074
1075 /* only need to zero these out due to (unused) snr calculation */
1076
1077 for(i=0; i<4; i++)
1078 for(j=1; j<=MAX_AMP; j++)
1079 model[i].A[j] = 0.0;
1080
1081 /* unpack bits from channel ------------------------------------*/
1082
1083 /* this will partially fill the model params for the 4 x 10ms
1084 frames */
1085
1086 model[0].voiced = unpack(bits, &nbit, 1);
1087
1088 model[1].voiced = unpack(bits, &nbit, 1);
1089 WoE_index = unpack(bits, &nbit, WO_E_BITS);
1090 decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index);
1091
1092 model[2].voiced = unpack(bits, &nbit, 1);
1093
1094 model[3].voiced = unpack(bits, &nbit, 1);
1095 WoE_index = unpack(bits, &nbit, WO_E_BITS);
1096 decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index);
1097
1098 for(i=0; i<LSP_SCALAR_INDEXES; i++) {
1099 lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i));
1100 }
1101 decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD);
1102 check_lsp_order(&lsps[3][0], LPC_ORD);
1103 bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0);
1104
1105 /* interpolate ------------------------------------------------*/
1106
1107 /* Wo and energy are sampled every 20ms, so we interpolate just 1
1108 10ms frame between 20ms samples */
1109
1110 interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
1111 e[0] = interp_energy(c2->prev_e_dec, e[1]);
1112 interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min);
1113 e[2] = interp_energy(e[1], e[3]);
1114
1115 /* LSPs are sampled every 40ms so we interpolate the 3 frames in
1116 between, then recover spectral amplitudes */
1117
1118 for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
1119 interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD);
1120 }
1121 for(i=0; i<4; i++) {
1122 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
1123 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
1124 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
1125 apply_lpc_correction(&model[i]);
1126 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0);
1127 }
1128
1129 /* update memories for next frame ----------------------------*/
1130
1131 c2->prev_model_dec = model[3];
1132 c2->prev_e_dec = e[3];
1133 for(i=0; i<LPC_ORD; i++)
1134 c2->prev_lsps_dec[i] = lsps[3][i];
1135
1136}
1137
1138/*---------------------------------------------------------------------------*\
1139
1140 FUNCTION....: codec2_encode_1300
1141 AUTHOR......: David Rowe
1142 DATE CREATED: March 14 2013
1143
1144 Encodes 320 speech samples (40ms of speech) into 52 bits.
1145
1146 The codec2 algorithm actually operates internally on 10ms (80
1147 sample) frames, so we run the encoding algorithm 4 times:
1148
1149 frame 0: voicing bit
1150 frame 1: voicing bit,
1151 frame 2: voicing bit
1152 frame 3: voicing bit, Wo and E, scalar LSPs
1153
1154 The bit allocation is:
1155
1156 Parameter frame 2 frame 4 Total
1157 -------------------------------------------------------
1158 Harmonic magnitudes (LSPs) 0 36 36
1159 Pitch (Wo) 0 7 7
1160 Energy 0 5 5
1161 Voicing (10ms update) 2 2 4
1162 TOTAL 2 50 52
1163
1164\*---------------------------------------------------------------------------*/
1165
1166void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[])
1167{
1168 MODEL model;
1169 float lsps[LPC_ORD];
1170 float ak[LPC_ORD+1];
1171 float e;
1172 int lsp_indexes[LPC_ORD];
1173 int Wo_index, e_index;
1174 int i;
1175 unsigned int nbit = 0;
1176 //#ifdef PROFILE
1177 //unsigned int quant_start;
1178 //#endif
1179
1180 assert(c2 != NULL);
1181
1182 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
1183
1184 /* frame 1: - voicing ---------------------------------------------*/
1185
1186 analyse_one_frame(c2, &model, speech);
1187 pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray);
1188
1189 /* frame 2: - voicing ---------------------------------------------*/
1190
1191 analyse_one_frame(c2, &model, &speech[c2->n_samp]);
1192 pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray);
1193
1194 /* frame 3: - voicing ---------------------------------------------*/
1195
1196 analyse_one_frame(c2, &model, &speech[2*c2->n_samp]);
1197 pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray);
1198
1199 /* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/
1200
1201 analyse_one_frame(c2, &model, &speech[3*c2->n_samp]);
1202 pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray);
1203
1204 Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS);
1205 pack_natural_or_gray(bits, &nbit, Wo_index, WO_BITS, c2->gray);
1206
1207 //#ifdef PROFILE
1208 //quant_start = machdep_profile_sample();
1209 //#endif
1210 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
1211 e_index = encode_energy(e, E_BITS);
1212 pack_natural_or_gray(bits, &nbit, e_index, E_BITS, c2->gray);
1213
1214 encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD);
1215 for(i=0; i<LSP_SCALAR_INDEXES; i++) {
1216 pack_natural_or_gray(bits, &nbit, lsp_indexes[i], lsp_bits(i), c2->gray);
1217 }
1218 //#ifdef PROFILE
1219 //machdep_profile_sample_and_log(quant_start, " quant/packing");
1220 //#endif
1221
1222 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
1223}
1224
1225
1226/*---------------------------------------------------------------------------*\
1227
1228 FUNCTION....: codec2_decode_1300
1229 AUTHOR......: David Rowe
1230 DATE CREATED: 11 May 2012
1231
1232 Decodes frames of 52 bits into 320 samples (40ms) of speech.
1233
1234\*---------------------------------------------------------------------------*/
1235static int frames;
1236void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * bits, float ber_est)
1237{
1238 MODEL model[4];
1239 int lsp_indexes[LPC_ORD];
1240 float lsps[4][LPC_ORD];
1241 int Wo_index, e_index;
1242 float e[4];
1243 float snr;
1244 float ak[4][LPC_ORD+1];
1245 int i,j;
1246 unsigned int nbit = 0;
1247 float weight;
1248 COMP Aw[FFT_ENC];
1249 //PROFILE_VAR(recover_start);
1250
1251 assert(c2 != NULL);
1252 frames+= 4;
1253 /* only need to zero these out due to (unused) snr calculation */
1254
1255 for(i=0; i<4; i++)
1256 for(j=1; j<=MAX_AMP; j++)
1257 model[i].A[j] = 0.0;
1258
1259 /* unpack bits from channel ------------------------------------*/
1260
1261 /* this will partially fill the model params for the 4 x 10ms
1262 frames */
1263
1264 model[0].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray);
1265 model[1].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray);
1266 model[2].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray);
1267 model[3].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray);
1268
1269 Wo_index = unpack_natural_or_gray(bits, &nbit, WO_BITS, c2->gray);
1270 model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS);
1271 model[3].L = PI/model[3].Wo;
1272
1273 e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray);
1274 e[3] = decode_energy(e_index, E_BITS);
1275 //fprintf(stderr, "%d %f\n", e_index, e[3]);
1276
1277 for(i=0; i<LSP_SCALAR_INDEXES; i++) {
1278 lsp_indexes[i] = unpack_natural_or_gray(bits, &nbit, lsp_bits(i), c2->gray);
1279 }
1280 decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD);
1281 check_lsp_order(&lsps[3][0], LPC_ORD);
1282 bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0);
1283
1284 if (ber_est > 0.15) {
1285 model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced = 0;
1286 e[3] = decode_energy(10, E_BITS);
1287 bw_expand_lsps(&lsps[3][0], LPC_ORD, 200.0, 200.0);
1288 //fprintf(stderr, "soft mute\n");
1289 }
1290
1291 /* interpolate ------------------------------------------------*/
1292
1293 /* Wo, energy, and LSPs are sampled every 40ms so we interpolate
1294 the 3 frames in between */
1295
1296 //PROFILE_SAMPLE(recover_start);
1297 for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
1298 interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD);
1299 interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min);
1300 e[i] = interp_energy2(c2->prev_e_dec, e[3],weight);
1301 }
1302
1303 /* then recover spectral amplitudes */
1304
1305 for(i=0; i<4; i++) {
1306 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
1307 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
1308 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
1309 apply_lpc_correction(&model[i]);
1310 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0);
1311
1312 /* dump parameters for deep learning experiments */
1313
1314 if (c2->fmlfeat != NULL) {
1315 /* 10 LSPs - energy - Wo - voicing flag - 10 LPCs */
1316 fwrite(&lsps[i][0], LPC_ORD, sizeof(float), c2->fmlfeat);
1317 fwrite(&e[i], 1, sizeof(float), c2->fmlfeat);
1318 fwrite(&model[i].Wo, 1, sizeof(float), c2->fmlfeat);
1319 float voiced_float = model[i].voiced;
1320 fwrite(&voiced_float, 1, sizeof(float), c2->fmlfeat);
1321 fwrite(&ak[i][1], LPC_ORD, sizeof(float), c2->fmlfeat);
1322 }
1323 }
1324 /*
1325 for(i=0; i<4; i++) {
1326 printf("%d Wo: %f L: %d v: %d\n", frames, model[i].Wo, model[i].L, model[i].voiced);
1327 }
1328 if (frames == 4*50)
1329 exit(0);
1330 */
1331 //PROFILE_SAMPLE_AND_LOG2(recover_start, " recover");
1332 #ifdef DUMP
1333 dump_lsp_(&lsps[3][0]);
1334 dump_ak_(&ak[3][0], LPC_ORD);
1335 #endif
1336
1337 /* update memories for next frame ----------------------------*/
1338
1339 c2->prev_model_dec = model[3];
1340 c2->prev_e_dec = e[3];
1341 for(i=0; i<LPC_ORD; i++)
1342 c2->prev_lsps_dec[i] = lsps[3][i];
1343
1344}
1345
1346
1347/*---------------------------------------------------------------------------*\
1348
1349 FUNCTION....: codec2_encode_1200
1350 AUTHOR......: David Rowe
1351 DATE CREATED: Nov 14 2011
1352
1353 Encodes 320 speech samples (40ms of speech) into 48 bits.
1354
1355 The codec2 algorithm actually operates internally on 10ms (80
1356 sample) frames, so we run the encoding algorithm four times:
1357
1358 frame 0: voicing bit
1359 frame 1: voicing bit, joint VQ of Wo and E
1360 frame 2: voicing bit
1361 frame 3: voicing bit, joint VQ of Wo and E, VQ LSPs
1362
1363 The bit allocation is:
1364
1365 Parameter frame 2 frame 4 Total
1366 -------------------------------------------------------
1367 Harmonic magnitudes (LSPs) 0 27 27
1368 Energy+Wo 8 8 16
1369 Voicing (10ms update) 2 2 4
1370 Spare 0 1 1
1371 TOTAL 10 38 48
1372
1373\*---------------------------------------------------------------------------*/
1374
1375void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[])
1376{
1377 MODEL model;
1378 float lsps[LPC_ORD];
1379 float lsps_[LPC_ORD];
1380 float ak[LPC_ORD+1];
1381 float e;
1382 int lsp_indexes[LPC_ORD];
1383 int WoE_index;
1384 int i;
1385 int spare = 0;
1386 unsigned int nbit = 0;
1387
1388 assert(c2 != NULL);
1389
1390 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
1391
1392 /* frame 1: - voicing ---------------------------------------------*/
1393
1394 analyse_one_frame(c2, &model, speech);
1395 pack(bits, &nbit, model.voiced, 1);
1396
1397 /* frame 2: - voicing, joint Wo & E -------------------------------*/
1398
1399 analyse_one_frame(c2, &model, &speech[c2->n_samp]);
1400 pack(bits, &nbit, model.voiced, 1);
1401
1402 /* need to run this just to get LPC energy */
1403 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
1404
1405 WoE_index = encode_WoE(&model, e, c2->xq_enc);
1406 pack(bits, &nbit, WoE_index, WO_E_BITS);
1407
1408 /* frame 3: - voicing ---------------------------------------------*/
1409
1410 analyse_one_frame(c2, &model, &speech[2*c2->n_samp]);
1411 pack(bits, &nbit, model.voiced, 1);
1412
1413 /* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/
1414
1415 analyse_one_frame(c2, &model, &speech[3*c2->n_samp]);
1416 pack(bits, &nbit, model.voiced, 1);
1417
1418 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
1419 WoE_index = encode_WoE(&model, e, c2->xq_enc);
1420 pack(bits, &nbit, WoE_index, WO_E_BITS);
1421
1422 encode_lsps_vq(lsp_indexes, lsps, lsps_, LPC_ORD);
1423 for(i=0; i<LSP_PRED_VQ_INDEXES; i++) {
1424 pack(bits, &nbit, lsp_indexes[i], lsp_pred_vq_bits(i));
1425 }
1426 pack(bits, &nbit, spare, 1);
1427
1428 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
1429}
1430
1431
1432/*---------------------------------------------------------------------------*\
1433
1434 FUNCTION....: codec2_decode_1200
1435 AUTHOR......: David Rowe
1436 DATE CREATED: 14 Feb 2012
1437
1438 Decodes frames of 48 bits into 320 samples (40ms) of speech.
1439
1440\*---------------------------------------------------------------------------*/
1441
1442void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * bits)
1443{
1444 MODEL model[4];
1445 int lsp_indexes[LPC_ORD];
1446 float lsps[4][LPC_ORD];
1447 int WoE_index;
1448 float e[4];
1449 float snr;
1450 float ak[4][LPC_ORD+1];
1451 int i,j;
1452 unsigned int nbit = 0;
1453 float weight;
1454 COMP Aw[FFT_ENC];
1455
1456 assert(c2 != NULL);
1457
1458 /* only need to zero these out due to (unused) snr calculation */
1459
1460 for(i=0; i<4; i++)
1461 for(j=1; j<=MAX_AMP; j++)
1462 model[i].A[j] = 0.0;
1463
1464 /* unpack bits from channel ------------------------------------*/
1465
1466 /* this will partially fill the model params for the 4 x 10ms
1467 frames */
1468
1469 model[0].voiced = unpack(bits, &nbit, 1);
1470
1471 model[1].voiced = unpack(bits, &nbit, 1);
1472 WoE_index = unpack(bits, &nbit, WO_E_BITS);
1473 decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index);
1474
1475 model[2].voiced = unpack(bits, &nbit, 1);
1476
1477 model[3].voiced = unpack(bits, &nbit, 1);
1478 WoE_index = unpack(bits, &nbit, WO_E_BITS);
1479 decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index);
1480
1481 for(i=0; i<LSP_PRED_VQ_INDEXES; i++) {
1482 lsp_indexes[i] = unpack(bits, &nbit, lsp_pred_vq_bits(i));
1483 }
1484 decode_lsps_vq(lsp_indexes, &lsps[3][0], LPC_ORD , 0);
1485 check_lsp_order(&lsps[3][0], LPC_ORD);
1486 bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0);
1487
1488 /* interpolate ------------------------------------------------*/
1489
1490 /* Wo and energy are sampled every 20ms, so we interpolate just 1
1491 10ms frame between 20ms samples */
1492
1493 interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
1494 e[0] = interp_energy(c2->prev_e_dec, e[1]);
1495 interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min);
1496 e[2] = interp_energy(e[1], e[3]);
1497
1498 /* LSPs are sampled every 40ms so we interpolate the 3 frames in
1499 between, then recover spectral amplitudes */
1500
1501 for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
1502 interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD);
1503 }
1504 for(i=0; i<4; i++) {
1505 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
1506 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
1507 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
1508 apply_lpc_correction(&model[i]);
1509 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0);
1510 }
1511
1512 /* update memories for next frame ----------------------------*/
1513
1514 c2->prev_model_dec = model[3];
1515 c2->prev_e_dec = e[3];
1516 for(i=0; i<LPC_ORD; i++)
1517 c2->prev_lsps_dec[i] = lsps[3][i];
1518}
1519
1520
1521#ifndef CORTEX_M4
1522/*---------------------------------------------------------------------------*\
1523
1524 FUNCTION....: codec2_encode_700
1525 AUTHOR......: David Rowe
1526 DATE CREATED: April 2015
1527
1528 Encodes 320 speech samples (40ms of speech) into 28 bits.
1529
1530 The codec2 algorithm actually operates internally on 10ms (80
1531 sample) frames, so we run the encoding algorithm four times:
1532
1533 frame 0: nothing
1534 frame 1: nothing
1535 frame 2: nothing
1536 frame 3: voicing bit, scalar Wo and E, 17 bit LSP MEL scalar, 2 spare
1537
1538 The bit allocation is:
1539
1540 Parameter frames 1-3 frame 4 Total
1541 -----------------------------------------------------------
1542 Harmonic magnitudes (LSPs) 0 17 17
1543 Energy 0 3 3
1544 log Wo 0 5 5
1545 Voicing 0 1 1
1546 spare 0 2 2
1547 TOTAL 0 28 28
1548
1549\*---------------------------------------------------------------------------*/
1550
1551void codec2_encode_700(struct CODEC2 *c2, unsigned char * bits, short speech[])
1552{
1553 MODEL model;
1554 float lsps[LPC_ORD_LOW];
1555 float mel[LPC_ORD_LOW];
1556 float ak[LPC_ORD_LOW+1];
1557 float e, f;
1558 int indexes[LPC_ORD_LOW];
1559 int Wo_index, e_index, i;
1560 unsigned int nbit = 0;
1561 float bpf_out[4*c2->n_samp];
1562 short bpf_speech[4*c2->n_samp];
1563 int spare = 0;
1564
1565 assert(c2 != NULL);
1566
1567 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
1568
1569 /* band pass filter */
1570
1571 for(i=0; i<BPF_N; i++)
1572 c2->bpf_buf[i] = c2->bpf_buf[4*c2->n_samp+i];
1573 for(i=0; i<4*c2->n_samp; i++)
1574 c2->bpf_buf[BPF_N+i] = speech[i];
1575 inverse_filter(&c2->bpf_buf[BPF_N], bpf, 4*c2->n_samp, bpf_out, BPF_N-1);
1576 for(i=0; i<4*c2->n_samp; i++)
1577 bpf_speech[i] = bpf_out[i];
1578
1579 /* frame 1 --------------------------------------------------------*/
1580
1581 analyse_one_frame(c2, &model, bpf_speech);
1582
1583 /* frame 2 --------------------------------------------------------*/
1584
1585 analyse_one_frame(c2, &model, &bpf_speech[c2->n_samp]);
1586
1587 /* frame 3 --------------------------------------------------------*/
1588
1589 analyse_one_frame(c2, &model, &bpf_speech[2*c2->n_samp]);
1590
1591 /* frame 4: - voicing, scalar Wo & E, scalar LSPs -----------------*/
1592
1593 analyse_one_frame(c2, &model, &bpf_speech[3*c2->n_samp]);
1594 pack(bits, &nbit, model.voiced, 1);
1595 Wo_index = encode_log_Wo(&c2->c2const, model.Wo, 5);
1596 pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray);
1597
1598 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD_LOW);
1599 e_index = encode_energy(e, 3);
1600 pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray);
1601
1602 for(i=0; i<LPC_ORD_LOW; i++) {
1603 f = (4000.0/PI)*lsps[i];
1604 mel[i] = floor(2595.0*log10(1.0 + f/700.0) + 0.5);
1605 }
1606 encode_mels_scalar(indexes, mel, LPC_ORD_LOW);
1607
1608 for(i=0; i<LPC_ORD_LOW; i++) {
1609 pack_natural_or_gray(bits, &nbit, indexes[i], mel_bits(i), c2->gray);
1610 }
1611
1612 pack_natural_or_gray(bits, &nbit, spare, 2, c2->gray);
1613
1614 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
1615}
1616
1617
1618/*---------------------------------------------------------------------------*\
1619
1620 FUNCTION....: codec2_decode_700
1621 AUTHOR......: David Rowe
1622 DATE CREATED: April 2015
1623
1624 Decodes frames of 28 bits into 320 samples (40ms) of speech.
1625
1626\*---------------------------------------------------------------------------*/
1627
1628void codec2_decode_700(struct CODEC2 *c2, short speech[], const unsigned char * bits)
1629{
1630 MODEL model[4];
1631 int indexes[LPC_ORD_LOW];
1632 float mel[LPC_ORD_LOW];
1633 float lsps[4][LPC_ORD_LOW];
1634 int Wo_index, e_index;
1635 float e[4];
1636 float snr, f_;
1637 float ak[4][LPC_ORD_LOW+1];
1638 int i,j;
1639 unsigned int nbit = 0;
1640 float weight;
1641 COMP Aw[FFT_ENC];
1642
1643 assert(c2 != NULL);
1644
1645 /* only need to zero these out due to (unused) snr calculation */
1646
1647 for(i=0; i<4; i++)
1648 for(j=1; j<=MAX_AMP; j++)
1649 model[i].A[j] = 0.0;
1650
1651 /* unpack bits from channel ------------------------------------*/
1652
1653 model[3].voiced = unpack(bits, &nbit, 1);
1654 model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced;
1655
1656 Wo_index = unpack_natural_or_gray(bits, &nbit, 5, c2->gray);
1657 model[3].Wo = decode_log_Wo(&c2->c2const, Wo_index, 5);
1658 model[3].L = PI/model[3].Wo;
1659
1660 e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray);
1661 e[3] = decode_energy(e_index, 3);
1662
1663 for(i=0; i<LPC_ORD_LOW; i++) {
1664 indexes[i] = unpack_natural_or_gray(bits, &nbit, mel_bits(i), c2->gray);
1665 }
1666
1667 decode_mels_scalar(mel, indexes, LPC_ORD_LOW);
1668 for(i=0; i<LPC_ORD_LOW; i++) {
1669 f_ = 700.0*( pow(10.0, (float)mel[i]/2595.0) - 1.0);
1670 lsps[3][i] = f_*(PI/4000.0);
1671 //printf("lsps[3][%d] %f\n", i, lsps[3][i]);
1672 }
1673
1674 check_lsp_order(&lsps[3][0], LPC_ORD_LOW);
1675 bw_expand_lsps(&lsps[3][0], LPC_ORD_LOW, 50.0, 100.0);
1676
1677 #ifdef MASK_NOT_FOR_NOW
1678 /* first pass at soft decn error masking, needs further work */
1679 /* If soft dec info available expand further for low power frames */
1680
1681 if (c2->softdec) {
1682 float e = 0.0;
1683 for(i=9; i<9+17; i++)
1684 e += c2->softdec[i]*c2->softdec[i];
1685 e /= 6.0;
1686 //fprintf(stderr, "e: %f\n", e);
1687 //if (e < 0.3)
1688 // bw_expand_lsps(&lsps[3][0], LPC_ORD_LOW, 150.0, 300.0);
1689 }
1690 #endif
1691
1692 /* interpolate ------------------------------------------------*/
1693
1694 /* LSPs, Wo, and energy are sampled every 40ms so we interpolate
1695 the 3 frames in between, then recover spectral amplitudes */
1696
1697 for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
1698 interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW);
1699 interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min);
1700 e[i] = interp_energy2(c2->prev_e_dec, e[3],weight);
1701 }
1702 for(i=0; i<4; i++) {
1703 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD_LOW);
1704 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0,
1705 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
1706 apply_lpc_correction(&model[i]);
1707 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0);
1708 }
1709
1710 #ifdef DUMP
1711 dump_lsp_(&lsps[3][0]);
1712 dump_ak_(&ak[3][0], LPC_ORD_LOW);
1713 dump_model(&model[3]);
1714 if (c2->softdec)
1715 dump_softdec(c2->softdec, nbit);
1716 #endif
1717
1718 /* update memories for next frame ----------------------------*/
1719
1720 c2->prev_model_dec = model[3];
1721 c2->prev_e_dec = e[3];
1722 for(i=0; i<LPC_ORD_LOW; i++)
1723 c2->prev_lsps_dec[i] = lsps[3][i];
1724}
1725
1726
1727/*---------------------------------------------------------------------------*\
1728
1729 FUNCTION....: codec2_encode_700b
1730 AUTHOR......: David Rowe
1731 DATE CREATED: August 2015
1732
1733 Version b of 700 bit/s codec. After some experiments over the air I
1734 wanted was unhappy with the rate 700 codec so spent a few weeks
1735 trying to improve the speech quality. This version uses a wider BPF
1736 and vector quantised mel-lsps.
1737
1738 Encodes 320 speech samples (40ms of speech) into 28 bits.
1739
1740 The codec2 algorithm actually operates internally on 10ms (80
1741 sample) frames, so we run the encoding algorithm four times:
1742
1743 frame 0: nothing
1744 frame 1: nothing
1745 frame 2: nothing
1746 frame 3: voicing bit, 5 bit scalar Wo and 3 bit E, 18 bit LSP MEL VQ,
1747 1 spare
1748
1749 The bit allocation is:
1750
1751 Parameter frames 1-3 frame 4 Total
1752 -----------------------------------------------------------
1753 Harmonic magnitudes (LSPs) 0 18 18
1754 Energy 0 3 3
1755 log Wo 0 5 5
1756 Voicing 0 1 1
1757 spare 0 1 1
1758 TOTAL 0 28 28
1759
1760\*---------------------------------------------------------------------------*/
1761
1762void codec2_encode_700b(struct CODEC2 *c2, unsigned char * bits, short speech[])
1763{
1764 MODEL model;
1765 float lsps[LPC_ORD_LOW];
1766 float mel[LPC_ORD_LOW];
1767 float mel_[LPC_ORD_LOW];
1768 float ak[LPC_ORD_LOW+1];
1769 float e, f;
1770 int indexes[3];
1771 int Wo_index, e_index, i;
1772 unsigned int nbit = 0;
1773 float bpf_out[4*c2->n_samp];
1774 short bpf_speech[4*c2->n_samp];
1775 int spare = 0;
1776
1777 assert(c2 != NULL);
1778
1779 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
1780
1781 /* band pass filter */
1782
1783 for(i=0; i<BPF_N; i++)
1784 c2->bpf_buf[i] = c2->bpf_buf[4*c2->n_samp+i];
1785 for(i=0; i<4*c2->n_samp; i++)
1786 c2->bpf_buf[BPF_N+i] = speech[i];
1787 inverse_filter(&c2->bpf_buf[BPF_N], bpfb, 4*c2->n_samp, bpf_out, BPF_N-1);
1788 for(i=0; i<4*c2->n_samp; i++)
1789 bpf_speech[i] = bpf_out[i];
1790
1791 /* frame 1 --------------------------------------------------------*/
1792
1793 analyse_one_frame(c2, &model, bpf_speech);
1794
1795 /* frame 2 --------------------------------------------------------*/
1796
1797 analyse_one_frame(c2, &model, &bpf_speech[c2->n_samp]);
1798
1799 /* frame 3 --------------------------------------------------------*/
1800
1801 analyse_one_frame(c2, &model, &bpf_speech[2*c2->n_samp]);
1802
1803 /* frame 4: - voicing, scalar Wo & E, VQ mel LSPs -----------------*/
1804
1805 analyse_one_frame(c2, &model, &bpf_speech[3*c2->n_samp]);
1806 pack(bits, &nbit, model.voiced, 1);
1807 Wo_index = encode_log_Wo(&c2->c2const, model.Wo, 5);
1808 pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray);
1809
1810 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD_LOW);
1811 e_index = encode_energy(e, 3);
1812 pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray);
1813
1814 for(i=0; i<LPC_ORD_LOW; i++) {
1815 f = (4000.0/PI)*lsps[i];
1816 mel[i] = floor(2595.0*log10(1.0 + f/700.0) + 0.5);
1817 }
1818 lspmelvq_mbest_encode(indexes, mel, mel_, LPC_ORD_LOW, 5);
1819
1820 for(i=0; i<3; i++) {
1821 pack_natural_or_gray(bits, &nbit, indexes[i], lspmelvq_cb_bits(i), c2->gray);
1822 }
1823
1824 pack_natural_or_gray(bits, &nbit, spare, 1, c2->gray);
1825
1826 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
1827}
1828
1829
1830/*---------------------------------------------------------------------------*\
1831
1832 FUNCTION....: codec2_decode_700b
1833 AUTHOR......: David Rowe
1834 DATE CREATED: August 2015
1835
1836 Decodes frames of 28 bits into 320 samples (40ms) of speech.
1837
1838\*---------------------------------------------------------------------------*/
1839
1840void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * bits)
1841{
1842 MODEL model[4];
1843 int indexes[3];
1844 float mel[LPC_ORD_LOW];
1845 float lsps[4][LPC_ORD_LOW];
1846 int Wo_index, e_index;
1847 float e[4];
1848 float snr, f_;
1849 float ak[4][LPC_ORD_LOW+1];
1850 int i,j;
1851 unsigned int nbit = 0;
1852 float weight;
1853 COMP Aw[FFT_ENC];
1854
1855 assert(c2 != NULL);
1856
1857 /* only need to zero these out due to (unused) snr calculation */
1858
1859 for(i=0; i<4; i++)
1860 for(j=1; j<=MAX_AMP; j++)
1861 model[i].A[j] = 0.0;
1862
1863 /* unpack bits from channel ------------------------------------*/
1864
1865 model[3].voiced = unpack(bits, &nbit, 1);
1866 model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced;
1867
1868 Wo_index = unpack_natural_or_gray(bits, &nbit, 5, c2->gray);
1869 model[3].Wo = decode_log_Wo(&c2->c2const, Wo_index, 5);
1870 model[3].L = PI/model[3].Wo;
1871
1872 e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray);
1873 e[3] = decode_energy(e_index, 3);
1874
1875 for(i=0; i<3; i++) {
1876 indexes[i] = unpack_natural_or_gray(bits, &nbit, lspmelvq_cb_bits(i), c2->gray);
1877 }
1878
1879 lspmelvq_decode(indexes, mel, LPC_ORD_LOW);
1880
1881 #define MEL_ROUND 10
1882 for(i=1; i<LPC_ORD_LOW; i++) {
1883 if (mel[i] <= mel[i-1]+MEL_ROUND) {
1884 mel[i]+=MEL_ROUND/2;
1885 mel[i-1]-=MEL_ROUND/2;
1886 i = 1;
1887 }
1888 }
1889
1890 for(i=0; i<LPC_ORD_LOW; i++) {
1891 f_ = 700.0*( pow(10.0, (float)mel[i]/2595.0) - 1.0);
1892 lsps[3][i] = f_*(PI/4000.0);
1893 //printf("lsps[3][%d] %f\n", i, lsps[3][i]);
1894 }
1895
1896 /* interpolate ------------------------------------------------*/
1897
1898 /* LSPs, Wo, and energy are sampled every 40ms so we interpolate
1899 the 3 frames in between, then recover spectral amplitudes */
1900
1901 for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
1902 interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW);
1903 interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min);
1904 e[i] = interp_energy2(c2->prev_e_dec, e[3],weight);
1905 }
1906 for(i=0; i<4; i++) {
1907 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD_LOW);
1908 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0,
1909 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
1910 apply_lpc_correction(&model[i]);
1911 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0);
1912 }
1913
1914 #ifdef DUMP
1915 dump_lsp_(&lsps[3][0]);
1916 dump_ak_(&ak[3][0], LPC_ORD_LOW);
1917 dump_model(&model[3]);
1918 if (c2->softdec)
1919 dump_softdec(c2->softdec, nbit);
1920 #endif
1921
1922 /* update memories for next frame ----------------------------*/
1923
1924 c2->prev_model_dec = model[3];
1925 c2->prev_e_dec = e[3];
1926 for(i=0; i<LPC_ORD_LOW; i++)
1927 c2->prev_lsps_dec[i] = lsps[3][i];
1928}
1929#endif
1930
1931
1932/*---------------------------------------------------------------------------*\
1933
1934 FUNCTION....: codec2_encode_700c
1935 AUTHOR......: David Rowe
1936 DATE CREATED: Jan 2017
1937
1938 Version c of 700 bit/s codec that uses newamp1 fixed rate VQ of amplitudes.
1939
1940 Encodes 320 speech samples (40ms of speech) into 28 bits.
1941
1942 The codec2 algorithm actually operates internally on 10ms (80
1943 sample) frames, so we run the encoding algorithm four times:
1944
1945 frame 0: nothing
1946 frame 1: nothing
1947 frame 2: nothing
1948 frame 3: 18 bit 2 stage VQ (9 bits/stage), 4 bits energy,
1949 6 bit scalar Wo/voicing. No spare bits.
1950
1951 Voicing is encoded using the 0 index of the Wo quantiser.
1952
1953 The bit allocation is:
1954
1955 Parameter frames 1-3 frame 4 Total
1956 -----------------------------------------------------------
1957 Harmonic magnitudes (rate k VQ) 0 18 18
1958 Energy 0 4 4
1959 log Wo/voicing 0 6 6
1960 TOTAL 0 28 28
1961
1962\*---------------------------------------------------------------------------*/
1963
1964void codec2_encode_700c(struct CODEC2 *c2, unsigned char * bits, short speech[])
1965{
1966 MODEL model;
1967 int indexes[4], i, M=4;
1968 unsigned int nbit = 0;
1969
1970 assert(c2 != NULL);
1971
1972 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
1973
1974 for(i=0; i<M; i++) {
1975 analyse_one_frame(c2, &model, &speech[i*c2->n_samp]);
1976 }
1977
1978 int K = 20;
1979 float rate_K_vec[K], mean;
1980 float rate_K_vec_no_mean[K], rate_K_vec_no_mean_[K];
1981
1982 newamp1_model_to_indexes(&c2->c2const,
1983 indexes,
1984 &model,
1985 rate_K_vec,
1986 c2->rate_K_sample_freqs_kHz,
1987 K,
1988 &mean,
1989 rate_K_vec_no_mean,
1990 rate_K_vec_no_mean_, &c2->se);
1991 c2->nse += K;
1992
1993#ifndef CORTEX_M4
1994 /* dump features for deep learning experiments */
1995 if (c2->fmlfeat != NULL) {
1996 fwrite(&mean, 1, sizeof(float), c2->fmlfeat);
1997 fwrite(rate_K_vec_no_mean, K, sizeof(float), c2->fmlfeat);
1998 fwrite(rate_K_vec_no_mean_, K, sizeof(float), c2->fmlfeat);
1999 }
2000#endif
2001
2002 pack_natural_or_gray(bits, &nbit, indexes[0], 9, 0);
2003 pack_natural_or_gray(bits, &nbit, indexes[1], 9, 0);
2004 pack_natural_or_gray(bits, &nbit, indexes[2], 4, 0);
2005 pack_natural_or_gray(bits, &nbit, indexes[3], 6, 0);
2006
2007 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
2008}
2009
2010
2011/*---------------------------------------------------------------------------*\
2012
2013 FUNCTION....: codec2_decode_700c
2014 AUTHOR......: David Rowe
2015 DATE CREATED: August 2015
2016
2017 Decodes frames of 28 bits into 320 samples (40ms) of speech.
2018
2019\*---------------------------------------------------------------------------*/
2020
2021void codec2_decode_700c(struct CODEC2 *c2, short speech[], const unsigned char * bits)
2022{
2023 MODEL model[4];
2024 int indexes[4];
2025 int i;
2026 unsigned int nbit = 0;
2027
2028 assert(c2 != NULL);
2029
2030 /* unpack bits from channel ------------------------------------*/
2031
2032 indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2033 indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2034 indexes[2] = unpack_natural_or_gray(bits, &nbit, 4, 0);
2035 indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0);
2036
2037 int M = 4;
2038 COMP HH[M][MAX_AMP+1];
2039 float interpolated_surface_[M][NEWAMP1_K];
2040
2041 newamp1_indexes_to_model(&c2->c2const,
2042 model,
2043 (COMP*)HH,
2044 (float*)interpolated_surface_,
2045 c2->prev_rate_K_vec_,
2046 &c2->Wo_left,
2047 &c2->voicing_left,
2048 c2->rate_K_sample_freqs_kHz,
2049 NEWAMP1_K,
2050 c2->phase_fft_fwd_cfg,
2051 c2->phase_fft_inv_cfg,
2052 indexes,
2053 c2->user_rate_K_vec_no_mean_,
2054 c2->post_filter_en);
2055
2056
2057 for(i=0; i<M; i++) {
2058 /* 700C is a little quiter so lets apply some experimentally derived audio gain */
2059 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], &HH[i][0], 1.5);
2060 }
2061}
2062
2063/*---------------------------------------------------------------------------*\
2064
2065 FUNCTION....: codec2_energy_700c
2066 AUTHOR......: Jeroen Vreeken
2067 DATE CREATED: Jan 2017
2068
2069 Decodes energy value from encoded bits.
2070
2071\*---------------------------------------------------------------------------*/
2072
2073float codec2_energy_700c(struct CODEC2 *c2, const unsigned char * bits)
2074{
2075 int indexes[4];
2076 unsigned int nbit = 0;
2077
2078 assert(c2 != NULL);
2079
2080 /* unpack bits from channel ------------------------------------*/
2081
2082 indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2083 indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2084 indexes[2] = unpack_natural_or_gray(bits, &nbit, 4, 0);
2085 indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0);
2086
2087 float mean = newamp1_energy_cb[0].cb[indexes[2]];
2088 mean -= 10;
2089 if (indexes[3] == 0)
2090 mean -= 10;
2091
2092 return POW10F(mean/10.0);
2093}
2094
2095#ifndef CORTEX_M4
2096float codec2_energy_450(struct CODEC2 *c2, const unsigned char * bits)
2097{
2098 int indexes[4];
2099 unsigned int nbit = 0;
2100
2101 assert(c2 != NULL);
2102
2103 /* unpack bits from channel ------------------------------------*/
2104
2105 indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2106 //indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2107 indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0);
2108 indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0);
2109
2110 float mean = newamp2_energy_cb[0].cb[indexes[2]];
2111 mean -= 10;
2112 if (indexes[3] == 0)
2113 mean -= 10;
2114
2115 return POW10F(mean/10.0);
2116}
2117
2118/*---------------------------------------------------------------------------*\
2119
2120 FUNCTION....: codec2_get_energy()
2121 AUTHOR......: Jeroen Vreeken
2122 DATE CREATED: 08/03/2016
2123
2124 Extract energy value from an encoded frame.
2125
2126\*---------------------------------------------------------------------------*/
2127
2128float codec2_get_energy(struct CODEC2 *c2, const unsigned char *bits)
2129{
2130 assert(c2 != NULL);
2131 assert(
2132 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) ||
2133 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) ||
2134 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) ||
2135 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) ||
2136 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) ||
2137 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) ||
2138 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) ||
2139 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) ||
2140 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) ||
2141 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) ||
2142 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode))
2143 );
2144 MODEL model;
2145 float xq_dec[2] = {};
2146 int e_index, WoE_index;
2147 float e;
2148 unsigned int nbit;
2149
2150 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) {
2151 nbit = 1 + 1 + WO_BITS;
2152 e_index = unpack(bits, &nbit, E_BITS);
2153 e = decode_energy(e_index, E_BITS);
2154 }
2155 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) {
2156 nbit = 1 + 1;
2157 WoE_index = unpack(bits, &nbit, WO_E_BITS);
2158 decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index);
2159 }
2160 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) {
2161 nbit = 1 + 1 + WO_BITS;
2162 e_index = unpack(bits, &nbit, E_BITS);
2163 e = decode_energy(e_index, E_BITS);
2164 }
2165 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) {
2166 nbit = 1 + 1;
2167 WoE_index = unpack(bits, &nbit, WO_E_BITS);
2168 decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index);
2169 }
2170 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) {
2171 nbit = 1 + 1 + 1 + 1 + WO_BITS;
2172 e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray);
2173 e = decode_energy(e_index, E_BITS);
2174 }
2175 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) {
2176 nbit = 1 + 1;
2177 WoE_index = unpack(bits, &nbit, WO_E_BITS);
2178 decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index);
2179 }
2180 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) {
2181 nbit = 1 + 5;
2182 e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray);
2183 e = decode_energy(e_index, 3);
2184 }
2185 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) {
2186 nbit = 1 + 5;
2187 e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray);
2188 e = decode_energy(e_index, 3);
2189 }
2190 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) {
2191 e = codec2_energy_700c(c2, bits);
2192 }
2193 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) {
2194 e = codec2_energy_450(c2, bits);
2195 }
2196
2197 return e;
2198}
2199
2200
2201/*---------------------------------------------------------------------------*\
2202
2203 FUNCTION....: codec2_encode_450
2204 AUTHOR......: Thomas Kurin and Stefan Erhardt
2205 INSTITUTE...: Institute for Electronics Engineering, University of Erlangen-Nuremberg
2206 DATE CREATED: July 2018
2207
2208 450 bit/s codec that uses newamp2 fixed rate VQ of amplitudes.
2209
2210 Encodes 320 speech samples (40ms of speech) into 28 bits.
2211
2212 The codec2 algorithm actually operates internally on 10ms (80
2213 sample) frames, so we run the encoding algorithm four times:
2214
2215 frame 0: nothing
2216 frame 1: nothing
2217 frame 2: nothing
2218 frame 3: 9 bit 1 stage VQ, 3 bits energy,
2219 6 bit scalar Wo/voicing/plosive. No spare bits.
2220
2221 If a plosive is detected the frame at the energy-step is encoded.
2222
2223 Voicing is encoded using the 000000 index of the Wo quantiser.
2224 Plosive is encoded using the 111111 index of the Wo quantiser.
2225
2226 The bit allocation is:
2227
2228 Parameter frames 1-3 frame 4 Total
2229 -----------------------------------------------------------
2230 Harmonic magnitudes (rate k VQ) 0 9 9
2231 Energy 0 3 3
2232 log Wo/voicing/plosive 0 6 6
2233 TOTAL 0 18 18
2234
2235
2236\*---------------------------------------------------------------------------*/
2237
2238void codec2_encode_450(struct CODEC2 *c2, unsigned char * bits, short speech[])
2239{
2240 MODEL model;
2241 int indexes[4], i,h, M=4;
2242 unsigned int nbit = 0;
2243 int plosiv = 0;
2244 float energydelta[M];
2245 int spectralCounter;
2246
2247 assert(c2 != NULL);
2248
2249 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
2250 for(i=0; i<M; i++){
2251 analyse_one_frame(c2, &model, &speech[i*c2->n_samp]);
2252 energydelta[i] = 0;
2253 spectralCounter = 0;
2254 for(h = 0;h<(model.L);h++){
2255 //only detect above 300 Hz
2256 if(h*model.Wo*(c2->c2const.Fs/2000.0)/M_PI > 0.3){
2257 energydelta[i] = energydelta[i] + 20.0*log10(model.A[10]+1E-16);
2258 spectralCounter = spectralCounter+1;
2259 }
2260
2261 }
2262 energydelta[i] = energydelta[i] / spectralCounter ;
2263 }
2264 //Constants for plosive Detection tdB = threshold; minPwr = from below this level plosives have to rise
2265 float tdB = 15; //not fixed can be changed
2266 float minPwr = 15; //not fixed can be changed
2267 if((c2->energy_prev)<minPwr && energydelta[0]>((c2->energy_prev)+tdB)){
2268
2269 plosiv = 1;
2270 }
2271 if(energydelta[0]<minPwr && energydelta[1]>(energydelta[0]+tdB)){
2272
2273 plosiv = 2;
2274 }
2275 if(energydelta[1]<minPwr &&energydelta[2]>(energydelta[1]+tdB)){
2276
2277 plosiv = 3;
2278 }
2279 if(energydelta[2]<minPwr &&energydelta[3]>(energydelta[2]+tdB)){
2280
2281 plosiv = 4;
2282 }
2283 if(plosiv != 0 && plosiv != 4){
2284 analyse_one_frame(c2, &model, &speech[(plosiv-1)*c2->n_samp]);
2285 }
2286
2287 c2->energy_prev = energydelta[3];
2288
2289
2290 int K = 29;
2291 float rate_K_vec[K], mean;
2292 float rate_K_vec_no_mean[K], rate_K_vec_no_mean_[K];
2293 if(plosiv > 0){
2294 plosiv = 1;
2295 }
2296 newamp2_model_to_indexes(&c2->c2const,
2297 indexes,
2298 &model,
2299 rate_K_vec,
2300 c2->n2_rate_K_sample_freqs_kHz,
2301 K,
2302 &mean,
2303 rate_K_vec_no_mean,
2304 rate_K_vec_no_mean_,
2305 plosiv);
2306
2307
2308 pack_natural_or_gray(bits, &nbit, indexes[0], 9, 0);
2309 //pack_natural_or_gray(bits, &nbit, indexes[1], 9, 0);
2310 pack_natural_or_gray(bits, &nbit, indexes[2], 3, 0);
2311 pack_natural_or_gray(bits, &nbit, indexes[3], 6, 0);
2312
2313 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
2314}
2315
2316
2317/*---------------------------------------------------------------------------*\
2318
2319 FUNCTION....: codec2_decode_450
2320 AUTHOR......: Thomas Kurin and Stefan Erhardt
2321 INSTITUTE...: Institute for Electronics Engineering, University of Erlangen-Nuremberg
2322 DATE CREATED: July 2018
2323
2324\*---------------------------------------------------------------------------*/
2325
2326void codec2_decode_450(struct CODEC2 *c2, short speech[], const unsigned char * bits)
2327{
2328 MODEL model[4];
2329 int indexes[4];
2330 int i;
2331 unsigned int nbit = 0;
2332
2333 assert(c2 != NULL);
2334
2335 /* unpack bits from channel ------------------------------------*/
2336
2337 indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2338 //indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2339 indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0);
2340 indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0);
2341
2342 int M = 4;
2343 COMP HH[M][MAX_AMP+1];
2344 float interpolated_surface_[M][NEWAMP2_K];
2345 int pwbFlag = 0;
2346
2347 newamp2_indexes_to_model(&c2->c2const,
2348 model,
2349 (COMP*)HH,
2350 (float*)interpolated_surface_,
2351 c2->n2_prev_rate_K_vec_,
2352 &c2->Wo_left,
2353 &c2->voicing_left,
2354 c2->n2_rate_K_sample_freqs_kHz,
2355 NEWAMP2_K,
2356 c2->phase_fft_fwd_cfg,
2357 c2->phase_fft_inv_cfg,
2358 indexes,
2359 1.5,
2360 pwbFlag);
2361
2362
2363 for(i=0; i<M; i++) {
2364 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], &HH[i][0], 1.5);
2365 }
2366}
2367
2368/*---------------------------------------------------------------------------*\
2369
2370 FUNCTION....: codec2_decode_450pwb
2371 AUTHOR......: Thomas Kurin and Stefan Erhardt
2372 INSTITUTE...: Institute for Electronics Engineering, University of Erlangen-Nuremberg
2373 DATE CREATED: July 2018
2374
2375 Decodes the 450 codec data in pseudo wideband at 16kHz samplerate.
2376
2377\*---------------------------------------------------------------------------*/
2378
2379void codec2_decode_450pwb(struct CODEC2 *c2, short speech[], const unsigned char * bits)
2380{
2381 MODEL model[4];
2382 int indexes[4];
2383 int i;
2384 unsigned int nbit = 0;
2385
2386 assert(c2 != NULL);
2387
2388 /* unpack bits from channel ------------------------------------*/
2389
2390 indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2391 //indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2392 indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0);
2393 indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0);
2394
2395 int M = 4;
2396 COMP HH[M][MAX_AMP+1];
2397 float interpolated_surface_[M][NEWAMP2_16K_K];
2398 int pwbFlag = 1;
2399
2400 newamp2_indexes_to_model(&c2->c2const,
2401 model,
2402 (COMP*)HH,
2403 (float*)interpolated_surface_,
2404 c2->n2_pwb_prev_rate_K_vec_,
2405 &c2->Wo_left,
2406 &c2->voicing_left,
2407 c2->n2_pwb_rate_K_sample_freqs_kHz,
2408 NEWAMP2_16K_K,
2409 c2->phase_fft_fwd_cfg,
2410 c2->phase_fft_inv_cfg,
2411 indexes,
2412 1.5,
2413 pwbFlag);
2414
2415
2416 for(i=0; i<M; i++) {
2417 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], &HH[i][0], 1.5);
2418 }
2419}
2420
2421#endif
2422
2423/*---------------------------------------------------------------------------* \
2424
2425 FUNCTION....: synthesise_one_frame()
2426 AUTHOR......: David Rowe
2427 DATE CREATED: 23/8/2010
2428
2429 Synthesise 80 speech samples (10ms) from model parameters.
2430
2431\*---------------------------------------------------------------------------*/
2432
2433void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, COMP Aw[], float gain)
2434{
2435 int i;
2436 //PROFILE_VAR(phase_start, pf_start, synth_start);
2437
2438 //#ifdef DUMP
2439 //dump_quantised_model(model);
2440 //#endif
2441
2442 //PROFILE_SAMPLE(phase_start);
2443
2444 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode) ) {
2445 /* newamp1/2, we've already worked out rate L phase */
2446 COMP *H = Aw;
2447 phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H);
2448 } else {
2449 /* LPC based phase synthesis */
2450 COMP H[MAX_AMP+1];
2451 sample_phase(model, H, Aw);
2452 phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H);
2453 }
2454
2455 //PROFILE_SAMPLE_AND_LOG(pf_start, phase_start, " phase_synth");
2456
2457 postfilter(model, &c2->bg_est);
2458
2459 //PROFILE_SAMPLE_AND_LOG(synth_start, pf_start, " postfilter");
2460
2461 synthesise(c2->n_samp, c2->fftr_inv_cfg, c2->Sn_, model, c2->Pn, 1);
2462
2463 for(i=0; i<c2->n_samp; i++) {
2464 c2->Sn_[i] *= gain;
2465 }
2466
2467 //PROFILE_SAMPLE_AND_LOG2(synth_start, " synth");
2468
2469 ear_protection(c2->Sn_, c2->n_samp);
2470
2471 for(i=0; i<c2->n_samp; i++) {
2472 if (c2->Sn_[i] > 32767.0)
2473 speech[i] = 32767;
2474 else if (c2->Sn_[i] < -32767.0)
2475 speech[i] = -32767;
2476 else
2477 speech[i] = c2->Sn_[i];
2478 }
2479
2480}
2481
2482/*---------------------------------------------------------------------------*\
2483
2484 FUNCTION....: analyse_one_frame()
2485 AUTHOR......: David Rowe
2486 DATE CREATED: 23/8/2010
2487
2488 Extract sinusoidal model parameters from 80 speech samples (10ms of
2489 speech).
2490
2491\*---------------------------------------------------------------------------*/
2492
2493void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[])
2494{
2495 COMP Sw[FFT_ENC];
2496 float pitch;
2497 int i;
2498 //PROFILE_VAR(dft_start, nlp_start, model_start, two_stage, estamps);
2499 int n_samp = c2->n_samp;
2500 int m_pitch = c2->m_pitch;
2501
2502 /* Read input speech */
2503
2504 for(i=0; i<m_pitch-n_samp; i++)
2505 c2->Sn[i] = c2->Sn[i+n_samp];
2506 for(i=0; i<n_samp; i++)
2507 c2->Sn[i+m_pitch-n_samp] = speech[i];
2508
2509 //PROFILE_SAMPLE(dft_start);
2510 dft_speech(&c2->c2const, c2->fft_fwd_cfg, Sw, c2->Sn, c2->w);
2511 //PROFILE_SAMPLE_AND_LOG(nlp_start, dft_start, " dft_speech");
2512
2513 /* Estimate pitch */
2514
2515 nlp(c2->nlp, c2->Sn, n_samp, &pitch, Sw, c2->W, &c2->prev_f0_enc);
2516 //PROFILE_SAMPLE_AND_LOG(model_start, nlp_start, " nlp");
2517
2518 model->Wo = TWO_PI/pitch;
2519 model->L = PI/model->Wo;
2520
2521 /* estimate model parameters */
2522
2523 two_stage_pitch_refinement(&c2->c2const, model, Sw);
2524 //PROFILE_SAMPLE_AND_LOG(two_stage, model_start, " two_stage");
2525 estimate_amplitudes(model, Sw, c2->W, 0);
2526 //PROFILE_SAMPLE_AND_LOG(estamps, two_stage, " est_amps");
2527 est_voicing_mbe(&c2->c2const, model, Sw, c2->W);
2528 //PROFILE_SAMPLE_AND_LOG2(estamps, " est_voicing");
2529 #ifdef DUMP
2530 dump_model(model);
2531 #endif
2532}
2533
2534/*---------------------------------------------------------------------------*\
2535
2536 FUNCTION....: ear_protection()
2537 AUTHOR......: David Rowe
2538 DATE CREATED: Nov 7 2012
2539
2540 Limits output level to protect ears when there are bit errors or the input
2541 is overdriven. This doesn't correct or mask bit errors, just reduces the
2542 worst of their damage.
2543
2544\*---------------------------------------------------------------------------*/
2545
2546static void ear_protection(float in_out[], int n) {
2547 float max_sample, over, gain;
2548 int i;
2549
2550 /* find maximum sample in frame */
2551
2552 max_sample = 0.0;
2553 for(i=0; i<n; i++)
2554 if (in_out[i] > max_sample)
2555 max_sample = in_out[i];
2556
2557 /* determine how far above set point */
2558
2559 over = max_sample/30000.0;
2560
2561 /* If we are x dB over set point we reduce level by 2x dB, this
2562 attenuates major excursions in amplitude (likely to be caused
2563 by bit errors) more than smaller ones */
2564
2565 if (over > 1.0) {
2566 gain = 1.0/(over*over);
2567 //fprintf(stderr, "gain: %f\n", gain);
2568 for(i=0; i<n; i++)
2569 in_out[i] *= gain;
2570 }
2571}
2572
2573void codec2_set_lpc_post_filter(struct CODEC2 *c2, int enable, int bass_boost, float beta, float gamma)
2574{
2575 assert((beta >= 0.0) && (beta <= 1.0));
2576 assert((gamma >= 0.0) && (gamma <= 1.0));
2577 c2->lpc_pf = enable;
2578 c2->bass_boost = bass_boost;
2579 c2->beta = beta;
2580 c2->gamma = gamma;
2581}
2582
2583/*
2584 Allows optional stealing of one of the voicing bits for use as a
2585 spare bit, only 1300 & 1400 & 1600 bit/s supported for now.
2586 Experimental method of sending voice/data frames for FreeDV.
2587*/
2588
2589int codec2_get_spare_bit_index(struct CODEC2 *c2)
2590{
2591 assert(c2 != NULL);
2592
2593 switch(c2->mode) {
2594 case CODEC2_MODE_1300:
2595 return 2; // bit 2 (3th bit) is v2 (third voicing bit)
2596 break;
2597 case CODEC2_MODE_1400:
2598 return 10; // bit 10 (11th bit) is v2 (third voicing bit)
2599 break;
2600 case CODEC2_MODE_1600:
2601 return 15; // bit 15 (16th bit) is v2 (third voicing bit)
2602 break;
2603 case CODEC2_MODE_700:
2604 return 26; // bits 26 and 27 are spare
2605 break;
2606 case CODEC2_MODE_700B:
2607 return 27; // bit 27 is spare
2608 break;
2609 }
2610
2611 return -1;
2612}
2613
2614/*
2615 Reconstructs the spare voicing bit. Note works on unpacked bits
2616 for convenience.
2617*/
2618
2619int codec2_rebuild_spare_bit(struct CODEC2 *c2, int unpacked_bits[])
2620{
2621 int v1,v3;
2622
2623 assert(c2 != NULL);
2624
2625 v1 = unpacked_bits[1];
2626
2627 switch(c2->mode) {
2628 case CODEC2_MODE_1300:
2629
2630 v3 = unpacked_bits[1+1+1];
2631
2632 /* if either adjacent frame is voiced, make this one voiced */
2633
2634 unpacked_bits[2] = (v1 || v3);
2635
2636 return 0;
2637
2638 break;
2639
2640 case CODEC2_MODE_1400:
2641
2642 v3 = unpacked_bits[1+1+8+1];
2643
2644 /* if either adjacent frame is voiced, make this one voiced */
2645
2646 unpacked_bits[10] = (v1 || v3);
2647
2648 return 0;
2649
2650 break;
2651
2652 case CODEC2_MODE_1600:
2653 v3 = unpacked_bits[1+1+8+5+1];
2654
2655 /* if either adjacent frame is voiced, make this one voiced */
2656
2657 unpacked_bits[15] = (v1 || v3);
2658
2659 return 0;
2660
2661 break;
2662 }
2663
2664 return -1;
2665}
2666
2667void codec2_set_natural_or_gray(struct CODEC2 *c2, int gray)
2668{
2669 assert(c2 != NULL);
2670 c2->gray = gray;
2671}
2672
2673void codec2_set_softdec(struct CODEC2 *c2, float *softdec)
2674{
2675 assert(c2 != NULL);
2676 c2->softdec = softdec;
2677}
2678
2679void codec2_open_mlfeat(struct CODEC2 *codec2_state, char *filename) {
2680 if ((codec2_state->fmlfeat = fopen(filename, "wb")) == NULL) {
2681 fprintf(stderr, "error opening machine learning feature file: %s\n", filename);
2682 exit(1);
2683 }
2684}
2685
2686#ifndef __EMBEDDED__
2687void codec2_load_codebook(struct CODEC2 *codec2_state, int num, char *filename) {
2688 FILE *f;
2689
2690 if ((f = fopen(filename, "rb")) == NULL) {
2691 fprintf(stderr, "error opening codebook file: %s\n", filename);
2692 exit(1);
2693 }
2694 //fprintf(stderr, "reading newamp1vq_cb[%d] k=%d m=%d\n", num, newamp1vq_cb[num].k, newamp1vq_cb[num].m);
2695 float tmp[newamp1vq_cb[num].k*newamp1vq_cb[num].m];
2696 int nread = fread(tmp, sizeof(float), newamp1vq_cb[num].k*newamp1vq_cb[num].m, f);
2697 float *p = (float*)newamp1vq_cb[num].cb;
2698 for(int i=0; i<newamp1vq_cb[num].k*newamp1vq_cb[num].m; i++)
2699 p[i] = tmp[i];
2700 // fprintf(stderr, "nread = %d %f %f\n", nread, newamp1vq_cb[num].cb[0], newamp1vq_cb[num].cb[1]);
2701 assert(nread == newamp1vq_cb[num].k*newamp1vq_cb[num].m);
2702 fclose(f);
2703}
2704#endif
2705
2706float codec2_get_var(struct CODEC2 *codec2_state) {
2707 if (codec2_state->nse)
2708 return codec2_state->se/codec2_state->nse;
2709 else
2710 return 0;
2711}
2712
2713float *codec2_enable_user_ratek(struct CODEC2 *codec2_state, int *K) {
2714 codec2_state->user_rate_K_vec_no_mean_ = (float*)malloc(sizeof(float)*NEWAMP1_K);
2715 *K = NEWAMP1_K;
2716 return codec2_state->user_rate_K_vec_no_mean_;
2717}
2718
2719void codec2_700c_post_filter(struct CODEC2 *codec2_state, int en) {
2720 codec2_state->post_filter_en = en;
2721}