summaryrefslogtreecommitdiff
path: root/codec2.c
diff options
context:
space:
mode:
authorerdgeist <erdgeist@erdgeist.org>2025-08-15 12:42:40 +0200
committererdgeist <erdgeist@erdgeist.org>2025-08-15 12:42:40 +0200
commit30325d24d107dbf133da39f7c96d1510fd1c9449 (patch)
tree932baa5b2a4475821f16dccf9e3e05011daa6d92 /codec2.c
parent9022d768021bbe15c7815cc6f8b64218b46f0e10 (diff)
Bump to codec2 version 1.2.0erdgeist-bump-to-1.2.0
Diffstat (limited to 'codec2.c')
-rw-r--r--codec2.c3331
1 files changed, 1259 insertions, 2072 deletions
diff --git a/codec2.c b/codec2.c
index 2697356..b27626a 100644
--- a/codec2.c
+++ b/codec2.c
@@ -26,33 +26,31 @@
26 along with this program; if not, see <http://www.gnu.org/licenses/>. 26 along with this program; if not, see <http://www.gnu.org/licenses/>.
27*/ 27*/
28 28
29#include "codec2.h"
30
29#include <assert.h> 31#include <assert.h>
32#include <math.h>
33#include <stdbool.h>
30#include <stdio.h> 34#include <stdio.h>
31#include <stdlib.h> 35#include <stdlib.h>
32#include <stdbool.h>
33#include <string.h> 36#include <string.h>
34#include <math.h>
35 37
36#include "defines.h" 38#include "bpf.h"
39#include "bpfb.h"
37#include "codec2_fft.h" 40#include "codec2_fft.h"
38#include "sine.h" 41#include "codec2_internal.h"
39#include "nlp.h" 42#include "debug_alloc.h"
43#include "defines.h"
40#include "dump.h" 44#include "dump.h"
41#include "lpc.h"
42#include "quantise.h"
43#include "phase.h"
44#include "interp.h" 45#include "interp.h"
45#include "postfilter.h" 46#include "lpc.h"
46#include "codec2.h"
47#include "lsp.h" 47#include "lsp.h"
48#include "newamp2.h"
49#include "codec2_internal.h"
50#include "machdep.h" 48#include "machdep.h"
51#include "bpf.h" 49#include "nlp.h"
52#include "bpfb.h" 50#include "phase.h"
53#include "c2wideband.h" 51#include "postfilter.h"
54 52#include "quantise.h"
55#include "debug_alloc.h" 53#include "sine.h"
56 54
57/*---------------------------------------------------------------------------* \ 55/*---------------------------------------------------------------------------* \
58 56
@@ -62,32 +60,30 @@
62 60
63void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]); 61void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]);
64void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, 62void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model,
65 COMP Aw[], float gain); 63 COMP Aw[], float gain);
66void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[]); 64void codec2_encode_3200(struct CODEC2 *c2, unsigned char *bits, short speech[]);
67void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char * bits); 65void codec2_decode_3200(struct CODEC2 *c2, short speech[],
68void codec2_encode_2400(struct CODEC2 *c2, unsigned char * bits, short speech[]); 66 const unsigned char *bits);
69void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char * bits); 67void codec2_encode_2400(struct CODEC2 *c2, unsigned char *bits, short speech[]);
70void codec2_encode_1600(struct CODEC2 *c2, unsigned char * bits, short speech[]); 68void codec2_decode_2400(struct CODEC2 *c2, short speech[],
71void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * bits); 69 const unsigned char *bits);
72void codec2_encode_1400(struct CODEC2 *c2, unsigned char * bits, short speech[]); 70void codec2_encode_1600(struct CODEC2 *c2, unsigned char *bits, short speech[]);
73void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char * bits); 71void codec2_decode_1600(struct CODEC2 *c2, short speech[],
74void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[]); 72 const unsigned char *bits);
75void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * bits, float ber_est); 73void codec2_encode_1400(struct CODEC2 *c2, unsigned char *bits, short speech[]);
76void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[]); 74void codec2_decode_1400(struct CODEC2 *c2, short speech[],
77void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * bits); 75 const unsigned char *bits);
78void codec2_encode_700(struct CODEC2 *c2, unsigned char * bits, short speech[]); 76void codec2_encode_1300(struct CODEC2 *c2, unsigned char *bits, short speech[]);
79void codec2_decode_700(struct CODEC2 *c2, short speech[], const unsigned char * bits); 77void codec2_decode_1300(struct CODEC2 *c2, short speech[],
80void codec2_encode_700b(struct CODEC2 *c2, unsigned char * bits, short speech[]); 78 const unsigned char *bits, float ber_est);
81void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * bits); 79void codec2_encode_1200(struct CODEC2 *c2, unsigned char *bits, short speech[]);
82void codec2_encode_700c(struct CODEC2 *c2, unsigned char * bits, short speech[]); 80void codec2_decode_1200(struct CODEC2 *c2, short speech[],
83void codec2_decode_700c(struct CODEC2 *c2, short speech[], const unsigned char * bits); 81 const unsigned char *bits);
84void codec2_encode_450(struct CODEC2 *c2, unsigned char * bits, short speech[]); 82void codec2_encode_700c(struct CODEC2 *c2, unsigned char *bits, short speech[]);
85void codec2_decode_450(struct CODEC2 *c2, short speech[], const unsigned char * bits); 83void codec2_decode_700c(struct CODEC2 *c2, short speech[],
86void codec2_decode_450pwb(struct CODEC2 *c2, short speech[], const unsigned char * bits); 84 const unsigned char *bits);
87static void ear_protection(float in_out[], int n); 85static void ear_protection(float in_out[], int n);
88 86
89
90
91/*---------------------------------------------------------------------------*\ 87/*---------------------------------------------------------------------------*\
92 88
93 FUNCTIONS 89 FUNCTIONS
@@ -108,247 +104,170 @@ static void ear_protection(float in_out[], int n);
108 104
109\*---------------------------------------------------------------------------*/ 105\*---------------------------------------------------------------------------*/
110 106
111 107struct CODEC2 *codec2_create(int mode) {
112//Don't create CODEC2_MODE_450PWB for Encoding as it has undefined behavior ! 108 struct CODEC2 *c2;
113struct CODEC2 * codec2_create(int mode) 109 int i, l;
114{ 110
115 struct CODEC2 *c2; 111 // ALL POSSIBLE MODES MUST BE CHECKED HERE!
116 int i,l; 112 // we test if the desired mode is enabled at compile time
117 113 // and return NULL if not
118 // ALL POSSIBLE MODES MUST BE CHECKED HERE! 114
119 // we test if the desired mode is enabled at compile time 115 if (false == (CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, mode) ||
120 // and return NULL if not 116 CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, mode) ||
121 117 CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, mode) ||
122 if (false == ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, mode) 118 CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, mode) ||
123 || CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, mode) 119 CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, mode) ||
124 || CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, mode) 120 CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, mode) ||
125 || CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, mode) 121 CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, mode))) {
126 || CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, mode) 122 return NULL;
127 || CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, mode) 123 }
128 || CODEC2_MODE_ACTIVE(CODEC2_MODE_700, mode) 124
129 || CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, mode) 125 c2 = (struct CODEC2 *)MALLOC(sizeof(struct CODEC2));
130 || CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, mode) 126 if (c2 == NULL) return NULL;
131 || CODEC2_MODE_ACTIVE(CODEC2_MODE_450, mode) 127
132 || CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, mode) 128 c2->mode = mode;
133 ) ) 129
134 { 130 /* store constants in a few places for convenience */
135 return NULL; 131
136 } 132 c2->c2const = c2const_create(8000, N_S);
137 133 c2->Fs = c2->c2const.Fs;
138 c2 = (struct CODEC2*)MALLOC(sizeof(struct CODEC2)); 134 int n_samp = c2->n_samp = c2->c2const.n_samp;
139 if (c2 == NULL) 135 int m_pitch = c2->m_pitch = c2->c2const.m_pitch;
140 return NULL; 136
141 137 c2->Pn = (float *)MALLOC(2 * n_samp * sizeof(float));
142 c2->mode = mode; 138 if (c2->Pn == NULL) {
143 139 return NULL;
144 /* store constants in a few places for convenience */ 140 }
145 141 c2->Sn_ = (float *)MALLOC(2 * n_samp * sizeof(float));
146 if( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, mode) == 0){ 142 if (c2->Sn_ == NULL) {
147 c2->c2const = c2const_create(8000, N_S); 143 FREE(c2->Pn);
148 }else{ 144 return NULL;
149 c2->c2const = c2const_create(16000, N_S); 145 }
150 } 146 c2->w = (float *)MALLOC(m_pitch * sizeof(float));
151 c2->Fs = c2->c2const.Fs; 147 if (c2->w == NULL) {
152 int n_samp = c2->n_samp = c2->c2const.n_samp; 148 FREE(c2->Pn);
153 int m_pitch = c2->m_pitch = c2->c2const.m_pitch; 149 FREE(c2->Sn_);
154 150 return NULL;
155 c2->Pn = (float*)MALLOC(2*n_samp*sizeof(float)); 151 }
156 if (c2->Pn == NULL) { 152 c2->Sn = (float *)MALLOC(m_pitch * sizeof(float));
157 return NULL; 153 if (c2->Sn == NULL) {
158 } 154 FREE(c2->Pn);
159 c2->Sn_ = (float*)MALLOC(2*n_samp*sizeof(float)); 155 FREE(c2->Sn_);
160 if (c2->Sn_ == NULL) { 156 FREE(c2->w);
161 FREE(c2->Pn); 157 return NULL;
162 return NULL; 158 }
163 } 159
164 c2->w = (float*)MALLOC(m_pitch*sizeof(float)); 160 for (i = 0; i < m_pitch; i++) c2->Sn[i] = 1.0;
165 if (c2->w == NULL) { 161 c2->hpf_states[0] = c2->hpf_states[1] = 0.0;
166 FREE(c2->Pn); 162 for (i = 0; i < 2 * n_samp; i++) c2->Sn_[i] = 0;
167 FREE(c2->Sn_); 163 c2->fft_fwd_cfg = codec2_fft_alloc(FFT_ENC, 0, NULL, NULL);
168 return NULL; 164 c2->fftr_fwd_cfg = codec2_fftr_alloc(FFT_ENC, 0, NULL, NULL);
169 } 165 make_analysis_window(&c2->c2const, c2->fft_fwd_cfg, c2->w, c2->W);
170 c2->Sn = (float*)MALLOC(m_pitch*sizeof(float)); 166 make_synthesis_window(&c2->c2const, c2->Pn);
171 if (c2->Sn == NULL) { 167 c2->fftr_inv_cfg = codec2_fftr_alloc(FFT_DEC, 1, NULL, NULL);
172 FREE(c2->Pn); 168 c2->prev_f0_enc = 1 / P_MAX_S;
173 FREE(c2->Sn_); 169 c2->bg_est = 0.0;
174 FREE(c2->w); 170 c2->ex_phase = 0.0;
175 return NULL; 171
176 } 172 for (l = 1; l <= MAX_AMP; l++) c2->prev_model_dec.A[l] = 0.0;
177 173 c2->prev_model_dec.Wo = TWO_PI / c2->c2const.p_max;
178 for(i=0; i<m_pitch; i++) 174 c2->prev_model_dec.L = PI / c2->prev_model_dec.Wo;
179 c2->Sn[i] = 1.0; 175 c2->prev_model_dec.voiced = 0;
180 c2->hpf_states[0] = c2->hpf_states[1] = 0.0; 176
181 for(i=0; i<2*n_samp; i++) 177 for (i = 0; i < LPC_ORD; i++) {
182 c2->Sn_[i] = 0; 178 c2->prev_lsps_dec[i] = i * PI / (LPC_ORD + 1);
183 c2->fft_fwd_cfg = codec2_fft_alloc(FFT_ENC, 0, NULL, NULL); 179 }
184 c2->fftr_fwd_cfg = codec2_fftr_alloc(FFT_ENC, 0, NULL, NULL); 180 c2->prev_e_dec = 1;
185 make_analysis_window(&c2->c2const, c2->fft_fwd_cfg, c2->w,c2->W); 181
186 make_synthesis_window(&c2->c2const, c2->Pn); 182 c2->nlp = nlp_create(&c2->c2const);
187 c2->fftr_inv_cfg = codec2_fftr_alloc(FFT_DEC, 1, NULL, NULL); 183 if (c2->nlp == NULL) {
188 quantise_init(); 184 return NULL;
189 c2->prev_f0_enc = 1/P_MAX_S; 185 }
190 c2->bg_est = 0.0; 186
191 c2->ex_phase = 0.0; 187 c2->lpc_pf = 1;
192 188 c2->bass_boost = 1;
193 for(l=1; l<=MAX_AMP; l++) 189 c2->beta = LPCPF_BETA;
194 c2->prev_model_dec.A[l] = 0.0; 190 c2->gamma = LPCPF_GAMMA;
195 c2->prev_model_dec.Wo = TWO_PI/c2->c2const.p_max; 191
196 c2->prev_model_dec.L = PI/c2->prev_model_dec.Wo; 192 c2->xq_enc[0] = c2->xq_enc[1] = 0.0;
197 c2->prev_model_dec.voiced = 0; 193 c2->xq_dec[0] = c2->xq_dec[1] = 0.0;
198 194
199 for(i=0; i<LPC_ORD; i++) { 195 c2->smoothing = 0;
200 c2->prev_lsps_dec[i] = i*PI/(LPC_ORD+1); 196 c2->se = 0.0;
201 } 197 c2->nse = 0;
202 c2->prev_e_dec = 1; 198 c2->user_rate_K_vec_no_mean_ = NULL;
203 199 c2->post_filter_en = true;
204 c2->nlp = nlp_create(&c2->c2const); 200
205 if (c2->nlp == NULL) { 201 c2->bpf_buf = (float *)MALLOC(sizeof(float) * (BPF_N + 4 * c2->n_samp));
206 return NULL; 202 assert(c2->bpf_buf != NULL);
207 } 203 for (i = 0; i < BPF_N + 4 * c2->n_samp; i++) c2->bpf_buf[i] = 0.0;
208 204
209 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, mode)) 205 c2->softdec = NULL;
210 c2->gray = 0; // natural binary better for trellis decoding (hopefully added later) 206 c2->gray = 1;
211 else 207
212 c2->gray = 1; 208 /* newamp1 initialisation */
213 209
214 c2->lpc_pf = 1; c2->bass_boost = 1; c2->beta = LPCPF_BETA; c2->gamma = LPCPF_GAMMA; 210 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) {
215 211 mel_sample_freqs_kHz(c2->rate_K_sample_freqs_kHz, NEWAMP1_K, ftomel(200.0),
216 c2->xq_enc[0] = c2->xq_enc[1] = 0.0; 212 ftomel(3700.0));
217 c2->xq_dec[0] = c2->xq_dec[1] = 0.0; 213 int k;
218 214 for (k = 0; k < NEWAMP1_K; k++) {
219 c2->smoothing = 0; 215 c2->prev_rate_K_vec_[k] = 0.0;
220 c2->se = 0.0; c2->nse = 0; 216 c2->eq[k] = 0.0;
221 c2->user_rate_K_vec_no_mean_ = NULL; 217 }
222 c2->post_filter_en = 1; 218 c2->eq_en = false;
223 219 c2->Wo_left = 0.0;
224 c2->bpf_buf = (float*)MALLOC(sizeof(float)*(BPF_N+4*c2->n_samp)); 220 c2->voicing_left = 0;
225 assert(c2->bpf_buf != NULL); 221 c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 0, NULL, NULL);
226 for(i=0; i<BPF_N+4*c2->n_samp; i++) 222 c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 1, NULL, NULL);
227 c2->bpf_buf[i] = 0.0; 223 }
228 224
229 c2->softdec = NULL; 225 c2->fmlfeat = NULL;
230 226 c2->fmlmodel = NULL;
231 /* newamp1 initialisation */ 227
232 228 // make sure that one of the two decode function pointers is empty
233 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { 229 // for the encode function pointer this is not required since we always set it
234 mel_sample_freqs_kHz(c2->rate_K_sample_freqs_kHz, NEWAMP1_K, ftomel(200.0), ftomel(3700.0) ); 230 // to a meaningful value
235 int k; 231
236 for(k=0; k<NEWAMP1_K; k++) { 232 c2->decode = NULL;
237 c2->prev_rate_K_vec_[k] = 0.0; 233 c2->decode_ber = NULL;
238 c2->eq[k] = 0.0; 234
239 } 235 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) {
240 c2->eq_en = 0; 236 c2->encode = codec2_encode_3200;
241 c2->Wo_left = 0.0; 237 c2->decode = codec2_decode_3200;
242 c2->voicing_left = 0;; 238 }
243 c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 0, NULL, NULL); 239
244 c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 1, NULL, NULL); 240 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) {
245 } 241 c2->encode = codec2_encode_2400;
246 242 c2->decode = codec2_decode_2400;
247 /* newamp2 initialisation */ 243 }
248 244
249 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) { 245 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) {
250 n2_mel_sample_freqs_kHz(c2->n2_rate_K_sample_freqs_kHz, NEWAMP2_K); 246 c2->encode = codec2_encode_1600;
251 int k; 247 c2->decode = codec2_decode_1600;
252 for(k=0; k<NEWAMP2_K; k++) { 248 }
253 c2->n2_prev_rate_K_vec_[k] = 0.0; 249
254 } 250 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) {
255 c2->Wo_left = 0.0; 251 c2->encode = codec2_encode_1400;
256 c2->voicing_left = 0;; 252 c2->decode = codec2_decode_1400;
257 c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 0, NULL, NULL); 253 }
258 c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 1, NULL, NULL); 254
259 } 255 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) {
260 /* newamp2 PWB initialisation */ 256 c2->encode = codec2_encode_1300;
261 257 c2->decode_ber = codec2_decode_1300;
262 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) { 258 }
263 n2_mel_sample_freqs_kHz(c2->n2_pwb_rate_K_sample_freqs_kHz, NEWAMP2_16K_K); 259
264 int k; 260 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) {
265 for(k=0; k<NEWAMP2_16K_K; k++) { 261 c2->encode = codec2_encode_1200;
266 c2->n2_pwb_prev_rate_K_vec_[k] = 0.0; 262 c2->decode = codec2_decode_1200;
267 } 263 }
268 c2->Wo_left = 0.0; 264
269 c2->voicing_left = 0;; 265 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) {
270 c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 0, NULL, NULL); 266 c2->encode = codec2_encode_700c;
271 c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 1, NULL, NULL); 267 c2->decode = codec2_decode_700c;
272 } 268 }
273 269
274 c2->fmlfeat = NULL; 270 return c2;
275
276 // make sure that one of the two decode function pointers is empty
277 // for the encode function pointer this is not required since we always set it
278 // to a meaningful value
279
280 c2->decode = NULL;
281 c2->decode_ber = NULL;
282
283 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode))
284 {
285 c2->encode = codec2_encode_3200;
286 c2->decode = codec2_decode_3200;
287 }
288
289 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode))
290 {
291 c2->encode = codec2_encode_2400;
292 c2->decode = codec2_decode_2400;
293 }
294
295 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode))
296 {
297 c2->encode = codec2_encode_1600;
298 c2->decode = codec2_decode_1600;
299 }
300
301 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode))
302 {
303 c2->encode = codec2_encode_1400;
304 c2->decode = codec2_decode_1400;
305 }
306
307 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode))
308 {
309 c2->encode = codec2_encode_1300;
310 c2->decode_ber = codec2_decode_1300;
311 }
312
313 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode))
314 {
315 c2->encode = codec2_encode_1200;
316 c2->decode = codec2_decode_1200;
317 }
318
319 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode))
320 {
321 c2->encode = codec2_encode_700;
322 c2->decode = codec2_decode_700;
323 }
324
325 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode))
326 {
327 c2->encode = codec2_encode_700b;
328 c2->decode = codec2_decode_700b;
329 }
330
331 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode))
332 {
333 c2->encode = codec2_encode_700c;
334 c2->decode = codec2_decode_700c;
335 }
336
337 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode))
338 {
339 c2->encode = codec2_encode_450;
340 c2->decode = codec2_decode_450;
341 }
342
343 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode))
344 {
345 //Encode PWB doesnt make sense
346 c2->encode = codec2_encode_450;
347 c2->decode = codec2_decode_450pwb;
348 }
349
350
351 return c2;
352} 271}
353 272
354/*---------------------------------------------------------------------------*\ 273/*---------------------------------------------------------------------------*\
@@ -361,31 +280,22 @@ struct CODEC2 * codec2_create(int mode)
361 280
362\*---------------------------------------------------------------------------*/ 281\*---------------------------------------------------------------------------*/
363 282
364void codec2_destroy(struct CODEC2 *c2) 283void codec2_destroy(struct CODEC2 *c2) {
365{ 284 assert(c2 != NULL);
366 assert(c2 != NULL); 285 FREE(c2->bpf_buf);
367 FREE(c2->bpf_buf); 286 nlp_destroy(c2->nlp);
368 nlp_destroy(c2->nlp); 287 codec2_fft_free(c2->fft_fwd_cfg);
369 codec2_fft_free(c2->fft_fwd_cfg); 288 codec2_fftr_free(c2->fftr_fwd_cfg);
370 codec2_fftr_free(c2->fftr_fwd_cfg); 289 codec2_fftr_free(c2->fftr_inv_cfg);
371 codec2_fftr_free(c2->fftr_inv_cfg); 290 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) {
372 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { 291 codec2_fft_free(c2->phase_fft_fwd_cfg);
373 codec2_fft_free(c2->phase_fft_fwd_cfg); 292 codec2_fft_free(c2->phase_fft_inv_cfg);
374 codec2_fft_free(c2->phase_fft_inv_cfg); 293 }
375 } 294 FREE(c2->Pn);
376 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) { 295 FREE(c2->Sn);
377 codec2_fft_free(c2->phase_fft_fwd_cfg); 296 FREE(c2->w);
378 codec2_fft_free(c2->phase_fft_inv_cfg); 297 FREE(c2->Sn_);
379 } 298 FREE(c2);
380 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) {
381 codec2_fft_free(c2->phase_fft_fwd_cfg);
382 codec2_fft_free(c2->phase_fft_inv_cfg);
383 }
384 FREE(c2->Pn);
385 FREE(c2->Sn);
386 FREE(c2->w);
387 FREE(c2->Sn_);
388 FREE(c2);
389} 299}
390 300
391/*---------------------------------------------------------------------------*\ 301/*---------------------------------------------------------------------------*\
@@ -399,32 +309,31 @@ void codec2_destroy(struct CODEC2 *c2)
399\*---------------------------------------------------------------------------*/ 309\*---------------------------------------------------------------------------*/
400 310
401int codec2_bits_per_frame(struct CODEC2 *c2) { 311int codec2_bits_per_frame(struct CODEC2 *c2) {
402 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) 312 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) return 64;
403 return 64; 313 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) return 48;
404 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) 314 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) return 64;
405 return 48; 315 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) return 56;
406 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) 316 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) return 52;
407 return 64; 317 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) return 48;
408 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) 318 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) return 28;
409 return 56; 319
410 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) 320 return 0; /* shouldn't get here */
411 return 52;
412 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode))
413 return 48;
414 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode))
415 return 28;
416 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode))
417 return 28;
418 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode))
419 return 28;
420 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode))
421 return 18;
422 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode))
423 return 18;
424
425 return 0; /* shouldn't get here */
426} 321}
427 322
323/*---------------------------------------------------------------------------*\
324
325 FUNCTION....: codec2_bytes_per_frame
326 DATE CREATED: April 2021
327
328 Returns the number of bytes per frame. Useful for allocated storage for
329 codec2_encode()/codec2_decode(). Note the number of bits may not be a
330 multiple of 8, therefore some bits in the last byte may be unused.
331
332\*---------------------------------------------------------------------------*/
333
334int codec2_bytes_per_frame(struct CODEC2 *c2) {
335 return (codec2_bits_per_frame(c2) + 7) / 8;
336}
428 337
429/*---------------------------------------------------------------------------*\ 338/*---------------------------------------------------------------------------*\
430 339
@@ -437,60 +346,61 @@ int codec2_bits_per_frame(struct CODEC2 *c2) {
437\*---------------------------------------------------------------------------*/ 346\*---------------------------------------------------------------------------*/
438 347
439int codec2_samples_per_frame(struct CODEC2 *c2) { 348int codec2_samples_per_frame(struct CODEC2 *c2) {
440 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) 349 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) return 160;
441 return 160; 350 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) return 160;
442 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) 351 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) return 320;
443 return 160; 352 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) return 320;
444 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) 353 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) return 320;
445 return 320; 354 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) return 320;
446 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) 355 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) return 320;
447 return 320; 356 return 0; /* shouldn't get here */
448 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode))
449 return 320;
450 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode))
451 return 320;
452 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode))
453 return 320;
454 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode))
455 return 320;
456 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode))
457 return 320;
458 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode))
459 return 320;
460 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode))
461 return 640;
462 return 0; /* shouldnt get here */
463} 357}
464 358
465void codec2_encode(struct CODEC2 *c2, unsigned char *bits, short speech[]) 359/*---------------------------------------------------------------------------*\
466{ 360
467 assert(c2 != NULL); 361 FUNCTION....: codec2_encode
468 assert(c2->encode != NULL); 362 AUTHOR......: David Rowe
363 DATE CREATED: Nov 14 2011
364
365 Take an input buffer of speech samples, and compress them to a packed buffer
366 of bytes.
469 367
470 c2->encode(c2, bits, speech); 368\*---------------------------------------------------------------------------*/
471 369
472} 370void codec2_encode(struct CODEC2 *c2, unsigned char *bytes, short speech[]) {
371 assert(c2 != NULL);
372 assert(c2->encode != NULL);
473 373
474void codec2_decode(struct CODEC2 *c2, short speech[], const unsigned char *bits) 374 c2->encode(c2, bytes, speech);
475{
476 codec2_decode_ber(c2, speech, bits, 0.0);
477} 375}
478 376
479void codec2_decode_ber(struct CODEC2 *c2, short speech[], const unsigned char *bits, float ber_est) 377/*---------------------------------------------------------------------------*\
480{
481 assert(c2 != NULL);
482 assert(c2->decode != NULL || c2->decode_ber != NULL);
483 378
484 if (c2->decode != NULL) 379 FUNCTION....: codec2_decode
485 { 380 AUTHOR......: David Rowe
486 c2->decode(c2, speech, bits); 381 DATE CREATED: Nov 14 2011
487 } 382
488 else 383 Take an input packed buffer of bytes, and decode them to a buffer of speech
489 { 384 samples.
490 c2->decode_ber(c2, speech, bits, ber_est); 385
491 } 386\*---------------------------------------------------------------------------*/
387
388void codec2_decode(struct CODEC2 *c2, short speech[],
389 const unsigned char *bytes) {
390 codec2_decode_ber(c2, speech, bytes, 0.0);
492} 391}
493 392
393void codec2_decode_ber(struct CODEC2 *c2, short speech[],
394 const unsigned char *bits, float ber_est) {
395 assert(c2 != NULL);
396 assert(c2->decode != NULL || c2->decode_ber != NULL);
397
398 if (c2->decode != NULL) {
399 c2->decode(c2, speech, bits);
400 } else {
401 c2->decode_ber(c2, speech, bits, ber_est);
402 }
403}
494 404
495/*---------------------------------------------------------------------------*\ 405/*---------------------------------------------------------------------------*\
496 406
@@ -503,60 +413,60 @@ void codec2_decode_ber(struct CODEC2 *c2, short speech[], const unsigned char *b
503 The codec2 algorithm actually operates internally on 10ms (80 413 The codec2 algorithm actually operates internally on 10ms (80
504 sample) frames, so we run the encoding algorithm twice. On the 414 sample) frames, so we run the encoding algorithm twice. On the
505 first frame we just send the voicing bits. On the second frame we 415 first frame we just send the voicing bits. On the second frame we
506 send all model parameters. Compared to 2400 we use a larger number 416 send all model parameters. Compared to 2400 we encode the LSP
507 of bits for the LSPs and non-VQ pitch and energy. 417 differences, a larger number of bits for the LSP(d)s and scalar
418 (non-VQ) quantisation for pitch and energy.
508 419
509 The bit allocation is: 420 The bit allocation is:
510 421
511 Parameter bits/frame 422 Parameter bits/frame
512 -------------------------------------- 423 ------------------------------------------------------
513 Harmonic magnitudes (LSPs) 50 424 Harmonic magnitudes (LSP differerences) 50
514 Pitch (Wo) 7 425 Pitch (Wo) 7
515 Energy 5 426 Energy 5
516 Voicing (10ms update) 2 427 Voicing (10ms update) 2
517 TOTAL 64 428 TOTAL 64
518 429
519\*---------------------------------------------------------------------------*/ 430\*---------------------------------------------------------------------------*/
520 431
521void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[]) 432void codec2_encode_3200(struct CODEC2 *c2, unsigned char *bits,
522{ 433 short speech[]) {
523 MODEL model; 434 MODEL model;
524 float ak[LPC_ORD+1]; 435 float ak[LPC_ORD + 1];
525 float lsps[LPC_ORD]; 436 float lsps[LPC_ORD];
526 float e; 437 float e;
527 int Wo_index, e_index; 438 int Wo_index, e_index;
528 int lspd_indexes[LPC_ORD]; 439 int lspd_indexes[LPC_ORD];
529 int i; 440 int i;
530 unsigned int nbit = 0; 441 unsigned int nbit = 0;
531 442
532 assert(c2 != NULL); 443 assert(c2 != NULL);
533 444
534 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); 445 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
535 446
536 /* first 10ms analysis frame - we just want voicing */ 447 /* first 10ms analysis frame - we just want voicing */
537 448
538 analyse_one_frame(c2, &model, speech); 449 analyse_one_frame(c2, &model, speech);
539 pack(bits, &nbit, model.voiced, 1); 450 pack(bits, &nbit, model.voiced, 1);
540 451
541 /* second 10ms analysis frame */ 452 /* second 10ms analysis frame */
542 453
543 analyse_one_frame(c2, &model, &speech[c2->n_samp]); 454 analyse_one_frame(c2, &model, &speech[c2->n_samp]);
544 pack(bits, &nbit, model.voiced, 1); 455 pack(bits, &nbit, model.voiced, 1);
545 Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); 456 Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS);
546 pack(bits, &nbit, Wo_index, WO_BITS); 457 pack(bits, &nbit, Wo_index, WO_BITS);
547 458
548 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); 459 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
549 e_index = encode_energy(e, E_BITS); 460 e_index = encode_energy(e, E_BITS);
550 pack(bits, &nbit, e_index, E_BITS); 461 pack(bits, &nbit, e_index, E_BITS);
551 462
552 encode_lspds_scalar(lspd_indexes, lsps, LPC_ORD); 463 encode_lspds_scalar(lspd_indexes, lsps, LPC_ORD);
553 for(i=0; i<LSPD_SCALAR_INDEXES; i++) { 464 for (i = 0; i < LSPD_SCALAR_INDEXES; i++) {
554 pack(bits, &nbit, lspd_indexes[i], lspd_bits(i)); 465 pack(bits, &nbit, lspd_indexes[i], lspd_bits(i));
555 } 466 }
556 assert(nbit == (unsigned)codec2_bits_per_frame(c2)); 467 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
557} 468}
558 469
559
560/*---------------------------------------------------------------------------*\ 470/*---------------------------------------------------------------------------*\
561 471
562 FUNCTION....: codec2_decode_3200 472 FUNCTION....: codec2_decode_3200
@@ -567,77 +477,75 @@ void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[])
567 477
568\*---------------------------------------------------------------------------*/ 478\*---------------------------------------------------------------------------*/
569 479
570void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char * bits) 480void codec2_decode_3200(struct CODEC2 *c2, short speech[],
571{ 481 const unsigned char *bits) {
572 MODEL model[2]; 482 MODEL model[2];
573 int lspd_indexes[LPC_ORD]; 483 int lspd_indexes[LPC_ORD];
574 float lsps[2][LPC_ORD]; 484 float lsps[2][LPC_ORD];
575 int Wo_index, e_index; 485 int Wo_index, e_index;
576 float e[2]; 486 float e[2];
577 float snr; 487 float snr;
578 float ak[2][LPC_ORD+1]; 488 float ak[2][LPC_ORD + 1];
579 int i,j; 489 int i, j;
580 unsigned int nbit = 0; 490 unsigned int nbit = 0;
581 COMP Aw[FFT_ENC]; 491 COMP Aw[FFT_ENC];
582 492
583 assert(c2 != NULL); 493 assert(c2 != NULL);
584 494
585 /* only need to zero these out due to (unused) snr calculation */ 495 /* only need to zero these out due to (unused) snr calculation */
586 496
587 for(i=0; i<2; i++) 497 for (i = 0; i < 2; i++)
588 for(j=1; j<=MAX_AMP; j++) 498 for (j = 1; j <= MAX_AMP; j++) model[i].A[j] = 0.0;
589 model[i].A[j] = 0.0;
590 499
591 /* unpack bits from channel ------------------------------------*/ 500 /* unpack bits from channel ------------------------------------*/
592 501
593 /* this will partially fill the model params for the 2 x 10ms 502 /* this will partially fill the model params for the 2 x 10ms
594 frames */ 503 frames */
595 504
596 model[0].voiced = unpack(bits, &nbit, 1); 505 model[0].voiced = unpack(bits, &nbit, 1);
597 model[1].voiced = unpack(bits, &nbit, 1); 506 model[1].voiced = unpack(bits, &nbit, 1);
598 507
599 Wo_index = unpack(bits, &nbit, WO_BITS); 508 Wo_index = unpack(bits, &nbit, WO_BITS);
600 model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); 509 model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS);
601 model[1].L = PI/model[1].Wo; 510 model[1].L = PI / model[1].Wo;
602 511
603 e_index = unpack(bits, &nbit, E_BITS); 512 e_index = unpack(bits, &nbit, E_BITS);
604 e[1] = decode_energy(e_index, E_BITS); 513 e[1] = decode_energy(e_index, E_BITS);
605 514
606 for(i=0; i<LSPD_SCALAR_INDEXES; i++) { 515 for (i = 0; i < LSPD_SCALAR_INDEXES; i++) {
607 lspd_indexes[i] = unpack(bits, &nbit, lspd_bits(i)); 516 lspd_indexes[i] = unpack(bits, &nbit, lspd_bits(i));
608 } 517 }
609 decode_lspds_scalar(&lsps[1][0], lspd_indexes, LPC_ORD); 518 decode_lspds_scalar(&lsps[1][0], lspd_indexes, LPC_ORD);
610 519
611 /* interpolate ------------------------------------------------*/ 520 /* interpolate ------------------------------------------------*/
612 521
613 /* Wo and energy are sampled every 20ms, so we interpolate just 1 522 /* Wo and energy are sampled every 20ms, so we interpolate just 1
614 10ms frame between 20ms samples */ 523 10ms frame between 20ms samples */
615 524
616 interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); 525 interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
617 e[0] = interp_energy(c2->prev_e_dec, e[1]); 526 e[0] = interp_energy(c2->prev_e_dec, e[1]);
618 527
619 /* LSPs are sampled every 20ms so we interpolate the frame in 528 /* LSPs are sampled every 20ms so we interpolate the frame in
620 between, then recover spectral amplitudes */ 529 between, then recover spectral amplitudes */
621 530
622 interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, LPC_ORD); 531 interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5,
532 LPC_ORD);
623 533
624 for(i=0; i<2; i++) { 534 for (i = 0; i < 2; i++) {
625 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); 535 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
626 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, 536 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
627 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); 537 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
628 apply_lpc_correction(&model[i]); 538 apply_lpc_correction(&model[i]);
629 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); 539 synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], Aw, 1.0);
630 } 540 }
631 541
632 /* update memories for next frame ----------------------------*/ 542 /* update memories for next frame ----------------------------*/
633 543
634 c2->prev_model_dec = model[1]; 544 c2->prev_model_dec = model[1];
635 c2->prev_e_dec = e[1]; 545 c2->prev_e_dec = e[1];
636 for(i=0; i<LPC_ORD; i++) 546 for (i = 0; i < LPC_ORD; i++) c2->prev_lsps_dec[i] = lsps[1][i];
637 c2->prev_lsps_dec[i] = lsps[1][i];
638} 547}
639 548
640
641/*---------------------------------------------------------------------------*\ 549/*---------------------------------------------------------------------------*\
642 550
643 FUNCTION....: codec2_encode_2400 551 FUNCTION....: codec2_encode_2400
@@ -663,46 +571,45 @@ void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char *
663 571
664\*---------------------------------------------------------------------------*/ 572\*---------------------------------------------------------------------------*/
665 573
666void codec2_encode_2400(struct CODEC2 *c2, unsigned char * bits, short speech[]) 574void codec2_encode_2400(struct CODEC2 *c2, unsigned char *bits,
667{ 575 short speech[]) {
668 MODEL model; 576 MODEL model;
669 float ak[LPC_ORD+1]; 577 float ak[LPC_ORD + 1];
670 float lsps[LPC_ORD]; 578 float lsps[LPC_ORD];
671 float e; 579 float e;
672 int WoE_index; 580 int WoE_index;
673 int lsp_indexes[LPC_ORD]; 581 int lsp_indexes[LPC_ORD];
674 int i; 582 int i;
675 int spare = 0; 583 int spare = 0;
676 unsigned int nbit = 0; 584 unsigned int nbit = 0;
677 585
678 assert(c2 != NULL); 586 assert(c2 != NULL);
679 587
680 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); 588 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
681 589
682 /* first 10ms analysis frame - we just want voicing */ 590 /* first 10ms analysis frame - we just want voicing */
683 591
684 analyse_one_frame(c2, &model, speech); 592 analyse_one_frame(c2, &model, speech);
685 pack(bits, &nbit, model.voiced, 1); 593 pack(bits, &nbit, model.voiced, 1);
686 594
687 /* second 10ms analysis frame */ 595 /* second 10ms analysis frame */
688 596
689 analyse_one_frame(c2, &model, &speech[c2->n_samp]); 597 analyse_one_frame(c2, &model, &speech[c2->n_samp]);
690 pack(bits, &nbit, model.voiced, 1); 598 pack(bits, &nbit, model.voiced, 1);
691 599
692 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); 600 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
693 WoE_index = encode_WoE(&model, e, c2->xq_enc); 601 WoE_index = encode_WoE(&model, e, c2->xq_enc);
694 pack(bits, &nbit, WoE_index, WO_E_BITS); 602 pack(bits, &nbit, WoE_index, WO_E_BITS);
695 603
696 encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); 604 encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD);
697 for(i=0; i<LSP_SCALAR_INDEXES; i++) { 605 for (i = 0; i < LSP_SCALAR_INDEXES; i++) {
698 pack(bits, &nbit, lsp_indexes[i], lsp_bits(i)); 606 pack(bits, &nbit, lsp_indexes[i], lsp_bits(i));
699 } 607 }
700 pack(bits, &nbit, spare, 2); 608 pack(bits, &nbit, spare, 2);
701 609
702 assert(nbit == (unsigned)codec2_bits_per_frame(c2)); 610 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
703} 611}
704 612
705
706/*---------------------------------------------------------------------------*\ 613/*---------------------------------------------------------------------------*\
707 614
708 FUNCTION....: codec2_decode_2400 615 FUNCTION....: codec2_decode_2400
@@ -713,86 +620,84 @@ void codec2_encode_2400(struct CODEC2 *c2, unsigned char * bits, short speech[])
713 620
714\*---------------------------------------------------------------------------*/ 621\*---------------------------------------------------------------------------*/
715 622
716void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char * bits) 623void codec2_decode_2400(struct CODEC2 *c2, short speech[],
717{ 624 const unsigned char *bits) {
718 MODEL model[2]; 625 MODEL model[2];
719 int lsp_indexes[LPC_ORD]; 626 int lsp_indexes[LPC_ORD];
720 float lsps[2][LPC_ORD]; 627 float lsps[2][LPC_ORD];
721 int WoE_index; 628 int WoE_index;
722 float e[2]; 629 float e[2];
723 float snr; 630 float snr;
724 float ak[2][LPC_ORD+1]; 631 float ak[2][LPC_ORD + 1];
725 int i,j; 632 int i, j;
726 unsigned int nbit = 0; 633 unsigned int nbit = 0;
727 COMP Aw[FFT_ENC]; 634 COMP Aw[FFT_ENC];
728 635
729 assert(c2 != NULL); 636 assert(c2 != NULL);
730 637
731 /* only need to zero these out due to (unused) snr calculation */ 638 /* only need to zero these out due to (unused) snr calculation */
732 639
733 for(i=0; i<2; i++) 640 for (i = 0; i < 2; i++)
734 for(j=1; j<=MAX_AMP; j++) 641 for (j = 1; j <= MAX_AMP; j++) model[i].A[j] = 0.0;
735 model[i].A[j] = 0.0;
736 642
737 /* unpack bits from channel ------------------------------------*/ 643 /* unpack bits from channel ------------------------------------*/
738 644
739 /* this will partially fill the model params for the 2 x 10ms 645 /* this will partially fill the model params for the 2 x 10ms
740 frames */ 646 frames */
741 647
742 model[0].voiced = unpack(bits, &nbit, 1); 648 model[0].voiced = unpack(bits, &nbit, 1);
743 649
744 model[1].voiced = unpack(bits, &nbit, 1); 650 model[1].voiced = unpack(bits, &nbit, 1);
745 WoE_index = unpack(bits, &nbit, WO_E_BITS); 651 WoE_index = unpack(bits, &nbit, WO_E_BITS);
746 decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); 652 decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index);
747 653
748 for(i=0; i<LSP_SCALAR_INDEXES; i++) { 654 for (i = 0; i < LSP_SCALAR_INDEXES; i++) {
749 lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i)); 655 lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i));
750 } 656 }
751 decode_lsps_scalar(&lsps[1][0], lsp_indexes, LPC_ORD); 657 decode_lsps_scalar(&lsps[1][0], lsp_indexes, LPC_ORD);
752 check_lsp_order(&lsps[1][0], LPC_ORD); 658 check_lsp_order(&lsps[1][0], LPC_ORD);
753 bw_expand_lsps(&lsps[1][0], LPC_ORD, 50.0, 100.0); 659 bw_expand_lsps(&lsps[1][0], LPC_ORD, 50.0, 100.0);
754 660
755 /* interpolate ------------------------------------------------*/ 661 /* interpolate ------------------------------------------------*/
756 662
757 /* Wo and energy are sampled every 20ms, so we interpolate just 1 663 /* Wo and energy are sampled every 20ms, so we interpolate just 1
758 10ms frame between 20ms samples */ 664 10ms frame between 20ms samples */
759 665
760 interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); 666 interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
761 e[0] = interp_energy(c2->prev_e_dec, e[1]); 667 e[0] = interp_energy(c2->prev_e_dec, e[1]);
762 668
763 /* LSPs are sampled every 20ms so we interpolate the frame in 669 /* LSPs are sampled every 20ms so we interpolate the frame in
764 between, then recover spectral amplitudes */ 670 between, then recover spectral amplitudes */
765 671
766 interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, LPC_ORD); 672 interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5,
767 for(i=0; i<2; i++) { 673 LPC_ORD);
768 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); 674 for (i = 0; i < 2; i++) {
769 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, 675 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
770 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); 676 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
771 apply_lpc_correction(&model[i]); 677 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
772 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); 678 apply_lpc_correction(&model[i]);
773 679 synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], Aw, 1.0);
774 /* dump parameters for deep learning experiments */ 680
775 681 /* dump parameters for deep learning experiments */
776 if (c2->fmlfeat != NULL) { 682
777 /* 10 LSPs - energy - Wo - voicing flag - 10 LPCs */ 683 if (c2->fmlfeat != NULL) {
778 fwrite(&lsps[i][0], LPC_ORD, sizeof(float), c2->fmlfeat); 684 /* 10 LSPs - energy - Wo - voicing flag - 10 LPCs */
779 fwrite(&e[i], 1, sizeof(float), c2->fmlfeat); 685 fwrite(&lsps[i][0], LPC_ORD, sizeof(float), c2->fmlfeat);
780 fwrite(&model[i].Wo, 1, sizeof(float), c2->fmlfeat); 686 fwrite(&e[i], 1, sizeof(float), c2->fmlfeat);
781 float voiced_float = model[i].voiced; 687 fwrite(&model[i].Wo, 1, sizeof(float), c2->fmlfeat);
782 fwrite(&voiced_float, 1, sizeof(float), c2->fmlfeat); 688 float voiced_float = model[i].voiced;
783 fwrite(&ak[i][1], LPC_ORD, sizeof(float), c2->fmlfeat); 689 fwrite(&voiced_float, 1, sizeof(float), c2->fmlfeat);
784 } 690 fwrite(&ak[i][1], LPC_ORD, sizeof(float), c2->fmlfeat);
785 } 691 }
692 }
786 693
787 /* update memories for next frame ----------------------------*/ 694 /* update memories for next frame ----------------------------*/
788 695
789 c2->prev_model_dec = model[1]; 696 c2->prev_model_dec = model[1];
790 c2->prev_e_dec = e[1]; 697 c2->prev_e_dec = e[1];
791 for(i=0; i<LPC_ORD; i++) 698 for (i = 0; i < LPC_ORD; i++) c2->prev_lsps_dec[i] = lsps[1][i];
792 c2->prev_lsps_dec[i] = lsps[1][i];
793} 699}
794 700
795
796/*---------------------------------------------------------------------------*\ 701/*---------------------------------------------------------------------------*\
797 702
798 FUNCTION....: codec2_encode_1600 703 FUNCTION....: codec2_encode_1600
@@ -821,65 +726,64 @@ void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char *
821 726
822\*---------------------------------------------------------------------------*/ 727\*---------------------------------------------------------------------------*/
823 728
824void codec2_encode_1600(struct CODEC2 *c2, unsigned char * bits, short speech[]) 729void codec2_encode_1600(struct CODEC2 *c2, unsigned char *bits,
825{ 730 short speech[]) {
826 MODEL model; 731 MODEL model;
827 float lsps[LPC_ORD]; 732 float lsps[LPC_ORD];
828 float ak[LPC_ORD+1]; 733 float ak[LPC_ORD + 1];
829 float e; 734 float e;
830 int lsp_indexes[LPC_ORD]; 735 int lsp_indexes[LPC_ORD];
831 int Wo_index, e_index; 736 int Wo_index, e_index;
832 int i; 737 int i;
833 unsigned int nbit = 0; 738 unsigned int nbit = 0;
834 739
835 assert(c2 != NULL); 740 assert(c2 != NULL);
836 741
837 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); 742 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
838 743
839 /* frame 1: - voicing ---------------------------------------------*/ 744 /* frame 1: - voicing ---------------------------------------------*/
840 745
841 analyse_one_frame(c2, &model, speech); 746 analyse_one_frame(c2, &model, speech);
842 pack(bits, &nbit, model.voiced, 1); 747 pack(bits, &nbit, model.voiced, 1);
843 748
844 /* frame 2: - voicing, scalar Wo & E -------------------------------*/ 749 /* frame 2: - voicing, scalar Wo & E -------------------------------*/
845 750
846 analyse_one_frame(c2, &model, &speech[c2->n_samp]); 751 analyse_one_frame(c2, &model, &speech[c2->n_samp]);
847 pack(bits, &nbit, model.voiced, 1); 752 pack(bits, &nbit, model.voiced, 1);
848 753
849 Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); 754 Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS);
850 pack(bits, &nbit, Wo_index, WO_BITS); 755 pack(bits, &nbit, Wo_index, WO_BITS);
851 756
852 /* need to run this just to get LPC energy */ 757 /* need to run this just to get LPC energy */
853 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); 758 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
854 e_index = encode_energy(e, E_BITS); 759 e_index = encode_energy(e, E_BITS);
855 pack(bits, &nbit, e_index, E_BITS); 760 pack(bits, &nbit, e_index, E_BITS);
856 761
857 /* frame 3: - voicing ---------------------------------------------*/ 762 /* frame 3: - voicing ---------------------------------------------*/
858 763
859 analyse_one_frame(c2, &model, &speech[2*c2->n_samp]); 764 analyse_one_frame(c2, &model, &speech[2 * c2->n_samp]);
860 pack(bits, &nbit, model.voiced, 1); 765 pack(bits, &nbit, model.voiced, 1);
861 766
862 /* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/ 767 /* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/
863 768
864 analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); 769 analyse_one_frame(c2, &model, &speech[3 * c2->n_samp]);
865 pack(bits, &nbit, model.voiced, 1); 770 pack(bits, &nbit, model.voiced, 1);
866 771
867 Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); 772 Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS);
868 pack(bits, &nbit, Wo_index, WO_BITS); 773 pack(bits, &nbit, Wo_index, WO_BITS);
869 774
870 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); 775 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
871 e_index = encode_energy(e, E_BITS); 776 e_index = encode_energy(e, E_BITS);
872 pack(bits, &nbit, e_index, E_BITS); 777 pack(bits, &nbit, e_index, E_BITS);
873 778
874 encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); 779 encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD);
875 for(i=0; i<LSP_SCALAR_INDEXES; i++) { 780 for (i = 0; i < LSP_SCALAR_INDEXES; i++) {
876 pack(bits, &nbit, lsp_indexes[i], lsp_bits(i)); 781 pack(bits, &nbit, lsp_indexes[i], lsp_bits(i));
877 } 782 }
878 783
879 assert(nbit == (unsigned)codec2_bits_per_frame(c2)); 784 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
880} 785}
881 786
882
883/*---------------------------------------------------------------------------*\ 787/*---------------------------------------------------------------------------*\
884 788
885 FUNCTION....: codec2_decode_1600 789 FUNCTION....: codec2_decode_1600
@@ -890,91 +794,89 @@ void codec2_encode_1600(struct CODEC2 *c2, unsigned char * bits, short speech[])
890 794
891\*---------------------------------------------------------------------------*/ 795\*---------------------------------------------------------------------------*/
892 796
893void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * bits) 797void codec2_decode_1600(struct CODEC2 *c2, short speech[],
894{ 798 const unsigned char *bits) {
895 MODEL model[4]; 799 MODEL model[4];
896 int lsp_indexes[LPC_ORD]; 800 int lsp_indexes[LPC_ORD];
897 float lsps[4][LPC_ORD]; 801 float lsps[4][LPC_ORD];
898 int Wo_index, e_index; 802 int Wo_index, e_index;
899 float e[4]; 803 float e[4];
900 float snr; 804 float snr;
901 float ak[4][LPC_ORD+1]; 805 float ak[4][LPC_ORD + 1];
902 int i,j; 806 int i, j;
903 unsigned int nbit = 0; 807 unsigned int nbit = 0;
904 float weight; 808 float weight;
905 COMP Aw[FFT_ENC]; 809 COMP Aw[FFT_ENC];
906 810
907 assert(c2 != NULL); 811 assert(c2 != NULL);
908 812
909 /* only need to zero these out due to (unused) snr calculation */ 813 /* only need to zero these out due to (unused) snr calculation */
910 814
911 for(i=0; i<4; i++) 815 for (i = 0; i < 4; i++)
912 for(j=1; j<=MAX_AMP; j++) 816 for (j = 1; j <= MAX_AMP; j++) model[i].A[j] = 0.0;
913 model[i].A[j] = 0.0; 817
914 818 /* unpack bits from channel ------------------------------------*/
915 /* unpack bits from channel ------------------------------------*/ 819
916 820 /* this will partially fill the model params for the 4 x 10ms
917 /* this will partially fill the model params for the 4 x 10ms 821 frames */
918 frames */ 822
919 823 model[0].voiced = unpack(bits, &nbit, 1);
920 model[0].voiced = unpack(bits, &nbit, 1); 824
921 825 model[1].voiced = unpack(bits, &nbit, 1);
922 model[1].voiced = unpack(bits, &nbit, 1); 826 Wo_index = unpack(bits, &nbit, WO_BITS);
923 Wo_index = unpack(bits, &nbit, WO_BITS); 827 model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS);
924 model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); 828 model[1].L = PI / model[1].Wo;
925 model[1].L = PI/model[1].Wo; 829
926 830 e_index = unpack(bits, &nbit, E_BITS);
927 e_index = unpack(bits, &nbit, E_BITS); 831 e[1] = decode_energy(e_index, E_BITS);
928 e[1] = decode_energy(e_index, E_BITS); 832
929 833 model[2].voiced = unpack(bits, &nbit, 1);
930 model[2].voiced = unpack(bits, &nbit, 1); 834
931 835 model[3].voiced = unpack(bits, &nbit, 1);
932 model[3].voiced = unpack(bits, &nbit, 1); 836 Wo_index = unpack(bits, &nbit, WO_BITS);
933 Wo_index = unpack(bits, &nbit, WO_BITS); 837 model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS);
934 model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); 838 model[3].L = PI / model[3].Wo;
935 model[3].L = PI/model[3].Wo; 839
936 840 e_index = unpack(bits, &nbit, E_BITS);
937 e_index = unpack(bits, &nbit, E_BITS); 841 e[3] = decode_energy(e_index, E_BITS);
938 e[3] = decode_energy(e_index, E_BITS); 842
939 843 for (i = 0; i < LSP_SCALAR_INDEXES; i++) {
940 for(i=0; i<LSP_SCALAR_INDEXES; i++) { 844 lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i));
941 lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i)); 845 }
942 } 846 decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD);
943 decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD); 847 check_lsp_order(&lsps[3][0], LPC_ORD);
944 check_lsp_order(&lsps[3][0], LPC_ORD); 848 bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0);
945 bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); 849
946 850 /* interpolate ------------------------------------------------*/
947 /* interpolate ------------------------------------------------*/ 851
948 852 /* Wo and energy are sampled every 20ms, so we interpolate just 1
949 /* Wo and energy are sampled every 20ms, so we interpolate just 1 853 10ms frame between 20ms samples */
950 10ms frame between 20ms samples */ 854
951 855 interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
952 interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); 856 e[0] = interp_energy(c2->prev_e_dec, e[1]);
953 e[0] = interp_energy(c2->prev_e_dec, e[1]); 857 interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min);
954 interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); 858 e[2] = interp_energy(e[1], e[3]);
955 e[2] = interp_energy(e[1], e[3]); 859
956 860 /* LSPs are sampled every 40ms so we interpolate the 3 frames in
957 /* LSPs are sampled every 40ms so we interpolate the 3 frames in 861 between, then recover spectral amplitudes */
958 between, then recover spectral amplitudes */ 862
959 863 for (i = 0, weight = 0.25; i < 3; i++, weight += 0.25) {
960 for(i=0, weight=0.25; i<3; i++, weight += 0.25) { 864 interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight,
961 interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); 865 LPC_ORD);
962 } 866 }
963 for(i=0; i<4; i++) { 867 for (i = 0; i < 4; i++) {
964 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); 868 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
965 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, 869 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
966 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); 870 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
967 apply_lpc_correction(&model[i]); 871 apply_lpc_correction(&model[i]);
968 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); 872 synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], Aw, 1.0);
969 } 873 }
970 874
971 /* update memories for next frame ----------------------------*/ 875 /* update memories for next frame ----------------------------*/
972 876
973 c2->prev_model_dec = model[3]; 877 c2->prev_model_dec = model[3];
974 c2->prev_e_dec = e[3]; 878 c2->prev_e_dec = e[3];
975 for(i=0; i<LPC_ORD; i++) 879 for (i = 0; i < LPC_ORD; i++) c2->prev_lsps_dec[i] = lsps[3][i];
976 c2->prev_lsps_dec[i] = lsps[3][i];
977
978} 880}
979 881
980/*---------------------------------------------------------------------------*\ 882/*---------------------------------------------------------------------------*\
@@ -1004,60 +906,59 @@ void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char *
1004 906
1005\*---------------------------------------------------------------------------*/ 907\*---------------------------------------------------------------------------*/
1006 908
1007void codec2_encode_1400(struct CODEC2 *c2, unsigned char * bits, short speech[]) 909void codec2_encode_1400(struct CODEC2 *c2, unsigned char *bits,
1008{ 910 short speech[]) {
1009 MODEL model; 911 MODEL model;
1010 float lsps[LPC_ORD]; 912 float lsps[LPC_ORD];
1011 float ak[LPC_ORD+1]; 913 float ak[LPC_ORD + 1];
1012 float e; 914 float e;
1013 int lsp_indexes[LPC_ORD]; 915 int lsp_indexes[LPC_ORD];
1014 int WoE_index; 916 int WoE_index;
1015 int i; 917 int i;
1016 unsigned int nbit = 0; 918 unsigned int nbit = 0;
1017 919
1018 assert(c2 != NULL); 920 assert(c2 != NULL);
1019 921
1020 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); 922 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
1021 923
1022 /* frame 1: - voicing ---------------------------------------------*/ 924 /* frame 1: - voicing ---------------------------------------------*/
1023 925
1024 analyse_one_frame(c2, &model, speech); 926 analyse_one_frame(c2, &model, speech);
1025 pack(bits, &nbit, model.voiced, 1); 927 pack(bits, &nbit, model.voiced, 1);
1026 928
1027 /* frame 2: - voicing, joint Wo & E -------------------------------*/ 929 /* frame 2: - voicing, joint Wo & E -------------------------------*/
1028 930
1029 analyse_one_frame(c2, &model, &speech[c2->n_samp]); 931 analyse_one_frame(c2, &model, &speech[c2->n_samp]);
1030 pack(bits, &nbit, model.voiced, 1); 932 pack(bits, &nbit, model.voiced, 1);
1031 933
1032 /* need to run this just to get LPC energy */ 934 /* need to run this just to get LPC energy */
1033 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); 935 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
1034 936
1035 WoE_index = encode_WoE(&model, e, c2->xq_enc); 937 WoE_index = encode_WoE(&model, e, c2->xq_enc);
1036 pack(bits, &nbit, WoE_index, WO_E_BITS); 938 pack(bits, &nbit, WoE_index, WO_E_BITS);
1037 939
1038 /* frame 3: - voicing ---------------------------------------------*/ 940 /* frame 3: - voicing ---------------------------------------------*/
1039 941
1040 analyse_one_frame(c2, &model, &speech[2*c2->n_samp]); 942 analyse_one_frame(c2, &model, &speech[2 * c2->n_samp]);
1041 pack(bits, &nbit, model.voiced, 1); 943 pack(bits, &nbit, model.voiced, 1);
1042 944
1043 /* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/ 945 /* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/
1044 946
1045 analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); 947 analyse_one_frame(c2, &model, &speech[3 * c2->n_samp]);
1046 pack(bits, &nbit, model.voiced, 1); 948 pack(bits, &nbit, model.voiced, 1);
1047 949
1048 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); 950 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
1049 WoE_index = encode_WoE(&model, e, c2->xq_enc); 951 WoE_index = encode_WoE(&model, e, c2->xq_enc);
1050 pack(bits, &nbit, WoE_index, WO_E_BITS); 952 pack(bits, &nbit, WoE_index, WO_E_BITS);
1051 953
1052 encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); 954 encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD);
1053 for(i=0; i<LSP_SCALAR_INDEXES; i++) { 955 for (i = 0; i < LSP_SCALAR_INDEXES; i++) {
1054 pack(bits, &nbit, lsp_indexes[i], lsp_bits(i)); 956 pack(bits, &nbit, lsp_indexes[i], lsp_bits(i));
1055 } 957 }
1056 958
1057 assert(nbit == (unsigned)codec2_bits_per_frame(c2)); 959 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
1058} 960}
1059 961
1060
1061/*---------------------------------------------------------------------------*\ 962/*---------------------------------------------------------------------------*\
1062 963
1063 FUNCTION....: codec2_decode_1400 964 FUNCTION....: codec2_decode_1400
@@ -1068,83 +969,81 @@ void codec2_encode_1400(struct CODEC2 *c2, unsigned char * bits, short speech[])
1068 969
1069\*---------------------------------------------------------------------------*/ 970\*---------------------------------------------------------------------------*/
1070 971
1071void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char * bits) 972void codec2_decode_1400(struct CODEC2 *c2, short speech[],
1072{ 973 const unsigned char *bits) {
1073 MODEL model[4]; 974 MODEL model[4];
1074 int lsp_indexes[LPC_ORD]; 975 int lsp_indexes[LPC_ORD];
1075 float lsps[4][LPC_ORD]; 976 float lsps[4][LPC_ORD];
1076 int WoE_index; 977 int WoE_index;
1077 float e[4]; 978 float e[4];
1078 float snr; 979 float snr;
1079 float ak[4][LPC_ORD+1]; 980 float ak[4][LPC_ORD + 1];
1080 int i,j; 981 int i, j;
1081 unsigned int nbit = 0; 982 unsigned int nbit = 0;
1082 float weight; 983 float weight;
1083 COMP Aw[FFT_ENC]; 984 COMP Aw[FFT_ENC];
1084 985
1085 assert(c2 != NULL); 986 assert(c2 != NULL);
1086 987
1087 /* only need to zero these out due to (unused) snr calculation */ 988 /* only need to zero these out due to (unused) snr calculation */
1088 989
1089 for(i=0; i<4; i++) 990 for (i = 0; i < 4; i++)
1090 for(j=1; j<=MAX_AMP; j++) 991 for (j = 1; j <= MAX_AMP; j++) model[i].A[j] = 0.0;
1091 model[i].A[j] = 0.0; 992
1092 993 /* unpack bits from channel ------------------------------------*/
1093 /* unpack bits from channel ------------------------------------*/ 994
1094 995 /* this will partially fill the model params for the 4 x 10ms
1095 /* this will partially fill the model params for the 4 x 10ms 996 frames */
1096 frames */ 997
1097 998 model[0].voiced = unpack(bits, &nbit, 1);
1098 model[0].voiced = unpack(bits, &nbit, 1); 999
1099 1000 model[1].voiced = unpack(bits, &nbit, 1);
1100 model[1].voiced = unpack(bits, &nbit, 1); 1001 WoE_index = unpack(bits, &nbit, WO_E_BITS);
1101 WoE_index = unpack(bits, &nbit, WO_E_BITS); 1002 decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index);
1102 decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); 1003
1103 1004 model[2].voiced = unpack(bits, &nbit, 1);
1104 model[2].voiced = unpack(bits, &nbit, 1); 1005
1105 1006 model[3].voiced = unpack(bits, &nbit, 1);
1106 model[3].voiced = unpack(bits, &nbit, 1); 1007 WoE_index = unpack(bits, &nbit, WO_E_BITS);
1107 WoE_index = unpack(bits, &nbit, WO_E_BITS); 1008 decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index);
1108 decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index); 1009
1109 1010 for (i = 0; i < LSP_SCALAR_INDEXES; i++) {
1110 for(i=0; i<LSP_SCALAR_INDEXES; i++) { 1011 lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i));
1111 lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i)); 1012 }
1112 } 1013 decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD);
1113 decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD); 1014 check_lsp_order(&lsps[3][0], LPC_ORD);
1114 check_lsp_order(&lsps[3][0], LPC_ORD); 1015 bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0);
1115 bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); 1016
1116 1017 /* interpolate ------------------------------------------------*/
1117 /* interpolate ------------------------------------------------*/ 1018
1118 1019 /* Wo and energy are sampled every 20ms, so we interpolate just 1
1119 /* Wo and energy are sampled every 20ms, so we interpolate just 1 1020 10ms frame between 20ms samples */
1120 10ms frame between 20ms samples */ 1021
1121 1022 interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
1122 interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); 1023 e[0] = interp_energy(c2->prev_e_dec, e[1]);
1123 e[0] = interp_energy(c2->prev_e_dec, e[1]); 1024 interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min);
1124 interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); 1025 e[2] = interp_energy(e[1], e[3]);
1125 e[2] = interp_energy(e[1], e[3]); 1026
1126 1027 /* LSPs are sampled every 40ms so we interpolate the 3 frames in
1127 /* LSPs are sampled every 40ms so we interpolate the 3 frames in 1028 between, then recover spectral amplitudes */
1128 between, then recover spectral amplitudes */ 1029
1129 1030 for (i = 0, weight = 0.25; i < 3; i++, weight += 0.25) {
1130 for(i=0, weight=0.25; i<3; i++, weight += 0.25) { 1031 interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight,
1131 interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); 1032 LPC_ORD);
1132 } 1033 }
1133 for(i=0; i<4; i++) { 1034 for (i = 0; i < 4; i++) {
1134 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); 1035 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
1135 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, 1036 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
1136 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); 1037 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
1137 apply_lpc_correction(&model[i]); 1038 apply_lpc_correction(&model[i]);
1138 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); 1039 synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], Aw, 1.0);
1139 } 1040 }
1140 1041
1141 /* update memories for next frame ----------------------------*/ 1042 /* update memories for next frame ----------------------------*/
1142 1043
1143 c2->prev_model_dec = model[3]; 1044 c2->prev_model_dec = model[3];
1144 c2->prev_e_dec = e[3]; 1045 c2->prev_e_dec = e[3];
1145 for(i=0; i<LPC_ORD; i++) 1046 for (i = 0; i < LPC_ORD; i++) c2->prev_lsps_dec[i] = lsps[3][i];
1146 c2->prev_lsps_dec[i] = lsps[3][i];
1147
1148} 1047}
1149 1048
1150/*---------------------------------------------------------------------------*\ 1049/*---------------------------------------------------------------------------*\
@@ -1175,66 +1074,56 @@ void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char *
1175 1074
1176\*---------------------------------------------------------------------------*/ 1075\*---------------------------------------------------------------------------*/
1177 1076
1178void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[]) 1077void codec2_encode_1300(struct CODEC2 *c2, unsigned char *bits,
1179{ 1078 short speech[]) {
1180 MODEL model; 1079 MODEL model;
1181 float lsps[LPC_ORD]; 1080 float lsps[LPC_ORD];
1182 float ak[LPC_ORD+1]; 1081 float ak[LPC_ORD + 1];
1183 float e; 1082 float e;
1184 int lsp_indexes[LPC_ORD]; 1083 int lsp_indexes[LPC_ORD];
1185 int Wo_index, e_index; 1084 int Wo_index, e_index;
1186 int i; 1085 int i;
1187 unsigned int nbit = 0; 1086 unsigned int nbit = 0;
1188 //#ifdef PROFILE
1189 //unsigned int quant_start;
1190 //#endif
1191 1087
1192 assert(c2 != NULL); 1088 assert(c2 != NULL);
1193 1089
1194 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); 1090 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
1195 1091
1196 /* frame 1: - voicing ---------------------------------------------*/ 1092 /* frame 1: - voicing ---------------------------------------------*/
1197 1093
1198 analyse_one_frame(c2, &model, speech); 1094 analyse_one_frame(c2, &model, speech);
1199 pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); 1095 pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray);
1200 1096
1201 /* frame 2: - voicing ---------------------------------------------*/ 1097 /* frame 2: - voicing ---------------------------------------------*/
1202 1098
1203 analyse_one_frame(c2, &model, &speech[c2->n_samp]); 1099 analyse_one_frame(c2, &model, &speech[c2->n_samp]);
1204 pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); 1100 pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray);
1205 1101
1206 /* frame 3: - voicing ---------------------------------------------*/ 1102 /* frame 3: - voicing ---------------------------------------------*/
1207 1103
1208 analyse_one_frame(c2, &model, &speech[2*c2->n_samp]); 1104 analyse_one_frame(c2, &model, &speech[2 * c2->n_samp]);
1209 pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); 1105 pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray);
1210 1106
1211 /* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/ 1107 /* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/
1212 1108
1213 analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); 1109 analyse_one_frame(c2, &model, &speech[3 * c2->n_samp]);
1214 pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); 1110 pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray);
1215 1111
1216 Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); 1112 Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS);
1217 pack_natural_or_gray(bits, &nbit, Wo_index, WO_BITS, c2->gray); 1113 pack_natural_or_gray(bits, &nbit, Wo_index, WO_BITS, c2->gray);
1218 1114
1219 //#ifdef PROFILE 1115 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
1220 //quant_start = machdep_profile_sample(); 1116 e_index = encode_energy(e, E_BITS);
1221 //#endif 1117 pack_natural_or_gray(bits, &nbit, e_index, E_BITS, c2->gray);
1222 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
1223 e_index = encode_energy(e, E_BITS);
1224 pack_natural_or_gray(bits, &nbit, e_index, E_BITS, c2->gray);
1225 1118
1226 encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); 1119 encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD);
1227 for(i=0; i<LSP_SCALAR_INDEXES; i++) { 1120 for (i = 0; i < LSP_SCALAR_INDEXES; i++) {
1228 pack_natural_or_gray(bits, &nbit, lsp_indexes[i], lsp_bits(i), c2->gray); 1121 pack_natural_or_gray(bits, &nbit, lsp_indexes[i], lsp_bits(i), c2->gray);
1229 } 1122 }
1230 //#ifdef PROFILE
1231 //machdep_profile_sample_and_log(quant_start, " quant/packing");
1232 //#endif
1233 1123
1234 assert(nbit == (unsigned)codec2_bits_per_frame(c2)); 1124 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
1235} 1125}
1236 1126
1237
1238/*---------------------------------------------------------------------------*\ 1127/*---------------------------------------------------------------------------*\
1239 1128
1240 FUNCTION....: codec2_decode_1300 1129 FUNCTION....: codec2_decode_1300
@@ -1244,118 +1133,106 @@ void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[])
1244 Decodes frames of 52 bits into 320 samples (40ms) of speech. 1133 Decodes frames of 52 bits into 320 samples (40ms) of speech.
1245 1134
1246\*---------------------------------------------------------------------------*/ 1135\*---------------------------------------------------------------------------*/
1247static int frames;
1248void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * bits, float ber_est)
1249{
1250 MODEL model[4];
1251 int lsp_indexes[LPC_ORD];
1252 float lsps[4][LPC_ORD];
1253 int Wo_index, e_index;
1254 float e[4];
1255 float snr;
1256 float ak[4][LPC_ORD+1];
1257 int i,j;
1258 unsigned int nbit = 0;
1259 float weight;
1260 COMP Aw[FFT_ENC];
1261 //PROFILE_VAR(recover_start);
1262
1263 assert(c2 != NULL);
1264 frames+= 4;
1265 /* only need to zero these out due to (unused) snr calculation */
1266
1267 for(i=0; i<4; i++)
1268 for(j=1; j<=MAX_AMP; j++)
1269 model[i].A[j] = 0.0;
1270
1271 /* unpack bits from channel ------------------------------------*/
1272
1273 /* this will partially fill the model params for the 4 x 10ms
1274 frames */
1275
1276 model[0].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray);
1277 model[1].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray);
1278 model[2].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray);
1279 model[3].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray);
1280
1281 Wo_index = unpack_natural_or_gray(bits, &nbit, WO_BITS, c2->gray);
1282 model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS);
1283 model[3].L = PI/model[3].Wo;
1284 1136
1285 e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray); 1137void codec2_decode_1300(struct CODEC2 *c2, short speech[],
1286 e[3] = decode_energy(e_index, E_BITS); 1138 const unsigned char *bits, float ber_est) {
1287 //fprintf(stderr, "%d %f\n", e_index, e[3]); 1139 MODEL model[4];
1140 int lsp_indexes[LPC_ORD];
1141 float lsps[4][LPC_ORD];
1142 int Wo_index, e_index;
1143 float e[4];
1144 float snr;
1145 float ak[4][LPC_ORD + 1];
1146 int i, j;
1147 unsigned int nbit = 0;
1148 float weight;
1149 COMP Aw[FFT_ENC];
1150
1151 assert(c2 != NULL);
1152
1153 /* only need to zero these out due to (unused) snr calculation */
1154
1155 for (i = 0; i < 4; i++)
1156 for (j = 1; j <= MAX_AMP; j++) model[i].A[j] = 0.0;
1157
1158 /* unpack bits from channel ------------------------------------*/
1159
1160 /* this will partially fill the model params for the 4 x 10ms
1161 frames */
1162
1163 model[0].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray);
1164 model[1].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray);
1165 model[2].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray);
1166 model[3].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray);
1167
1168 Wo_index = unpack_natural_or_gray(bits, &nbit, WO_BITS, c2->gray);
1169 model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS);
1170 model[3].L = PI / model[3].Wo;
1171
1172 e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray);
1173 e[3] = decode_energy(e_index, E_BITS);
1174
1175 for (i = 0; i < LSP_SCALAR_INDEXES; i++) {
1176 lsp_indexes[i] = unpack_natural_or_gray(bits, &nbit, lsp_bits(i), c2->gray);
1177 }
1178 decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD);
1179 check_lsp_order(&lsps[3][0], LPC_ORD);
1180 bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0);
1181
1182 if (ber_est > 0.15) {
1183 model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced = 0;
1184 e[3] = decode_energy(10, E_BITS);
1185 bw_expand_lsps(&lsps[3][0], LPC_ORD, 200.0, 200.0);
1186 // fprintf(stderr, "soft mute\n");
1187 }
1188
1189 /* interpolate ------------------------------------------------*/
1190
1191 /* Wo, energy, and LSPs are sampled every 40ms so we interpolate
1192 the 3 frames in between */
1193
1194 for (i = 0, weight = 0.25; i < 3; i++, weight += 0.25) {
1195 interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight,
1196 LPC_ORD);
1197 interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight,
1198 c2->c2const.Wo_min);
1199 e[i] = interp_energy2(c2->prev_e_dec, e[3], weight);
1200 }
1201
1202 /* then recover spectral amplitudes */
1203
1204 for (i = 0; i < 4; i++) {
1205 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
1206 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
1207 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
1208 apply_lpc_correction(&model[i]);
1209 synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], Aw, 1.0);
1210
1211 /* dump parameters for deep learning experiments */
1288 1212
1289 for(i=0; i<LSP_SCALAR_INDEXES; i++) { 1213 if (c2->fmlfeat != NULL) {
1290 lsp_indexes[i] = unpack_natural_or_gray(bits, &nbit, lsp_bits(i), c2->gray); 1214 /* 10 LSPs - energy - Wo - voicing flag - 10 LPCs */
1291 } 1215 fwrite(&lsps[i][0], LPC_ORD, sizeof(float), c2->fmlfeat);
1292 decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD); 1216 fwrite(&e[i], 1, sizeof(float), c2->fmlfeat);
1293 check_lsp_order(&lsps[3][0], LPC_ORD); 1217 fwrite(&model[i].Wo, 1, sizeof(float), c2->fmlfeat);
1294 bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); 1218 float voiced_float = model[i].voiced;
1295 1219 fwrite(&voiced_float, 1, sizeof(float), c2->fmlfeat);
1296 if (ber_est > 0.15) { 1220 fwrite(&ak[i][1], LPC_ORD, sizeof(float), c2->fmlfeat);
1297 model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced = 0; 1221 }
1298 e[3] = decode_energy(10, E_BITS); 1222 }
1299 bw_expand_lsps(&lsps[3][0], LPC_ORD, 200.0, 200.0); 1223
1300 //fprintf(stderr, "soft mute\n"); 1224#ifdef DUMP
1301 } 1225 dump_lsp_(&lsps[3][0]);
1302 1226 dump_ak_(&ak[3][0], LPC_ORD);
1303 /* interpolate ------------------------------------------------*/ 1227#endif
1304
1305 /* Wo, energy, and LSPs are sampled every 40ms so we interpolate
1306 the 3 frames in between */
1307
1308 //PROFILE_SAMPLE(recover_start);
1309 for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
1310 interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD);
1311 interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min);
1312 e[i] = interp_energy2(c2->prev_e_dec, e[3],weight);
1313 }
1314 1228
1315 /* then recover spectral amplitudes */ 1229 /* update memories for next frame ----------------------------*/
1316
1317 for(i=0; i<4; i++) {
1318 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
1319 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
1320 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
1321 apply_lpc_correction(&model[i]);
1322 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0);
1323
1324 /* dump parameters for deep learning experiments */
1325
1326 if (c2->fmlfeat != NULL) {
1327 /* 10 LSPs - energy - Wo - voicing flag - 10 LPCs */
1328 fwrite(&lsps[i][0], LPC_ORD, sizeof(float), c2->fmlfeat);
1329 fwrite(&e[i], 1, sizeof(float), c2->fmlfeat);
1330 fwrite(&model[i].Wo, 1, sizeof(float), c2->fmlfeat);
1331 float voiced_float = model[i].voiced;
1332 fwrite(&voiced_float, 1, sizeof(float), c2->fmlfeat);
1333 fwrite(&ak[i][1], LPC_ORD, sizeof(float), c2->fmlfeat);
1334 }
1335 }
1336 /*
1337 for(i=0; i<4; i++) {
1338 printf("%d Wo: %f L: %d v: %d\n", frames, model[i].Wo, model[i].L, model[i].voiced);
1339 }
1340 if (frames == 4*50)
1341 exit(0);
1342 */
1343 //PROFILE_SAMPLE_AND_LOG2(recover_start, " recover");
1344 #ifdef DUMP
1345 dump_lsp_(&lsps[3][0]);
1346 dump_ak_(&ak[3][0], LPC_ORD);
1347 #endif
1348
1349 /* update memories for next frame ----------------------------*/
1350
1351 c2->prev_model_dec = model[3];
1352 c2->prev_e_dec = e[3];
1353 for(i=0; i<LPC_ORD; i++)
1354 c2->prev_lsps_dec[i] = lsps[3][i];
1355 1230
1231 c2->prev_model_dec = model[3];
1232 c2->prev_e_dec = e[3];
1233 for (i = 0; i < LPC_ORD; i++) c2->prev_lsps_dec[i] = lsps[3][i];
1356} 1234}
1357 1235
1358
1359/*---------------------------------------------------------------------------*\ 1236/*---------------------------------------------------------------------------*\
1360 1237
1361 FUNCTION....: codec2_encode_1200 1238 FUNCTION....: codec2_encode_1200
@@ -1384,63 +1261,62 @@ void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char *
1384 1261
1385\*---------------------------------------------------------------------------*/ 1262\*---------------------------------------------------------------------------*/
1386 1263
1387void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[]) 1264void codec2_encode_1200(struct CODEC2 *c2, unsigned char *bits,
1388{ 1265 short speech[]) {
1389 MODEL model; 1266 MODEL model;
1390 float lsps[LPC_ORD]; 1267 float lsps[LPC_ORD];
1391 float lsps_[LPC_ORD]; 1268 float lsps_[LPC_ORD];
1392 float ak[LPC_ORD+1]; 1269 float ak[LPC_ORD + 1];
1393 float e; 1270 float e;
1394 int lsp_indexes[LPC_ORD]; 1271 int lsp_indexes[LPC_ORD];
1395 int WoE_index; 1272 int WoE_index;
1396 int i; 1273 int i;
1397 int spare = 0; 1274 int spare = 0;
1398 unsigned int nbit = 0; 1275 unsigned int nbit = 0;
1399 1276
1400 assert(c2 != NULL); 1277 assert(c2 != NULL);
1401 1278
1402 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); 1279 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
1403 1280
1404 /* frame 1: - voicing ---------------------------------------------*/ 1281 /* frame 1: - voicing ---------------------------------------------*/
1405 1282
1406 analyse_one_frame(c2, &model, speech); 1283 analyse_one_frame(c2, &model, speech);
1407 pack(bits, &nbit, model.voiced, 1); 1284 pack(bits, &nbit, model.voiced, 1);
1408 1285
1409 /* frame 2: - voicing, joint Wo & E -------------------------------*/ 1286 /* frame 2: - voicing, joint Wo & E -------------------------------*/
1410 1287
1411 analyse_one_frame(c2, &model, &speech[c2->n_samp]); 1288 analyse_one_frame(c2, &model, &speech[c2->n_samp]);
1412 pack(bits, &nbit, model.voiced, 1); 1289 pack(bits, &nbit, model.voiced, 1);
1413 1290
1414 /* need to run this just to get LPC energy */ 1291 /* need to run this just to get LPC energy */
1415 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); 1292 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
1416 1293
1417 WoE_index = encode_WoE(&model, e, c2->xq_enc); 1294 WoE_index = encode_WoE(&model, e, c2->xq_enc);
1418 pack(bits, &nbit, WoE_index, WO_E_BITS); 1295 pack(bits, &nbit, WoE_index, WO_E_BITS);
1419 1296
1420 /* frame 3: - voicing ---------------------------------------------*/ 1297 /* frame 3: - voicing ---------------------------------------------*/
1421 1298
1422 analyse_one_frame(c2, &model, &speech[2*c2->n_samp]); 1299 analyse_one_frame(c2, &model, &speech[2 * c2->n_samp]);
1423 pack(bits, &nbit, model.voiced, 1); 1300 pack(bits, &nbit, model.voiced, 1);
1424 1301
1425 /* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/ 1302 /* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/
1426 1303
1427 analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); 1304 analyse_one_frame(c2, &model, &speech[3 * c2->n_samp]);
1428 pack(bits, &nbit, model.voiced, 1); 1305 pack(bits, &nbit, model.voiced, 1);
1429 1306
1430 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); 1307 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
1431 WoE_index = encode_WoE(&model, e, c2->xq_enc); 1308 WoE_index = encode_WoE(&model, e, c2->xq_enc);
1432 pack(bits, &nbit, WoE_index, WO_E_BITS); 1309 pack(bits, &nbit, WoE_index, WO_E_BITS);
1433 1310
1434 encode_lsps_vq(lsp_indexes, lsps, lsps_, LPC_ORD); 1311 encode_lsps_vq(lsp_indexes, lsps, lsps_, LPC_ORD);
1435 for(i=0; i<LSP_PRED_VQ_INDEXES; i++) { 1312 for (i = 0; i < LSP_PRED_VQ_INDEXES; i++) {
1436 pack(bits, &nbit, lsp_indexes[i], lsp_pred_vq_bits(i)); 1313 pack(bits, &nbit, lsp_indexes[i], lsp_pred_vq_bits(i));
1437 } 1314 }
1438 pack(bits, &nbit, spare, 1); 1315 pack(bits, &nbit, spare, 1);
1439 1316
1440 assert(nbit == (unsigned)codec2_bits_per_frame(c2)); 1317 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
1441} 1318}
1442 1319
1443
1444/*---------------------------------------------------------------------------*\ 1320/*---------------------------------------------------------------------------*\
1445 1321
1446 FUNCTION....: codec2_decode_1200 1322 FUNCTION....: codec2_decode_1200
@@ -1451,494 +1327,83 @@ void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[])
1451 1327
1452\*---------------------------------------------------------------------------*/ 1328\*---------------------------------------------------------------------------*/
1453 1329
1454void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * bits) 1330void codec2_decode_1200(struct CODEC2 *c2, short speech[],
1455{ 1331 const unsigned char *bits) {
1456 MODEL model[4]; 1332 MODEL model[4];
1457 int lsp_indexes[LPC_ORD]; 1333 int lsp_indexes[LPC_ORD];
1458 float lsps[4][LPC_ORD]; 1334 float lsps[4][LPC_ORD];
1459 int WoE_index; 1335 int WoE_index;
1460 float e[4]; 1336 float e[4];
1461 float snr; 1337 float snr;
1462 float ak[4][LPC_ORD+1]; 1338 float ak[4][LPC_ORD + 1];
1463 int i,j; 1339 int i, j;
1464 unsigned int nbit = 0; 1340 unsigned int nbit = 0;
1465 float weight; 1341 float weight;
1466 COMP Aw[FFT_ENC]; 1342 COMP Aw[FFT_ENC];
1467 1343
1468 assert(c2 != NULL); 1344 assert(c2 != NULL);
1469 1345
1470 /* only need to zero these out due to (unused) snr calculation */ 1346 /* only need to zero these out due to (unused) snr calculation */
1471 1347
1472 for(i=0; i<4; i++) 1348 for (i = 0; i < 4; i++)
1473 for(j=1; j<=MAX_AMP; j++) 1349 for (j = 1; j <= MAX_AMP; j++) model[i].A[j] = 0.0;
1474 model[i].A[j] = 0.0; 1350
1475 1351 /* unpack bits from channel ------------------------------------*/
1476 /* unpack bits from channel ------------------------------------*/ 1352
1477 1353 /* this will partially fill the model params for the 4 x 10ms
1478 /* this will partially fill the model params for the 4 x 10ms 1354 frames */
1479 frames */ 1355
1480 1356 model[0].voiced = unpack(bits, &nbit, 1);
1481 model[0].voiced = unpack(bits, &nbit, 1); 1357
1482 1358 model[1].voiced = unpack(bits, &nbit, 1);
1483 model[1].voiced = unpack(bits, &nbit, 1); 1359 WoE_index = unpack(bits, &nbit, WO_E_BITS);
1484 WoE_index = unpack(bits, &nbit, WO_E_BITS); 1360 decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index);
1485 decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); 1361
1486 1362 model[2].voiced = unpack(bits, &nbit, 1);
1487 model[2].voiced = unpack(bits, &nbit, 1); 1363
1488 1364 model[3].voiced = unpack(bits, &nbit, 1);
1489 model[3].voiced = unpack(bits, &nbit, 1); 1365 WoE_index = unpack(bits, &nbit, WO_E_BITS);
1490 WoE_index = unpack(bits, &nbit, WO_E_BITS); 1366 decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index);
1491 decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index); 1367
1492 1368 for (i = 0; i < LSP_PRED_VQ_INDEXES; i++) {
1493 for(i=0; i<LSP_PRED_VQ_INDEXES; i++) { 1369 lsp_indexes[i] = unpack(bits, &nbit, lsp_pred_vq_bits(i));
1494 lsp_indexes[i] = unpack(bits, &nbit, lsp_pred_vq_bits(i)); 1370 }
1495 } 1371 decode_lsps_vq(lsp_indexes, &lsps[3][0], LPC_ORD, 0);
1496 decode_lsps_vq(lsp_indexes, &lsps[3][0], LPC_ORD , 0); 1372 check_lsp_order(&lsps[3][0], LPC_ORD);
1497 check_lsp_order(&lsps[3][0], LPC_ORD); 1373 bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0);
1498 bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); 1374
1499 1375 /* interpolate ------------------------------------------------*/
1500 /* interpolate ------------------------------------------------*/ 1376
1501 1377 /* Wo and energy are sampled every 20ms, so we interpolate just 1
1502 /* Wo and energy are sampled every 20ms, so we interpolate just 1 1378 10ms frame between 20ms samples */
1503 10ms frame between 20ms samples */ 1379
1504 1380 interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
1505 interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); 1381 e[0] = interp_energy(c2->prev_e_dec, e[1]);
1506 e[0] = interp_energy(c2->prev_e_dec, e[1]); 1382 interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min);
1507 interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); 1383 e[2] = interp_energy(e[1], e[3]);
1508 e[2] = interp_energy(e[1], e[3]); 1384
1509 1385 /* LSPs are sampled every 40ms so we interpolate the 3 frames in
1510 /* LSPs are sampled every 40ms so we interpolate the 3 frames in 1386 between, then recover spectral amplitudes */
1511 between, then recover spectral amplitudes */ 1387
1512 1388 for (i = 0, weight = 0.25; i < 3; i++, weight += 0.25) {
1513 for(i=0, weight=0.25; i<3; i++, weight += 0.25) { 1389 interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight,
1514 interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); 1390 LPC_ORD);
1515 } 1391 }
1516 for(i=0; i<4; i++) { 1392 for (i = 0; i < 4; i++) {
1517 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); 1393 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
1518 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, 1394 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
1519 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); 1395 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
1520 apply_lpc_correction(&model[i]); 1396 apply_lpc_correction(&model[i]);
1521 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); 1397 synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], Aw, 1.0);
1522 } 1398 }
1523 1399
1524 /* update memories for next frame ----------------------------*/ 1400 /* update memories for next frame ----------------------------*/
1525 1401
1526 c2->prev_model_dec = model[3]; 1402 c2->prev_model_dec = model[3];
1527 c2->prev_e_dec = e[3]; 1403 c2->prev_e_dec = e[3];
1528 for(i=0; i<LPC_ORD; i++) 1404 for (i = 0; i < LPC_ORD; i++) c2->prev_lsps_dec[i] = lsps[3][i];
1529 c2->prev_lsps_dec[i] = lsps[3][i];
1530} 1405}
1531 1406
1532
1533/*---------------------------------------------------------------------------*\
1534
1535 FUNCTION....: codec2_encode_700
1536 AUTHOR......: David Rowe
1537 DATE CREATED: April 2015
1538
1539 Encodes 320 speech samples (40ms of speech) into 28 bits.
1540
1541 The codec2 algorithm actually operates internally on 10ms (80
1542 sample) frames, so we run the encoding algorithm four times:
1543
1544 frame 0: nothing
1545 frame 1: nothing
1546 frame 2: nothing
1547 frame 3: voicing bit, scalar Wo and E, 17 bit LSP MEL scalar, 2 spare
1548
1549 The bit allocation is:
1550
1551 Parameter frames 1-3 frame 4 Total
1552 -----------------------------------------------------------
1553 Harmonic magnitudes (LSPs) 0 17 17
1554 Energy 0 3 3
1555 log Wo 0 5 5
1556 Voicing 0 1 1
1557 spare 0 2 2
1558 TOTAL 0 28 28
1559
1560\*---------------------------------------------------------------------------*/
1561
1562void codec2_encode_700(struct CODEC2 *c2, unsigned char * bits, short speech[])
1563{
1564 MODEL model;
1565 float lsps[LPC_ORD_LOW];
1566 float mel[LPC_ORD_LOW];
1567 float ak[LPC_ORD_LOW+1];
1568 float e, f;
1569 int indexes[LPC_ORD_LOW];
1570 int Wo_index, e_index, i;
1571 unsigned int nbit = 0;
1572 float bpf_out[4*c2->n_samp];
1573 short bpf_speech[4*c2->n_samp];
1574 int spare = 0;
1575
1576 assert(c2 != NULL);
1577
1578 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
1579
1580 /* band pass filter */
1581
1582 for(i=0; i<BPF_N; i++)
1583 c2->bpf_buf[i] = c2->bpf_buf[4*c2->n_samp+i];
1584 for(i=0; i<4*c2->n_samp; i++)
1585 c2->bpf_buf[BPF_N+i] = speech[i];
1586 inverse_filter(&c2->bpf_buf[BPF_N], bpf, 4*c2->n_samp, bpf_out, BPF_N-1);
1587 for(i=0; i<4*c2->n_samp; i++)
1588 bpf_speech[i] = bpf_out[i];
1589
1590 /* frame 1 --------------------------------------------------------*/
1591
1592 analyse_one_frame(c2, &model, bpf_speech);
1593
1594 /* frame 2 --------------------------------------------------------*/
1595
1596 analyse_one_frame(c2, &model, &bpf_speech[c2->n_samp]);
1597
1598 /* frame 3 --------------------------------------------------------*/
1599
1600 analyse_one_frame(c2, &model, &bpf_speech[2*c2->n_samp]);
1601
1602 /* frame 4: - voicing, scalar Wo & E, scalar LSPs -----------------*/
1603
1604 analyse_one_frame(c2, &model, &bpf_speech[3*c2->n_samp]);
1605 pack(bits, &nbit, model.voiced, 1);
1606 Wo_index = encode_log_Wo(&c2->c2const, model.Wo, 5);
1607 pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray);
1608
1609 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD_LOW);
1610 e_index = encode_energy(e, 3);
1611 pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray);
1612
1613 for(i=0; i<LPC_ORD_LOW; i++) {
1614 f = (4000.0/PI)*lsps[i];
1615 mel[i] = floor(2595.0*log10(1.0 + f/700.0) + 0.5);
1616 }
1617 encode_mels_scalar(indexes, mel, LPC_ORD_LOW);
1618
1619 for(i=0; i<LPC_ORD_LOW; i++) {
1620 pack_natural_or_gray(bits, &nbit, indexes[i], mel_bits(i), c2->gray);
1621 }
1622
1623 pack_natural_or_gray(bits, &nbit, spare, 2, c2->gray);
1624
1625 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
1626}
1627
1628
1629/*---------------------------------------------------------------------------*\
1630
1631 FUNCTION....: codec2_decode_700
1632 AUTHOR......: David Rowe
1633 DATE CREATED: April 2015
1634
1635 Decodes frames of 28 bits into 320 samples (40ms) of speech.
1636
1637\*---------------------------------------------------------------------------*/
1638
1639void codec2_decode_700(struct CODEC2 *c2, short speech[], const unsigned char * bits)
1640{
1641 MODEL model[4];
1642 int indexes[LPC_ORD_LOW];
1643 float mel[LPC_ORD_LOW];
1644 float lsps[4][LPC_ORD_LOW];
1645 int Wo_index, e_index;
1646 float e[4];
1647 float snr, f_;
1648 float ak[4][LPC_ORD_LOW+1];
1649 int i,j;
1650 unsigned int nbit = 0;
1651 float weight;
1652 COMP Aw[FFT_ENC];
1653
1654 assert(c2 != NULL);
1655
1656 /* only need to zero these out due to (unused) snr calculation */
1657
1658 for(i=0; i<4; i++)
1659 for(j=1; j<=MAX_AMP; j++)
1660 model[i].A[j] = 0.0;
1661
1662 /* unpack bits from channel ------------------------------------*/
1663
1664 model[3].voiced = unpack(bits, &nbit, 1);
1665 model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced;
1666
1667 Wo_index = unpack_natural_or_gray(bits, &nbit, 5, c2->gray);
1668 model[3].Wo = decode_log_Wo(&c2->c2const, Wo_index, 5);
1669 model[3].L = PI/model[3].Wo;
1670
1671 e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray);
1672 e[3] = decode_energy(e_index, 3);
1673
1674 for(i=0; i<LPC_ORD_LOW; i++) {
1675 indexes[i] = unpack_natural_or_gray(bits, &nbit, mel_bits(i), c2->gray);
1676 }
1677
1678 decode_mels_scalar(mel, indexes, LPC_ORD_LOW);
1679 for(i=0; i<LPC_ORD_LOW; i++) {
1680 f_ = 700.0*( pow(10.0, (float)mel[i]/2595.0) - 1.0);
1681 lsps[3][i] = f_*(PI/4000.0);
1682 //printf("lsps[3][%d] %f\n", i, lsps[3][i]);
1683 }
1684
1685 check_lsp_order(&lsps[3][0], LPC_ORD_LOW);
1686 bw_expand_lsps(&lsps[3][0], LPC_ORD_LOW, 50.0, 100.0);
1687
1688 #ifdef MASK_NOT_FOR_NOW
1689 /* first pass at soft decn error masking, needs further work */
1690 /* If soft dec info available expand further for low power frames */
1691
1692 if (c2->softdec) {
1693 float e = 0.0;
1694 for(i=9; i<9+17; i++)
1695 e += c2->softdec[i]*c2->softdec[i];
1696 e /= 6.0;
1697 //fprintf(stderr, "e: %f\n", e);
1698 //if (e < 0.3)
1699 // bw_expand_lsps(&lsps[3][0], LPC_ORD_LOW, 150.0, 300.0);
1700 }
1701 #endif
1702
1703 /* interpolate ------------------------------------------------*/
1704
1705 /* LSPs, Wo, and energy are sampled every 40ms so we interpolate
1706 the 3 frames in between, then recover spectral amplitudes */
1707
1708 for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
1709 interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW);
1710 interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min);
1711 e[i] = interp_energy2(c2->prev_e_dec, e[3],weight);
1712 }
1713 for(i=0; i<4; i++) {
1714 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD_LOW);
1715 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0,
1716 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
1717 apply_lpc_correction(&model[i]);
1718 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0);
1719 }
1720
1721 #ifdef DUMP
1722 dump_lsp_(&lsps[3][0]);
1723 dump_ak_(&ak[3][0], LPC_ORD_LOW);
1724 dump_model(&model[3]);
1725 if (c2->softdec)
1726 dump_softdec(c2->softdec, nbit);
1727 #endif
1728
1729 /* update memories for next frame ----------------------------*/
1730
1731 c2->prev_model_dec = model[3];
1732 c2->prev_e_dec = e[3];
1733 for(i=0; i<LPC_ORD_LOW; i++)
1734 c2->prev_lsps_dec[i] = lsps[3][i];
1735}
1736
1737
1738/*---------------------------------------------------------------------------*\
1739
1740 FUNCTION....: codec2_encode_700b
1741 AUTHOR......: David Rowe
1742 DATE CREATED: August 2015
1743
1744 Version b of 700 bit/s codec. After some experiments over the air I
1745 wanted was unhappy with the rate 700 codec so spent a few weeks
1746 trying to improve the speech quality. This version uses a wider BPF
1747 and vector quantised mel-lsps.
1748
1749 Encodes 320 speech samples (40ms of speech) into 28 bits.
1750
1751 The codec2 algorithm actually operates internally on 10ms (80
1752 sample) frames, so we run the encoding algorithm four times:
1753
1754 frame 0: nothing
1755 frame 1: nothing
1756 frame 2: nothing
1757 frame 3: voicing bit, 5 bit scalar Wo and 3 bit E, 18 bit LSP MEL VQ,
1758 1 spare
1759
1760 The bit allocation is:
1761
1762 Parameter frames 1-3 frame 4 Total
1763 -----------------------------------------------------------
1764 Harmonic magnitudes (LSPs) 0 18 18
1765 Energy 0 3 3
1766 log Wo 0 5 5
1767 Voicing 0 1 1
1768 spare 0 1 1
1769 TOTAL 0 28 28
1770
1771\*---------------------------------------------------------------------------*/
1772
1773void codec2_encode_700b(struct CODEC2 *c2, unsigned char * bits, short speech[])
1774{
1775 MODEL model;
1776 float lsps[LPC_ORD_LOW];
1777 float mel[LPC_ORD_LOW];
1778 float mel_[LPC_ORD_LOW];
1779 float ak[LPC_ORD_LOW+1];
1780 float e, f;
1781 int indexes[3];
1782 int Wo_index, e_index, i;
1783 unsigned int nbit = 0;
1784 float bpf_out[4*c2->n_samp];
1785 short bpf_speech[4*c2->n_samp];
1786 int spare = 0;
1787
1788 assert(c2 != NULL);
1789
1790 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
1791
1792 /* band pass filter */
1793
1794 for(i=0; i<BPF_N; i++)
1795 c2->bpf_buf[i] = c2->bpf_buf[4*c2->n_samp+i];
1796 for(i=0; i<4*c2->n_samp; i++)
1797 c2->bpf_buf[BPF_N+i] = speech[i];
1798 inverse_filter(&c2->bpf_buf[BPF_N], bpfb, 4*c2->n_samp, bpf_out, BPF_N-1);
1799 for(i=0; i<4*c2->n_samp; i++)
1800 bpf_speech[i] = bpf_out[i];
1801
1802 /* frame 1 --------------------------------------------------------*/
1803
1804 analyse_one_frame(c2, &model, bpf_speech);
1805
1806 /* frame 2 --------------------------------------------------------*/
1807
1808 analyse_one_frame(c2, &model, &bpf_speech[c2->n_samp]);
1809
1810 /* frame 3 --------------------------------------------------------*/
1811
1812 analyse_one_frame(c2, &model, &bpf_speech[2*c2->n_samp]);
1813
1814 /* frame 4: - voicing, scalar Wo & E, VQ mel LSPs -----------------*/
1815
1816 analyse_one_frame(c2, &model, &bpf_speech[3*c2->n_samp]);
1817 pack(bits, &nbit, model.voiced, 1);
1818 Wo_index = encode_log_Wo(&c2->c2const, model.Wo, 5);
1819 pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray);
1820
1821 e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD_LOW);
1822 e_index = encode_energy(e, 3);
1823 pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray);
1824
1825 for(i=0; i<LPC_ORD_LOW; i++) {
1826 f = (4000.0/PI)*lsps[i];
1827 mel[i] = floor(2595.0*log10(1.0 + f/700.0) + 0.5);
1828 }
1829 lspmelvq_mbest_encode(indexes, mel, mel_, LPC_ORD_LOW, 5);
1830
1831 for(i=0; i<3; i++) {
1832 pack_natural_or_gray(bits, &nbit, indexes[i], lspmelvq_cb_bits(i), c2->gray);
1833 }
1834
1835 pack_natural_or_gray(bits, &nbit, spare, 1, c2->gray);
1836
1837 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
1838}
1839
1840
1841/*---------------------------------------------------------------------------*\
1842
1843 FUNCTION....: codec2_decode_700b
1844 AUTHOR......: David Rowe
1845 DATE CREATED: August 2015
1846
1847 Decodes frames of 28 bits into 320 samples (40ms) of speech.
1848
1849\*---------------------------------------------------------------------------*/
1850
1851void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * bits)
1852{
1853 MODEL model[4];
1854 int indexes[3];
1855 float mel[LPC_ORD_LOW];
1856 float lsps[4][LPC_ORD_LOW];
1857 int Wo_index, e_index;
1858 float e[4];
1859 float snr, f_;
1860 float ak[4][LPC_ORD_LOW+1];
1861 int i,j;
1862 unsigned int nbit = 0;
1863 float weight;
1864 COMP Aw[FFT_ENC];
1865
1866 assert(c2 != NULL);
1867
1868 /* only need to zero these out due to (unused) snr calculation */
1869
1870 for(i=0; i<4; i++)
1871 for(j=1; j<=MAX_AMP; j++)
1872 model[i].A[j] = 0.0;
1873
1874 /* unpack bits from channel ------------------------------------*/
1875
1876 model[3].voiced = unpack(bits, &nbit, 1);
1877 model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced;
1878
1879 Wo_index = unpack_natural_or_gray(bits, &nbit, 5, c2->gray);
1880 model[3].Wo = decode_log_Wo(&c2->c2const, Wo_index, 5);
1881 model[3].L = PI/model[3].Wo;
1882
1883 e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray);
1884 e[3] = decode_energy(e_index, 3);
1885
1886 for(i=0; i<3; i++) {
1887 indexes[i] = unpack_natural_or_gray(bits, &nbit, lspmelvq_cb_bits(i), c2->gray);
1888 }
1889
1890 lspmelvq_decode(indexes, mel, LPC_ORD_LOW);
1891
1892 #define MEL_ROUND 10
1893 for(i=1; i<LPC_ORD_LOW; i++) {
1894 if (mel[i] <= mel[i-1]+MEL_ROUND) {
1895 mel[i]+=MEL_ROUND/2;
1896 mel[i-1]-=MEL_ROUND/2;
1897 i = 1;
1898 }
1899 }
1900
1901 for(i=0; i<LPC_ORD_LOW; i++) {
1902 f_ = 700.0*( pow(10.0, (float)mel[i]/2595.0) - 1.0);
1903 lsps[3][i] = f_*(PI/4000.0);
1904 //printf("lsps[3][%d] %f\n", i, lsps[3][i]);
1905 }
1906
1907 /* interpolate ------------------------------------------------*/
1908
1909 /* LSPs, Wo, and energy are sampled every 40ms so we interpolate
1910 the 3 frames in between, then recover spectral amplitudes */
1911
1912 for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
1913 interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW);
1914 interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min);
1915 e[i] = interp_energy2(c2->prev_e_dec, e[3],weight);
1916 }
1917 for(i=0; i<4; i++) {
1918 lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD_LOW);
1919 aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0,
1920 c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
1921 apply_lpc_correction(&model[i]);
1922 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0);
1923 }
1924
1925 #ifdef DUMP
1926 dump_lsp_(&lsps[3][0]);
1927 dump_ak_(&ak[3][0], LPC_ORD_LOW);
1928 dump_model(&model[3]);
1929 if (c2->softdec)
1930 dump_softdec(c2->softdec, nbit);
1931 #endif
1932
1933 /* update memories for next frame ----------------------------*/
1934
1935 c2->prev_model_dec = model[3];
1936 c2->prev_e_dec = e[3];
1937 for(i=0; i<LPC_ORD_LOW; i++)
1938 c2->prev_lsps_dec[i] = lsps[3][i];
1939}
1940
1941
1942/*---------------------------------------------------------------------------*\ 1407/*---------------------------------------------------------------------------*\
1943 1408
1944 FUNCTION....: codec2_encode_700c 1409 FUNCTION....: codec2_encode_700c
@@ -1955,7 +1420,7 @@ void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char *
1955 frame 0: nothing 1420 frame 0: nothing
1956 frame 1: nothing 1421 frame 1: nothing
1957 frame 2: nothing 1422 frame 2: nothing
1958 frame 3: 18 bit 2 stage VQ (9 bits/stage), 4 bits energy, 1423 frame 3: 18 bit 2 stage VQ (9 bits/stage), 4 bits energy,
1959 6 bit scalar Wo/voicing. No spare bits. 1424 6 bit scalar Wo/voicing. No spare bits.
1960 1425
1961 Voicing is encoded using the 0 index of the Wo quantiser. 1426 Voicing is encoded using the 0 index of the Wo quantiser.
@@ -1971,52 +1436,54 @@ void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char *
1971 1436
1972\*---------------------------------------------------------------------------*/ 1437\*---------------------------------------------------------------------------*/
1973 1438
1974void codec2_encode_700c(struct CODEC2 *c2, unsigned char * bits, short speech[]) 1439void codec2_encode_700c(struct CODEC2 *c2, unsigned char *bits,
1975{ 1440 short speech[]) {
1976 MODEL model; 1441 MODEL model;
1977 int indexes[4], i, M=4; 1442 int indexes[4], i, M = 4;
1978 unsigned int nbit = 0; 1443 unsigned int nbit = 0;
1979 1444
1980 assert(c2 != NULL); 1445 assert(c2 != NULL);
1981 1446
1982 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); 1447 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
1983 1448
1984 for(i=0; i<M; i++) { 1449 for (i = 0; i < M; i++) {
1985 analyse_one_frame(c2, &model, &speech[i*c2->n_samp]); 1450 analyse_one_frame(c2, &model, &speech[i * c2->n_samp]);
1986 } 1451 }
1987 1452
1988 int K = 20; 1453 int K = 20;
1989 float rate_K_vec[K], mean; 1454 float rate_K_vec[K], mean;
1990 float rate_K_vec_no_mean[K], rate_K_vec_no_mean_[K]; 1455 float rate_K_vec_no_mean[K], rate_K_vec_no_mean_[K];
1991 1456
1992 newamp1_model_to_indexes(&c2->c2const, 1457 newamp1_model_to_indexes(&c2->c2const, indexes, &model, rate_K_vec,
1993 indexes, 1458 c2->rate_K_sample_freqs_kHz, K, &mean,
1994 &model, 1459 rate_K_vec_no_mean, rate_K_vec_no_mean_, &c2->se,
1995 rate_K_vec, 1460 c2->eq, c2->eq_en);
1996 c2->rate_K_sample_freqs_kHz, 1461 c2->nse += K;
1997 K,
1998 &mean,
1999 rate_K_vec_no_mean,
2000 rate_K_vec_no_mean_, &c2->se, c2->eq, c2->eq_en);
2001 c2->nse += K;
2002 1462
2003#ifndef CORTEX_M4 1463#ifndef CORTEX_M4
2004 /* dump features for deep learning experiments */ 1464 /* dump features for deep learning experiments */
2005 if (c2->fmlfeat != NULL) { 1465 if (c2->fmlfeat != NULL) {
2006 fwrite(&mean, 1, sizeof(float), c2->fmlfeat); 1466 fwrite(&mean, 1, sizeof(float), c2->fmlfeat);
2007 fwrite(rate_K_vec_no_mean, K, sizeof(float), c2->fmlfeat); 1467 fwrite(rate_K_vec_no_mean, K, sizeof(float), c2->fmlfeat);
2008 fwrite(rate_K_vec_no_mean_, K, sizeof(float), c2->fmlfeat); 1468 fwrite(rate_K_vec_no_mean_, K, sizeof(float), c2->fmlfeat);
2009 } 1469 MODEL model_;
1470 memcpy(&model_, &model, sizeof(model));
1471 float rate_K_vec_[K];
1472 for (int k = 0; k < K; k++) rate_K_vec_[k] = rate_K_vec_no_mean_[k] + mean;
1473 resample_rate_L(&c2->c2const, &model_, rate_K_vec_,
1474 c2->rate_K_sample_freqs_kHz, K);
1475 fwrite(&model_.A, MAX_AMP, sizeof(float), c2->fmlfeat);
1476 }
1477 if (c2->fmlmodel != NULL) fwrite(&model, sizeof(MODEL), 1, c2->fmlmodel);
2010#endif 1478#endif
2011
2012 pack_natural_or_gray(bits, &nbit, indexes[0], 9, 0);
2013 pack_natural_or_gray(bits, &nbit, indexes[1], 9, 0);
2014 pack_natural_or_gray(bits, &nbit, indexes[2], 4, 0);
2015 pack_natural_or_gray(bits, &nbit, indexes[3], 6, 0);
2016 1479
2017 assert(nbit == (unsigned)codec2_bits_per_frame(c2)); 1480 pack_natural_or_gray(bits, &nbit, indexes[0], 9, 0);
2018} 1481 pack_natural_or_gray(bits, &nbit, indexes[1], 9, 0);
1482 pack_natural_or_gray(bits, &nbit, indexes[2], 4, 0);
1483 pack_natural_or_gray(bits, &nbit, indexes[3], 6, 0);
2019 1484
1485 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
1486}
2020 1487
2021/*---------------------------------------------------------------------------*\ 1488/*---------------------------------------------------------------------------*\
2022 1489
@@ -2028,46 +1495,53 @@ void codec2_encode_700c(struct CODEC2 *c2, unsigned char * bits, short speech[])
2028 1495
2029\*---------------------------------------------------------------------------*/ 1496\*---------------------------------------------------------------------------*/
2030 1497
2031void codec2_decode_700c(struct CODEC2 *c2, short speech[], const unsigned char * bits) 1498void codec2_decode_700c(struct CODEC2 *c2, short speech[],
2032{ 1499 const unsigned char *bits) {
2033 MODEL model[4]; 1500 MODEL model[4];
2034 int indexes[4]; 1501 int indexes[4];
2035 int i; 1502 int i;
2036 unsigned int nbit = 0; 1503 unsigned int nbit = 0;
2037 1504
2038 assert(c2 != NULL); 1505 assert(c2 != NULL);
2039 1506
2040 /* unpack bits from channel ------------------------------------*/ 1507 /* unpack bits from channel ------------------------------------*/
2041 1508
2042 indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); 1509 indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2043 indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); 1510 indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2044 indexes[2] = unpack_natural_or_gray(bits, &nbit, 4, 0); 1511 indexes[2] = unpack_natural_or_gray(bits, &nbit, 4, 0);
2045 indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); 1512 indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0);
2046 1513
2047 int M = 4; 1514 int M = 4;
2048 COMP HH[M][MAX_AMP+1]; 1515 COMP HH[M][MAX_AMP + 1];
2049 float interpolated_surface_[M][NEWAMP1_K]; 1516 float interpolated_surface_[M][NEWAMP1_K];
2050 1517
2051 newamp1_indexes_to_model(&c2->c2const, 1518 newamp1_indexes_to_model(
2052 model, 1519 &c2->c2const, model, (COMP *)HH, (float *)interpolated_surface_,
2053 (COMP*)HH, 1520 c2->prev_rate_K_vec_, &c2->Wo_left, &c2->voicing_left,
2054 (float*)interpolated_surface_, 1521 c2->rate_K_sample_freqs_kHz, NEWAMP1_K, c2->phase_fft_fwd_cfg,
2055 c2->prev_rate_K_vec_, 1522 c2->phase_fft_inv_cfg, indexes, c2->user_rate_K_vec_no_mean_,
2056 &c2->Wo_left, 1523 c2->post_filter_en);
2057 &c2->voicing_left, 1524
2058 c2->rate_K_sample_freqs_kHz, 1525 for (i = 0; i < M; i++) {
2059 NEWAMP1_K, 1526 if (c2->fmlfeat != NULL) {
2060 c2->phase_fft_fwd_cfg, 1527 /* We use standard nb_features=55 feature records for compatibility with
2061 c2->phase_fft_inv_cfg, 1528 * train_lpcnet.py */
2062 indexes, 1529 float features[55] = {0};
2063 c2->user_rate_K_vec_no_mean_, 1530 /* just using 18/20 for compatibility with LPCNet, coarse scaling for NN
2064 c2->post_filter_en); 1531 * input */
2065 1532 for (int j = 0; j < 18; j++)
2066 1533 features[j] = (interpolated_surface_[i][j] - 30) / 40;
2067 for(i=0; i<M; i++) { 1534 int pitch_index = 21 + 2.0 * M_PI / model[i].Wo;
2068 /* 700C is a little quiter so lets apply some experimentally derived audio gain */ 1535 features[36] = 0.02 * (pitch_index - 100);
2069 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], &HH[i][0], 1.5); 1536 features[37] = model[i].voiced;
2070 } 1537 fwrite(features, 55, sizeof(float), c2->fmlfeat);
1538 }
1539
1540 /* 700C is a little quieter so lets apply some experimentally derived audio
1541 * gain */
1542 synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], &HH[i][0],
1543 1.5);
1544 }
2071} 1545}
2072 1546
2073/*---------------------------------------------------------------------------*\ 1547/*---------------------------------------------------------------------------*\
@@ -2080,48 +1554,24 @@ void codec2_decode_700c(struct CODEC2 *c2, short speech[], const unsigned char *
2080 1554
2081\*---------------------------------------------------------------------------*/ 1555\*---------------------------------------------------------------------------*/
2082 1556
2083float codec2_energy_700c(struct CODEC2 *c2, const unsigned char * bits) 1557float codec2_energy_700c(struct CODEC2 *c2, const unsigned char *bits) {
2084{ 1558 int indexes[4];
2085 int indexes[4]; 1559 unsigned int nbit = 0;
2086 unsigned int nbit = 0;
2087 1560
2088 assert(c2 != NULL); 1561 assert(c2 != NULL);
2089 1562
2090 /* unpack bits from channel ------------------------------------*/ 1563 /* unpack bits from channel ------------------------------------*/
2091 1564
2092 indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); 1565 indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2093 indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); 1566 indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2094 indexes[2] = unpack_natural_or_gray(bits, &nbit, 4, 0); 1567 indexes[2] = unpack_natural_or_gray(bits, &nbit, 4, 0);
2095 indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); 1568 indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0);
2096 1569
2097 float mean = newamp1_energy_cb[0].cb[indexes[2]]; 1570 float mean = newamp1_energy_cb[0].cb[indexes[2]];
2098 mean -= 10; 1571 mean -= 10;
2099 if (indexes[3] == 0) 1572 if (indexes[3] == 0) mean -= 10;
2100 mean -= 10;
2101 1573
2102 return POW10F(mean/10.0); 1574 return POW10F(mean / 10.0);
2103}
2104
2105float codec2_energy_450(struct CODEC2 *c2, const unsigned char * bits)
2106{
2107 int indexes[4];
2108 unsigned int nbit = 0;
2109
2110 assert(c2 != NULL);
2111
2112 /* unpack bits from channel ------------------------------------*/
2113
2114 indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2115 //indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2116 indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0);
2117 indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0);
2118
2119 float mean = newamp2_energy_cb[0].cb[indexes[2]];
2120 mean -= 10;
2121 if (indexes[3] == 0)
2122 mean -= 10;
2123
2124 return POW10F(mean/10.0);
2125} 1575}
2126 1576
2127/*---------------------------------------------------------------------------*\ 1577/*---------------------------------------------------------------------------*\
@@ -2134,300 +1584,58 @@ float codec2_energy_450(struct CODEC2 *c2, const unsigned char * bits)
2134 1584
2135\*---------------------------------------------------------------------------*/ 1585\*---------------------------------------------------------------------------*/
2136 1586
2137float codec2_get_energy(struct CODEC2 *c2, const unsigned char *bits) 1587float codec2_get_energy(struct CODEC2 *c2, const unsigned char *bits) {
2138{ 1588 assert(c2 != NULL);
2139 assert(c2 != NULL); 1589 assert((CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) ||
2140 assert( 1590 (CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) ||
2141 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) || 1591 (CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) ||
2142 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) || 1592 (CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) ||
2143 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) || 1593 (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) ||
2144 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) || 1594 (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) ||
2145 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) || 1595 (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)));
2146 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) || 1596 MODEL model;
2147 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) || 1597 float xq_dec[2] = {};
2148 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) || 1598 int e_index, WoE_index;
2149 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) || 1599 float e = 0.0f;
2150 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) || 1600 unsigned int nbit;
2151 ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) 1601
2152 ); 1602 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) {
2153 MODEL model; 1603 nbit = 1 + 1 + WO_BITS;
2154 float xq_dec[2] = {}; 1604 e_index = unpack(bits, &nbit, E_BITS);
2155 int e_index, WoE_index; 1605 e = decode_energy(e_index, E_BITS);
2156 float e; 1606 }
2157 unsigned int nbit; 1607 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) {
2158 1608 nbit = 1 + 1;
2159 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) { 1609 WoE_index = unpack(bits, &nbit, WO_E_BITS);
2160 nbit = 1 + 1 + WO_BITS; 1610 decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index);
2161 e_index = unpack(bits, &nbit, E_BITS); 1611 }
2162 e = decode_energy(e_index, E_BITS); 1612 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) {
2163 } 1613 nbit = 1 + 1 + WO_BITS;
2164 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) { 1614 e_index = unpack(bits, &nbit, E_BITS);
2165 nbit = 1 + 1; 1615 e = decode_energy(e_index, E_BITS);
2166 WoE_index = unpack(bits, &nbit, WO_E_BITS); 1616 }
2167 decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); 1617 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) {
2168 } 1618 nbit = 1 + 1;
2169 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) { 1619 WoE_index = unpack(bits, &nbit, WO_E_BITS);
2170 nbit = 1 + 1 + WO_BITS; 1620 decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index);
2171 e_index = unpack(bits, &nbit, E_BITS); 1621 }
2172 e = decode_energy(e_index, E_BITS); 1622 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) {
2173 } 1623 nbit = 1 + 1 + 1 + 1 + WO_BITS;
2174 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) { 1624 e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray);
2175 nbit = 1 + 1; 1625 e = decode_energy(e_index, E_BITS);
2176 WoE_index = unpack(bits, &nbit, WO_E_BITS); 1626 }
2177 decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); 1627 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) {
2178 } 1628 nbit = 1 + 1;
2179 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) { 1629 WoE_index = unpack(bits, &nbit, WO_E_BITS);
2180 nbit = 1 + 1 + 1 + 1 + WO_BITS; 1630 decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index);
2181 e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray); 1631 }
2182 e = decode_energy(e_index, E_BITS); 1632 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) {
2183 } 1633 e = codec2_energy_700c(c2, bits);
2184 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) { 1634 }
2185 nbit = 1 + 1;
2186 WoE_index = unpack(bits, &nbit, WO_E_BITS);
2187 decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index);
2188 }
2189 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) {
2190 nbit = 1 + 5;
2191 e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray);
2192 e = decode_energy(e_index, 3);
2193 }
2194 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) {
2195 nbit = 1 + 5;
2196 e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray);
2197 e = decode_energy(e_index, 3);
2198 }
2199 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) {
2200 e = codec2_energy_700c(c2, bits);
2201 }
2202 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) {
2203 e = codec2_energy_450(c2, bits);
2204 }
2205
2206 return e;
2207}
2208
2209
2210/*---------------------------------------------------------------------------*\
2211
2212 FUNCTION....: codec2_encode_450
2213 AUTHOR......: Thomas Kurin and Stefan Erhardt
2214 INSTITUTE...: Institute for Electronics Engineering, University of Erlangen-Nuremberg
2215 DATE CREATED: July 2018
2216
2217 450 bit/s codec that uses newamp2 fixed rate VQ of amplitudes.
2218
2219 Encodes 320 speech samples (40ms of speech) into 28 bits.
2220
2221 The codec2 algorithm actually operates internally on 10ms (80
2222 sample) frames, so we run the encoding algorithm four times:
2223
2224 frame 0: nothing
2225 frame 1: nothing
2226 frame 2: nothing
2227 frame 3: 9 bit 1 stage VQ, 3 bits energy,
2228 6 bit scalar Wo/voicing/plosive. No spare bits.
2229
2230 If a plosive is detected the frame at the energy-step is encoded.
2231
2232 Voicing is encoded using the 000000 index of the Wo quantiser.
2233 Plosive is encoded using the 111111 index of the Wo quantiser.
2234
2235 The bit allocation is:
2236
2237 Parameter frames 1-3 frame 4 Total
2238 -----------------------------------------------------------
2239 Harmonic magnitudes (rate k VQ) 0 9 9
2240 Energy 0 3 3
2241 log Wo/voicing/plosive 0 6 6
2242 TOTAL 0 18 18
2243
2244
2245\*---------------------------------------------------------------------------*/
2246
2247void codec2_encode_450(struct CODEC2 *c2, unsigned char * bits, short speech[])
2248{
2249 MODEL model;
2250 int indexes[4], i,h, M=4;
2251 unsigned int nbit = 0;
2252 int plosiv = 0;
2253 float energydelta[M];
2254 int spectralCounter;
2255
2256 assert(c2 != NULL);
2257
2258 memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
2259 for(i=0; i<M; i++){
2260 analyse_one_frame(c2, &model, &speech[i*c2->n_samp]);
2261 energydelta[i] = 0;
2262 spectralCounter = 0;
2263 for(h = 0;h<(model.L);h++){
2264 //only detect above 300 Hz
2265 if(h*model.Wo*(c2->c2const.Fs/2000.0)/M_PI > 0.3){
2266 energydelta[i] = energydelta[i] + 20.0*log10(model.A[10]+1E-16);
2267 spectralCounter = spectralCounter+1;
2268 }
2269
2270 }
2271 energydelta[i] = energydelta[i] / spectralCounter ;
2272 }
2273 //Constants for plosive Detection tdB = threshold; minPwr = from below this level plosives have to rise
2274 float tdB = 15; //not fixed can be changed
2275 float minPwr = 15; //not fixed can be changed
2276 if((c2->energy_prev)<minPwr && energydelta[0]>((c2->energy_prev)+tdB)){
2277
2278 plosiv = 1;
2279 }
2280 if(energydelta[0]<minPwr && energydelta[1]>(energydelta[0]+tdB)){
2281
2282 plosiv = 2;
2283 }
2284 if(energydelta[1]<minPwr &&energydelta[2]>(energydelta[1]+tdB)){
2285
2286 plosiv = 3;
2287 }
2288 if(energydelta[2]<minPwr &&energydelta[3]>(energydelta[2]+tdB)){
2289
2290 plosiv = 4;
2291 }
2292 if(plosiv != 0 && plosiv != 4){
2293 analyse_one_frame(c2, &model, &speech[(plosiv-1)*c2->n_samp]);
2294 }
2295
2296 c2->energy_prev = energydelta[3];
2297
2298
2299 int K = 29;
2300 float rate_K_vec[K], mean;
2301 float rate_K_vec_no_mean[K], rate_K_vec_no_mean_[K];
2302 if(plosiv > 0){
2303 plosiv = 1;
2304 }
2305 newamp2_model_to_indexes(&c2->c2const,
2306 indexes,
2307 &model,
2308 rate_K_vec,
2309 c2->n2_rate_K_sample_freqs_kHz,
2310 K,
2311 &mean,
2312 rate_K_vec_no_mean,
2313 rate_K_vec_no_mean_,
2314 plosiv);
2315
2316
2317 pack_natural_or_gray(bits, &nbit, indexes[0], 9, 0);
2318 //pack_natural_or_gray(bits, &nbit, indexes[1], 9, 0);
2319 pack_natural_or_gray(bits, &nbit, indexes[2], 3, 0);
2320 pack_natural_or_gray(bits, &nbit, indexes[3], 6, 0);
2321
2322 assert(nbit == (unsigned)codec2_bits_per_frame(c2));
2323}
2324
2325
2326/*---------------------------------------------------------------------------*\
2327
2328 FUNCTION....: codec2_decode_450
2329 AUTHOR......: Thomas Kurin and Stefan Erhardt
2330 INSTITUTE...: Institute for Electronics Engineering, University of Erlangen-Nuremberg
2331 DATE CREATED: July 2018
2332
2333\*---------------------------------------------------------------------------*/
2334
2335void codec2_decode_450(struct CODEC2 *c2, short speech[], const unsigned char * bits)
2336{
2337 MODEL model[4];
2338 int indexes[4];
2339 int i;
2340 unsigned int nbit = 0;
2341
2342 assert(c2 != NULL);
2343
2344 /* unpack bits from channel ------------------------------------*/
2345
2346 indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2347 //indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2348 indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0);
2349 indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0);
2350
2351 int M = 4;
2352 COMP HH[M][MAX_AMP+1];
2353 float interpolated_surface_[M][NEWAMP2_K];
2354 int pwbFlag = 0;
2355
2356 newamp2_indexes_to_model(&c2->c2const,
2357 model,
2358 (COMP*)HH,
2359 (float*)interpolated_surface_,
2360 c2->n2_prev_rate_K_vec_,
2361 &c2->Wo_left,
2362 &c2->voicing_left,
2363 c2->n2_rate_K_sample_freqs_kHz,
2364 NEWAMP2_K,
2365 c2->phase_fft_fwd_cfg,
2366 c2->phase_fft_inv_cfg,
2367 indexes,
2368 1.5,
2369 pwbFlag);
2370
2371
2372 for(i=0; i<M; i++) {
2373 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], &HH[i][0], 1.5);
2374 }
2375}
2376
2377/*---------------------------------------------------------------------------*\
2378
2379 FUNCTION....: codec2_decode_450pwb
2380 AUTHOR......: Thomas Kurin and Stefan Erhardt
2381 INSTITUTE...: Institute for Electronics Engineering, University of Erlangen-Nuremberg
2382 DATE CREATED: July 2018
2383
2384 Decodes the 450 codec data in pseudo wideband at 16kHz samplerate.
2385
2386\*---------------------------------------------------------------------------*/
2387 1635
2388void codec2_decode_450pwb(struct CODEC2 *c2, short speech[], const unsigned char * bits) 1636 return e;
2389{
2390 MODEL model[4];
2391 int indexes[4];
2392 int i;
2393 unsigned int nbit = 0;
2394
2395 assert(c2 != NULL);
2396
2397 /* unpack bits from channel ------------------------------------*/
2398
2399 indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2400 //indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0);
2401 indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0);
2402 indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0);
2403
2404 int M = 4;
2405 COMP HH[M][MAX_AMP+1];
2406 float interpolated_surface_[M][NEWAMP2_16K_K];
2407 int pwbFlag = 1;
2408
2409 newamp2_indexes_to_model(&c2->c2const,
2410 model,
2411 (COMP*)HH,
2412 (float*)interpolated_surface_,
2413 c2->n2_pwb_prev_rate_K_vec_,
2414 &c2->Wo_left,
2415 &c2->voicing_left,
2416 c2->n2_pwb_rate_K_sample_freqs_kHz,
2417 NEWAMP2_16K_K,
2418 c2->phase_fft_fwd_cfg,
2419 c2->phase_fft_inv_cfg,
2420 indexes,
2421 1.5,
2422 pwbFlag);
2423
2424
2425 for(i=0; i<M; i++) {
2426 synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], &HH[i][0], 1.5);
2427 }
2428} 1637}
2429 1638
2430
2431/*---------------------------------------------------------------------------* \ 1639/*---------------------------------------------------------------------------* \
2432 1640
2433 FUNCTION....: synthesise_one_frame() 1641 FUNCTION....: synthesise_one_frame()
@@ -2438,56 +1646,41 @@ void codec2_decode_450pwb(struct CODEC2 *c2, short speech[], const unsigned char
2438 1646
2439\*---------------------------------------------------------------------------*/ 1647\*---------------------------------------------------------------------------*/
2440 1648
2441void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, COMP Aw[], float gain) 1649void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model,
2442{ 1650 COMP Aw[], float gain) {
2443 int i; 1651 int i;
2444 //PROFILE_VAR(phase_start, pf_start, synth_start); 1652
2445 1653 if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) {
2446 //#ifdef DUMP 1654 /* newamp1, we've already worked out rate L phase */
2447 //dump_quantised_model(model); 1655 COMP *H = Aw;
2448 //#endif 1656 phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H);
2449 1657 } else {
2450 //PROFILE_SAMPLE(phase_start); 1658 /* LPC based phase synthesis */
2451 1659 COMP H[MAX_AMP + 1];
2452 if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode) ) { 1660 sample_phase(model, H, Aw);
2453 /* newamp1/2, we've already worked out rate L phase */ 1661 phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H);
2454 COMP *H = Aw; 1662 }
2455 phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H); 1663
2456 } else { 1664 postfilter(model, &c2->bg_est);
2457 /* LPC based phase synthesis */ 1665 synthesise(c2->n_samp, c2->fftr_inv_cfg, c2->Sn_, model, c2->Pn, 1);
2458 COMP H[MAX_AMP+1]; 1666
2459 sample_phase(model, H, Aw); 1667 for (i = 0; i < c2->n_samp; i++) {
2460 phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H); 1668 c2->Sn_[i] *= gain;
2461 } 1669 }
2462 1670
2463 //PROFILE_SAMPLE_AND_LOG(pf_start, phase_start, " phase_synth"); 1671 ear_protection(c2->Sn_, c2->n_samp);
2464 1672
2465 postfilter(model, &c2->bg_est); 1673 for (i = 0; i < c2->n_samp; i++) {
2466 1674 if (c2->Sn_[i] > 32767.0)
2467 //PROFILE_SAMPLE_AND_LOG(synth_start, pf_start, " postfilter"); 1675 speech[i] = 32767;
2468 1676 else if (c2->Sn_[i] < -32767.0)
2469 synthesise(c2->n_samp, c2->fftr_inv_cfg, c2->Sn_, model, c2->Pn, 1); 1677 speech[i] = -32767;
2470 1678 else
2471 for(i=0; i<c2->n_samp; i++) { 1679 speech[i] = c2->Sn_[i];
2472 c2->Sn_[i] *= gain; 1680 }
2473 }
2474
2475 //PROFILE_SAMPLE_AND_LOG2(synth_start, " synth");
2476
2477 ear_protection(c2->Sn_, c2->n_samp);
2478
2479 for(i=0; i<c2->n_samp; i++) {
2480 if (c2->Sn_[i] > 32767.0)
2481 speech[i] = 32767;
2482 else if (c2->Sn_[i] < -32767.0)
2483 speech[i] = -32767;
2484 else
2485 speech[i] = c2->Sn_[i];
2486 }
2487
2488} 1681}
2489 1682
2490/*---------------------------------------------------------------------------*\ 1683/*---------------------------------------------------------------------------* \
2491 1684
2492 FUNCTION....: analyse_one_frame() 1685 FUNCTION....: analyse_one_frame()
2493 AUTHOR......: David Rowe 1686 AUTHOR......: David Rowe
@@ -2498,48 +1691,40 @@ void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, COMP
2498 1691
2499\*---------------------------------------------------------------------------*/ 1692\*---------------------------------------------------------------------------*/
2500 1693
2501void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]) 1694void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]) {
2502{ 1695 COMP Sw[FFT_ENC];
2503 COMP Sw[FFT_ENC]; 1696 float pitch;
2504 float pitch; 1697 int i;
2505 int i; 1698 int n_samp = c2->n_samp;
2506 //PROFILE_VAR(dft_start, nlp_start, model_start, two_stage, estamps); 1699 int m_pitch = c2->m_pitch;
2507 int n_samp = c2->n_samp;
2508 int m_pitch = c2->m_pitch;
2509 1700
2510 /* Read input speech */ 1701 /* Read input speech */
2511 1702
2512 for(i=0; i<m_pitch-n_samp; i++) 1703 for (i = 0; i < m_pitch - n_samp; i++) c2->Sn[i] = c2->Sn[i + n_samp];
2513 c2->Sn[i] = c2->Sn[i+n_samp]; 1704 for (i = 0; i < n_samp; i++) c2->Sn[i + m_pitch - n_samp] = speech[i];
2514 for(i=0; i<n_samp; i++)
2515 c2->Sn[i+m_pitch-n_samp] = speech[i];
2516 1705
2517 //PROFILE_SAMPLE(dft_start); 1706 dft_speech(&c2->c2const, c2->fft_fwd_cfg, Sw, c2->Sn, c2->w);
2518 dft_speech(&c2->c2const, c2->fft_fwd_cfg, Sw, c2->Sn, c2->w);
2519 //PROFILE_SAMPLE_AND_LOG(nlp_start, dft_start, " dft_speech");
2520 1707
2521 /* Estimate pitch */ 1708 /* Estimate pitch */
1709 nlp(c2->nlp, c2->Sn, n_samp, &pitch, Sw, c2->W, &c2->prev_f0_enc);
1710 model->Wo = TWO_PI / pitch;
1711 model->L = PI / model->Wo;
2522 1712
2523 nlp(c2->nlp, c2->Sn, n_samp, &pitch, Sw, c2->W, &c2->prev_f0_enc); 1713 /* estimate model parameters */
2524 //PROFILE_SAMPLE_AND_LOG(model_start, nlp_start, " nlp"); 1714 two_stage_pitch_refinement(&c2->c2const, model, Sw);
2525 1715
2526 model->Wo = TWO_PI/pitch; 1716 /* estimate phases when doing ML experiments */
2527 model->L = PI/model->Wo; 1717 if (c2->fmlfeat != NULL)
2528 1718 estimate_amplitudes(model, Sw, c2->W, 1);
2529 /* estimate model parameters */ 1719 else
2530
2531 two_stage_pitch_refinement(&c2->c2const, model, Sw);
2532 //PROFILE_SAMPLE_AND_LOG(two_stage, model_start, " two_stage");
2533 estimate_amplitudes(model, Sw, c2->W, 0); 1720 estimate_amplitudes(model, Sw, c2->W, 0);
2534 //PROFILE_SAMPLE_AND_LOG(estamps, two_stage, " est_amps"); 1721 est_voicing_mbe(&c2->c2const, model, Sw, c2->W);
2535 est_voicing_mbe(&c2->c2const, model, Sw, c2->W); 1722#ifdef DUMP
2536 //PROFILE_SAMPLE_AND_LOG2(estamps, " est_voicing"); 1723 dump_model(model);
2537 #ifdef DUMP 1724#endif
2538 dump_model(model);
2539 #endif
2540} 1725}
2541 1726
2542/*---------------------------------------------------------------------------*\ 1727/*---------------------------------------------------------------------------* \
2543 1728
2544 FUNCTION....: ear_protection() 1729 FUNCTION....: ear_protection()
2545 AUTHOR......: David Rowe 1730 AUTHOR......: David Rowe
@@ -2552,40 +1737,37 @@ void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[])
2552\*---------------------------------------------------------------------------*/ 1737\*---------------------------------------------------------------------------*/
2553 1738
2554static void ear_protection(float in_out[], int n) { 1739static void ear_protection(float in_out[], int n) {
2555 float max_sample, over, gain; 1740 float max_sample, over, gain;
2556 int i; 1741 int i;
2557 1742
2558 /* find maximum sample in frame */ 1743 /* find maximum sample in frame */
2559 1744
2560 max_sample = 0.0; 1745 max_sample = 0.0;
2561 for(i=0; i<n; i++) 1746 for (i = 0; i < n; i++)
2562 if (in_out[i] > max_sample) 1747 if (in_out[i] > max_sample) max_sample = in_out[i];
2563 max_sample = in_out[i];
2564 1748
2565 /* determine how far above set point */ 1749 /* determine how far above set point */
2566 1750
2567 over = max_sample/30000.0; 1751 over = max_sample / 30000.0;
2568 1752
2569 /* If we are x dB over set point we reduce level by 2x dB, this 1753 /* If we are x dB over set point we reduce level by 2x dB, this
2570 attenuates major excursions in amplitude (likely to be caused 1754 attenuates major excursions in amplitude (likely to be caused
2571 by bit errors) more than smaller ones */ 1755 by bit errors) more than smaller ones */
2572 1756
2573 if (over > 1.0) { 1757 if (over > 1.0) {
2574 gain = 1.0/(over*over); 1758 gain = 1.0 / (over * over);
2575 //fprintf(stderr, "gain: %f\n", gain); 1759 for (i = 0; i < n; i++) in_out[i] *= gain;
2576 for(i=0; i<n; i++) 1760 }
2577 in_out[i] *= gain;
2578 }
2579} 1761}
2580 1762
2581void codec2_set_lpc_post_filter(struct CODEC2 *c2, int enable, int bass_boost, float beta, float gamma) 1763void codec2_set_lpc_post_filter(struct CODEC2 *c2, int enable, int bass_boost,
2582{ 1764 float beta, float gamma) {
2583 assert((beta >= 0.0) && (beta <= 1.0)); 1765 assert((beta >= 0.0) && (beta <= 1.0));
2584 assert((gamma >= 0.0) && (gamma <= 1.0)); 1766 assert((gamma >= 0.0) && (gamma <= 1.0));
2585 c2->lpc_pf = enable; 1767 c2->lpc_pf = enable;
2586 c2->bass_boost = bass_boost; 1768 c2->bass_boost = bass_boost;
2587 c2->beta = beta; 1769 c2->beta = beta;
2588 c2->gamma = gamma; 1770 c2->gamma = gamma;
2589} 1771}
2590 1772
2591/* 1773/*
@@ -2594,29 +1776,22 @@ void codec2_set_lpc_post_filter(struct CODEC2 *c2, int enable, int bass_boost, f
2594 Experimental method of sending voice/data frames for FreeDV. 1776 Experimental method of sending voice/data frames for FreeDV.
2595*/ 1777*/
2596 1778
2597int codec2_get_spare_bit_index(struct CODEC2 *c2) 1779int codec2_get_spare_bit_index(struct CODEC2 *c2) {
2598{ 1780 assert(c2 != NULL);
2599 assert(c2 != NULL);
2600 1781
2601 switch(c2->mode) { 1782 switch (c2->mode) {
2602 case CODEC2_MODE_1300: 1783 case CODEC2_MODE_1300:
2603 return 2; // bit 2 (3th bit) is v2 (third voicing bit) 1784 return 2; // bit 2 (3th bit) is v2 (third voicing bit)
2604 break; 1785 break;
2605 case CODEC2_MODE_1400: 1786 case CODEC2_MODE_1400:
2606 return 10; // bit 10 (11th bit) is v2 (third voicing bit) 1787 return 10; // bit 10 (11th bit) is v2 (third voicing bit)
2607 break; 1788 break;
2608 case CODEC2_MODE_1600: 1789 case CODEC2_MODE_1600:
2609 return 15; // bit 15 (16th bit) is v2 (third voicing bit) 1790 return 15; // bit 15 (16th bit) is v2 (third voicing bit)
2610 break; 1791 break;
2611 case CODEC2_MODE_700: 1792 }
2612 return 26; // bits 26 and 27 are spare
2613 break;
2614 case CODEC2_MODE_700B:
2615 return 27; // bit 27 is spare
2616 break;
2617 }
2618 1793
2619 return -1; 1794 return -1;
2620} 1795}
2621 1796
2622/* 1797/*
@@ -2624,111 +1799,123 @@ int codec2_get_spare_bit_index(struct CODEC2 *c2)
2624 for convenience. 1799 for convenience.
2625*/ 1800*/
2626 1801
2627int codec2_rebuild_spare_bit(struct CODEC2 *c2, int unpacked_bits[]) 1802int codec2_rebuild_spare_bit(struct CODEC2 *c2, char unpacked_bits[]) {
2628{ 1803 int v1, v3;
2629 int v1,v3;
2630 1804
2631 assert(c2 != NULL); 1805 assert(c2 != NULL);
2632 1806
2633 v1 = unpacked_bits[1]; 1807 v1 = unpacked_bits[1];
2634 1808
2635 switch(c2->mode) { 1809 switch (c2->mode) {
2636 case CODEC2_MODE_1300: 1810 case CODEC2_MODE_1300:
2637 1811
2638 v3 = unpacked_bits[1+1+1]; 1812 v3 = unpacked_bits[1 + 1 + 1];
2639 1813
2640 /* if either adjacent frame is voiced, make this one voiced */ 1814 /* if either adjacent frame is voiced, make this one voiced */
2641 1815
2642 unpacked_bits[2] = (v1 || v3); 1816 unpacked_bits[2] = (v1 || v3);
2643 1817
2644 return 0; 1818 return 0;
2645 1819
2646 break; 1820 break;
2647 1821
2648 case CODEC2_MODE_1400: 1822 case CODEC2_MODE_1400:
2649 1823
2650 v3 = unpacked_bits[1+1+8+1]; 1824 v3 = unpacked_bits[1 + 1 + 8 + 1];
2651 1825
2652 /* if either adjacent frame is voiced, make this one voiced */ 1826 /* if either adjacent frame is voiced, make this one voiced */
2653 1827
2654 unpacked_bits[10] = (v1 || v3); 1828 unpacked_bits[10] = (v1 || v3);
2655 1829
2656 return 0; 1830 return 0;
2657 1831
2658 break; 1832 break;
2659 1833
2660 case CODEC2_MODE_1600: 1834 case CODEC2_MODE_1600:
2661 v3 = unpacked_bits[1+1+8+5+1]; 1835 v3 = unpacked_bits[1 + 1 + 8 + 5 + 1];
2662 1836
2663 /* if either adjacent frame is voiced, make this one voiced */ 1837 /* if either adjacent frame is voiced, make this one voiced */
2664 1838
2665 unpacked_bits[15] = (v1 || v3); 1839 unpacked_bits[15] = (v1 || v3);
2666 1840
2667 return 0; 1841 return 0;
2668 1842
2669 break; 1843 break;
2670 } 1844 }
2671 1845
2672 return -1; 1846 return -1;
2673} 1847}
2674 1848
2675void codec2_set_natural_or_gray(struct CODEC2 *c2, int gray) 1849void codec2_set_natural_or_gray(struct CODEC2 *c2, int gray) {
2676{ 1850 assert(c2 != NULL);
2677 assert(c2 != NULL); 1851 c2->gray = gray;
2678 c2->gray = gray;
2679} 1852}
2680 1853
2681void codec2_set_softdec(struct CODEC2 *c2, float *softdec) 1854void codec2_set_softdec(struct CODEC2 *c2, float *softdec) {
2682{ 1855 assert(c2 != NULL);
2683 assert(c2 != NULL); 1856 c2->softdec = softdec;
2684 c2->softdec = softdec;
2685} 1857}
2686 1858
2687void codec2_open_mlfeat(struct CODEC2 *codec2_state, char *filename) { 1859void codec2_open_mlfeat(struct CODEC2 *codec2_state, char *feat_fn,
2688 if ((codec2_state->fmlfeat = fopen(filename, "wb")) == NULL) { 1860 char *model_fn) {
2689 fprintf(stderr, "error opening machine learning feature file: %s\n", filename); 1861 if ((codec2_state->fmlfeat = fopen(feat_fn, "wb")) == NULL) {
2690 exit(1); 1862 fprintf(stderr, "error opening machine learning feature file: %s\n",
2691 } 1863 feat_fn);
1864 exit(1);
1865 }
1866 if (model_fn) {
1867 if ((codec2_state->fmlmodel = fopen(model_fn, "wb")) == NULL) {
1868 fprintf(stderr, "error opening machine learning Codec 2 model file: %s\n",
1869 feat_fn);
1870 exit(1);
1871 }
1872 }
2692} 1873}
2693 1874
2694#ifndef __EMBEDDED__ 1875#ifndef __EMBEDDED__
2695void codec2_load_codebook(struct CODEC2 *codec2_state, int num, char *filename) { 1876void codec2_load_codebook(struct CODEC2 *codec2_state, int num,
2696 FILE *f; 1877 char *filename) {
2697 1878 FILE *f;
2698 if ((f = fopen(filename, "rb")) == NULL) { 1879
2699 fprintf(stderr, "error opening codebook file: %s\n", filename); 1880 if ((f = fopen(filename, "rb")) == NULL) {
2700 exit(1); 1881 fprintf(stderr, "error opening codebook file: %s\n", filename);
2701 } 1882 exit(1);
2702 //fprintf(stderr, "reading newamp1vq_cb[%d] k=%d m=%d\n", num, newamp1vq_cb[num].k, newamp1vq_cb[num].m); 1883 }
2703 float tmp[newamp1vq_cb[num].k*newamp1vq_cb[num].m]; 1884 // fprintf(stderr, "reading newamp1vq_cb[%d] k=%d m=%d\n", num,
2704 int nread = fread(tmp, sizeof(float), newamp1vq_cb[num].k*newamp1vq_cb[num].m, f); 1885 // newamp1vq_cb[num].k, newamp1vq_cb[num].m);
2705 float *p = (float*)newamp1vq_cb[num].cb; 1886 float tmp[newamp1vq_cb[num].k * newamp1vq_cb[num].m];
2706 for(int i=0; i<newamp1vq_cb[num].k*newamp1vq_cb[num].m; i++) 1887 int nread =
2707 p[i] = tmp[i]; 1888 fread(tmp, sizeof(float), newamp1vq_cb[num].k * newamp1vq_cb[num].m, f);
2708 // fprintf(stderr, "nread = %d %f %f\n", nread, newamp1vq_cb[num].cb[0], newamp1vq_cb[num].cb[1]); 1889 float *p = (float *)newamp1vq_cb[num].cb;
2709 assert(nread == newamp1vq_cb[num].k*newamp1vq_cb[num].m); 1890 for (int i = 0; i < newamp1vq_cb[num].k * newamp1vq_cb[num].m; i++)
2710 fclose(f); 1891 p[i] = tmp[i];
1892 // fprintf(stderr, "nread = %d %f %f\n", nread, newamp1vq_cb[num].cb[0],
1893 // newamp1vq_cb[num].cb[1]);
1894 assert(nread == newamp1vq_cb[num].k * newamp1vq_cb[num].m);
1895 fclose(f);
2711} 1896}
2712#endif 1897#endif
2713 1898
2714float codec2_get_var(struct CODEC2 *codec2_state) { 1899float codec2_get_var(struct CODEC2 *codec2_state) {
2715 if (codec2_state->nse) 1900 if (codec2_state->nse)
2716 return codec2_state->se/codec2_state->nse; 1901 return codec2_state->se / codec2_state->nse;
2717 else 1902 else
2718 return 0; 1903 return 0;
2719} 1904}
2720 1905
2721float *codec2_enable_user_ratek(struct CODEC2 *codec2_state, int *K) { 1906float *codec2_enable_user_ratek(struct CODEC2 *codec2_state, int *K) {
2722 codec2_state->user_rate_K_vec_no_mean_ = (float*)malloc(sizeof(float)*NEWAMP1_K); 1907 codec2_state->user_rate_K_vec_no_mean_ =
2723 *K = NEWAMP1_K; 1908 (float *)malloc(sizeof(float) * NEWAMP1_K);
2724 return codec2_state->user_rate_K_vec_no_mean_; 1909 *K = NEWAMP1_K;
1910 return codec2_state->user_rate_K_vec_no_mean_;
2725} 1911}
2726 1912
2727void codec2_700c_post_filter(struct CODEC2 *codec2_state, int en) { 1913void codec2_700c_post_filter(struct CODEC2 *codec2_state, bool en) {
2728 codec2_state->post_filter_en = en; 1914 codec2_state->post_filter_en = en;
2729} 1915}
2730 1916
2731void codec2_700c_eq(struct CODEC2 *codec2_state, int en) { 1917void codec2_700c_eq(struct CODEC2 *codec2_state, bool en) {
2732 codec2_state->eq_en = en; 1918 codec2_state->eq_en = en;
2733 codec2_state->se = 0.0; codec2_state->nse = 0; 1919 codec2_state->se = 0.0;
1920 codec2_state->nse = 0;
2734} 1921}