diff options
Diffstat (limited to 'codec2.c')
-rw-r--r-- | codec2.c | 3331 |
1 files changed, 1259 insertions, 2072 deletions
@@ -26,33 +26,31 @@ | |||
26 | along with this program; if not, see <http://www.gnu.org/licenses/>. | 26 | along with this program; if not, see <http://www.gnu.org/licenses/>. |
27 | */ | 27 | */ |
28 | 28 | ||
29 | #include "codec2.h" | ||
30 | |||
29 | #include <assert.h> | 31 | #include <assert.h> |
32 | #include <math.h> | ||
33 | #include <stdbool.h> | ||
30 | #include <stdio.h> | 34 | #include <stdio.h> |
31 | #include <stdlib.h> | 35 | #include <stdlib.h> |
32 | #include <stdbool.h> | ||
33 | #include <string.h> | 36 | #include <string.h> |
34 | #include <math.h> | ||
35 | 37 | ||
36 | #include "defines.h" | 38 | #include "bpf.h" |
39 | #include "bpfb.h" | ||
37 | #include "codec2_fft.h" | 40 | #include "codec2_fft.h" |
38 | #include "sine.h" | 41 | #include "codec2_internal.h" |
39 | #include "nlp.h" | 42 | #include "debug_alloc.h" |
43 | #include "defines.h" | ||
40 | #include "dump.h" | 44 | #include "dump.h" |
41 | #include "lpc.h" | ||
42 | #include "quantise.h" | ||
43 | #include "phase.h" | ||
44 | #include "interp.h" | 45 | #include "interp.h" |
45 | #include "postfilter.h" | 46 | #include "lpc.h" |
46 | #include "codec2.h" | ||
47 | #include "lsp.h" | 47 | #include "lsp.h" |
48 | #include "newamp2.h" | ||
49 | #include "codec2_internal.h" | ||
50 | #include "machdep.h" | 48 | #include "machdep.h" |
51 | #include "bpf.h" | 49 | #include "nlp.h" |
52 | #include "bpfb.h" | 50 | #include "phase.h" |
53 | #include "c2wideband.h" | 51 | #include "postfilter.h" |
54 | 52 | #include "quantise.h" | |
55 | #include "debug_alloc.h" | 53 | #include "sine.h" |
56 | 54 | ||
57 | /*---------------------------------------------------------------------------* \ | 55 | /*---------------------------------------------------------------------------* \ |
58 | 56 | ||
@@ -62,32 +60,30 @@ | |||
62 | 60 | ||
63 | void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]); | 61 | void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]); |
64 | void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, | 62 | void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, |
65 | COMP Aw[], float gain); | 63 | COMP Aw[], float gain); |
66 | void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 64 | void codec2_encode_3200(struct CODEC2 *c2, unsigned char *bits, short speech[]); |
67 | void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 65 | void codec2_decode_3200(struct CODEC2 *c2, short speech[], |
68 | void codec2_encode_2400(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 66 | const unsigned char *bits); |
69 | void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 67 | void codec2_encode_2400(struct CODEC2 *c2, unsigned char *bits, short speech[]); |
70 | void codec2_encode_1600(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 68 | void codec2_decode_2400(struct CODEC2 *c2, short speech[], |
71 | void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 69 | const unsigned char *bits); |
72 | void codec2_encode_1400(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 70 | void codec2_encode_1600(struct CODEC2 *c2, unsigned char *bits, short speech[]); |
73 | void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 71 | void codec2_decode_1600(struct CODEC2 *c2, short speech[], |
74 | void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 72 | const unsigned char *bits); |
75 | void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * bits, float ber_est); | 73 | void codec2_encode_1400(struct CODEC2 *c2, unsigned char *bits, short speech[]); |
76 | void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 74 | void codec2_decode_1400(struct CODEC2 *c2, short speech[], |
77 | void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 75 | const unsigned char *bits); |
78 | void codec2_encode_700(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 76 | void codec2_encode_1300(struct CODEC2 *c2, unsigned char *bits, short speech[]); |
79 | void codec2_decode_700(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 77 | void codec2_decode_1300(struct CODEC2 *c2, short speech[], |
80 | void codec2_encode_700b(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 78 | const unsigned char *bits, float ber_est); |
81 | void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 79 | void codec2_encode_1200(struct CODEC2 *c2, unsigned char *bits, short speech[]); |
82 | void codec2_encode_700c(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 80 | void codec2_decode_1200(struct CODEC2 *c2, short speech[], |
83 | void codec2_decode_700c(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 81 | const unsigned char *bits); |
84 | void codec2_encode_450(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 82 | void codec2_encode_700c(struct CODEC2 *c2, unsigned char *bits, short speech[]); |
85 | void codec2_decode_450(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 83 | void codec2_decode_700c(struct CODEC2 *c2, short speech[], |
86 | void codec2_decode_450pwb(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 84 | const unsigned char *bits); |
87 | static void ear_protection(float in_out[], int n); | 85 | static void ear_protection(float in_out[], int n); |
88 | 86 | ||
89 | |||
90 | |||
91 | /*---------------------------------------------------------------------------*\ | 87 | /*---------------------------------------------------------------------------*\ |
92 | 88 | ||
93 | FUNCTIONS | 89 | FUNCTIONS |
@@ -108,247 +104,170 @@ static void ear_protection(float in_out[], int n); | |||
108 | 104 | ||
109 | \*---------------------------------------------------------------------------*/ | 105 | \*---------------------------------------------------------------------------*/ |
110 | 106 | ||
111 | 107 | struct CODEC2 *codec2_create(int mode) { | |
112 | //Don't create CODEC2_MODE_450PWB for Encoding as it has undefined behavior ! | 108 | struct CODEC2 *c2; |
113 | struct CODEC2 * codec2_create(int mode) | 109 | int i, l; |
114 | { | 110 | |
115 | struct CODEC2 *c2; | 111 | // ALL POSSIBLE MODES MUST BE CHECKED HERE! |
116 | int i,l; | 112 | // we test if the desired mode is enabled at compile time |
117 | 113 | // and return NULL if not | |
118 | // ALL POSSIBLE MODES MUST BE CHECKED HERE! | 114 | |
119 | // we test if the desired mode is enabled at compile time | 115 | if (false == (CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, mode) || |
120 | // and return NULL if not | 116 | CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, mode) || |
121 | 117 | CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, mode) || | |
122 | if (false == ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, mode) | 118 | CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, mode) || |
123 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, mode) | 119 | CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, mode) || |
124 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, mode) | 120 | CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, mode) || |
125 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, mode) | 121 | CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, mode))) { |
126 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, mode) | 122 | return NULL; |
127 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, mode) | 123 | } |
128 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_700, mode) | 124 | |
129 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, mode) | 125 | c2 = (struct CODEC2 *)MALLOC(sizeof(struct CODEC2)); |
130 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, mode) | 126 | if (c2 == NULL) return NULL; |
131 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_450, mode) | 127 | |
132 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, mode) | 128 | c2->mode = mode; |
133 | ) ) | 129 | |
134 | { | 130 | /* store constants in a few places for convenience */ |
135 | return NULL; | 131 | |
136 | } | 132 | c2->c2const = c2const_create(8000, N_S); |
137 | 133 | c2->Fs = c2->c2const.Fs; | |
138 | c2 = (struct CODEC2*)MALLOC(sizeof(struct CODEC2)); | 134 | int n_samp = c2->n_samp = c2->c2const.n_samp; |
139 | if (c2 == NULL) | 135 | int m_pitch = c2->m_pitch = c2->c2const.m_pitch; |
140 | return NULL; | 136 | |
141 | 137 | c2->Pn = (float *)MALLOC(2 * n_samp * sizeof(float)); | |
142 | c2->mode = mode; | 138 | if (c2->Pn == NULL) { |
143 | 139 | return NULL; | |
144 | /* store constants in a few places for convenience */ | 140 | } |
145 | 141 | c2->Sn_ = (float *)MALLOC(2 * n_samp * sizeof(float)); | |
146 | if( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, mode) == 0){ | 142 | if (c2->Sn_ == NULL) { |
147 | c2->c2const = c2const_create(8000, N_S); | 143 | FREE(c2->Pn); |
148 | }else{ | 144 | return NULL; |
149 | c2->c2const = c2const_create(16000, N_S); | 145 | } |
150 | } | 146 | c2->w = (float *)MALLOC(m_pitch * sizeof(float)); |
151 | c2->Fs = c2->c2const.Fs; | 147 | if (c2->w == NULL) { |
152 | int n_samp = c2->n_samp = c2->c2const.n_samp; | 148 | FREE(c2->Pn); |
153 | int m_pitch = c2->m_pitch = c2->c2const.m_pitch; | 149 | FREE(c2->Sn_); |
154 | 150 | return NULL; | |
155 | c2->Pn = (float*)MALLOC(2*n_samp*sizeof(float)); | 151 | } |
156 | if (c2->Pn == NULL) { | 152 | c2->Sn = (float *)MALLOC(m_pitch * sizeof(float)); |
157 | return NULL; | 153 | if (c2->Sn == NULL) { |
158 | } | 154 | FREE(c2->Pn); |
159 | c2->Sn_ = (float*)MALLOC(2*n_samp*sizeof(float)); | 155 | FREE(c2->Sn_); |
160 | if (c2->Sn_ == NULL) { | 156 | FREE(c2->w); |
161 | FREE(c2->Pn); | 157 | return NULL; |
162 | return NULL; | 158 | } |
163 | } | 159 | |
164 | c2->w = (float*)MALLOC(m_pitch*sizeof(float)); | 160 | for (i = 0; i < m_pitch; i++) c2->Sn[i] = 1.0; |
165 | if (c2->w == NULL) { | 161 | c2->hpf_states[0] = c2->hpf_states[1] = 0.0; |
166 | FREE(c2->Pn); | 162 | for (i = 0; i < 2 * n_samp; i++) c2->Sn_[i] = 0; |
167 | FREE(c2->Sn_); | 163 | c2->fft_fwd_cfg = codec2_fft_alloc(FFT_ENC, 0, NULL, NULL); |
168 | return NULL; | 164 | c2->fftr_fwd_cfg = codec2_fftr_alloc(FFT_ENC, 0, NULL, NULL); |
169 | } | 165 | make_analysis_window(&c2->c2const, c2->fft_fwd_cfg, c2->w, c2->W); |
170 | c2->Sn = (float*)MALLOC(m_pitch*sizeof(float)); | 166 | make_synthesis_window(&c2->c2const, c2->Pn); |
171 | if (c2->Sn == NULL) { | 167 | c2->fftr_inv_cfg = codec2_fftr_alloc(FFT_DEC, 1, NULL, NULL); |
172 | FREE(c2->Pn); | 168 | c2->prev_f0_enc = 1 / P_MAX_S; |
173 | FREE(c2->Sn_); | 169 | c2->bg_est = 0.0; |
174 | FREE(c2->w); | 170 | c2->ex_phase = 0.0; |
175 | return NULL; | 171 | |
176 | } | 172 | for (l = 1; l <= MAX_AMP; l++) c2->prev_model_dec.A[l] = 0.0; |
177 | 173 | c2->prev_model_dec.Wo = TWO_PI / c2->c2const.p_max; | |
178 | for(i=0; i<m_pitch; i++) | 174 | c2->prev_model_dec.L = PI / c2->prev_model_dec.Wo; |
179 | c2->Sn[i] = 1.0; | 175 | c2->prev_model_dec.voiced = 0; |
180 | c2->hpf_states[0] = c2->hpf_states[1] = 0.0; | 176 | |
181 | for(i=0; i<2*n_samp; i++) | 177 | for (i = 0; i < LPC_ORD; i++) { |
182 | c2->Sn_[i] = 0; | 178 | c2->prev_lsps_dec[i] = i * PI / (LPC_ORD + 1); |
183 | c2->fft_fwd_cfg = codec2_fft_alloc(FFT_ENC, 0, NULL, NULL); | 179 | } |
184 | c2->fftr_fwd_cfg = codec2_fftr_alloc(FFT_ENC, 0, NULL, NULL); | 180 | c2->prev_e_dec = 1; |
185 | make_analysis_window(&c2->c2const, c2->fft_fwd_cfg, c2->w,c2->W); | 181 | |
186 | make_synthesis_window(&c2->c2const, c2->Pn); | 182 | c2->nlp = nlp_create(&c2->c2const); |
187 | c2->fftr_inv_cfg = codec2_fftr_alloc(FFT_DEC, 1, NULL, NULL); | 183 | if (c2->nlp == NULL) { |
188 | quantise_init(); | 184 | return NULL; |
189 | c2->prev_f0_enc = 1/P_MAX_S; | 185 | } |
190 | c2->bg_est = 0.0; | 186 | |
191 | c2->ex_phase = 0.0; | 187 | c2->lpc_pf = 1; |
192 | 188 | c2->bass_boost = 1; | |
193 | for(l=1; l<=MAX_AMP; l++) | 189 | c2->beta = LPCPF_BETA; |
194 | c2->prev_model_dec.A[l] = 0.0; | 190 | c2->gamma = LPCPF_GAMMA; |
195 | c2->prev_model_dec.Wo = TWO_PI/c2->c2const.p_max; | 191 | |
196 | c2->prev_model_dec.L = PI/c2->prev_model_dec.Wo; | 192 | c2->xq_enc[0] = c2->xq_enc[1] = 0.0; |
197 | c2->prev_model_dec.voiced = 0; | 193 | c2->xq_dec[0] = c2->xq_dec[1] = 0.0; |
198 | 194 | ||
199 | for(i=0; i<LPC_ORD; i++) { | 195 | c2->smoothing = 0; |
200 | c2->prev_lsps_dec[i] = i*PI/(LPC_ORD+1); | 196 | c2->se = 0.0; |
201 | } | 197 | c2->nse = 0; |
202 | c2->prev_e_dec = 1; | 198 | c2->user_rate_K_vec_no_mean_ = NULL; |
203 | 199 | c2->post_filter_en = true; | |
204 | c2->nlp = nlp_create(&c2->c2const); | 200 | |
205 | if (c2->nlp == NULL) { | 201 | c2->bpf_buf = (float *)MALLOC(sizeof(float) * (BPF_N + 4 * c2->n_samp)); |
206 | return NULL; | 202 | assert(c2->bpf_buf != NULL); |
207 | } | 203 | for (i = 0; i < BPF_N + 4 * c2->n_samp; i++) c2->bpf_buf[i] = 0.0; |
208 | 204 | ||
209 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, mode)) | 205 | c2->softdec = NULL; |
210 | c2->gray = 0; // natural binary better for trellis decoding (hopefully added later) | 206 | c2->gray = 1; |
211 | else | 207 | |
212 | c2->gray = 1; | 208 | /* newamp1 initialisation */ |
213 | 209 | ||
214 | c2->lpc_pf = 1; c2->bass_boost = 1; c2->beta = LPCPF_BETA; c2->gamma = LPCPF_GAMMA; | 210 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { |
215 | 211 | mel_sample_freqs_kHz(c2->rate_K_sample_freqs_kHz, NEWAMP1_K, ftomel(200.0), | |
216 | c2->xq_enc[0] = c2->xq_enc[1] = 0.0; | 212 | ftomel(3700.0)); |
217 | c2->xq_dec[0] = c2->xq_dec[1] = 0.0; | 213 | int k; |
218 | 214 | for (k = 0; k < NEWAMP1_K; k++) { | |
219 | c2->smoothing = 0; | 215 | c2->prev_rate_K_vec_[k] = 0.0; |
220 | c2->se = 0.0; c2->nse = 0; | 216 | c2->eq[k] = 0.0; |
221 | c2->user_rate_K_vec_no_mean_ = NULL; | 217 | } |
222 | c2->post_filter_en = 1; | 218 | c2->eq_en = false; |
223 | 219 | c2->Wo_left = 0.0; | |
224 | c2->bpf_buf = (float*)MALLOC(sizeof(float)*(BPF_N+4*c2->n_samp)); | 220 | c2->voicing_left = 0; |
225 | assert(c2->bpf_buf != NULL); | 221 | c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 0, NULL, NULL); |
226 | for(i=0; i<BPF_N+4*c2->n_samp; i++) | 222 | c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 1, NULL, NULL); |
227 | c2->bpf_buf[i] = 0.0; | 223 | } |
228 | 224 | ||
229 | c2->softdec = NULL; | 225 | c2->fmlfeat = NULL; |
230 | 226 | c2->fmlmodel = NULL; | |
231 | /* newamp1 initialisation */ | 227 | |
232 | 228 | // make sure that one of the two decode function pointers is empty | |
233 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { | 229 | // for the encode function pointer this is not required since we always set it |
234 | mel_sample_freqs_kHz(c2->rate_K_sample_freqs_kHz, NEWAMP1_K, ftomel(200.0), ftomel(3700.0) ); | 230 | // to a meaningful value |
235 | int k; | 231 | |
236 | for(k=0; k<NEWAMP1_K; k++) { | 232 | c2->decode = NULL; |
237 | c2->prev_rate_K_vec_[k] = 0.0; | 233 | c2->decode_ber = NULL; |
238 | c2->eq[k] = 0.0; | 234 | |
239 | } | 235 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) { |
240 | c2->eq_en = 0; | 236 | c2->encode = codec2_encode_3200; |
241 | c2->Wo_left = 0.0; | 237 | c2->decode = codec2_decode_3200; |
242 | c2->voicing_left = 0;; | 238 | } |
243 | c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 0, NULL, NULL); | 239 | |
244 | c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 1, NULL, NULL); | 240 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) { |
245 | } | 241 | c2->encode = codec2_encode_2400; |
246 | 242 | c2->decode = codec2_decode_2400; | |
247 | /* newamp2 initialisation */ | 243 | } |
248 | 244 | ||
249 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) { | 245 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) { |
250 | n2_mel_sample_freqs_kHz(c2->n2_rate_K_sample_freqs_kHz, NEWAMP2_K); | 246 | c2->encode = codec2_encode_1600; |
251 | int k; | 247 | c2->decode = codec2_decode_1600; |
252 | for(k=0; k<NEWAMP2_K; k++) { | 248 | } |
253 | c2->n2_prev_rate_K_vec_[k] = 0.0; | 249 | |
254 | } | 250 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) { |
255 | c2->Wo_left = 0.0; | 251 | c2->encode = codec2_encode_1400; |
256 | c2->voicing_left = 0;; | 252 | c2->decode = codec2_decode_1400; |
257 | c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 0, NULL, NULL); | 253 | } |
258 | c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 1, NULL, NULL); | 254 | |
259 | } | 255 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) { |
260 | /* newamp2 PWB initialisation */ | 256 | c2->encode = codec2_encode_1300; |
261 | 257 | c2->decode_ber = codec2_decode_1300; | |
262 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) { | 258 | } |
263 | n2_mel_sample_freqs_kHz(c2->n2_pwb_rate_K_sample_freqs_kHz, NEWAMP2_16K_K); | 259 | |
264 | int k; | 260 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) { |
265 | for(k=0; k<NEWAMP2_16K_K; k++) { | 261 | c2->encode = codec2_encode_1200; |
266 | c2->n2_pwb_prev_rate_K_vec_[k] = 0.0; | 262 | c2->decode = codec2_decode_1200; |
267 | } | 263 | } |
268 | c2->Wo_left = 0.0; | 264 | |
269 | c2->voicing_left = 0;; | 265 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { |
270 | c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 0, NULL, NULL); | 266 | c2->encode = codec2_encode_700c; |
271 | c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 1, NULL, NULL); | 267 | c2->decode = codec2_decode_700c; |
272 | } | 268 | } |
273 | 269 | ||
274 | c2->fmlfeat = NULL; | 270 | return c2; |
275 | |||
276 | // make sure that one of the two decode function pointers is empty | ||
277 | // for the encode function pointer this is not required since we always set it | ||
278 | // to a meaningful value | ||
279 | |||
280 | c2->decode = NULL; | ||
281 | c2->decode_ber = NULL; | ||
282 | |||
283 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) | ||
284 | { | ||
285 | c2->encode = codec2_encode_3200; | ||
286 | c2->decode = codec2_decode_3200; | ||
287 | } | ||
288 | |||
289 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) | ||
290 | { | ||
291 | c2->encode = codec2_encode_2400; | ||
292 | c2->decode = codec2_decode_2400; | ||
293 | } | ||
294 | |||
295 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) | ||
296 | { | ||
297 | c2->encode = codec2_encode_1600; | ||
298 | c2->decode = codec2_decode_1600; | ||
299 | } | ||
300 | |||
301 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) | ||
302 | { | ||
303 | c2->encode = codec2_encode_1400; | ||
304 | c2->decode = codec2_decode_1400; | ||
305 | } | ||
306 | |||
307 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) | ||
308 | { | ||
309 | c2->encode = codec2_encode_1300; | ||
310 | c2->decode_ber = codec2_decode_1300; | ||
311 | } | ||
312 | |||
313 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) | ||
314 | { | ||
315 | c2->encode = codec2_encode_1200; | ||
316 | c2->decode = codec2_decode_1200; | ||
317 | } | ||
318 | |||
319 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) | ||
320 | { | ||
321 | c2->encode = codec2_encode_700; | ||
322 | c2->decode = codec2_decode_700; | ||
323 | } | ||
324 | |||
325 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) | ||
326 | { | ||
327 | c2->encode = codec2_encode_700b; | ||
328 | c2->decode = codec2_decode_700b; | ||
329 | } | ||
330 | |||
331 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) | ||
332 | { | ||
333 | c2->encode = codec2_encode_700c; | ||
334 | c2->decode = codec2_decode_700c; | ||
335 | } | ||
336 | |||
337 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) | ||
338 | { | ||
339 | c2->encode = codec2_encode_450; | ||
340 | c2->decode = codec2_decode_450; | ||
341 | } | ||
342 | |||
343 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) | ||
344 | { | ||
345 | //Encode PWB doesnt make sense | ||
346 | c2->encode = codec2_encode_450; | ||
347 | c2->decode = codec2_decode_450pwb; | ||
348 | } | ||
349 | |||
350 | |||
351 | return c2; | ||
352 | } | 271 | } |
353 | 272 | ||
354 | /*---------------------------------------------------------------------------*\ | 273 | /*---------------------------------------------------------------------------*\ |
@@ -361,31 +280,22 @@ struct CODEC2 * codec2_create(int mode) | |||
361 | 280 | ||
362 | \*---------------------------------------------------------------------------*/ | 281 | \*---------------------------------------------------------------------------*/ |
363 | 282 | ||
364 | void codec2_destroy(struct CODEC2 *c2) | 283 | void codec2_destroy(struct CODEC2 *c2) { |
365 | { | 284 | assert(c2 != NULL); |
366 | assert(c2 != NULL); | 285 | FREE(c2->bpf_buf); |
367 | FREE(c2->bpf_buf); | 286 | nlp_destroy(c2->nlp); |
368 | nlp_destroy(c2->nlp); | 287 | codec2_fft_free(c2->fft_fwd_cfg); |
369 | codec2_fft_free(c2->fft_fwd_cfg); | 288 | codec2_fftr_free(c2->fftr_fwd_cfg); |
370 | codec2_fftr_free(c2->fftr_fwd_cfg); | 289 | codec2_fftr_free(c2->fftr_inv_cfg); |
371 | codec2_fftr_free(c2->fftr_inv_cfg); | 290 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { |
372 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { | 291 | codec2_fft_free(c2->phase_fft_fwd_cfg); |
373 | codec2_fft_free(c2->phase_fft_fwd_cfg); | 292 | codec2_fft_free(c2->phase_fft_inv_cfg); |
374 | codec2_fft_free(c2->phase_fft_inv_cfg); | 293 | } |
375 | } | 294 | FREE(c2->Pn); |
376 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) { | 295 | FREE(c2->Sn); |
377 | codec2_fft_free(c2->phase_fft_fwd_cfg); | 296 | FREE(c2->w); |
378 | codec2_fft_free(c2->phase_fft_inv_cfg); | 297 | FREE(c2->Sn_); |
379 | } | 298 | FREE(c2); |
380 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) { | ||
381 | codec2_fft_free(c2->phase_fft_fwd_cfg); | ||
382 | codec2_fft_free(c2->phase_fft_inv_cfg); | ||
383 | } | ||
384 | FREE(c2->Pn); | ||
385 | FREE(c2->Sn); | ||
386 | FREE(c2->w); | ||
387 | FREE(c2->Sn_); | ||
388 | FREE(c2); | ||
389 | } | 299 | } |
390 | 300 | ||
391 | /*---------------------------------------------------------------------------*\ | 301 | /*---------------------------------------------------------------------------*\ |
@@ -399,32 +309,31 @@ void codec2_destroy(struct CODEC2 *c2) | |||
399 | \*---------------------------------------------------------------------------*/ | 309 | \*---------------------------------------------------------------------------*/ |
400 | 310 | ||
401 | int codec2_bits_per_frame(struct CODEC2 *c2) { | 311 | int codec2_bits_per_frame(struct CODEC2 *c2) { |
402 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) | 312 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) return 64; |
403 | return 64; | 313 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) return 48; |
404 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) | 314 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) return 64; |
405 | return 48; | 315 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) return 56; |
406 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) | 316 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) return 52; |
407 | return 64; | 317 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) return 48; |
408 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) | 318 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) return 28; |
409 | return 56; | 319 | |
410 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) | 320 | return 0; /* shouldn't get here */ |
411 | return 52; | ||
412 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) | ||
413 | return 48; | ||
414 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) | ||
415 | return 28; | ||
416 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) | ||
417 | return 28; | ||
418 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) | ||
419 | return 28; | ||
420 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) | ||
421 | return 18; | ||
422 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) | ||
423 | return 18; | ||
424 | |||
425 | return 0; /* shouldn't get here */ | ||
426 | } | 321 | } |
427 | 322 | ||
323 | /*---------------------------------------------------------------------------*\ | ||
324 | |||
325 | FUNCTION....: codec2_bytes_per_frame | ||
326 | DATE CREATED: April 2021 | ||
327 | |||
328 | Returns the number of bytes per frame. Useful for allocated storage for | ||
329 | codec2_encode()/codec2_decode(). Note the number of bits may not be a | ||
330 | multiple of 8, therefore some bits in the last byte may be unused. | ||
331 | |||
332 | \*---------------------------------------------------------------------------*/ | ||
333 | |||
334 | int codec2_bytes_per_frame(struct CODEC2 *c2) { | ||
335 | return (codec2_bits_per_frame(c2) + 7) / 8; | ||
336 | } | ||
428 | 337 | ||
429 | /*---------------------------------------------------------------------------*\ | 338 | /*---------------------------------------------------------------------------*\ |
430 | 339 | ||
@@ -437,60 +346,61 @@ int codec2_bits_per_frame(struct CODEC2 *c2) { | |||
437 | \*---------------------------------------------------------------------------*/ | 346 | \*---------------------------------------------------------------------------*/ |
438 | 347 | ||
439 | int codec2_samples_per_frame(struct CODEC2 *c2) { | 348 | int codec2_samples_per_frame(struct CODEC2 *c2) { |
440 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) | 349 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) return 160; |
441 | return 160; | 350 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) return 160; |
442 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) | 351 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) return 320; |
443 | return 160; | 352 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) return 320; |
444 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) | 353 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) return 320; |
445 | return 320; | 354 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) return 320; |
446 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) | 355 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) return 320; |
447 | return 320; | 356 | return 0; /* shouldn't get here */ |
448 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) | ||
449 | return 320; | ||
450 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) | ||
451 | return 320; | ||
452 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) | ||
453 | return 320; | ||
454 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) | ||
455 | return 320; | ||
456 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) | ||
457 | return 320; | ||
458 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) | ||
459 | return 320; | ||
460 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) | ||
461 | return 640; | ||
462 | return 0; /* shouldnt get here */ | ||
463 | } | 357 | } |
464 | 358 | ||
465 | void codec2_encode(struct CODEC2 *c2, unsigned char *bits, short speech[]) | 359 | /*---------------------------------------------------------------------------*\ |
466 | { | 360 | |
467 | assert(c2 != NULL); | 361 | FUNCTION....: codec2_encode |
468 | assert(c2->encode != NULL); | 362 | AUTHOR......: David Rowe |
363 | DATE CREATED: Nov 14 2011 | ||
364 | |||
365 | Take an input buffer of speech samples, and compress them to a packed buffer | ||
366 | of bytes. | ||
469 | 367 | ||
470 | c2->encode(c2, bits, speech); | 368 | \*---------------------------------------------------------------------------*/ |
471 | 369 | ||
472 | } | 370 | void codec2_encode(struct CODEC2 *c2, unsigned char *bytes, short speech[]) { |
371 | assert(c2 != NULL); | ||
372 | assert(c2->encode != NULL); | ||
473 | 373 | ||
474 | void codec2_decode(struct CODEC2 *c2, short speech[], const unsigned char *bits) | 374 | c2->encode(c2, bytes, speech); |
475 | { | ||
476 | codec2_decode_ber(c2, speech, bits, 0.0); | ||
477 | } | 375 | } |
478 | 376 | ||
479 | void codec2_decode_ber(struct CODEC2 *c2, short speech[], const unsigned char *bits, float ber_est) | 377 | /*---------------------------------------------------------------------------*\ |
480 | { | ||
481 | assert(c2 != NULL); | ||
482 | assert(c2->decode != NULL || c2->decode_ber != NULL); | ||
483 | 378 | ||
484 | if (c2->decode != NULL) | 379 | FUNCTION....: codec2_decode |
485 | { | 380 | AUTHOR......: David Rowe |
486 | c2->decode(c2, speech, bits); | 381 | DATE CREATED: Nov 14 2011 |
487 | } | 382 | |
488 | else | 383 | Take an input packed buffer of bytes, and decode them to a buffer of speech |
489 | { | 384 | samples. |
490 | c2->decode_ber(c2, speech, bits, ber_est); | 385 | |
491 | } | 386 | \*---------------------------------------------------------------------------*/ |
387 | |||
388 | void codec2_decode(struct CODEC2 *c2, short speech[], | ||
389 | const unsigned char *bytes) { | ||
390 | codec2_decode_ber(c2, speech, bytes, 0.0); | ||
492 | } | 391 | } |
493 | 392 | ||
393 | void codec2_decode_ber(struct CODEC2 *c2, short speech[], | ||
394 | const unsigned char *bits, float ber_est) { | ||
395 | assert(c2 != NULL); | ||
396 | assert(c2->decode != NULL || c2->decode_ber != NULL); | ||
397 | |||
398 | if (c2->decode != NULL) { | ||
399 | c2->decode(c2, speech, bits); | ||
400 | } else { | ||
401 | c2->decode_ber(c2, speech, bits, ber_est); | ||
402 | } | ||
403 | } | ||
494 | 404 | ||
495 | /*---------------------------------------------------------------------------*\ | 405 | /*---------------------------------------------------------------------------*\ |
496 | 406 | ||
@@ -503,60 +413,60 @@ void codec2_decode_ber(struct CODEC2 *c2, short speech[], const unsigned char *b | |||
503 | The codec2 algorithm actually operates internally on 10ms (80 | 413 | The codec2 algorithm actually operates internally on 10ms (80 |
504 | sample) frames, so we run the encoding algorithm twice. On the | 414 | sample) frames, so we run the encoding algorithm twice. On the |
505 | first frame we just send the voicing bits. On the second frame we | 415 | first frame we just send the voicing bits. On the second frame we |
506 | send all model parameters. Compared to 2400 we use a larger number | 416 | send all model parameters. Compared to 2400 we encode the LSP |
507 | of bits for the LSPs and non-VQ pitch and energy. | 417 | differences, a larger number of bits for the LSP(d)s and scalar |
418 | (non-VQ) quantisation for pitch and energy. | ||
508 | 419 | ||
509 | The bit allocation is: | 420 | The bit allocation is: |
510 | 421 | ||
511 | Parameter bits/frame | 422 | Parameter bits/frame |
512 | -------------------------------------- | 423 | ------------------------------------------------------ |
513 | Harmonic magnitudes (LSPs) 50 | 424 | Harmonic magnitudes (LSP differerences) 50 |
514 | Pitch (Wo) 7 | 425 | Pitch (Wo) 7 |
515 | Energy 5 | 426 | Energy 5 |
516 | Voicing (10ms update) 2 | 427 | Voicing (10ms update) 2 |
517 | TOTAL 64 | 428 | TOTAL 64 |
518 | 429 | ||
519 | \*---------------------------------------------------------------------------*/ | 430 | \*---------------------------------------------------------------------------*/ |
520 | 431 | ||
521 | void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[]) | 432 | void codec2_encode_3200(struct CODEC2 *c2, unsigned char *bits, |
522 | { | 433 | short speech[]) { |
523 | MODEL model; | 434 | MODEL model; |
524 | float ak[LPC_ORD+1]; | 435 | float ak[LPC_ORD + 1]; |
525 | float lsps[LPC_ORD]; | 436 | float lsps[LPC_ORD]; |
526 | float e; | 437 | float e; |
527 | int Wo_index, e_index; | 438 | int Wo_index, e_index; |
528 | int lspd_indexes[LPC_ORD]; | 439 | int lspd_indexes[LPC_ORD]; |
529 | int i; | 440 | int i; |
530 | unsigned int nbit = 0; | 441 | unsigned int nbit = 0; |
531 | 442 | ||
532 | assert(c2 != NULL); | 443 | assert(c2 != NULL); |
533 | 444 | ||
534 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | 445 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); |
535 | 446 | ||
536 | /* first 10ms analysis frame - we just want voicing */ | 447 | /* first 10ms analysis frame - we just want voicing */ |
537 | 448 | ||
538 | analyse_one_frame(c2, &model, speech); | 449 | analyse_one_frame(c2, &model, speech); |
539 | pack(bits, &nbit, model.voiced, 1); | 450 | pack(bits, &nbit, model.voiced, 1); |
540 | 451 | ||
541 | /* second 10ms analysis frame */ | 452 | /* second 10ms analysis frame */ |
542 | 453 | ||
543 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); | 454 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); |
544 | pack(bits, &nbit, model.voiced, 1); | 455 | pack(bits, &nbit, model.voiced, 1); |
545 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); | 456 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); |
546 | pack(bits, &nbit, Wo_index, WO_BITS); | 457 | pack(bits, &nbit, Wo_index, WO_BITS); |
547 | 458 | ||
548 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | 459 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); |
549 | e_index = encode_energy(e, E_BITS); | 460 | e_index = encode_energy(e, E_BITS); |
550 | pack(bits, &nbit, e_index, E_BITS); | 461 | pack(bits, &nbit, e_index, E_BITS); |
551 | 462 | ||
552 | encode_lspds_scalar(lspd_indexes, lsps, LPC_ORD); | 463 | encode_lspds_scalar(lspd_indexes, lsps, LPC_ORD); |
553 | for(i=0; i<LSPD_SCALAR_INDEXES; i++) { | 464 | for (i = 0; i < LSPD_SCALAR_INDEXES; i++) { |
554 | pack(bits, &nbit, lspd_indexes[i], lspd_bits(i)); | 465 | pack(bits, &nbit, lspd_indexes[i], lspd_bits(i)); |
555 | } | 466 | } |
556 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | 467 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); |
557 | } | 468 | } |
558 | 469 | ||
559 | |||
560 | /*---------------------------------------------------------------------------*\ | 470 | /*---------------------------------------------------------------------------*\ |
561 | 471 | ||
562 | FUNCTION....: codec2_decode_3200 | 472 | FUNCTION....: codec2_decode_3200 |
@@ -567,77 +477,75 @@ void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[]) | |||
567 | 477 | ||
568 | \*---------------------------------------------------------------------------*/ | 478 | \*---------------------------------------------------------------------------*/ |
569 | 479 | ||
570 | void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char * bits) | 480 | void codec2_decode_3200(struct CODEC2 *c2, short speech[], |
571 | { | 481 | const unsigned char *bits) { |
572 | MODEL model[2]; | 482 | MODEL model[2]; |
573 | int lspd_indexes[LPC_ORD]; | 483 | int lspd_indexes[LPC_ORD]; |
574 | float lsps[2][LPC_ORD]; | 484 | float lsps[2][LPC_ORD]; |
575 | int Wo_index, e_index; | 485 | int Wo_index, e_index; |
576 | float e[2]; | 486 | float e[2]; |
577 | float snr; | 487 | float snr; |
578 | float ak[2][LPC_ORD+1]; | 488 | float ak[2][LPC_ORD + 1]; |
579 | int i,j; | 489 | int i, j; |
580 | unsigned int nbit = 0; | 490 | unsigned int nbit = 0; |
581 | COMP Aw[FFT_ENC]; | 491 | COMP Aw[FFT_ENC]; |
582 | 492 | ||
583 | assert(c2 != NULL); | 493 | assert(c2 != NULL); |
584 | 494 | ||
585 | /* only need to zero these out due to (unused) snr calculation */ | 495 | /* only need to zero these out due to (unused) snr calculation */ |
586 | 496 | ||
587 | for(i=0; i<2; i++) | 497 | for (i = 0; i < 2; i++) |
588 | for(j=1; j<=MAX_AMP; j++) | 498 | for (j = 1; j <= MAX_AMP; j++) model[i].A[j] = 0.0; |
589 | model[i].A[j] = 0.0; | ||
590 | 499 | ||
591 | /* unpack bits from channel ------------------------------------*/ | 500 | /* unpack bits from channel ------------------------------------*/ |
592 | 501 | ||
593 | /* this will partially fill the model params for the 2 x 10ms | 502 | /* this will partially fill the model params for the 2 x 10ms |
594 | frames */ | 503 | frames */ |
595 | 504 | ||
596 | model[0].voiced = unpack(bits, &nbit, 1); | 505 | model[0].voiced = unpack(bits, &nbit, 1); |
597 | model[1].voiced = unpack(bits, &nbit, 1); | 506 | model[1].voiced = unpack(bits, &nbit, 1); |
598 | 507 | ||
599 | Wo_index = unpack(bits, &nbit, WO_BITS); | 508 | Wo_index = unpack(bits, &nbit, WO_BITS); |
600 | model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); | 509 | model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); |
601 | model[1].L = PI/model[1].Wo; | 510 | model[1].L = PI / model[1].Wo; |
602 | 511 | ||
603 | e_index = unpack(bits, &nbit, E_BITS); | 512 | e_index = unpack(bits, &nbit, E_BITS); |
604 | e[1] = decode_energy(e_index, E_BITS); | 513 | e[1] = decode_energy(e_index, E_BITS); |
605 | 514 | ||
606 | for(i=0; i<LSPD_SCALAR_INDEXES; i++) { | 515 | for (i = 0; i < LSPD_SCALAR_INDEXES; i++) { |
607 | lspd_indexes[i] = unpack(bits, &nbit, lspd_bits(i)); | 516 | lspd_indexes[i] = unpack(bits, &nbit, lspd_bits(i)); |
608 | } | 517 | } |
609 | decode_lspds_scalar(&lsps[1][0], lspd_indexes, LPC_ORD); | 518 | decode_lspds_scalar(&lsps[1][0], lspd_indexes, LPC_ORD); |
610 | 519 | ||
611 | /* interpolate ------------------------------------------------*/ | 520 | /* interpolate ------------------------------------------------*/ |
612 | 521 | ||
613 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | 522 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 |
614 | 10ms frame between 20ms samples */ | 523 | 10ms frame between 20ms samples */ |
615 | 524 | ||
616 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | 525 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); |
617 | e[0] = interp_energy(c2->prev_e_dec, e[1]); | 526 | e[0] = interp_energy(c2->prev_e_dec, e[1]); |
618 | 527 | ||
619 | /* LSPs are sampled every 20ms so we interpolate the frame in | 528 | /* LSPs are sampled every 20ms so we interpolate the frame in |
620 | between, then recover spectral amplitudes */ | 529 | between, then recover spectral amplitudes */ |
621 | 530 | ||
622 | interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, LPC_ORD); | 531 | interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, |
532 | LPC_ORD); | ||
623 | 533 | ||
624 | for(i=0; i<2; i++) { | 534 | for (i = 0; i < 2; i++) { |
625 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | 535 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); |
626 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | 536 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, |
627 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | 537 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); |
628 | apply_lpc_correction(&model[i]); | 538 | apply_lpc_correction(&model[i]); |
629 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | 539 | synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], Aw, 1.0); |
630 | } | 540 | } |
631 | 541 | ||
632 | /* update memories for next frame ----------------------------*/ | 542 | /* update memories for next frame ----------------------------*/ |
633 | 543 | ||
634 | c2->prev_model_dec = model[1]; | 544 | c2->prev_model_dec = model[1]; |
635 | c2->prev_e_dec = e[1]; | 545 | c2->prev_e_dec = e[1]; |
636 | for(i=0; i<LPC_ORD; i++) | 546 | for (i = 0; i < LPC_ORD; i++) c2->prev_lsps_dec[i] = lsps[1][i]; |
637 | c2->prev_lsps_dec[i] = lsps[1][i]; | ||
638 | } | 547 | } |
639 | 548 | ||
640 | |||
641 | /*---------------------------------------------------------------------------*\ | 549 | /*---------------------------------------------------------------------------*\ |
642 | 550 | ||
643 | FUNCTION....: codec2_encode_2400 | 551 | FUNCTION....: codec2_encode_2400 |
@@ -663,46 +571,45 @@ void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char * | |||
663 | 571 | ||
664 | \*---------------------------------------------------------------------------*/ | 572 | \*---------------------------------------------------------------------------*/ |
665 | 573 | ||
666 | void codec2_encode_2400(struct CODEC2 *c2, unsigned char * bits, short speech[]) | 574 | void codec2_encode_2400(struct CODEC2 *c2, unsigned char *bits, |
667 | { | 575 | short speech[]) { |
668 | MODEL model; | 576 | MODEL model; |
669 | float ak[LPC_ORD+1]; | 577 | float ak[LPC_ORD + 1]; |
670 | float lsps[LPC_ORD]; | 578 | float lsps[LPC_ORD]; |
671 | float e; | 579 | float e; |
672 | int WoE_index; | 580 | int WoE_index; |
673 | int lsp_indexes[LPC_ORD]; | 581 | int lsp_indexes[LPC_ORD]; |
674 | int i; | 582 | int i; |
675 | int spare = 0; | 583 | int spare = 0; |
676 | unsigned int nbit = 0; | 584 | unsigned int nbit = 0; |
677 | 585 | ||
678 | assert(c2 != NULL); | 586 | assert(c2 != NULL); |
679 | 587 | ||
680 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | 588 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); |
681 | 589 | ||
682 | /* first 10ms analysis frame - we just want voicing */ | 590 | /* first 10ms analysis frame - we just want voicing */ |
683 | 591 | ||
684 | analyse_one_frame(c2, &model, speech); | 592 | analyse_one_frame(c2, &model, speech); |
685 | pack(bits, &nbit, model.voiced, 1); | 593 | pack(bits, &nbit, model.voiced, 1); |
686 | 594 | ||
687 | /* second 10ms analysis frame */ | 595 | /* second 10ms analysis frame */ |
688 | 596 | ||
689 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); | 597 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); |
690 | pack(bits, &nbit, model.voiced, 1); | 598 | pack(bits, &nbit, model.voiced, 1); |
691 | 599 | ||
692 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | 600 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); |
693 | WoE_index = encode_WoE(&model, e, c2->xq_enc); | 601 | WoE_index = encode_WoE(&model, e, c2->xq_enc); |
694 | pack(bits, &nbit, WoE_index, WO_E_BITS); | 602 | pack(bits, &nbit, WoE_index, WO_E_BITS); |
695 | 603 | ||
696 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); | 604 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); |
697 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | 605 | for (i = 0; i < LSP_SCALAR_INDEXES; i++) { |
698 | pack(bits, &nbit, lsp_indexes[i], lsp_bits(i)); | 606 | pack(bits, &nbit, lsp_indexes[i], lsp_bits(i)); |
699 | } | 607 | } |
700 | pack(bits, &nbit, spare, 2); | 608 | pack(bits, &nbit, spare, 2); |
701 | 609 | ||
702 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | 610 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); |
703 | } | 611 | } |
704 | 612 | ||
705 | |||
706 | /*---------------------------------------------------------------------------*\ | 613 | /*---------------------------------------------------------------------------*\ |
707 | 614 | ||
708 | FUNCTION....: codec2_decode_2400 | 615 | FUNCTION....: codec2_decode_2400 |
@@ -713,86 +620,84 @@ void codec2_encode_2400(struct CODEC2 *c2, unsigned char * bits, short speech[]) | |||
713 | 620 | ||
714 | \*---------------------------------------------------------------------------*/ | 621 | \*---------------------------------------------------------------------------*/ |
715 | 622 | ||
716 | void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char * bits) | 623 | void codec2_decode_2400(struct CODEC2 *c2, short speech[], |
717 | { | 624 | const unsigned char *bits) { |
718 | MODEL model[2]; | 625 | MODEL model[2]; |
719 | int lsp_indexes[LPC_ORD]; | 626 | int lsp_indexes[LPC_ORD]; |
720 | float lsps[2][LPC_ORD]; | 627 | float lsps[2][LPC_ORD]; |
721 | int WoE_index; | 628 | int WoE_index; |
722 | float e[2]; | 629 | float e[2]; |
723 | float snr; | 630 | float snr; |
724 | float ak[2][LPC_ORD+1]; | 631 | float ak[2][LPC_ORD + 1]; |
725 | int i,j; | 632 | int i, j; |
726 | unsigned int nbit = 0; | 633 | unsigned int nbit = 0; |
727 | COMP Aw[FFT_ENC]; | 634 | COMP Aw[FFT_ENC]; |
728 | 635 | ||
729 | assert(c2 != NULL); | 636 | assert(c2 != NULL); |
730 | 637 | ||
731 | /* only need to zero these out due to (unused) snr calculation */ | 638 | /* only need to zero these out due to (unused) snr calculation */ |
732 | 639 | ||
733 | for(i=0; i<2; i++) | 640 | for (i = 0; i < 2; i++) |
734 | for(j=1; j<=MAX_AMP; j++) | 641 | for (j = 1; j <= MAX_AMP; j++) model[i].A[j] = 0.0; |
735 | model[i].A[j] = 0.0; | ||
736 | 642 | ||
737 | /* unpack bits from channel ------------------------------------*/ | 643 | /* unpack bits from channel ------------------------------------*/ |
738 | 644 | ||
739 | /* this will partially fill the model params for the 2 x 10ms | 645 | /* this will partially fill the model params for the 2 x 10ms |
740 | frames */ | 646 | frames */ |
741 | 647 | ||
742 | model[0].voiced = unpack(bits, &nbit, 1); | 648 | model[0].voiced = unpack(bits, &nbit, 1); |
743 | 649 | ||
744 | model[1].voiced = unpack(bits, &nbit, 1); | 650 | model[1].voiced = unpack(bits, &nbit, 1); |
745 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | 651 | WoE_index = unpack(bits, &nbit, WO_E_BITS); |
746 | decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); | 652 | decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); |
747 | 653 | ||
748 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | 654 | for (i = 0; i < LSP_SCALAR_INDEXES; i++) { |
749 | lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i)); | 655 | lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i)); |
750 | } | 656 | } |
751 | decode_lsps_scalar(&lsps[1][0], lsp_indexes, LPC_ORD); | 657 | decode_lsps_scalar(&lsps[1][0], lsp_indexes, LPC_ORD); |
752 | check_lsp_order(&lsps[1][0], LPC_ORD); | 658 | check_lsp_order(&lsps[1][0], LPC_ORD); |
753 | bw_expand_lsps(&lsps[1][0], LPC_ORD, 50.0, 100.0); | 659 | bw_expand_lsps(&lsps[1][0], LPC_ORD, 50.0, 100.0); |
754 | 660 | ||
755 | /* interpolate ------------------------------------------------*/ | 661 | /* interpolate ------------------------------------------------*/ |
756 | 662 | ||
757 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | 663 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 |
758 | 10ms frame between 20ms samples */ | 664 | 10ms frame between 20ms samples */ |
759 | 665 | ||
760 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | 666 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); |
761 | e[0] = interp_energy(c2->prev_e_dec, e[1]); | 667 | e[0] = interp_energy(c2->prev_e_dec, e[1]); |
762 | 668 | ||
763 | /* LSPs are sampled every 20ms so we interpolate the frame in | 669 | /* LSPs are sampled every 20ms so we interpolate the frame in |
764 | between, then recover spectral amplitudes */ | 670 | between, then recover spectral amplitudes */ |
765 | 671 | ||
766 | interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, LPC_ORD); | 672 | interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, |
767 | for(i=0; i<2; i++) { | 673 | LPC_ORD); |
768 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | 674 | for (i = 0; i < 2; i++) { |
769 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | 675 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); |
770 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | 676 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, |
771 | apply_lpc_correction(&model[i]); | 677 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); |
772 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | 678 | apply_lpc_correction(&model[i]); |
773 | 679 | synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], Aw, 1.0); | |
774 | /* dump parameters for deep learning experiments */ | 680 | |
775 | 681 | /* dump parameters for deep learning experiments */ | |
776 | if (c2->fmlfeat != NULL) { | 682 | |
777 | /* 10 LSPs - energy - Wo - voicing flag - 10 LPCs */ | 683 | if (c2->fmlfeat != NULL) { |
778 | fwrite(&lsps[i][0], LPC_ORD, sizeof(float), c2->fmlfeat); | 684 | /* 10 LSPs - energy - Wo - voicing flag - 10 LPCs */ |
779 | fwrite(&e[i], 1, sizeof(float), c2->fmlfeat); | 685 | fwrite(&lsps[i][0], LPC_ORD, sizeof(float), c2->fmlfeat); |
780 | fwrite(&model[i].Wo, 1, sizeof(float), c2->fmlfeat); | 686 | fwrite(&e[i], 1, sizeof(float), c2->fmlfeat); |
781 | float voiced_float = model[i].voiced; | 687 | fwrite(&model[i].Wo, 1, sizeof(float), c2->fmlfeat); |
782 | fwrite(&voiced_float, 1, sizeof(float), c2->fmlfeat); | 688 | float voiced_float = model[i].voiced; |
783 | fwrite(&ak[i][1], LPC_ORD, sizeof(float), c2->fmlfeat); | 689 | fwrite(&voiced_float, 1, sizeof(float), c2->fmlfeat); |
784 | } | 690 | fwrite(&ak[i][1], LPC_ORD, sizeof(float), c2->fmlfeat); |
785 | } | 691 | } |
692 | } | ||
786 | 693 | ||
787 | /* update memories for next frame ----------------------------*/ | 694 | /* update memories for next frame ----------------------------*/ |
788 | 695 | ||
789 | c2->prev_model_dec = model[1]; | 696 | c2->prev_model_dec = model[1]; |
790 | c2->prev_e_dec = e[1]; | 697 | c2->prev_e_dec = e[1]; |
791 | for(i=0; i<LPC_ORD; i++) | 698 | for (i = 0; i < LPC_ORD; i++) c2->prev_lsps_dec[i] = lsps[1][i]; |
792 | c2->prev_lsps_dec[i] = lsps[1][i]; | ||
793 | } | 699 | } |
794 | 700 | ||
795 | |||
796 | /*---------------------------------------------------------------------------*\ | 701 | /*---------------------------------------------------------------------------*\ |
797 | 702 | ||
798 | FUNCTION....: codec2_encode_1600 | 703 | FUNCTION....: codec2_encode_1600 |
@@ -821,65 +726,64 @@ void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char * | |||
821 | 726 | ||
822 | \*---------------------------------------------------------------------------*/ | 727 | \*---------------------------------------------------------------------------*/ |
823 | 728 | ||
824 | void codec2_encode_1600(struct CODEC2 *c2, unsigned char * bits, short speech[]) | 729 | void codec2_encode_1600(struct CODEC2 *c2, unsigned char *bits, |
825 | { | 730 | short speech[]) { |
826 | MODEL model; | 731 | MODEL model; |
827 | float lsps[LPC_ORD]; | 732 | float lsps[LPC_ORD]; |
828 | float ak[LPC_ORD+1]; | 733 | float ak[LPC_ORD + 1]; |
829 | float e; | 734 | float e; |
830 | int lsp_indexes[LPC_ORD]; | 735 | int lsp_indexes[LPC_ORD]; |
831 | int Wo_index, e_index; | 736 | int Wo_index, e_index; |
832 | int i; | 737 | int i; |
833 | unsigned int nbit = 0; | 738 | unsigned int nbit = 0; |
834 | 739 | ||
835 | assert(c2 != NULL); | 740 | assert(c2 != NULL); |
836 | 741 | ||
837 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | 742 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); |
838 | 743 | ||
839 | /* frame 1: - voicing ---------------------------------------------*/ | 744 | /* frame 1: - voicing ---------------------------------------------*/ |
840 | 745 | ||
841 | analyse_one_frame(c2, &model, speech); | 746 | analyse_one_frame(c2, &model, speech); |
842 | pack(bits, &nbit, model.voiced, 1); | 747 | pack(bits, &nbit, model.voiced, 1); |
843 | 748 | ||
844 | /* frame 2: - voicing, scalar Wo & E -------------------------------*/ | 749 | /* frame 2: - voicing, scalar Wo & E -------------------------------*/ |
845 | 750 | ||
846 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); | 751 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); |
847 | pack(bits, &nbit, model.voiced, 1); | 752 | pack(bits, &nbit, model.voiced, 1); |
848 | 753 | ||
849 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); | 754 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); |
850 | pack(bits, &nbit, Wo_index, WO_BITS); | 755 | pack(bits, &nbit, Wo_index, WO_BITS); |
851 | 756 | ||
852 | /* need to run this just to get LPC energy */ | 757 | /* need to run this just to get LPC energy */ |
853 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | 758 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); |
854 | e_index = encode_energy(e, E_BITS); | 759 | e_index = encode_energy(e, E_BITS); |
855 | pack(bits, &nbit, e_index, E_BITS); | 760 | pack(bits, &nbit, e_index, E_BITS); |
856 | 761 | ||
857 | /* frame 3: - voicing ---------------------------------------------*/ | 762 | /* frame 3: - voicing ---------------------------------------------*/ |
858 | 763 | ||
859 | analyse_one_frame(c2, &model, &speech[2*c2->n_samp]); | 764 | analyse_one_frame(c2, &model, &speech[2 * c2->n_samp]); |
860 | pack(bits, &nbit, model.voiced, 1); | 765 | pack(bits, &nbit, model.voiced, 1); |
861 | 766 | ||
862 | /* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/ | 767 | /* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/ |
863 | 768 | ||
864 | analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); | 769 | analyse_one_frame(c2, &model, &speech[3 * c2->n_samp]); |
865 | pack(bits, &nbit, model.voiced, 1); | 770 | pack(bits, &nbit, model.voiced, 1); |
866 | 771 | ||
867 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); | 772 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); |
868 | pack(bits, &nbit, Wo_index, WO_BITS); | 773 | pack(bits, &nbit, Wo_index, WO_BITS); |
869 | 774 | ||
870 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | 775 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); |
871 | e_index = encode_energy(e, E_BITS); | 776 | e_index = encode_energy(e, E_BITS); |
872 | pack(bits, &nbit, e_index, E_BITS); | 777 | pack(bits, &nbit, e_index, E_BITS); |
873 | 778 | ||
874 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); | 779 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); |
875 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | 780 | for (i = 0; i < LSP_SCALAR_INDEXES; i++) { |
876 | pack(bits, &nbit, lsp_indexes[i], lsp_bits(i)); | 781 | pack(bits, &nbit, lsp_indexes[i], lsp_bits(i)); |
877 | } | 782 | } |
878 | 783 | ||
879 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | 784 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); |
880 | } | 785 | } |
881 | 786 | ||
882 | |||
883 | /*---------------------------------------------------------------------------*\ | 787 | /*---------------------------------------------------------------------------*\ |
884 | 788 | ||
885 | FUNCTION....: codec2_decode_1600 | 789 | FUNCTION....: codec2_decode_1600 |
@@ -890,91 +794,89 @@ void codec2_encode_1600(struct CODEC2 *c2, unsigned char * bits, short speech[]) | |||
890 | 794 | ||
891 | \*---------------------------------------------------------------------------*/ | 795 | \*---------------------------------------------------------------------------*/ |
892 | 796 | ||
893 | void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * bits) | 797 | void codec2_decode_1600(struct CODEC2 *c2, short speech[], |
894 | { | 798 | const unsigned char *bits) { |
895 | MODEL model[4]; | 799 | MODEL model[4]; |
896 | int lsp_indexes[LPC_ORD]; | 800 | int lsp_indexes[LPC_ORD]; |
897 | float lsps[4][LPC_ORD]; | 801 | float lsps[4][LPC_ORD]; |
898 | int Wo_index, e_index; | 802 | int Wo_index, e_index; |
899 | float e[4]; | 803 | float e[4]; |
900 | float snr; | 804 | float snr; |
901 | float ak[4][LPC_ORD+1]; | 805 | float ak[4][LPC_ORD + 1]; |
902 | int i,j; | 806 | int i, j; |
903 | unsigned int nbit = 0; | 807 | unsigned int nbit = 0; |
904 | float weight; | 808 | float weight; |
905 | COMP Aw[FFT_ENC]; | 809 | COMP Aw[FFT_ENC]; |
906 | 810 | ||
907 | assert(c2 != NULL); | 811 | assert(c2 != NULL); |
908 | 812 | ||
909 | /* only need to zero these out due to (unused) snr calculation */ | 813 | /* only need to zero these out due to (unused) snr calculation */ |
910 | 814 | ||
911 | for(i=0; i<4; i++) | 815 | for (i = 0; i < 4; i++) |
912 | for(j=1; j<=MAX_AMP; j++) | 816 | for (j = 1; j <= MAX_AMP; j++) model[i].A[j] = 0.0; |
913 | model[i].A[j] = 0.0; | 817 | |
914 | 818 | /* unpack bits from channel ------------------------------------*/ | |
915 | /* unpack bits from channel ------------------------------------*/ | 819 | |
916 | 820 | /* this will partially fill the model params for the 4 x 10ms | |
917 | /* this will partially fill the model params for the 4 x 10ms | 821 | frames */ |
918 | frames */ | 822 | |
919 | 823 | model[0].voiced = unpack(bits, &nbit, 1); | |
920 | model[0].voiced = unpack(bits, &nbit, 1); | 824 | |
921 | 825 | model[1].voiced = unpack(bits, &nbit, 1); | |
922 | model[1].voiced = unpack(bits, &nbit, 1); | 826 | Wo_index = unpack(bits, &nbit, WO_BITS); |
923 | Wo_index = unpack(bits, &nbit, WO_BITS); | 827 | model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); |
924 | model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); | 828 | model[1].L = PI / model[1].Wo; |
925 | model[1].L = PI/model[1].Wo; | 829 | |
926 | 830 | e_index = unpack(bits, &nbit, E_BITS); | |
927 | e_index = unpack(bits, &nbit, E_BITS); | 831 | e[1] = decode_energy(e_index, E_BITS); |
928 | e[1] = decode_energy(e_index, E_BITS); | 832 | |
929 | 833 | model[2].voiced = unpack(bits, &nbit, 1); | |
930 | model[2].voiced = unpack(bits, &nbit, 1); | 834 | |
931 | 835 | model[3].voiced = unpack(bits, &nbit, 1); | |
932 | model[3].voiced = unpack(bits, &nbit, 1); | 836 | Wo_index = unpack(bits, &nbit, WO_BITS); |
933 | Wo_index = unpack(bits, &nbit, WO_BITS); | 837 | model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); |
934 | model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); | 838 | model[3].L = PI / model[3].Wo; |
935 | model[3].L = PI/model[3].Wo; | 839 | |
936 | 840 | e_index = unpack(bits, &nbit, E_BITS); | |
937 | e_index = unpack(bits, &nbit, E_BITS); | 841 | e[3] = decode_energy(e_index, E_BITS); |
938 | e[3] = decode_energy(e_index, E_BITS); | 842 | |
939 | 843 | for (i = 0; i < LSP_SCALAR_INDEXES; i++) { | |
940 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | 844 | lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i)); |
941 | lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i)); | 845 | } |
942 | } | 846 | decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD); |
943 | decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD); | 847 | check_lsp_order(&lsps[3][0], LPC_ORD); |
944 | check_lsp_order(&lsps[3][0], LPC_ORD); | 848 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); |
945 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); | 849 | |
946 | 850 | /* interpolate ------------------------------------------------*/ | |
947 | /* interpolate ------------------------------------------------*/ | 851 | |
948 | 852 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | |
949 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | 853 | 10ms frame between 20ms samples */ |
950 | 10ms frame between 20ms samples */ | 854 | |
951 | 855 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | |
952 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | 856 | e[0] = interp_energy(c2->prev_e_dec, e[1]); |
953 | e[0] = interp_energy(c2->prev_e_dec, e[1]); | 857 | interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); |
954 | interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); | 858 | e[2] = interp_energy(e[1], e[3]); |
955 | e[2] = interp_energy(e[1], e[3]); | 859 | |
956 | 860 | /* LSPs are sampled every 40ms so we interpolate the 3 frames in | |
957 | /* LSPs are sampled every 40ms so we interpolate the 3 frames in | 861 | between, then recover spectral amplitudes */ |
958 | between, then recover spectral amplitudes */ | 862 | |
959 | 863 | for (i = 0, weight = 0.25; i < 3; i++, weight += 0.25) { | |
960 | for(i=0, weight=0.25; i<3; i++, weight += 0.25) { | 864 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, |
961 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); | 865 | LPC_ORD); |
962 | } | 866 | } |
963 | for(i=0; i<4; i++) { | 867 | for (i = 0; i < 4; i++) { |
964 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | 868 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); |
965 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | 869 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, |
966 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | 870 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); |
967 | apply_lpc_correction(&model[i]); | 871 | apply_lpc_correction(&model[i]); |
968 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | 872 | synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], Aw, 1.0); |
969 | } | 873 | } |
970 | 874 | ||
971 | /* update memories for next frame ----------------------------*/ | 875 | /* update memories for next frame ----------------------------*/ |
972 | 876 | ||
973 | c2->prev_model_dec = model[3]; | 877 | c2->prev_model_dec = model[3]; |
974 | c2->prev_e_dec = e[3]; | 878 | c2->prev_e_dec = e[3]; |
975 | for(i=0; i<LPC_ORD; i++) | 879 | for (i = 0; i < LPC_ORD; i++) c2->prev_lsps_dec[i] = lsps[3][i]; |
976 | c2->prev_lsps_dec[i] = lsps[3][i]; | ||
977 | |||
978 | } | 880 | } |
979 | 881 | ||
980 | /*---------------------------------------------------------------------------*\ | 882 | /*---------------------------------------------------------------------------*\ |
@@ -1004,60 +906,59 @@ void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * | |||
1004 | 906 | ||
1005 | \*---------------------------------------------------------------------------*/ | 907 | \*---------------------------------------------------------------------------*/ |
1006 | 908 | ||
1007 | void codec2_encode_1400(struct CODEC2 *c2, unsigned char * bits, short speech[]) | 909 | void codec2_encode_1400(struct CODEC2 *c2, unsigned char *bits, |
1008 | { | 910 | short speech[]) { |
1009 | MODEL model; | 911 | MODEL model; |
1010 | float lsps[LPC_ORD]; | 912 | float lsps[LPC_ORD]; |
1011 | float ak[LPC_ORD+1]; | 913 | float ak[LPC_ORD + 1]; |
1012 | float e; | 914 | float e; |
1013 | int lsp_indexes[LPC_ORD]; | 915 | int lsp_indexes[LPC_ORD]; |
1014 | int WoE_index; | 916 | int WoE_index; |
1015 | int i; | 917 | int i; |
1016 | unsigned int nbit = 0; | 918 | unsigned int nbit = 0; |
1017 | 919 | ||
1018 | assert(c2 != NULL); | 920 | assert(c2 != NULL); |
1019 | 921 | ||
1020 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | 922 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); |
1021 | 923 | ||
1022 | /* frame 1: - voicing ---------------------------------------------*/ | 924 | /* frame 1: - voicing ---------------------------------------------*/ |
1023 | 925 | ||
1024 | analyse_one_frame(c2, &model, speech); | 926 | analyse_one_frame(c2, &model, speech); |
1025 | pack(bits, &nbit, model.voiced, 1); | 927 | pack(bits, &nbit, model.voiced, 1); |
1026 | 928 | ||
1027 | /* frame 2: - voicing, joint Wo & E -------------------------------*/ | 929 | /* frame 2: - voicing, joint Wo & E -------------------------------*/ |
1028 | 930 | ||
1029 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); | 931 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); |
1030 | pack(bits, &nbit, model.voiced, 1); | 932 | pack(bits, &nbit, model.voiced, 1); |
1031 | 933 | ||
1032 | /* need to run this just to get LPC energy */ | 934 | /* need to run this just to get LPC energy */ |
1033 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | 935 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); |
1034 | 936 | ||
1035 | WoE_index = encode_WoE(&model, e, c2->xq_enc); | 937 | WoE_index = encode_WoE(&model, e, c2->xq_enc); |
1036 | pack(bits, &nbit, WoE_index, WO_E_BITS); | 938 | pack(bits, &nbit, WoE_index, WO_E_BITS); |
1037 | 939 | ||
1038 | /* frame 3: - voicing ---------------------------------------------*/ | 940 | /* frame 3: - voicing ---------------------------------------------*/ |
1039 | 941 | ||
1040 | analyse_one_frame(c2, &model, &speech[2*c2->n_samp]); | 942 | analyse_one_frame(c2, &model, &speech[2 * c2->n_samp]); |
1041 | pack(bits, &nbit, model.voiced, 1); | 943 | pack(bits, &nbit, model.voiced, 1); |
1042 | 944 | ||
1043 | /* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/ | 945 | /* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/ |
1044 | 946 | ||
1045 | analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); | 947 | analyse_one_frame(c2, &model, &speech[3 * c2->n_samp]); |
1046 | pack(bits, &nbit, model.voiced, 1); | 948 | pack(bits, &nbit, model.voiced, 1); |
1047 | 949 | ||
1048 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | 950 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); |
1049 | WoE_index = encode_WoE(&model, e, c2->xq_enc); | 951 | WoE_index = encode_WoE(&model, e, c2->xq_enc); |
1050 | pack(bits, &nbit, WoE_index, WO_E_BITS); | 952 | pack(bits, &nbit, WoE_index, WO_E_BITS); |
1051 | 953 | ||
1052 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); | 954 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); |
1053 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | 955 | for (i = 0; i < LSP_SCALAR_INDEXES; i++) { |
1054 | pack(bits, &nbit, lsp_indexes[i], lsp_bits(i)); | 956 | pack(bits, &nbit, lsp_indexes[i], lsp_bits(i)); |
1055 | } | 957 | } |
1056 | 958 | ||
1057 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | 959 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); |
1058 | } | 960 | } |
1059 | 961 | ||
1060 | |||
1061 | /*---------------------------------------------------------------------------*\ | 962 | /*---------------------------------------------------------------------------*\ |
1062 | 963 | ||
1063 | FUNCTION....: codec2_decode_1400 | 964 | FUNCTION....: codec2_decode_1400 |
@@ -1068,83 +969,81 @@ void codec2_encode_1400(struct CODEC2 *c2, unsigned char * bits, short speech[]) | |||
1068 | 969 | ||
1069 | \*---------------------------------------------------------------------------*/ | 970 | \*---------------------------------------------------------------------------*/ |
1070 | 971 | ||
1071 | void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char * bits) | 972 | void codec2_decode_1400(struct CODEC2 *c2, short speech[], |
1072 | { | 973 | const unsigned char *bits) { |
1073 | MODEL model[4]; | 974 | MODEL model[4]; |
1074 | int lsp_indexes[LPC_ORD]; | 975 | int lsp_indexes[LPC_ORD]; |
1075 | float lsps[4][LPC_ORD]; | 976 | float lsps[4][LPC_ORD]; |
1076 | int WoE_index; | 977 | int WoE_index; |
1077 | float e[4]; | 978 | float e[4]; |
1078 | float snr; | 979 | float snr; |
1079 | float ak[4][LPC_ORD+1]; | 980 | float ak[4][LPC_ORD + 1]; |
1080 | int i,j; | 981 | int i, j; |
1081 | unsigned int nbit = 0; | 982 | unsigned int nbit = 0; |
1082 | float weight; | 983 | float weight; |
1083 | COMP Aw[FFT_ENC]; | 984 | COMP Aw[FFT_ENC]; |
1084 | 985 | ||
1085 | assert(c2 != NULL); | 986 | assert(c2 != NULL); |
1086 | 987 | ||
1087 | /* only need to zero these out due to (unused) snr calculation */ | 988 | /* only need to zero these out due to (unused) snr calculation */ |
1088 | 989 | ||
1089 | for(i=0; i<4; i++) | 990 | for (i = 0; i < 4; i++) |
1090 | for(j=1; j<=MAX_AMP; j++) | 991 | for (j = 1; j <= MAX_AMP; j++) model[i].A[j] = 0.0; |
1091 | model[i].A[j] = 0.0; | 992 | |
1092 | 993 | /* unpack bits from channel ------------------------------------*/ | |
1093 | /* unpack bits from channel ------------------------------------*/ | 994 | |
1094 | 995 | /* this will partially fill the model params for the 4 x 10ms | |
1095 | /* this will partially fill the model params for the 4 x 10ms | 996 | frames */ |
1096 | frames */ | 997 | |
1097 | 998 | model[0].voiced = unpack(bits, &nbit, 1); | |
1098 | model[0].voiced = unpack(bits, &nbit, 1); | 999 | |
1099 | 1000 | model[1].voiced = unpack(bits, &nbit, 1); | |
1100 | model[1].voiced = unpack(bits, &nbit, 1); | 1001 | WoE_index = unpack(bits, &nbit, WO_E_BITS); |
1101 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | 1002 | decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); |
1102 | decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); | 1003 | |
1103 | 1004 | model[2].voiced = unpack(bits, &nbit, 1); | |
1104 | model[2].voiced = unpack(bits, &nbit, 1); | 1005 | |
1105 | 1006 | model[3].voiced = unpack(bits, &nbit, 1); | |
1106 | model[3].voiced = unpack(bits, &nbit, 1); | 1007 | WoE_index = unpack(bits, &nbit, WO_E_BITS); |
1107 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | 1008 | decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index); |
1108 | decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index); | 1009 | |
1109 | 1010 | for (i = 0; i < LSP_SCALAR_INDEXES; i++) { | |
1110 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | 1011 | lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i)); |
1111 | lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i)); | 1012 | } |
1112 | } | 1013 | decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD); |
1113 | decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD); | 1014 | check_lsp_order(&lsps[3][0], LPC_ORD); |
1114 | check_lsp_order(&lsps[3][0], LPC_ORD); | 1015 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); |
1115 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); | 1016 | |
1116 | 1017 | /* interpolate ------------------------------------------------*/ | |
1117 | /* interpolate ------------------------------------------------*/ | 1018 | |
1118 | 1019 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | |
1119 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | 1020 | 10ms frame between 20ms samples */ |
1120 | 10ms frame between 20ms samples */ | 1021 | |
1121 | 1022 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | |
1122 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | 1023 | e[0] = interp_energy(c2->prev_e_dec, e[1]); |
1123 | e[0] = interp_energy(c2->prev_e_dec, e[1]); | 1024 | interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); |
1124 | interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); | 1025 | e[2] = interp_energy(e[1], e[3]); |
1125 | e[2] = interp_energy(e[1], e[3]); | 1026 | |
1126 | 1027 | /* LSPs are sampled every 40ms so we interpolate the 3 frames in | |
1127 | /* LSPs are sampled every 40ms so we interpolate the 3 frames in | 1028 | between, then recover spectral amplitudes */ |
1128 | between, then recover spectral amplitudes */ | 1029 | |
1129 | 1030 | for (i = 0, weight = 0.25; i < 3; i++, weight += 0.25) { | |
1130 | for(i=0, weight=0.25; i<3; i++, weight += 0.25) { | 1031 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, |
1131 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); | 1032 | LPC_ORD); |
1132 | } | 1033 | } |
1133 | for(i=0; i<4; i++) { | 1034 | for (i = 0; i < 4; i++) { |
1134 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | 1035 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); |
1135 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | 1036 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, |
1136 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | 1037 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); |
1137 | apply_lpc_correction(&model[i]); | 1038 | apply_lpc_correction(&model[i]); |
1138 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | 1039 | synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], Aw, 1.0); |
1139 | } | 1040 | } |
1140 | 1041 | ||
1141 | /* update memories for next frame ----------------------------*/ | 1042 | /* update memories for next frame ----------------------------*/ |
1142 | 1043 | ||
1143 | c2->prev_model_dec = model[3]; | 1044 | c2->prev_model_dec = model[3]; |
1144 | c2->prev_e_dec = e[3]; | 1045 | c2->prev_e_dec = e[3]; |
1145 | for(i=0; i<LPC_ORD; i++) | 1046 | for (i = 0; i < LPC_ORD; i++) c2->prev_lsps_dec[i] = lsps[3][i]; |
1146 | c2->prev_lsps_dec[i] = lsps[3][i]; | ||
1147 | |||
1148 | } | 1047 | } |
1149 | 1048 | ||
1150 | /*---------------------------------------------------------------------------*\ | 1049 | /*---------------------------------------------------------------------------*\ |
@@ -1175,66 +1074,56 @@ void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char * | |||
1175 | 1074 | ||
1176 | \*---------------------------------------------------------------------------*/ | 1075 | \*---------------------------------------------------------------------------*/ |
1177 | 1076 | ||
1178 | void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[]) | 1077 | void codec2_encode_1300(struct CODEC2 *c2, unsigned char *bits, |
1179 | { | 1078 | short speech[]) { |
1180 | MODEL model; | 1079 | MODEL model; |
1181 | float lsps[LPC_ORD]; | 1080 | float lsps[LPC_ORD]; |
1182 | float ak[LPC_ORD+1]; | 1081 | float ak[LPC_ORD + 1]; |
1183 | float e; | 1082 | float e; |
1184 | int lsp_indexes[LPC_ORD]; | 1083 | int lsp_indexes[LPC_ORD]; |
1185 | int Wo_index, e_index; | 1084 | int Wo_index, e_index; |
1186 | int i; | 1085 | int i; |
1187 | unsigned int nbit = 0; | 1086 | unsigned int nbit = 0; |
1188 | //#ifdef PROFILE | ||
1189 | //unsigned int quant_start; | ||
1190 | //#endif | ||
1191 | 1087 | ||
1192 | assert(c2 != NULL); | 1088 | assert(c2 != NULL); |
1193 | 1089 | ||
1194 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | 1090 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); |
1195 | 1091 | ||
1196 | /* frame 1: - voicing ---------------------------------------------*/ | 1092 | /* frame 1: - voicing ---------------------------------------------*/ |
1197 | 1093 | ||
1198 | analyse_one_frame(c2, &model, speech); | 1094 | analyse_one_frame(c2, &model, speech); |
1199 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); | 1095 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); |
1200 | 1096 | ||
1201 | /* frame 2: - voicing ---------------------------------------------*/ | 1097 | /* frame 2: - voicing ---------------------------------------------*/ |
1202 | 1098 | ||
1203 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); | 1099 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); |
1204 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); | 1100 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); |
1205 | 1101 | ||
1206 | /* frame 3: - voicing ---------------------------------------------*/ | 1102 | /* frame 3: - voicing ---------------------------------------------*/ |
1207 | 1103 | ||
1208 | analyse_one_frame(c2, &model, &speech[2*c2->n_samp]); | 1104 | analyse_one_frame(c2, &model, &speech[2 * c2->n_samp]); |
1209 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); | 1105 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); |
1210 | 1106 | ||
1211 | /* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/ | 1107 | /* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/ |
1212 | 1108 | ||
1213 | analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); | 1109 | analyse_one_frame(c2, &model, &speech[3 * c2->n_samp]); |
1214 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); | 1110 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); |
1215 | 1111 | ||
1216 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); | 1112 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); |
1217 | pack_natural_or_gray(bits, &nbit, Wo_index, WO_BITS, c2->gray); | 1113 | pack_natural_or_gray(bits, &nbit, Wo_index, WO_BITS, c2->gray); |
1218 | 1114 | ||
1219 | //#ifdef PROFILE | 1115 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); |
1220 | //quant_start = machdep_profile_sample(); | 1116 | e_index = encode_energy(e, E_BITS); |
1221 | //#endif | 1117 | pack_natural_or_gray(bits, &nbit, e_index, E_BITS, c2->gray); |
1222 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | ||
1223 | e_index = encode_energy(e, E_BITS); | ||
1224 | pack_natural_or_gray(bits, &nbit, e_index, E_BITS, c2->gray); | ||
1225 | 1118 | ||
1226 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); | 1119 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); |
1227 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | 1120 | for (i = 0; i < LSP_SCALAR_INDEXES; i++) { |
1228 | pack_natural_or_gray(bits, &nbit, lsp_indexes[i], lsp_bits(i), c2->gray); | 1121 | pack_natural_or_gray(bits, &nbit, lsp_indexes[i], lsp_bits(i), c2->gray); |
1229 | } | 1122 | } |
1230 | //#ifdef PROFILE | ||
1231 | //machdep_profile_sample_and_log(quant_start, " quant/packing"); | ||
1232 | //#endif | ||
1233 | 1123 | ||
1234 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | 1124 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); |
1235 | } | 1125 | } |
1236 | 1126 | ||
1237 | |||
1238 | /*---------------------------------------------------------------------------*\ | 1127 | /*---------------------------------------------------------------------------*\ |
1239 | 1128 | ||
1240 | FUNCTION....: codec2_decode_1300 | 1129 | FUNCTION....: codec2_decode_1300 |
@@ -1244,118 +1133,106 @@ void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[]) | |||
1244 | Decodes frames of 52 bits into 320 samples (40ms) of speech. | 1133 | Decodes frames of 52 bits into 320 samples (40ms) of speech. |
1245 | 1134 | ||
1246 | \*---------------------------------------------------------------------------*/ | 1135 | \*---------------------------------------------------------------------------*/ |
1247 | static int frames; | ||
1248 | void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * bits, float ber_est) | ||
1249 | { | ||
1250 | MODEL model[4]; | ||
1251 | int lsp_indexes[LPC_ORD]; | ||
1252 | float lsps[4][LPC_ORD]; | ||
1253 | int Wo_index, e_index; | ||
1254 | float e[4]; | ||
1255 | float snr; | ||
1256 | float ak[4][LPC_ORD+1]; | ||
1257 | int i,j; | ||
1258 | unsigned int nbit = 0; | ||
1259 | float weight; | ||
1260 | COMP Aw[FFT_ENC]; | ||
1261 | //PROFILE_VAR(recover_start); | ||
1262 | |||
1263 | assert(c2 != NULL); | ||
1264 | frames+= 4; | ||
1265 | /* only need to zero these out due to (unused) snr calculation */ | ||
1266 | |||
1267 | for(i=0; i<4; i++) | ||
1268 | for(j=1; j<=MAX_AMP; j++) | ||
1269 | model[i].A[j] = 0.0; | ||
1270 | |||
1271 | /* unpack bits from channel ------------------------------------*/ | ||
1272 | |||
1273 | /* this will partially fill the model params for the 4 x 10ms | ||
1274 | frames */ | ||
1275 | |||
1276 | model[0].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
1277 | model[1].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
1278 | model[2].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
1279 | model[3].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
1280 | |||
1281 | Wo_index = unpack_natural_or_gray(bits, &nbit, WO_BITS, c2->gray); | ||
1282 | model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); | ||
1283 | model[3].L = PI/model[3].Wo; | ||
1284 | 1136 | ||
1285 | e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray); | 1137 | void codec2_decode_1300(struct CODEC2 *c2, short speech[], |
1286 | e[3] = decode_energy(e_index, E_BITS); | 1138 | const unsigned char *bits, float ber_est) { |
1287 | //fprintf(stderr, "%d %f\n", e_index, e[3]); | 1139 | MODEL model[4]; |
1140 | int lsp_indexes[LPC_ORD]; | ||
1141 | float lsps[4][LPC_ORD]; | ||
1142 | int Wo_index, e_index; | ||
1143 | float e[4]; | ||
1144 | float snr; | ||
1145 | float ak[4][LPC_ORD + 1]; | ||
1146 | int i, j; | ||
1147 | unsigned int nbit = 0; | ||
1148 | float weight; | ||
1149 | COMP Aw[FFT_ENC]; | ||
1150 | |||
1151 | assert(c2 != NULL); | ||
1152 | |||
1153 | /* only need to zero these out due to (unused) snr calculation */ | ||
1154 | |||
1155 | for (i = 0; i < 4; i++) | ||
1156 | for (j = 1; j <= MAX_AMP; j++) model[i].A[j] = 0.0; | ||
1157 | |||
1158 | /* unpack bits from channel ------------------------------------*/ | ||
1159 | |||
1160 | /* this will partially fill the model params for the 4 x 10ms | ||
1161 | frames */ | ||
1162 | |||
1163 | model[0].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
1164 | model[1].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
1165 | model[2].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
1166 | model[3].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
1167 | |||
1168 | Wo_index = unpack_natural_or_gray(bits, &nbit, WO_BITS, c2->gray); | ||
1169 | model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); | ||
1170 | model[3].L = PI / model[3].Wo; | ||
1171 | |||
1172 | e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray); | ||
1173 | e[3] = decode_energy(e_index, E_BITS); | ||
1174 | |||
1175 | for (i = 0; i < LSP_SCALAR_INDEXES; i++) { | ||
1176 | lsp_indexes[i] = unpack_natural_or_gray(bits, &nbit, lsp_bits(i), c2->gray); | ||
1177 | } | ||
1178 | decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD); | ||
1179 | check_lsp_order(&lsps[3][0], LPC_ORD); | ||
1180 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); | ||
1181 | |||
1182 | if (ber_est > 0.15) { | ||
1183 | model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced = 0; | ||
1184 | e[3] = decode_energy(10, E_BITS); | ||
1185 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 200.0, 200.0); | ||
1186 | // fprintf(stderr, "soft mute\n"); | ||
1187 | } | ||
1188 | |||
1189 | /* interpolate ------------------------------------------------*/ | ||
1190 | |||
1191 | /* Wo, energy, and LSPs are sampled every 40ms so we interpolate | ||
1192 | the 3 frames in between */ | ||
1193 | |||
1194 | for (i = 0, weight = 0.25; i < 3; i++, weight += 0.25) { | ||
1195 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, | ||
1196 | LPC_ORD); | ||
1197 | interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, | ||
1198 | c2->c2const.Wo_min); | ||
1199 | e[i] = interp_energy2(c2->prev_e_dec, e[3], weight); | ||
1200 | } | ||
1201 | |||
1202 | /* then recover spectral amplitudes */ | ||
1203 | |||
1204 | for (i = 0; i < 4; i++) { | ||
1205 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | ||
1206 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | ||
1207 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | ||
1208 | apply_lpc_correction(&model[i]); | ||
1209 | synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], Aw, 1.0); | ||
1210 | |||
1211 | /* dump parameters for deep learning experiments */ | ||
1288 | 1212 | ||
1289 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | 1213 | if (c2->fmlfeat != NULL) { |
1290 | lsp_indexes[i] = unpack_natural_or_gray(bits, &nbit, lsp_bits(i), c2->gray); | 1214 | /* 10 LSPs - energy - Wo - voicing flag - 10 LPCs */ |
1291 | } | 1215 | fwrite(&lsps[i][0], LPC_ORD, sizeof(float), c2->fmlfeat); |
1292 | decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD); | 1216 | fwrite(&e[i], 1, sizeof(float), c2->fmlfeat); |
1293 | check_lsp_order(&lsps[3][0], LPC_ORD); | 1217 | fwrite(&model[i].Wo, 1, sizeof(float), c2->fmlfeat); |
1294 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); | 1218 | float voiced_float = model[i].voiced; |
1295 | 1219 | fwrite(&voiced_float, 1, sizeof(float), c2->fmlfeat); | |
1296 | if (ber_est > 0.15) { | 1220 | fwrite(&ak[i][1], LPC_ORD, sizeof(float), c2->fmlfeat); |
1297 | model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced = 0; | 1221 | } |
1298 | e[3] = decode_energy(10, E_BITS); | 1222 | } |
1299 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 200.0, 200.0); | 1223 | |
1300 | //fprintf(stderr, "soft mute\n"); | 1224 | #ifdef DUMP |
1301 | } | 1225 | dump_lsp_(&lsps[3][0]); |
1302 | 1226 | dump_ak_(&ak[3][0], LPC_ORD); | |
1303 | /* interpolate ------------------------------------------------*/ | 1227 | #endif |
1304 | |||
1305 | /* Wo, energy, and LSPs are sampled every 40ms so we interpolate | ||
1306 | the 3 frames in between */ | ||
1307 | |||
1308 | //PROFILE_SAMPLE(recover_start); | ||
1309 | for(i=0, weight=0.25; i<3; i++, weight += 0.25) { | ||
1310 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); | ||
1311 | interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min); | ||
1312 | e[i] = interp_energy2(c2->prev_e_dec, e[3],weight); | ||
1313 | } | ||
1314 | 1228 | ||
1315 | /* then recover spectral amplitudes */ | 1229 | /* update memories for next frame ----------------------------*/ |
1316 | |||
1317 | for(i=0; i<4; i++) { | ||
1318 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | ||
1319 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | ||
1320 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | ||
1321 | apply_lpc_correction(&model[i]); | ||
1322 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | ||
1323 | |||
1324 | /* dump parameters for deep learning experiments */ | ||
1325 | |||
1326 | if (c2->fmlfeat != NULL) { | ||
1327 | /* 10 LSPs - energy - Wo - voicing flag - 10 LPCs */ | ||
1328 | fwrite(&lsps[i][0], LPC_ORD, sizeof(float), c2->fmlfeat); | ||
1329 | fwrite(&e[i], 1, sizeof(float), c2->fmlfeat); | ||
1330 | fwrite(&model[i].Wo, 1, sizeof(float), c2->fmlfeat); | ||
1331 | float voiced_float = model[i].voiced; | ||
1332 | fwrite(&voiced_float, 1, sizeof(float), c2->fmlfeat); | ||
1333 | fwrite(&ak[i][1], LPC_ORD, sizeof(float), c2->fmlfeat); | ||
1334 | } | ||
1335 | } | ||
1336 | /* | ||
1337 | for(i=0; i<4; i++) { | ||
1338 | printf("%d Wo: %f L: %d v: %d\n", frames, model[i].Wo, model[i].L, model[i].voiced); | ||
1339 | } | ||
1340 | if (frames == 4*50) | ||
1341 | exit(0); | ||
1342 | */ | ||
1343 | //PROFILE_SAMPLE_AND_LOG2(recover_start, " recover"); | ||
1344 | #ifdef DUMP | ||
1345 | dump_lsp_(&lsps[3][0]); | ||
1346 | dump_ak_(&ak[3][0], LPC_ORD); | ||
1347 | #endif | ||
1348 | |||
1349 | /* update memories for next frame ----------------------------*/ | ||
1350 | |||
1351 | c2->prev_model_dec = model[3]; | ||
1352 | c2->prev_e_dec = e[3]; | ||
1353 | for(i=0; i<LPC_ORD; i++) | ||
1354 | c2->prev_lsps_dec[i] = lsps[3][i]; | ||
1355 | 1230 | ||
1231 | c2->prev_model_dec = model[3]; | ||
1232 | c2->prev_e_dec = e[3]; | ||
1233 | for (i = 0; i < LPC_ORD; i++) c2->prev_lsps_dec[i] = lsps[3][i]; | ||
1356 | } | 1234 | } |
1357 | 1235 | ||
1358 | |||
1359 | /*---------------------------------------------------------------------------*\ | 1236 | /*---------------------------------------------------------------------------*\ |
1360 | 1237 | ||
1361 | FUNCTION....: codec2_encode_1200 | 1238 | FUNCTION....: codec2_encode_1200 |
@@ -1384,63 +1261,62 @@ void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * | |||
1384 | 1261 | ||
1385 | \*---------------------------------------------------------------------------*/ | 1262 | \*---------------------------------------------------------------------------*/ |
1386 | 1263 | ||
1387 | void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[]) | 1264 | void codec2_encode_1200(struct CODEC2 *c2, unsigned char *bits, |
1388 | { | 1265 | short speech[]) { |
1389 | MODEL model; | 1266 | MODEL model; |
1390 | float lsps[LPC_ORD]; | 1267 | float lsps[LPC_ORD]; |
1391 | float lsps_[LPC_ORD]; | 1268 | float lsps_[LPC_ORD]; |
1392 | float ak[LPC_ORD+1]; | 1269 | float ak[LPC_ORD + 1]; |
1393 | float e; | 1270 | float e; |
1394 | int lsp_indexes[LPC_ORD]; | 1271 | int lsp_indexes[LPC_ORD]; |
1395 | int WoE_index; | 1272 | int WoE_index; |
1396 | int i; | 1273 | int i; |
1397 | int spare = 0; | 1274 | int spare = 0; |
1398 | unsigned int nbit = 0; | 1275 | unsigned int nbit = 0; |
1399 | 1276 | ||
1400 | assert(c2 != NULL); | 1277 | assert(c2 != NULL); |
1401 | 1278 | ||
1402 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | 1279 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); |
1403 | 1280 | ||
1404 | /* frame 1: - voicing ---------------------------------------------*/ | 1281 | /* frame 1: - voicing ---------------------------------------------*/ |
1405 | 1282 | ||
1406 | analyse_one_frame(c2, &model, speech); | 1283 | analyse_one_frame(c2, &model, speech); |
1407 | pack(bits, &nbit, model.voiced, 1); | 1284 | pack(bits, &nbit, model.voiced, 1); |
1408 | 1285 | ||
1409 | /* frame 2: - voicing, joint Wo & E -------------------------------*/ | 1286 | /* frame 2: - voicing, joint Wo & E -------------------------------*/ |
1410 | 1287 | ||
1411 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); | 1288 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); |
1412 | pack(bits, &nbit, model.voiced, 1); | 1289 | pack(bits, &nbit, model.voiced, 1); |
1413 | 1290 | ||
1414 | /* need to run this just to get LPC energy */ | 1291 | /* need to run this just to get LPC energy */ |
1415 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | 1292 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); |
1416 | 1293 | ||
1417 | WoE_index = encode_WoE(&model, e, c2->xq_enc); | 1294 | WoE_index = encode_WoE(&model, e, c2->xq_enc); |
1418 | pack(bits, &nbit, WoE_index, WO_E_BITS); | 1295 | pack(bits, &nbit, WoE_index, WO_E_BITS); |
1419 | 1296 | ||
1420 | /* frame 3: - voicing ---------------------------------------------*/ | 1297 | /* frame 3: - voicing ---------------------------------------------*/ |
1421 | 1298 | ||
1422 | analyse_one_frame(c2, &model, &speech[2*c2->n_samp]); | 1299 | analyse_one_frame(c2, &model, &speech[2 * c2->n_samp]); |
1423 | pack(bits, &nbit, model.voiced, 1); | 1300 | pack(bits, &nbit, model.voiced, 1); |
1424 | 1301 | ||
1425 | /* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/ | 1302 | /* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/ |
1426 | 1303 | ||
1427 | analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); | 1304 | analyse_one_frame(c2, &model, &speech[3 * c2->n_samp]); |
1428 | pack(bits, &nbit, model.voiced, 1); | 1305 | pack(bits, &nbit, model.voiced, 1); |
1429 | 1306 | ||
1430 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | 1307 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); |
1431 | WoE_index = encode_WoE(&model, e, c2->xq_enc); | 1308 | WoE_index = encode_WoE(&model, e, c2->xq_enc); |
1432 | pack(bits, &nbit, WoE_index, WO_E_BITS); | 1309 | pack(bits, &nbit, WoE_index, WO_E_BITS); |
1433 | 1310 | ||
1434 | encode_lsps_vq(lsp_indexes, lsps, lsps_, LPC_ORD); | 1311 | encode_lsps_vq(lsp_indexes, lsps, lsps_, LPC_ORD); |
1435 | for(i=0; i<LSP_PRED_VQ_INDEXES; i++) { | 1312 | for (i = 0; i < LSP_PRED_VQ_INDEXES; i++) { |
1436 | pack(bits, &nbit, lsp_indexes[i], lsp_pred_vq_bits(i)); | 1313 | pack(bits, &nbit, lsp_indexes[i], lsp_pred_vq_bits(i)); |
1437 | } | 1314 | } |
1438 | pack(bits, &nbit, spare, 1); | 1315 | pack(bits, &nbit, spare, 1); |
1439 | 1316 | ||
1440 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | 1317 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); |
1441 | } | 1318 | } |
1442 | 1319 | ||
1443 | |||
1444 | /*---------------------------------------------------------------------------*\ | 1320 | /*---------------------------------------------------------------------------*\ |
1445 | 1321 | ||
1446 | FUNCTION....: codec2_decode_1200 | 1322 | FUNCTION....: codec2_decode_1200 |
@@ -1451,494 +1327,83 @@ void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[]) | |||
1451 | 1327 | ||
1452 | \*---------------------------------------------------------------------------*/ | 1328 | \*---------------------------------------------------------------------------*/ |
1453 | 1329 | ||
1454 | void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * bits) | 1330 | void codec2_decode_1200(struct CODEC2 *c2, short speech[], |
1455 | { | 1331 | const unsigned char *bits) { |
1456 | MODEL model[4]; | 1332 | MODEL model[4]; |
1457 | int lsp_indexes[LPC_ORD]; | 1333 | int lsp_indexes[LPC_ORD]; |
1458 | float lsps[4][LPC_ORD]; | 1334 | float lsps[4][LPC_ORD]; |
1459 | int WoE_index; | 1335 | int WoE_index; |
1460 | float e[4]; | 1336 | float e[4]; |
1461 | float snr; | 1337 | float snr; |
1462 | float ak[4][LPC_ORD+1]; | 1338 | float ak[4][LPC_ORD + 1]; |
1463 | int i,j; | 1339 | int i, j; |
1464 | unsigned int nbit = 0; | 1340 | unsigned int nbit = 0; |
1465 | float weight; | 1341 | float weight; |
1466 | COMP Aw[FFT_ENC]; | 1342 | COMP Aw[FFT_ENC]; |
1467 | 1343 | ||
1468 | assert(c2 != NULL); | 1344 | assert(c2 != NULL); |
1469 | 1345 | ||
1470 | /* only need to zero these out due to (unused) snr calculation */ | 1346 | /* only need to zero these out due to (unused) snr calculation */ |
1471 | 1347 | ||
1472 | for(i=0; i<4; i++) | 1348 | for (i = 0; i < 4; i++) |
1473 | for(j=1; j<=MAX_AMP; j++) | 1349 | for (j = 1; j <= MAX_AMP; j++) model[i].A[j] = 0.0; |
1474 | model[i].A[j] = 0.0; | 1350 | |
1475 | 1351 | /* unpack bits from channel ------------------------------------*/ | |
1476 | /* unpack bits from channel ------------------------------------*/ | 1352 | |
1477 | 1353 | /* this will partially fill the model params for the 4 x 10ms | |
1478 | /* this will partially fill the model params for the 4 x 10ms | 1354 | frames */ |
1479 | frames */ | 1355 | |
1480 | 1356 | model[0].voiced = unpack(bits, &nbit, 1); | |
1481 | model[0].voiced = unpack(bits, &nbit, 1); | 1357 | |
1482 | 1358 | model[1].voiced = unpack(bits, &nbit, 1); | |
1483 | model[1].voiced = unpack(bits, &nbit, 1); | 1359 | WoE_index = unpack(bits, &nbit, WO_E_BITS); |
1484 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | 1360 | decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); |
1485 | decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); | 1361 | |
1486 | 1362 | model[2].voiced = unpack(bits, &nbit, 1); | |
1487 | model[2].voiced = unpack(bits, &nbit, 1); | 1363 | |
1488 | 1364 | model[3].voiced = unpack(bits, &nbit, 1); | |
1489 | model[3].voiced = unpack(bits, &nbit, 1); | 1365 | WoE_index = unpack(bits, &nbit, WO_E_BITS); |
1490 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | 1366 | decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index); |
1491 | decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index); | 1367 | |
1492 | 1368 | for (i = 0; i < LSP_PRED_VQ_INDEXES; i++) { | |
1493 | for(i=0; i<LSP_PRED_VQ_INDEXES; i++) { | 1369 | lsp_indexes[i] = unpack(bits, &nbit, lsp_pred_vq_bits(i)); |
1494 | lsp_indexes[i] = unpack(bits, &nbit, lsp_pred_vq_bits(i)); | 1370 | } |
1495 | } | 1371 | decode_lsps_vq(lsp_indexes, &lsps[3][0], LPC_ORD, 0); |
1496 | decode_lsps_vq(lsp_indexes, &lsps[3][0], LPC_ORD , 0); | 1372 | check_lsp_order(&lsps[3][0], LPC_ORD); |
1497 | check_lsp_order(&lsps[3][0], LPC_ORD); | 1373 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); |
1498 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); | 1374 | |
1499 | 1375 | /* interpolate ------------------------------------------------*/ | |
1500 | /* interpolate ------------------------------------------------*/ | 1376 | |
1501 | 1377 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | |
1502 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | 1378 | 10ms frame between 20ms samples */ |
1503 | 10ms frame between 20ms samples */ | 1379 | |
1504 | 1380 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | |
1505 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | 1381 | e[0] = interp_energy(c2->prev_e_dec, e[1]); |
1506 | e[0] = interp_energy(c2->prev_e_dec, e[1]); | 1382 | interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); |
1507 | interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); | 1383 | e[2] = interp_energy(e[1], e[3]); |
1508 | e[2] = interp_energy(e[1], e[3]); | 1384 | |
1509 | 1385 | /* LSPs are sampled every 40ms so we interpolate the 3 frames in | |
1510 | /* LSPs are sampled every 40ms so we interpolate the 3 frames in | 1386 | between, then recover spectral amplitudes */ |
1511 | between, then recover spectral amplitudes */ | 1387 | |
1512 | 1388 | for (i = 0, weight = 0.25; i < 3; i++, weight += 0.25) { | |
1513 | for(i=0, weight=0.25; i<3; i++, weight += 0.25) { | 1389 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, |
1514 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); | 1390 | LPC_ORD); |
1515 | } | 1391 | } |
1516 | for(i=0; i<4; i++) { | 1392 | for (i = 0; i < 4; i++) { |
1517 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | 1393 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); |
1518 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | 1394 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, |
1519 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | 1395 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); |
1520 | apply_lpc_correction(&model[i]); | 1396 | apply_lpc_correction(&model[i]); |
1521 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | 1397 | synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], Aw, 1.0); |
1522 | } | 1398 | } |
1523 | 1399 | ||
1524 | /* update memories for next frame ----------------------------*/ | 1400 | /* update memories for next frame ----------------------------*/ |
1525 | 1401 | ||
1526 | c2->prev_model_dec = model[3]; | 1402 | c2->prev_model_dec = model[3]; |
1527 | c2->prev_e_dec = e[3]; | 1403 | c2->prev_e_dec = e[3]; |
1528 | for(i=0; i<LPC_ORD; i++) | 1404 | for (i = 0; i < LPC_ORD; i++) c2->prev_lsps_dec[i] = lsps[3][i]; |
1529 | c2->prev_lsps_dec[i] = lsps[3][i]; | ||
1530 | } | 1405 | } |
1531 | 1406 | ||
1532 | |||
1533 | /*---------------------------------------------------------------------------*\ | ||
1534 | |||
1535 | FUNCTION....: codec2_encode_700 | ||
1536 | AUTHOR......: David Rowe | ||
1537 | DATE CREATED: April 2015 | ||
1538 | |||
1539 | Encodes 320 speech samples (40ms of speech) into 28 bits. | ||
1540 | |||
1541 | The codec2 algorithm actually operates internally on 10ms (80 | ||
1542 | sample) frames, so we run the encoding algorithm four times: | ||
1543 | |||
1544 | frame 0: nothing | ||
1545 | frame 1: nothing | ||
1546 | frame 2: nothing | ||
1547 | frame 3: voicing bit, scalar Wo and E, 17 bit LSP MEL scalar, 2 spare | ||
1548 | |||
1549 | The bit allocation is: | ||
1550 | |||
1551 | Parameter frames 1-3 frame 4 Total | ||
1552 | ----------------------------------------------------------- | ||
1553 | Harmonic magnitudes (LSPs) 0 17 17 | ||
1554 | Energy 0 3 3 | ||
1555 | log Wo 0 5 5 | ||
1556 | Voicing 0 1 1 | ||
1557 | spare 0 2 2 | ||
1558 | TOTAL 0 28 28 | ||
1559 | |||
1560 | \*---------------------------------------------------------------------------*/ | ||
1561 | |||
1562 | void codec2_encode_700(struct CODEC2 *c2, unsigned char * bits, short speech[]) | ||
1563 | { | ||
1564 | MODEL model; | ||
1565 | float lsps[LPC_ORD_LOW]; | ||
1566 | float mel[LPC_ORD_LOW]; | ||
1567 | float ak[LPC_ORD_LOW+1]; | ||
1568 | float e, f; | ||
1569 | int indexes[LPC_ORD_LOW]; | ||
1570 | int Wo_index, e_index, i; | ||
1571 | unsigned int nbit = 0; | ||
1572 | float bpf_out[4*c2->n_samp]; | ||
1573 | short bpf_speech[4*c2->n_samp]; | ||
1574 | int spare = 0; | ||
1575 | |||
1576 | assert(c2 != NULL); | ||
1577 | |||
1578 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | ||
1579 | |||
1580 | /* band pass filter */ | ||
1581 | |||
1582 | for(i=0; i<BPF_N; i++) | ||
1583 | c2->bpf_buf[i] = c2->bpf_buf[4*c2->n_samp+i]; | ||
1584 | for(i=0; i<4*c2->n_samp; i++) | ||
1585 | c2->bpf_buf[BPF_N+i] = speech[i]; | ||
1586 | inverse_filter(&c2->bpf_buf[BPF_N], bpf, 4*c2->n_samp, bpf_out, BPF_N-1); | ||
1587 | for(i=0; i<4*c2->n_samp; i++) | ||
1588 | bpf_speech[i] = bpf_out[i]; | ||
1589 | |||
1590 | /* frame 1 --------------------------------------------------------*/ | ||
1591 | |||
1592 | analyse_one_frame(c2, &model, bpf_speech); | ||
1593 | |||
1594 | /* frame 2 --------------------------------------------------------*/ | ||
1595 | |||
1596 | analyse_one_frame(c2, &model, &bpf_speech[c2->n_samp]); | ||
1597 | |||
1598 | /* frame 3 --------------------------------------------------------*/ | ||
1599 | |||
1600 | analyse_one_frame(c2, &model, &bpf_speech[2*c2->n_samp]); | ||
1601 | |||
1602 | /* frame 4: - voicing, scalar Wo & E, scalar LSPs -----------------*/ | ||
1603 | |||
1604 | analyse_one_frame(c2, &model, &bpf_speech[3*c2->n_samp]); | ||
1605 | pack(bits, &nbit, model.voiced, 1); | ||
1606 | Wo_index = encode_log_Wo(&c2->c2const, model.Wo, 5); | ||
1607 | pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray); | ||
1608 | |||
1609 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD_LOW); | ||
1610 | e_index = encode_energy(e, 3); | ||
1611 | pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray); | ||
1612 | |||
1613 | for(i=0; i<LPC_ORD_LOW; i++) { | ||
1614 | f = (4000.0/PI)*lsps[i]; | ||
1615 | mel[i] = floor(2595.0*log10(1.0 + f/700.0) + 0.5); | ||
1616 | } | ||
1617 | encode_mels_scalar(indexes, mel, LPC_ORD_LOW); | ||
1618 | |||
1619 | for(i=0; i<LPC_ORD_LOW; i++) { | ||
1620 | pack_natural_or_gray(bits, &nbit, indexes[i], mel_bits(i), c2->gray); | ||
1621 | } | ||
1622 | |||
1623 | pack_natural_or_gray(bits, &nbit, spare, 2, c2->gray); | ||
1624 | |||
1625 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | ||
1626 | } | ||
1627 | |||
1628 | |||
1629 | /*---------------------------------------------------------------------------*\ | ||
1630 | |||
1631 | FUNCTION....: codec2_decode_700 | ||
1632 | AUTHOR......: David Rowe | ||
1633 | DATE CREATED: April 2015 | ||
1634 | |||
1635 | Decodes frames of 28 bits into 320 samples (40ms) of speech. | ||
1636 | |||
1637 | \*---------------------------------------------------------------------------*/ | ||
1638 | |||
1639 | void codec2_decode_700(struct CODEC2 *c2, short speech[], const unsigned char * bits) | ||
1640 | { | ||
1641 | MODEL model[4]; | ||
1642 | int indexes[LPC_ORD_LOW]; | ||
1643 | float mel[LPC_ORD_LOW]; | ||
1644 | float lsps[4][LPC_ORD_LOW]; | ||
1645 | int Wo_index, e_index; | ||
1646 | float e[4]; | ||
1647 | float snr, f_; | ||
1648 | float ak[4][LPC_ORD_LOW+1]; | ||
1649 | int i,j; | ||
1650 | unsigned int nbit = 0; | ||
1651 | float weight; | ||
1652 | COMP Aw[FFT_ENC]; | ||
1653 | |||
1654 | assert(c2 != NULL); | ||
1655 | |||
1656 | /* only need to zero these out due to (unused) snr calculation */ | ||
1657 | |||
1658 | for(i=0; i<4; i++) | ||
1659 | for(j=1; j<=MAX_AMP; j++) | ||
1660 | model[i].A[j] = 0.0; | ||
1661 | |||
1662 | /* unpack bits from channel ------------------------------------*/ | ||
1663 | |||
1664 | model[3].voiced = unpack(bits, &nbit, 1); | ||
1665 | model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced; | ||
1666 | |||
1667 | Wo_index = unpack_natural_or_gray(bits, &nbit, 5, c2->gray); | ||
1668 | model[3].Wo = decode_log_Wo(&c2->c2const, Wo_index, 5); | ||
1669 | model[3].L = PI/model[3].Wo; | ||
1670 | |||
1671 | e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray); | ||
1672 | e[3] = decode_energy(e_index, 3); | ||
1673 | |||
1674 | for(i=0; i<LPC_ORD_LOW; i++) { | ||
1675 | indexes[i] = unpack_natural_or_gray(bits, &nbit, mel_bits(i), c2->gray); | ||
1676 | } | ||
1677 | |||
1678 | decode_mels_scalar(mel, indexes, LPC_ORD_LOW); | ||
1679 | for(i=0; i<LPC_ORD_LOW; i++) { | ||
1680 | f_ = 700.0*( pow(10.0, (float)mel[i]/2595.0) - 1.0); | ||
1681 | lsps[3][i] = f_*(PI/4000.0); | ||
1682 | //printf("lsps[3][%d] %f\n", i, lsps[3][i]); | ||
1683 | } | ||
1684 | |||
1685 | check_lsp_order(&lsps[3][0], LPC_ORD_LOW); | ||
1686 | bw_expand_lsps(&lsps[3][0], LPC_ORD_LOW, 50.0, 100.0); | ||
1687 | |||
1688 | #ifdef MASK_NOT_FOR_NOW | ||
1689 | /* first pass at soft decn error masking, needs further work */ | ||
1690 | /* If soft dec info available expand further for low power frames */ | ||
1691 | |||
1692 | if (c2->softdec) { | ||
1693 | float e = 0.0; | ||
1694 | for(i=9; i<9+17; i++) | ||
1695 | e += c2->softdec[i]*c2->softdec[i]; | ||
1696 | e /= 6.0; | ||
1697 | //fprintf(stderr, "e: %f\n", e); | ||
1698 | //if (e < 0.3) | ||
1699 | // bw_expand_lsps(&lsps[3][0], LPC_ORD_LOW, 150.0, 300.0); | ||
1700 | } | ||
1701 | #endif | ||
1702 | |||
1703 | /* interpolate ------------------------------------------------*/ | ||
1704 | |||
1705 | /* LSPs, Wo, and energy are sampled every 40ms so we interpolate | ||
1706 | the 3 frames in between, then recover spectral amplitudes */ | ||
1707 | |||
1708 | for(i=0, weight=0.25; i<3; i++, weight += 0.25) { | ||
1709 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW); | ||
1710 | interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min); | ||
1711 | e[i] = interp_energy2(c2->prev_e_dec, e[3],weight); | ||
1712 | } | ||
1713 | for(i=0; i<4; i++) { | ||
1714 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD_LOW); | ||
1715 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0, | ||
1716 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | ||
1717 | apply_lpc_correction(&model[i]); | ||
1718 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | ||
1719 | } | ||
1720 | |||
1721 | #ifdef DUMP | ||
1722 | dump_lsp_(&lsps[3][0]); | ||
1723 | dump_ak_(&ak[3][0], LPC_ORD_LOW); | ||
1724 | dump_model(&model[3]); | ||
1725 | if (c2->softdec) | ||
1726 | dump_softdec(c2->softdec, nbit); | ||
1727 | #endif | ||
1728 | |||
1729 | /* update memories for next frame ----------------------------*/ | ||
1730 | |||
1731 | c2->prev_model_dec = model[3]; | ||
1732 | c2->prev_e_dec = e[3]; | ||
1733 | for(i=0; i<LPC_ORD_LOW; i++) | ||
1734 | c2->prev_lsps_dec[i] = lsps[3][i]; | ||
1735 | } | ||
1736 | |||
1737 | |||
1738 | /*---------------------------------------------------------------------------*\ | ||
1739 | |||
1740 | FUNCTION....: codec2_encode_700b | ||
1741 | AUTHOR......: David Rowe | ||
1742 | DATE CREATED: August 2015 | ||
1743 | |||
1744 | Version b of 700 bit/s codec. After some experiments over the air I | ||
1745 | wanted was unhappy with the rate 700 codec so spent a few weeks | ||
1746 | trying to improve the speech quality. This version uses a wider BPF | ||
1747 | and vector quantised mel-lsps. | ||
1748 | |||
1749 | Encodes 320 speech samples (40ms of speech) into 28 bits. | ||
1750 | |||
1751 | The codec2 algorithm actually operates internally on 10ms (80 | ||
1752 | sample) frames, so we run the encoding algorithm four times: | ||
1753 | |||
1754 | frame 0: nothing | ||
1755 | frame 1: nothing | ||
1756 | frame 2: nothing | ||
1757 | frame 3: voicing bit, 5 bit scalar Wo and 3 bit E, 18 bit LSP MEL VQ, | ||
1758 | 1 spare | ||
1759 | |||
1760 | The bit allocation is: | ||
1761 | |||
1762 | Parameter frames 1-3 frame 4 Total | ||
1763 | ----------------------------------------------------------- | ||
1764 | Harmonic magnitudes (LSPs) 0 18 18 | ||
1765 | Energy 0 3 3 | ||
1766 | log Wo 0 5 5 | ||
1767 | Voicing 0 1 1 | ||
1768 | spare 0 1 1 | ||
1769 | TOTAL 0 28 28 | ||
1770 | |||
1771 | \*---------------------------------------------------------------------------*/ | ||
1772 | |||
1773 | void codec2_encode_700b(struct CODEC2 *c2, unsigned char * bits, short speech[]) | ||
1774 | { | ||
1775 | MODEL model; | ||
1776 | float lsps[LPC_ORD_LOW]; | ||
1777 | float mel[LPC_ORD_LOW]; | ||
1778 | float mel_[LPC_ORD_LOW]; | ||
1779 | float ak[LPC_ORD_LOW+1]; | ||
1780 | float e, f; | ||
1781 | int indexes[3]; | ||
1782 | int Wo_index, e_index, i; | ||
1783 | unsigned int nbit = 0; | ||
1784 | float bpf_out[4*c2->n_samp]; | ||
1785 | short bpf_speech[4*c2->n_samp]; | ||
1786 | int spare = 0; | ||
1787 | |||
1788 | assert(c2 != NULL); | ||
1789 | |||
1790 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | ||
1791 | |||
1792 | /* band pass filter */ | ||
1793 | |||
1794 | for(i=0; i<BPF_N; i++) | ||
1795 | c2->bpf_buf[i] = c2->bpf_buf[4*c2->n_samp+i]; | ||
1796 | for(i=0; i<4*c2->n_samp; i++) | ||
1797 | c2->bpf_buf[BPF_N+i] = speech[i]; | ||
1798 | inverse_filter(&c2->bpf_buf[BPF_N], bpfb, 4*c2->n_samp, bpf_out, BPF_N-1); | ||
1799 | for(i=0; i<4*c2->n_samp; i++) | ||
1800 | bpf_speech[i] = bpf_out[i]; | ||
1801 | |||
1802 | /* frame 1 --------------------------------------------------------*/ | ||
1803 | |||
1804 | analyse_one_frame(c2, &model, bpf_speech); | ||
1805 | |||
1806 | /* frame 2 --------------------------------------------------------*/ | ||
1807 | |||
1808 | analyse_one_frame(c2, &model, &bpf_speech[c2->n_samp]); | ||
1809 | |||
1810 | /* frame 3 --------------------------------------------------------*/ | ||
1811 | |||
1812 | analyse_one_frame(c2, &model, &bpf_speech[2*c2->n_samp]); | ||
1813 | |||
1814 | /* frame 4: - voicing, scalar Wo & E, VQ mel LSPs -----------------*/ | ||
1815 | |||
1816 | analyse_one_frame(c2, &model, &bpf_speech[3*c2->n_samp]); | ||
1817 | pack(bits, &nbit, model.voiced, 1); | ||
1818 | Wo_index = encode_log_Wo(&c2->c2const, model.Wo, 5); | ||
1819 | pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray); | ||
1820 | |||
1821 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD_LOW); | ||
1822 | e_index = encode_energy(e, 3); | ||
1823 | pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray); | ||
1824 | |||
1825 | for(i=0; i<LPC_ORD_LOW; i++) { | ||
1826 | f = (4000.0/PI)*lsps[i]; | ||
1827 | mel[i] = floor(2595.0*log10(1.0 + f/700.0) + 0.5); | ||
1828 | } | ||
1829 | lspmelvq_mbest_encode(indexes, mel, mel_, LPC_ORD_LOW, 5); | ||
1830 | |||
1831 | for(i=0; i<3; i++) { | ||
1832 | pack_natural_or_gray(bits, &nbit, indexes[i], lspmelvq_cb_bits(i), c2->gray); | ||
1833 | } | ||
1834 | |||
1835 | pack_natural_or_gray(bits, &nbit, spare, 1, c2->gray); | ||
1836 | |||
1837 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | ||
1838 | } | ||
1839 | |||
1840 | |||
1841 | /*---------------------------------------------------------------------------*\ | ||
1842 | |||
1843 | FUNCTION....: codec2_decode_700b | ||
1844 | AUTHOR......: David Rowe | ||
1845 | DATE CREATED: August 2015 | ||
1846 | |||
1847 | Decodes frames of 28 bits into 320 samples (40ms) of speech. | ||
1848 | |||
1849 | \*---------------------------------------------------------------------------*/ | ||
1850 | |||
1851 | void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * bits) | ||
1852 | { | ||
1853 | MODEL model[4]; | ||
1854 | int indexes[3]; | ||
1855 | float mel[LPC_ORD_LOW]; | ||
1856 | float lsps[4][LPC_ORD_LOW]; | ||
1857 | int Wo_index, e_index; | ||
1858 | float e[4]; | ||
1859 | float snr, f_; | ||
1860 | float ak[4][LPC_ORD_LOW+1]; | ||
1861 | int i,j; | ||
1862 | unsigned int nbit = 0; | ||
1863 | float weight; | ||
1864 | COMP Aw[FFT_ENC]; | ||
1865 | |||
1866 | assert(c2 != NULL); | ||
1867 | |||
1868 | /* only need to zero these out due to (unused) snr calculation */ | ||
1869 | |||
1870 | for(i=0; i<4; i++) | ||
1871 | for(j=1; j<=MAX_AMP; j++) | ||
1872 | model[i].A[j] = 0.0; | ||
1873 | |||
1874 | /* unpack bits from channel ------------------------------------*/ | ||
1875 | |||
1876 | model[3].voiced = unpack(bits, &nbit, 1); | ||
1877 | model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced; | ||
1878 | |||
1879 | Wo_index = unpack_natural_or_gray(bits, &nbit, 5, c2->gray); | ||
1880 | model[3].Wo = decode_log_Wo(&c2->c2const, Wo_index, 5); | ||
1881 | model[3].L = PI/model[3].Wo; | ||
1882 | |||
1883 | e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray); | ||
1884 | e[3] = decode_energy(e_index, 3); | ||
1885 | |||
1886 | for(i=0; i<3; i++) { | ||
1887 | indexes[i] = unpack_natural_or_gray(bits, &nbit, lspmelvq_cb_bits(i), c2->gray); | ||
1888 | } | ||
1889 | |||
1890 | lspmelvq_decode(indexes, mel, LPC_ORD_LOW); | ||
1891 | |||
1892 | #define MEL_ROUND 10 | ||
1893 | for(i=1; i<LPC_ORD_LOW; i++) { | ||
1894 | if (mel[i] <= mel[i-1]+MEL_ROUND) { | ||
1895 | mel[i]+=MEL_ROUND/2; | ||
1896 | mel[i-1]-=MEL_ROUND/2; | ||
1897 | i = 1; | ||
1898 | } | ||
1899 | } | ||
1900 | |||
1901 | for(i=0; i<LPC_ORD_LOW; i++) { | ||
1902 | f_ = 700.0*( pow(10.0, (float)mel[i]/2595.0) - 1.0); | ||
1903 | lsps[3][i] = f_*(PI/4000.0); | ||
1904 | //printf("lsps[3][%d] %f\n", i, lsps[3][i]); | ||
1905 | } | ||
1906 | |||
1907 | /* interpolate ------------------------------------------------*/ | ||
1908 | |||
1909 | /* LSPs, Wo, and energy are sampled every 40ms so we interpolate | ||
1910 | the 3 frames in between, then recover spectral amplitudes */ | ||
1911 | |||
1912 | for(i=0, weight=0.25; i<3; i++, weight += 0.25) { | ||
1913 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW); | ||
1914 | interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min); | ||
1915 | e[i] = interp_energy2(c2->prev_e_dec, e[3],weight); | ||
1916 | } | ||
1917 | for(i=0; i<4; i++) { | ||
1918 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD_LOW); | ||
1919 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0, | ||
1920 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | ||
1921 | apply_lpc_correction(&model[i]); | ||
1922 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | ||
1923 | } | ||
1924 | |||
1925 | #ifdef DUMP | ||
1926 | dump_lsp_(&lsps[3][0]); | ||
1927 | dump_ak_(&ak[3][0], LPC_ORD_LOW); | ||
1928 | dump_model(&model[3]); | ||
1929 | if (c2->softdec) | ||
1930 | dump_softdec(c2->softdec, nbit); | ||
1931 | #endif | ||
1932 | |||
1933 | /* update memories for next frame ----------------------------*/ | ||
1934 | |||
1935 | c2->prev_model_dec = model[3]; | ||
1936 | c2->prev_e_dec = e[3]; | ||
1937 | for(i=0; i<LPC_ORD_LOW; i++) | ||
1938 | c2->prev_lsps_dec[i] = lsps[3][i]; | ||
1939 | } | ||
1940 | |||
1941 | |||
1942 | /*---------------------------------------------------------------------------*\ | 1407 | /*---------------------------------------------------------------------------*\ |
1943 | 1408 | ||
1944 | FUNCTION....: codec2_encode_700c | 1409 | FUNCTION....: codec2_encode_700c |
@@ -1955,7 +1420,7 @@ void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * | |||
1955 | frame 0: nothing | 1420 | frame 0: nothing |
1956 | frame 1: nothing | 1421 | frame 1: nothing |
1957 | frame 2: nothing | 1422 | frame 2: nothing |
1958 | frame 3: 18 bit 2 stage VQ (9 bits/stage), 4 bits energy, | 1423 | frame 3: 18 bit 2 stage VQ (9 bits/stage), 4 bits energy, |
1959 | 6 bit scalar Wo/voicing. No spare bits. | 1424 | 6 bit scalar Wo/voicing. No spare bits. |
1960 | 1425 | ||
1961 | Voicing is encoded using the 0 index of the Wo quantiser. | 1426 | Voicing is encoded using the 0 index of the Wo quantiser. |
@@ -1971,52 +1436,54 @@ void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * | |||
1971 | 1436 | ||
1972 | \*---------------------------------------------------------------------------*/ | 1437 | \*---------------------------------------------------------------------------*/ |
1973 | 1438 | ||
1974 | void codec2_encode_700c(struct CODEC2 *c2, unsigned char * bits, short speech[]) | 1439 | void codec2_encode_700c(struct CODEC2 *c2, unsigned char *bits, |
1975 | { | 1440 | short speech[]) { |
1976 | MODEL model; | 1441 | MODEL model; |
1977 | int indexes[4], i, M=4; | 1442 | int indexes[4], i, M = 4; |
1978 | unsigned int nbit = 0; | 1443 | unsigned int nbit = 0; |
1979 | 1444 | ||
1980 | assert(c2 != NULL); | 1445 | assert(c2 != NULL); |
1981 | 1446 | ||
1982 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | 1447 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); |
1983 | 1448 | ||
1984 | for(i=0; i<M; i++) { | 1449 | for (i = 0; i < M; i++) { |
1985 | analyse_one_frame(c2, &model, &speech[i*c2->n_samp]); | 1450 | analyse_one_frame(c2, &model, &speech[i * c2->n_samp]); |
1986 | } | 1451 | } |
1987 | 1452 | ||
1988 | int K = 20; | 1453 | int K = 20; |
1989 | float rate_K_vec[K], mean; | 1454 | float rate_K_vec[K], mean; |
1990 | float rate_K_vec_no_mean[K], rate_K_vec_no_mean_[K]; | 1455 | float rate_K_vec_no_mean[K], rate_K_vec_no_mean_[K]; |
1991 | 1456 | ||
1992 | newamp1_model_to_indexes(&c2->c2const, | 1457 | newamp1_model_to_indexes(&c2->c2const, indexes, &model, rate_K_vec, |
1993 | indexes, | 1458 | c2->rate_K_sample_freqs_kHz, K, &mean, |
1994 | &model, | 1459 | rate_K_vec_no_mean, rate_K_vec_no_mean_, &c2->se, |
1995 | rate_K_vec, | 1460 | c2->eq, c2->eq_en); |
1996 | c2->rate_K_sample_freqs_kHz, | 1461 | c2->nse += K; |
1997 | K, | ||
1998 | &mean, | ||
1999 | rate_K_vec_no_mean, | ||
2000 | rate_K_vec_no_mean_, &c2->se, c2->eq, c2->eq_en); | ||
2001 | c2->nse += K; | ||
2002 | 1462 | ||
2003 | #ifndef CORTEX_M4 | 1463 | #ifndef CORTEX_M4 |
2004 | /* dump features for deep learning experiments */ | 1464 | /* dump features for deep learning experiments */ |
2005 | if (c2->fmlfeat != NULL) { | 1465 | if (c2->fmlfeat != NULL) { |
2006 | fwrite(&mean, 1, sizeof(float), c2->fmlfeat); | 1466 | fwrite(&mean, 1, sizeof(float), c2->fmlfeat); |
2007 | fwrite(rate_K_vec_no_mean, K, sizeof(float), c2->fmlfeat); | 1467 | fwrite(rate_K_vec_no_mean, K, sizeof(float), c2->fmlfeat); |
2008 | fwrite(rate_K_vec_no_mean_, K, sizeof(float), c2->fmlfeat); | 1468 | fwrite(rate_K_vec_no_mean_, K, sizeof(float), c2->fmlfeat); |
2009 | } | 1469 | MODEL model_; |
1470 | memcpy(&model_, &model, sizeof(model)); | ||
1471 | float rate_K_vec_[K]; | ||
1472 | for (int k = 0; k < K; k++) rate_K_vec_[k] = rate_K_vec_no_mean_[k] + mean; | ||
1473 | resample_rate_L(&c2->c2const, &model_, rate_K_vec_, | ||
1474 | c2->rate_K_sample_freqs_kHz, K); | ||
1475 | fwrite(&model_.A, MAX_AMP, sizeof(float), c2->fmlfeat); | ||
1476 | } | ||
1477 | if (c2->fmlmodel != NULL) fwrite(&model, sizeof(MODEL), 1, c2->fmlmodel); | ||
2010 | #endif | 1478 | #endif |
2011 | |||
2012 | pack_natural_or_gray(bits, &nbit, indexes[0], 9, 0); | ||
2013 | pack_natural_or_gray(bits, &nbit, indexes[1], 9, 0); | ||
2014 | pack_natural_or_gray(bits, &nbit, indexes[2], 4, 0); | ||
2015 | pack_natural_or_gray(bits, &nbit, indexes[3], 6, 0); | ||
2016 | 1479 | ||
2017 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | 1480 | pack_natural_or_gray(bits, &nbit, indexes[0], 9, 0); |
2018 | } | 1481 | pack_natural_or_gray(bits, &nbit, indexes[1], 9, 0); |
1482 | pack_natural_or_gray(bits, &nbit, indexes[2], 4, 0); | ||
1483 | pack_natural_or_gray(bits, &nbit, indexes[3], 6, 0); | ||
2019 | 1484 | ||
1485 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | ||
1486 | } | ||
2020 | 1487 | ||
2021 | /*---------------------------------------------------------------------------*\ | 1488 | /*---------------------------------------------------------------------------*\ |
2022 | 1489 | ||
@@ -2028,46 +1495,53 @@ void codec2_encode_700c(struct CODEC2 *c2, unsigned char * bits, short speech[]) | |||
2028 | 1495 | ||
2029 | \*---------------------------------------------------------------------------*/ | 1496 | \*---------------------------------------------------------------------------*/ |
2030 | 1497 | ||
2031 | void codec2_decode_700c(struct CODEC2 *c2, short speech[], const unsigned char * bits) | 1498 | void codec2_decode_700c(struct CODEC2 *c2, short speech[], |
2032 | { | 1499 | const unsigned char *bits) { |
2033 | MODEL model[4]; | 1500 | MODEL model[4]; |
2034 | int indexes[4]; | 1501 | int indexes[4]; |
2035 | int i; | 1502 | int i; |
2036 | unsigned int nbit = 0; | 1503 | unsigned int nbit = 0; |
2037 | 1504 | ||
2038 | assert(c2 != NULL); | 1505 | assert(c2 != NULL); |
2039 | 1506 | ||
2040 | /* unpack bits from channel ------------------------------------*/ | 1507 | /* unpack bits from channel ------------------------------------*/ |
2041 | 1508 | ||
2042 | indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); | 1509 | indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); |
2043 | indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); | 1510 | indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); |
2044 | indexes[2] = unpack_natural_or_gray(bits, &nbit, 4, 0); | 1511 | indexes[2] = unpack_natural_or_gray(bits, &nbit, 4, 0); |
2045 | indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); | 1512 | indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); |
2046 | 1513 | ||
2047 | int M = 4; | 1514 | int M = 4; |
2048 | COMP HH[M][MAX_AMP+1]; | 1515 | COMP HH[M][MAX_AMP + 1]; |
2049 | float interpolated_surface_[M][NEWAMP1_K]; | 1516 | float interpolated_surface_[M][NEWAMP1_K]; |
2050 | 1517 | ||
2051 | newamp1_indexes_to_model(&c2->c2const, | 1518 | newamp1_indexes_to_model( |
2052 | model, | 1519 | &c2->c2const, model, (COMP *)HH, (float *)interpolated_surface_, |
2053 | (COMP*)HH, | 1520 | c2->prev_rate_K_vec_, &c2->Wo_left, &c2->voicing_left, |
2054 | (float*)interpolated_surface_, | 1521 | c2->rate_K_sample_freqs_kHz, NEWAMP1_K, c2->phase_fft_fwd_cfg, |
2055 | c2->prev_rate_K_vec_, | 1522 | c2->phase_fft_inv_cfg, indexes, c2->user_rate_K_vec_no_mean_, |
2056 | &c2->Wo_left, | 1523 | c2->post_filter_en); |
2057 | &c2->voicing_left, | 1524 | |
2058 | c2->rate_K_sample_freqs_kHz, | 1525 | for (i = 0; i < M; i++) { |
2059 | NEWAMP1_K, | 1526 | if (c2->fmlfeat != NULL) { |
2060 | c2->phase_fft_fwd_cfg, | 1527 | /* We use standard nb_features=55 feature records for compatibility with |
2061 | c2->phase_fft_inv_cfg, | 1528 | * train_lpcnet.py */ |
2062 | indexes, | 1529 | float features[55] = {0}; |
2063 | c2->user_rate_K_vec_no_mean_, | 1530 | /* just using 18/20 for compatibility with LPCNet, coarse scaling for NN |
2064 | c2->post_filter_en); | 1531 | * input */ |
2065 | 1532 | for (int j = 0; j < 18; j++) | |
2066 | 1533 | features[j] = (interpolated_surface_[i][j] - 30) / 40; | |
2067 | for(i=0; i<M; i++) { | 1534 | int pitch_index = 21 + 2.0 * M_PI / model[i].Wo; |
2068 | /* 700C is a little quiter so lets apply some experimentally derived audio gain */ | 1535 | features[36] = 0.02 * (pitch_index - 100); |
2069 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], &HH[i][0], 1.5); | 1536 | features[37] = model[i].voiced; |
2070 | } | 1537 | fwrite(features, 55, sizeof(float), c2->fmlfeat); |
1538 | } | ||
1539 | |||
1540 | /* 700C is a little quieter so lets apply some experimentally derived audio | ||
1541 | * gain */ | ||
1542 | synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], &HH[i][0], | ||
1543 | 1.5); | ||
1544 | } | ||
2071 | } | 1545 | } |
2072 | 1546 | ||
2073 | /*---------------------------------------------------------------------------*\ | 1547 | /*---------------------------------------------------------------------------*\ |
@@ -2080,48 +1554,24 @@ void codec2_decode_700c(struct CODEC2 *c2, short speech[], const unsigned char * | |||
2080 | 1554 | ||
2081 | \*---------------------------------------------------------------------------*/ | 1555 | \*---------------------------------------------------------------------------*/ |
2082 | 1556 | ||
2083 | float codec2_energy_700c(struct CODEC2 *c2, const unsigned char * bits) | 1557 | float codec2_energy_700c(struct CODEC2 *c2, const unsigned char *bits) { |
2084 | { | 1558 | int indexes[4]; |
2085 | int indexes[4]; | 1559 | unsigned int nbit = 0; |
2086 | unsigned int nbit = 0; | ||
2087 | 1560 | ||
2088 | assert(c2 != NULL); | 1561 | assert(c2 != NULL); |
2089 | 1562 | ||
2090 | /* unpack bits from channel ------------------------------------*/ | 1563 | /* unpack bits from channel ------------------------------------*/ |
2091 | 1564 | ||
2092 | indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); | 1565 | indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); |
2093 | indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); | 1566 | indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); |
2094 | indexes[2] = unpack_natural_or_gray(bits, &nbit, 4, 0); | 1567 | indexes[2] = unpack_natural_or_gray(bits, &nbit, 4, 0); |
2095 | indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); | 1568 | indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); |
2096 | 1569 | ||
2097 | float mean = newamp1_energy_cb[0].cb[indexes[2]]; | 1570 | float mean = newamp1_energy_cb[0].cb[indexes[2]]; |
2098 | mean -= 10; | 1571 | mean -= 10; |
2099 | if (indexes[3] == 0) | 1572 | if (indexes[3] == 0) mean -= 10; |
2100 | mean -= 10; | ||
2101 | 1573 | ||
2102 | return POW10F(mean/10.0); | 1574 | return POW10F(mean / 10.0); |
2103 | } | ||
2104 | |||
2105 | float codec2_energy_450(struct CODEC2 *c2, const unsigned char * bits) | ||
2106 | { | ||
2107 | int indexes[4]; | ||
2108 | unsigned int nbit = 0; | ||
2109 | |||
2110 | assert(c2 != NULL); | ||
2111 | |||
2112 | /* unpack bits from channel ------------------------------------*/ | ||
2113 | |||
2114 | indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
2115 | //indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
2116 | indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0); | ||
2117 | indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); | ||
2118 | |||
2119 | float mean = newamp2_energy_cb[0].cb[indexes[2]]; | ||
2120 | mean -= 10; | ||
2121 | if (indexes[3] == 0) | ||
2122 | mean -= 10; | ||
2123 | |||
2124 | return POW10F(mean/10.0); | ||
2125 | } | 1575 | } |
2126 | 1576 | ||
2127 | /*---------------------------------------------------------------------------*\ | 1577 | /*---------------------------------------------------------------------------*\ |
@@ -2134,300 +1584,58 @@ float codec2_energy_450(struct CODEC2 *c2, const unsigned char * bits) | |||
2134 | 1584 | ||
2135 | \*---------------------------------------------------------------------------*/ | 1585 | \*---------------------------------------------------------------------------*/ |
2136 | 1586 | ||
2137 | float codec2_get_energy(struct CODEC2 *c2, const unsigned char *bits) | 1587 | float codec2_get_energy(struct CODEC2 *c2, const unsigned char *bits) { |
2138 | { | 1588 | assert(c2 != NULL); |
2139 | assert(c2 != NULL); | 1589 | assert((CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) || |
2140 | assert( | 1590 | (CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) || |
2141 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) || | 1591 | (CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) || |
2142 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) || | 1592 | (CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) || |
2143 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) || | 1593 | (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) || |
2144 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) || | 1594 | (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) || |
2145 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) || | 1595 | (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode))); |
2146 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) || | 1596 | MODEL model; |
2147 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) || | 1597 | float xq_dec[2] = {}; |
2148 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) || | 1598 | int e_index, WoE_index; |
2149 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) || | 1599 | float e = 0.0f; |
2150 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) || | 1600 | unsigned int nbit; |
2151 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) | 1601 | |
2152 | ); | 1602 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) { |
2153 | MODEL model; | 1603 | nbit = 1 + 1 + WO_BITS; |
2154 | float xq_dec[2] = {}; | 1604 | e_index = unpack(bits, &nbit, E_BITS); |
2155 | int e_index, WoE_index; | 1605 | e = decode_energy(e_index, E_BITS); |
2156 | float e; | 1606 | } |
2157 | unsigned int nbit; | 1607 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) { |
2158 | 1608 | nbit = 1 + 1; | |
2159 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) { | 1609 | WoE_index = unpack(bits, &nbit, WO_E_BITS); |
2160 | nbit = 1 + 1 + WO_BITS; | 1610 | decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); |
2161 | e_index = unpack(bits, &nbit, E_BITS); | 1611 | } |
2162 | e = decode_energy(e_index, E_BITS); | 1612 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) { |
2163 | } | 1613 | nbit = 1 + 1 + WO_BITS; |
2164 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) { | 1614 | e_index = unpack(bits, &nbit, E_BITS); |
2165 | nbit = 1 + 1; | 1615 | e = decode_energy(e_index, E_BITS); |
2166 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | 1616 | } |
2167 | decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); | 1617 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) { |
2168 | } | 1618 | nbit = 1 + 1; |
2169 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) { | 1619 | WoE_index = unpack(bits, &nbit, WO_E_BITS); |
2170 | nbit = 1 + 1 + WO_BITS; | 1620 | decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); |
2171 | e_index = unpack(bits, &nbit, E_BITS); | 1621 | } |
2172 | e = decode_energy(e_index, E_BITS); | 1622 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) { |
2173 | } | 1623 | nbit = 1 + 1 + 1 + 1 + WO_BITS; |
2174 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) { | 1624 | e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray); |
2175 | nbit = 1 + 1; | 1625 | e = decode_energy(e_index, E_BITS); |
2176 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | 1626 | } |
2177 | decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); | 1627 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) { |
2178 | } | 1628 | nbit = 1 + 1; |
2179 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) { | 1629 | WoE_index = unpack(bits, &nbit, WO_E_BITS); |
2180 | nbit = 1 + 1 + 1 + 1 + WO_BITS; | 1630 | decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); |
2181 | e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray); | 1631 | } |
2182 | e = decode_energy(e_index, E_BITS); | 1632 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { |
2183 | } | 1633 | e = codec2_energy_700c(c2, bits); |
2184 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) { | 1634 | } |
2185 | nbit = 1 + 1; | ||
2186 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | ||
2187 | decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); | ||
2188 | } | ||
2189 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) { | ||
2190 | nbit = 1 + 5; | ||
2191 | e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray); | ||
2192 | e = decode_energy(e_index, 3); | ||
2193 | } | ||
2194 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) { | ||
2195 | nbit = 1 + 5; | ||
2196 | e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray); | ||
2197 | e = decode_energy(e_index, 3); | ||
2198 | } | ||
2199 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { | ||
2200 | e = codec2_energy_700c(c2, bits); | ||
2201 | } | ||
2202 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) { | ||
2203 | e = codec2_energy_450(c2, bits); | ||
2204 | } | ||
2205 | |||
2206 | return e; | ||
2207 | } | ||
2208 | |||
2209 | |||
2210 | /*---------------------------------------------------------------------------*\ | ||
2211 | |||
2212 | FUNCTION....: codec2_encode_450 | ||
2213 | AUTHOR......: Thomas Kurin and Stefan Erhardt | ||
2214 | INSTITUTE...: Institute for Electronics Engineering, University of Erlangen-Nuremberg | ||
2215 | DATE CREATED: July 2018 | ||
2216 | |||
2217 | 450 bit/s codec that uses newamp2 fixed rate VQ of amplitudes. | ||
2218 | |||
2219 | Encodes 320 speech samples (40ms of speech) into 28 bits. | ||
2220 | |||
2221 | The codec2 algorithm actually operates internally on 10ms (80 | ||
2222 | sample) frames, so we run the encoding algorithm four times: | ||
2223 | |||
2224 | frame 0: nothing | ||
2225 | frame 1: nothing | ||
2226 | frame 2: nothing | ||
2227 | frame 3: 9 bit 1 stage VQ, 3 bits energy, | ||
2228 | 6 bit scalar Wo/voicing/plosive. No spare bits. | ||
2229 | |||
2230 | If a plosive is detected the frame at the energy-step is encoded. | ||
2231 | |||
2232 | Voicing is encoded using the 000000 index of the Wo quantiser. | ||
2233 | Plosive is encoded using the 111111 index of the Wo quantiser. | ||
2234 | |||
2235 | The bit allocation is: | ||
2236 | |||
2237 | Parameter frames 1-3 frame 4 Total | ||
2238 | ----------------------------------------------------------- | ||
2239 | Harmonic magnitudes (rate k VQ) 0 9 9 | ||
2240 | Energy 0 3 3 | ||
2241 | log Wo/voicing/plosive 0 6 6 | ||
2242 | TOTAL 0 18 18 | ||
2243 | |||
2244 | |||
2245 | \*---------------------------------------------------------------------------*/ | ||
2246 | |||
2247 | void codec2_encode_450(struct CODEC2 *c2, unsigned char * bits, short speech[]) | ||
2248 | { | ||
2249 | MODEL model; | ||
2250 | int indexes[4], i,h, M=4; | ||
2251 | unsigned int nbit = 0; | ||
2252 | int plosiv = 0; | ||
2253 | float energydelta[M]; | ||
2254 | int spectralCounter; | ||
2255 | |||
2256 | assert(c2 != NULL); | ||
2257 | |||
2258 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | ||
2259 | for(i=0; i<M; i++){ | ||
2260 | analyse_one_frame(c2, &model, &speech[i*c2->n_samp]); | ||
2261 | energydelta[i] = 0; | ||
2262 | spectralCounter = 0; | ||
2263 | for(h = 0;h<(model.L);h++){ | ||
2264 | //only detect above 300 Hz | ||
2265 | if(h*model.Wo*(c2->c2const.Fs/2000.0)/M_PI > 0.3){ | ||
2266 | energydelta[i] = energydelta[i] + 20.0*log10(model.A[10]+1E-16); | ||
2267 | spectralCounter = spectralCounter+1; | ||
2268 | } | ||
2269 | |||
2270 | } | ||
2271 | energydelta[i] = energydelta[i] / spectralCounter ; | ||
2272 | } | ||
2273 | //Constants for plosive Detection tdB = threshold; minPwr = from below this level plosives have to rise | ||
2274 | float tdB = 15; //not fixed can be changed | ||
2275 | float minPwr = 15; //not fixed can be changed | ||
2276 | if((c2->energy_prev)<minPwr && energydelta[0]>((c2->energy_prev)+tdB)){ | ||
2277 | |||
2278 | plosiv = 1; | ||
2279 | } | ||
2280 | if(energydelta[0]<minPwr && energydelta[1]>(energydelta[0]+tdB)){ | ||
2281 | |||
2282 | plosiv = 2; | ||
2283 | } | ||
2284 | if(energydelta[1]<minPwr &&energydelta[2]>(energydelta[1]+tdB)){ | ||
2285 | |||
2286 | plosiv = 3; | ||
2287 | } | ||
2288 | if(energydelta[2]<minPwr &&energydelta[3]>(energydelta[2]+tdB)){ | ||
2289 | |||
2290 | plosiv = 4; | ||
2291 | } | ||
2292 | if(plosiv != 0 && plosiv != 4){ | ||
2293 | analyse_one_frame(c2, &model, &speech[(plosiv-1)*c2->n_samp]); | ||
2294 | } | ||
2295 | |||
2296 | c2->energy_prev = energydelta[3]; | ||
2297 | |||
2298 | |||
2299 | int K = 29; | ||
2300 | float rate_K_vec[K], mean; | ||
2301 | float rate_K_vec_no_mean[K], rate_K_vec_no_mean_[K]; | ||
2302 | if(plosiv > 0){ | ||
2303 | plosiv = 1; | ||
2304 | } | ||
2305 | newamp2_model_to_indexes(&c2->c2const, | ||
2306 | indexes, | ||
2307 | &model, | ||
2308 | rate_K_vec, | ||
2309 | c2->n2_rate_K_sample_freqs_kHz, | ||
2310 | K, | ||
2311 | &mean, | ||
2312 | rate_K_vec_no_mean, | ||
2313 | rate_K_vec_no_mean_, | ||
2314 | plosiv); | ||
2315 | |||
2316 | |||
2317 | pack_natural_or_gray(bits, &nbit, indexes[0], 9, 0); | ||
2318 | //pack_natural_or_gray(bits, &nbit, indexes[1], 9, 0); | ||
2319 | pack_natural_or_gray(bits, &nbit, indexes[2], 3, 0); | ||
2320 | pack_natural_or_gray(bits, &nbit, indexes[3], 6, 0); | ||
2321 | |||
2322 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | ||
2323 | } | ||
2324 | |||
2325 | |||
2326 | /*---------------------------------------------------------------------------*\ | ||
2327 | |||
2328 | FUNCTION....: codec2_decode_450 | ||
2329 | AUTHOR......: Thomas Kurin and Stefan Erhardt | ||
2330 | INSTITUTE...: Institute for Electronics Engineering, University of Erlangen-Nuremberg | ||
2331 | DATE CREATED: July 2018 | ||
2332 | |||
2333 | \*---------------------------------------------------------------------------*/ | ||
2334 | |||
2335 | void codec2_decode_450(struct CODEC2 *c2, short speech[], const unsigned char * bits) | ||
2336 | { | ||
2337 | MODEL model[4]; | ||
2338 | int indexes[4]; | ||
2339 | int i; | ||
2340 | unsigned int nbit = 0; | ||
2341 | |||
2342 | assert(c2 != NULL); | ||
2343 | |||
2344 | /* unpack bits from channel ------------------------------------*/ | ||
2345 | |||
2346 | indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
2347 | //indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
2348 | indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0); | ||
2349 | indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); | ||
2350 | |||
2351 | int M = 4; | ||
2352 | COMP HH[M][MAX_AMP+1]; | ||
2353 | float interpolated_surface_[M][NEWAMP2_K]; | ||
2354 | int pwbFlag = 0; | ||
2355 | |||
2356 | newamp2_indexes_to_model(&c2->c2const, | ||
2357 | model, | ||
2358 | (COMP*)HH, | ||
2359 | (float*)interpolated_surface_, | ||
2360 | c2->n2_prev_rate_K_vec_, | ||
2361 | &c2->Wo_left, | ||
2362 | &c2->voicing_left, | ||
2363 | c2->n2_rate_K_sample_freqs_kHz, | ||
2364 | NEWAMP2_K, | ||
2365 | c2->phase_fft_fwd_cfg, | ||
2366 | c2->phase_fft_inv_cfg, | ||
2367 | indexes, | ||
2368 | 1.5, | ||
2369 | pwbFlag); | ||
2370 | |||
2371 | |||
2372 | for(i=0; i<M; i++) { | ||
2373 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], &HH[i][0], 1.5); | ||
2374 | } | ||
2375 | } | ||
2376 | |||
2377 | /*---------------------------------------------------------------------------*\ | ||
2378 | |||
2379 | FUNCTION....: codec2_decode_450pwb | ||
2380 | AUTHOR......: Thomas Kurin and Stefan Erhardt | ||
2381 | INSTITUTE...: Institute for Electronics Engineering, University of Erlangen-Nuremberg | ||
2382 | DATE CREATED: July 2018 | ||
2383 | |||
2384 | Decodes the 450 codec data in pseudo wideband at 16kHz samplerate. | ||
2385 | |||
2386 | \*---------------------------------------------------------------------------*/ | ||
2387 | 1635 | ||
2388 | void codec2_decode_450pwb(struct CODEC2 *c2, short speech[], const unsigned char * bits) | 1636 | return e; |
2389 | { | ||
2390 | MODEL model[4]; | ||
2391 | int indexes[4]; | ||
2392 | int i; | ||
2393 | unsigned int nbit = 0; | ||
2394 | |||
2395 | assert(c2 != NULL); | ||
2396 | |||
2397 | /* unpack bits from channel ------------------------------------*/ | ||
2398 | |||
2399 | indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
2400 | //indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
2401 | indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0); | ||
2402 | indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); | ||
2403 | |||
2404 | int M = 4; | ||
2405 | COMP HH[M][MAX_AMP+1]; | ||
2406 | float interpolated_surface_[M][NEWAMP2_16K_K]; | ||
2407 | int pwbFlag = 1; | ||
2408 | |||
2409 | newamp2_indexes_to_model(&c2->c2const, | ||
2410 | model, | ||
2411 | (COMP*)HH, | ||
2412 | (float*)interpolated_surface_, | ||
2413 | c2->n2_pwb_prev_rate_K_vec_, | ||
2414 | &c2->Wo_left, | ||
2415 | &c2->voicing_left, | ||
2416 | c2->n2_pwb_rate_K_sample_freqs_kHz, | ||
2417 | NEWAMP2_16K_K, | ||
2418 | c2->phase_fft_fwd_cfg, | ||
2419 | c2->phase_fft_inv_cfg, | ||
2420 | indexes, | ||
2421 | 1.5, | ||
2422 | pwbFlag); | ||
2423 | |||
2424 | |||
2425 | for(i=0; i<M; i++) { | ||
2426 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], &HH[i][0], 1.5); | ||
2427 | } | ||
2428 | } | 1637 | } |
2429 | 1638 | ||
2430 | |||
2431 | /*---------------------------------------------------------------------------* \ | 1639 | /*---------------------------------------------------------------------------* \ |
2432 | 1640 | ||
2433 | FUNCTION....: synthesise_one_frame() | 1641 | FUNCTION....: synthesise_one_frame() |
@@ -2438,56 +1646,41 @@ void codec2_decode_450pwb(struct CODEC2 *c2, short speech[], const unsigned char | |||
2438 | 1646 | ||
2439 | \*---------------------------------------------------------------------------*/ | 1647 | \*---------------------------------------------------------------------------*/ |
2440 | 1648 | ||
2441 | void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, COMP Aw[], float gain) | 1649 | void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, |
2442 | { | 1650 | COMP Aw[], float gain) { |
2443 | int i; | 1651 | int i; |
2444 | //PROFILE_VAR(phase_start, pf_start, synth_start); | 1652 | |
2445 | 1653 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { | |
2446 | //#ifdef DUMP | 1654 | /* newamp1, we've already worked out rate L phase */ |
2447 | //dump_quantised_model(model); | 1655 | COMP *H = Aw; |
2448 | //#endif | 1656 | phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H); |
2449 | 1657 | } else { | |
2450 | //PROFILE_SAMPLE(phase_start); | 1658 | /* LPC based phase synthesis */ |
2451 | 1659 | COMP H[MAX_AMP + 1]; | |
2452 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode) ) { | 1660 | sample_phase(model, H, Aw); |
2453 | /* newamp1/2, we've already worked out rate L phase */ | 1661 | phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H); |
2454 | COMP *H = Aw; | 1662 | } |
2455 | phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H); | 1663 | |
2456 | } else { | 1664 | postfilter(model, &c2->bg_est); |
2457 | /* LPC based phase synthesis */ | 1665 | synthesise(c2->n_samp, c2->fftr_inv_cfg, c2->Sn_, model, c2->Pn, 1); |
2458 | COMP H[MAX_AMP+1]; | 1666 | |
2459 | sample_phase(model, H, Aw); | 1667 | for (i = 0; i < c2->n_samp; i++) { |
2460 | phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H); | 1668 | c2->Sn_[i] *= gain; |
2461 | } | 1669 | } |
2462 | 1670 | ||
2463 | //PROFILE_SAMPLE_AND_LOG(pf_start, phase_start, " phase_synth"); | 1671 | ear_protection(c2->Sn_, c2->n_samp); |
2464 | 1672 | ||
2465 | postfilter(model, &c2->bg_est); | 1673 | for (i = 0; i < c2->n_samp; i++) { |
2466 | 1674 | if (c2->Sn_[i] > 32767.0) | |
2467 | //PROFILE_SAMPLE_AND_LOG(synth_start, pf_start, " postfilter"); | 1675 | speech[i] = 32767; |
2468 | 1676 | else if (c2->Sn_[i] < -32767.0) | |
2469 | synthesise(c2->n_samp, c2->fftr_inv_cfg, c2->Sn_, model, c2->Pn, 1); | 1677 | speech[i] = -32767; |
2470 | 1678 | else | |
2471 | for(i=0; i<c2->n_samp; i++) { | 1679 | speech[i] = c2->Sn_[i]; |
2472 | c2->Sn_[i] *= gain; | 1680 | } |
2473 | } | ||
2474 | |||
2475 | //PROFILE_SAMPLE_AND_LOG2(synth_start, " synth"); | ||
2476 | |||
2477 | ear_protection(c2->Sn_, c2->n_samp); | ||
2478 | |||
2479 | for(i=0; i<c2->n_samp; i++) { | ||
2480 | if (c2->Sn_[i] > 32767.0) | ||
2481 | speech[i] = 32767; | ||
2482 | else if (c2->Sn_[i] < -32767.0) | ||
2483 | speech[i] = -32767; | ||
2484 | else | ||
2485 | speech[i] = c2->Sn_[i]; | ||
2486 | } | ||
2487 | |||
2488 | } | 1681 | } |
2489 | 1682 | ||
2490 | /*---------------------------------------------------------------------------*\ | 1683 | /*---------------------------------------------------------------------------* \ |
2491 | 1684 | ||
2492 | FUNCTION....: analyse_one_frame() | 1685 | FUNCTION....: analyse_one_frame() |
2493 | AUTHOR......: David Rowe | 1686 | AUTHOR......: David Rowe |
@@ -2498,48 +1691,40 @@ void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, COMP | |||
2498 | 1691 | ||
2499 | \*---------------------------------------------------------------------------*/ | 1692 | \*---------------------------------------------------------------------------*/ |
2500 | 1693 | ||
2501 | void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]) | 1694 | void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]) { |
2502 | { | 1695 | COMP Sw[FFT_ENC]; |
2503 | COMP Sw[FFT_ENC]; | 1696 | float pitch; |
2504 | float pitch; | 1697 | int i; |
2505 | int i; | 1698 | int n_samp = c2->n_samp; |
2506 | //PROFILE_VAR(dft_start, nlp_start, model_start, two_stage, estamps); | 1699 | int m_pitch = c2->m_pitch; |
2507 | int n_samp = c2->n_samp; | ||
2508 | int m_pitch = c2->m_pitch; | ||
2509 | 1700 | ||
2510 | /* Read input speech */ | 1701 | /* Read input speech */ |
2511 | 1702 | ||
2512 | for(i=0; i<m_pitch-n_samp; i++) | 1703 | for (i = 0; i < m_pitch - n_samp; i++) c2->Sn[i] = c2->Sn[i + n_samp]; |
2513 | c2->Sn[i] = c2->Sn[i+n_samp]; | 1704 | for (i = 0; i < n_samp; i++) c2->Sn[i + m_pitch - n_samp] = speech[i]; |
2514 | for(i=0; i<n_samp; i++) | ||
2515 | c2->Sn[i+m_pitch-n_samp] = speech[i]; | ||
2516 | 1705 | ||
2517 | //PROFILE_SAMPLE(dft_start); | 1706 | dft_speech(&c2->c2const, c2->fft_fwd_cfg, Sw, c2->Sn, c2->w); |
2518 | dft_speech(&c2->c2const, c2->fft_fwd_cfg, Sw, c2->Sn, c2->w); | ||
2519 | //PROFILE_SAMPLE_AND_LOG(nlp_start, dft_start, " dft_speech"); | ||
2520 | 1707 | ||
2521 | /* Estimate pitch */ | 1708 | /* Estimate pitch */ |
1709 | nlp(c2->nlp, c2->Sn, n_samp, &pitch, Sw, c2->W, &c2->prev_f0_enc); | ||
1710 | model->Wo = TWO_PI / pitch; | ||
1711 | model->L = PI / model->Wo; | ||
2522 | 1712 | ||
2523 | nlp(c2->nlp, c2->Sn, n_samp, &pitch, Sw, c2->W, &c2->prev_f0_enc); | 1713 | /* estimate model parameters */ |
2524 | //PROFILE_SAMPLE_AND_LOG(model_start, nlp_start, " nlp"); | 1714 | two_stage_pitch_refinement(&c2->c2const, model, Sw); |
2525 | 1715 | ||
2526 | model->Wo = TWO_PI/pitch; | 1716 | /* estimate phases when doing ML experiments */ |
2527 | model->L = PI/model->Wo; | 1717 | if (c2->fmlfeat != NULL) |
2528 | 1718 | estimate_amplitudes(model, Sw, c2->W, 1); | |
2529 | /* estimate model parameters */ | 1719 | else |
2530 | |||
2531 | two_stage_pitch_refinement(&c2->c2const, model, Sw); | ||
2532 | //PROFILE_SAMPLE_AND_LOG(two_stage, model_start, " two_stage"); | ||
2533 | estimate_amplitudes(model, Sw, c2->W, 0); | 1720 | estimate_amplitudes(model, Sw, c2->W, 0); |
2534 | //PROFILE_SAMPLE_AND_LOG(estamps, two_stage, " est_amps"); | 1721 | est_voicing_mbe(&c2->c2const, model, Sw, c2->W); |
2535 | est_voicing_mbe(&c2->c2const, model, Sw, c2->W); | 1722 | #ifdef DUMP |
2536 | //PROFILE_SAMPLE_AND_LOG2(estamps, " est_voicing"); | 1723 | dump_model(model); |
2537 | #ifdef DUMP | 1724 | #endif |
2538 | dump_model(model); | ||
2539 | #endif | ||
2540 | } | 1725 | } |
2541 | 1726 | ||
2542 | /*---------------------------------------------------------------------------*\ | 1727 | /*---------------------------------------------------------------------------* \ |
2543 | 1728 | ||
2544 | FUNCTION....: ear_protection() | 1729 | FUNCTION....: ear_protection() |
2545 | AUTHOR......: David Rowe | 1730 | AUTHOR......: David Rowe |
@@ -2552,40 +1737,37 @@ void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]) | |||
2552 | \*---------------------------------------------------------------------------*/ | 1737 | \*---------------------------------------------------------------------------*/ |
2553 | 1738 | ||
2554 | static void ear_protection(float in_out[], int n) { | 1739 | static void ear_protection(float in_out[], int n) { |
2555 | float max_sample, over, gain; | 1740 | float max_sample, over, gain; |
2556 | int i; | 1741 | int i; |
2557 | 1742 | ||
2558 | /* find maximum sample in frame */ | 1743 | /* find maximum sample in frame */ |
2559 | 1744 | ||
2560 | max_sample = 0.0; | 1745 | max_sample = 0.0; |
2561 | for(i=0; i<n; i++) | 1746 | for (i = 0; i < n; i++) |
2562 | if (in_out[i] > max_sample) | 1747 | if (in_out[i] > max_sample) max_sample = in_out[i]; |
2563 | max_sample = in_out[i]; | ||
2564 | 1748 | ||
2565 | /* determine how far above set point */ | 1749 | /* determine how far above set point */ |
2566 | 1750 | ||
2567 | over = max_sample/30000.0; | 1751 | over = max_sample / 30000.0; |
2568 | 1752 | ||
2569 | /* If we are x dB over set point we reduce level by 2x dB, this | 1753 | /* If we are x dB over set point we reduce level by 2x dB, this |
2570 | attenuates major excursions in amplitude (likely to be caused | 1754 | attenuates major excursions in amplitude (likely to be caused |
2571 | by bit errors) more than smaller ones */ | 1755 | by bit errors) more than smaller ones */ |
2572 | 1756 | ||
2573 | if (over > 1.0) { | 1757 | if (over > 1.0) { |
2574 | gain = 1.0/(over*over); | 1758 | gain = 1.0 / (over * over); |
2575 | //fprintf(stderr, "gain: %f\n", gain); | 1759 | for (i = 0; i < n; i++) in_out[i] *= gain; |
2576 | for(i=0; i<n; i++) | 1760 | } |
2577 | in_out[i] *= gain; | ||
2578 | } | ||
2579 | } | 1761 | } |
2580 | 1762 | ||
2581 | void codec2_set_lpc_post_filter(struct CODEC2 *c2, int enable, int bass_boost, float beta, float gamma) | 1763 | void codec2_set_lpc_post_filter(struct CODEC2 *c2, int enable, int bass_boost, |
2582 | { | 1764 | float beta, float gamma) { |
2583 | assert((beta >= 0.0) && (beta <= 1.0)); | 1765 | assert((beta >= 0.0) && (beta <= 1.0)); |
2584 | assert((gamma >= 0.0) && (gamma <= 1.0)); | 1766 | assert((gamma >= 0.0) && (gamma <= 1.0)); |
2585 | c2->lpc_pf = enable; | 1767 | c2->lpc_pf = enable; |
2586 | c2->bass_boost = bass_boost; | 1768 | c2->bass_boost = bass_boost; |
2587 | c2->beta = beta; | 1769 | c2->beta = beta; |
2588 | c2->gamma = gamma; | 1770 | c2->gamma = gamma; |
2589 | } | 1771 | } |
2590 | 1772 | ||
2591 | /* | 1773 | /* |
@@ -2594,29 +1776,22 @@ void codec2_set_lpc_post_filter(struct CODEC2 *c2, int enable, int bass_boost, f | |||
2594 | Experimental method of sending voice/data frames for FreeDV. | 1776 | Experimental method of sending voice/data frames for FreeDV. |
2595 | */ | 1777 | */ |
2596 | 1778 | ||
2597 | int codec2_get_spare_bit_index(struct CODEC2 *c2) | 1779 | int codec2_get_spare_bit_index(struct CODEC2 *c2) { |
2598 | { | 1780 | assert(c2 != NULL); |
2599 | assert(c2 != NULL); | ||
2600 | 1781 | ||
2601 | switch(c2->mode) { | 1782 | switch (c2->mode) { |
2602 | case CODEC2_MODE_1300: | 1783 | case CODEC2_MODE_1300: |
2603 | return 2; // bit 2 (3th bit) is v2 (third voicing bit) | 1784 | return 2; // bit 2 (3th bit) is v2 (third voicing bit) |
2604 | break; | 1785 | break; |
2605 | case CODEC2_MODE_1400: | 1786 | case CODEC2_MODE_1400: |
2606 | return 10; // bit 10 (11th bit) is v2 (third voicing bit) | 1787 | return 10; // bit 10 (11th bit) is v2 (third voicing bit) |
2607 | break; | 1788 | break; |
2608 | case CODEC2_MODE_1600: | 1789 | case CODEC2_MODE_1600: |
2609 | return 15; // bit 15 (16th bit) is v2 (third voicing bit) | 1790 | return 15; // bit 15 (16th bit) is v2 (third voicing bit) |
2610 | break; | 1791 | break; |
2611 | case CODEC2_MODE_700: | 1792 | } |
2612 | return 26; // bits 26 and 27 are spare | ||
2613 | break; | ||
2614 | case CODEC2_MODE_700B: | ||
2615 | return 27; // bit 27 is spare | ||
2616 | break; | ||
2617 | } | ||
2618 | 1793 | ||
2619 | return -1; | 1794 | return -1; |
2620 | } | 1795 | } |
2621 | 1796 | ||
2622 | /* | 1797 | /* |
@@ -2624,111 +1799,123 @@ int codec2_get_spare_bit_index(struct CODEC2 *c2) | |||
2624 | for convenience. | 1799 | for convenience. |
2625 | */ | 1800 | */ |
2626 | 1801 | ||
2627 | int codec2_rebuild_spare_bit(struct CODEC2 *c2, int unpacked_bits[]) | 1802 | int codec2_rebuild_spare_bit(struct CODEC2 *c2, char unpacked_bits[]) { |
2628 | { | 1803 | int v1, v3; |
2629 | int v1,v3; | ||
2630 | 1804 | ||
2631 | assert(c2 != NULL); | 1805 | assert(c2 != NULL); |
2632 | 1806 | ||
2633 | v1 = unpacked_bits[1]; | 1807 | v1 = unpacked_bits[1]; |
2634 | 1808 | ||
2635 | switch(c2->mode) { | 1809 | switch (c2->mode) { |
2636 | case CODEC2_MODE_1300: | 1810 | case CODEC2_MODE_1300: |
2637 | 1811 | ||
2638 | v3 = unpacked_bits[1+1+1]; | 1812 | v3 = unpacked_bits[1 + 1 + 1]; |
2639 | 1813 | ||
2640 | /* if either adjacent frame is voiced, make this one voiced */ | 1814 | /* if either adjacent frame is voiced, make this one voiced */ |
2641 | 1815 | ||
2642 | unpacked_bits[2] = (v1 || v3); | 1816 | unpacked_bits[2] = (v1 || v3); |
2643 | 1817 | ||
2644 | return 0; | 1818 | return 0; |
2645 | 1819 | ||
2646 | break; | 1820 | break; |
2647 | 1821 | ||
2648 | case CODEC2_MODE_1400: | 1822 | case CODEC2_MODE_1400: |
2649 | 1823 | ||
2650 | v3 = unpacked_bits[1+1+8+1]; | 1824 | v3 = unpacked_bits[1 + 1 + 8 + 1]; |
2651 | 1825 | ||
2652 | /* if either adjacent frame is voiced, make this one voiced */ | 1826 | /* if either adjacent frame is voiced, make this one voiced */ |
2653 | 1827 | ||
2654 | unpacked_bits[10] = (v1 || v3); | 1828 | unpacked_bits[10] = (v1 || v3); |
2655 | 1829 | ||
2656 | return 0; | 1830 | return 0; |
2657 | 1831 | ||
2658 | break; | 1832 | break; |
2659 | 1833 | ||
2660 | case CODEC2_MODE_1600: | 1834 | case CODEC2_MODE_1600: |
2661 | v3 = unpacked_bits[1+1+8+5+1]; | 1835 | v3 = unpacked_bits[1 + 1 + 8 + 5 + 1]; |
2662 | 1836 | ||
2663 | /* if either adjacent frame is voiced, make this one voiced */ | 1837 | /* if either adjacent frame is voiced, make this one voiced */ |
2664 | 1838 | ||
2665 | unpacked_bits[15] = (v1 || v3); | 1839 | unpacked_bits[15] = (v1 || v3); |
2666 | 1840 | ||
2667 | return 0; | 1841 | return 0; |
2668 | 1842 | ||
2669 | break; | 1843 | break; |
2670 | } | 1844 | } |
2671 | 1845 | ||
2672 | return -1; | 1846 | return -1; |
2673 | } | 1847 | } |
2674 | 1848 | ||
2675 | void codec2_set_natural_or_gray(struct CODEC2 *c2, int gray) | 1849 | void codec2_set_natural_or_gray(struct CODEC2 *c2, int gray) { |
2676 | { | 1850 | assert(c2 != NULL); |
2677 | assert(c2 != NULL); | 1851 | c2->gray = gray; |
2678 | c2->gray = gray; | ||
2679 | } | 1852 | } |
2680 | 1853 | ||
2681 | void codec2_set_softdec(struct CODEC2 *c2, float *softdec) | 1854 | void codec2_set_softdec(struct CODEC2 *c2, float *softdec) { |
2682 | { | 1855 | assert(c2 != NULL); |
2683 | assert(c2 != NULL); | 1856 | c2->softdec = softdec; |
2684 | c2->softdec = softdec; | ||
2685 | } | 1857 | } |
2686 | 1858 | ||
2687 | void codec2_open_mlfeat(struct CODEC2 *codec2_state, char *filename) { | 1859 | void codec2_open_mlfeat(struct CODEC2 *codec2_state, char *feat_fn, |
2688 | if ((codec2_state->fmlfeat = fopen(filename, "wb")) == NULL) { | 1860 | char *model_fn) { |
2689 | fprintf(stderr, "error opening machine learning feature file: %s\n", filename); | 1861 | if ((codec2_state->fmlfeat = fopen(feat_fn, "wb")) == NULL) { |
2690 | exit(1); | 1862 | fprintf(stderr, "error opening machine learning feature file: %s\n", |
2691 | } | 1863 | feat_fn); |
1864 | exit(1); | ||
1865 | } | ||
1866 | if (model_fn) { | ||
1867 | if ((codec2_state->fmlmodel = fopen(model_fn, "wb")) == NULL) { | ||
1868 | fprintf(stderr, "error opening machine learning Codec 2 model file: %s\n", | ||
1869 | feat_fn); | ||
1870 | exit(1); | ||
1871 | } | ||
1872 | } | ||
2692 | } | 1873 | } |
2693 | 1874 | ||
2694 | #ifndef __EMBEDDED__ | 1875 | #ifndef __EMBEDDED__ |
2695 | void codec2_load_codebook(struct CODEC2 *codec2_state, int num, char *filename) { | 1876 | void codec2_load_codebook(struct CODEC2 *codec2_state, int num, |
2696 | FILE *f; | 1877 | char *filename) { |
2697 | 1878 | FILE *f; | |
2698 | if ((f = fopen(filename, "rb")) == NULL) { | 1879 | |
2699 | fprintf(stderr, "error opening codebook file: %s\n", filename); | 1880 | if ((f = fopen(filename, "rb")) == NULL) { |
2700 | exit(1); | 1881 | fprintf(stderr, "error opening codebook file: %s\n", filename); |
2701 | } | 1882 | exit(1); |
2702 | //fprintf(stderr, "reading newamp1vq_cb[%d] k=%d m=%d\n", num, newamp1vq_cb[num].k, newamp1vq_cb[num].m); | 1883 | } |
2703 | float tmp[newamp1vq_cb[num].k*newamp1vq_cb[num].m]; | 1884 | // fprintf(stderr, "reading newamp1vq_cb[%d] k=%d m=%d\n", num, |
2704 | int nread = fread(tmp, sizeof(float), newamp1vq_cb[num].k*newamp1vq_cb[num].m, f); | 1885 | // newamp1vq_cb[num].k, newamp1vq_cb[num].m); |
2705 | float *p = (float*)newamp1vq_cb[num].cb; | 1886 | float tmp[newamp1vq_cb[num].k * newamp1vq_cb[num].m]; |
2706 | for(int i=0; i<newamp1vq_cb[num].k*newamp1vq_cb[num].m; i++) | 1887 | int nread = |
2707 | p[i] = tmp[i]; | 1888 | fread(tmp, sizeof(float), newamp1vq_cb[num].k * newamp1vq_cb[num].m, f); |
2708 | // fprintf(stderr, "nread = %d %f %f\n", nread, newamp1vq_cb[num].cb[0], newamp1vq_cb[num].cb[1]); | 1889 | float *p = (float *)newamp1vq_cb[num].cb; |
2709 | assert(nread == newamp1vq_cb[num].k*newamp1vq_cb[num].m); | 1890 | for (int i = 0; i < newamp1vq_cb[num].k * newamp1vq_cb[num].m; i++) |
2710 | fclose(f); | 1891 | p[i] = tmp[i]; |
1892 | // fprintf(stderr, "nread = %d %f %f\n", nread, newamp1vq_cb[num].cb[0], | ||
1893 | // newamp1vq_cb[num].cb[1]); | ||
1894 | assert(nread == newamp1vq_cb[num].k * newamp1vq_cb[num].m); | ||
1895 | fclose(f); | ||
2711 | } | 1896 | } |
2712 | #endif | 1897 | #endif |
2713 | 1898 | ||
2714 | float codec2_get_var(struct CODEC2 *codec2_state) { | 1899 | float codec2_get_var(struct CODEC2 *codec2_state) { |
2715 | if (codec2_state->nse) | 1900 | if (codec2_state->nse) |
2716 | return codec2_state->se/codec2_state->nse; | 1901 | return codec2_state->se / codec2_state->nse; |
2717 | else | 1902 | else |
2718 | return 0; | 1903 | return 0; |
2719 | } | 1904 | } |
2720 | 1905 | ||
2721 | float *codec2_enable_user_ratek(struct CODEC2 *codec2_state, int *K) { | 1906 | float *codec2_enable_user_ratek(struct CODEC2 *codec2_state, int *K) { |
2722 | codec2_state->user_rate_K_vec_no_mean_ = (float*)malloc(sizeof(float)*NEWAMP1_K); | 1907 | codec2_state->user_rate_K_vec_no_mean_ = |
2723 | *K = NEWAMP1_K; | 1908 | (float *)malloc(sizeof(float) * NEWAMP1_K); |
2724 | return codec2_state->user_rate_K_vec_no_mean_; | 1909 | *K = NEWAMP1_K; |
1910 | return codec2_state->user_rate_K_vec_no_mean_; | ||
2725 | } | 1911 | } |
2726 | 1912 | ||
2727 | void codec2_700c_post_filter(struct CODEC2 *codec2_state, int en) { | 1913 | void codec2_700c_post_filter(struct CODEC2 *codec2_state, bool en) { |
2728 | codec2_state->post_filter_en = en; | 1914 | codec2_state->post_filter_en = en; |
2729 | } | 1915 | } |
2730 | 1916 | ||
2731 | void codec2_700c_eq(struct CODEC2 *codec2_state, int en) { | 1917 | void codec2_700c_eq(struct CODEC2 *codec2_state, bool en) { |
2732 | codec2_state->eq_en = en; | 1918 | codec2_state->eq_en = en; |
2733 | codec2_state->se = 0.0; codec2_state->nse = 0; | 1919 | codec2_state->se = 0.0; |
1920 | codec2_state->nse = 0; | ||
2734 | } | 1921 | } |