summaryrefslogtreecommitdiff
path: root/newamp1.c
diff options
context:
space:
mode:
authorerdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box>2019-07-04 23:26:09 +0200
committererdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box>2019-07-04 23:26:09 +0200
commitf02dfce6e6c34b3d8a7b8a0e784b506178e331fa (patch)
tree45556e6104242d4702689760433d7321ae74ec17 /newamp1.c
stripdown of version 0.9
Diffstat (limited to 'newamp1.c')
-rw-r--r--newamp1.c625
1 files changed, 625 insertions, 0 deletions
diff --git a/newamp1.c b/newamp1.c
new file mode 100644
index 0000000..575620a
--- /dev/null
+++ b/newamp1.c
@@ -0,0 +1,625 @@
1/*---------------------------------------------------------------------------*\
2
3 FILE........: newamp1.c
4 AUTHOR......: David Rowe
5 DATE CREATED: Jan 2017
6
7 Quantisation functions for the sinusoidal coder, using "newamp1"
8 algorithm that resamples variable rate L [Am} to a fixed rate K then
9 VQs.
10
11\*---------------------------------------------------------------------------*/
12
13/*
14 Copyright David Rowe 2017
15
16 All rights reserved.
17
18 This program is free software; you can redistribute it and/or modify
19 it under the terms of the GNU Lesser General Public License version 2.1, as
20 published by the Free Software Foundation. This program is
21 distributed in the hope that it will be useful, but WITHOUT ANY
22 WARRANTY; without even the implied warranty of MERCHANTABILITY or
23 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
24 License for more details.
25
26 You should have received a copy of the GNU Lesser General Public License
27 along with this program; if not, see <http://www.gnu.org/licenses/>.
28
29*/
30
31#include <assert.h>
32#include <stdio.h>
33#include <stdlib.h>
34#include <string.h>
35#include <math.h>
36
37#include "defines.h"
38#include "phase.h"
39#include "quantise.h"
40#include "mbest.h"
41#include "newamp1.h"
42
43#define NEWAMP1_VQ_MBEST_DEPTH 5 /* how many candidates we keep for each stage of mbest search */
44
45/*---------------------------------------------------------------------------*\
46
47 FUNCTION....: interp_para()
48 AUTHOR......: David Rowe
49 DATE CREATED: Jan 2017
50
51 General 2nd order parabolic interpolator. Used splines orginally,
52 but this is much simpler and we don't need much accuracy. Given two
53 vectors of points xp and yp, find interpolated values y at points x.
54
55\*---------------------------------------------------------------------------*/
56
57void interp_para(float y[], float xp[], float yp[], int np, float x[], int n)
58{
59 assert(np >= 3);
60
61 int k,i;
62 float xi, x1, y1, x2, y2, x3, y3, a, b;
63
64 k = 0;
65 for (i=0; i<n; i++) {
66 xi = x[i];
67
68 /* k is index into xp of where we start 3 points used to form parabola */
69
70 while ((xp[k+1] < xi) && (k < (np-3)))
71 k++;
72
73 x1 = xp[k]; y1 = yp[k]; x2 = xp[k+1]; y2 = yp[k+1]; x3 = xp[k+2]; y3 = yp[k+2];
74
75 //printf("k: %d np: %d i: %d xi: %f x1: %f y1: %f\n", k, np, i, xi, x1, y1);
76
77 a = ((y3-y2)/(x3-x2)-(y2-y1)/(x2-x1))/(x3-x1);
78 b = ((y3-y2)/(x3-x2)*(x2-x1)+(y2-y1)/(x2-x1)*(x3-x2))/(x3-x1);
79
80 y[i] = a*(xi-x2)*(xi-x2) + b*(xi-x2) + y2;
81 }
82}
83
84
85/*---------------------------------------------------------------------------*\
86
87 FUNCTION....: ftomel()
88 AUTHOR......: David Rowe
89 DATE CREATED: Jan 2017
90
91 Non linear sampling of frequency axis, reducing the "rate" is a
92 first step before VQ
93
94\*---------------------------------------------------------------------------*/
95
96float ftomel(float fHz) {
97 float mel = floorf(2595.0*log10f(1.0 + fHz/700.0)+0.5);
98 return mel;
99}
100
101void mel_sample_freqs_kHz(float rate_K_sample_freqs_kHz[], int K, float mel_start, float mel_end)
102{
103 float step = (mel_end-mel_start)/(K-1);
104 float mel;
105 int k;
106
107 mel = mel_start;
108 for (k=0; k<K; k++) {
109 rate_K_sample_freqs_kHz[k] = 0.7*(POW10F(mel/2595.0) - 1.0);
110 mel += step;
111 }
112}
113
114
115/*---------------------------------------------------------------------------*\
116
117 FUNCTION....: resample_const_rate_f()
118 AUTHOR......: David Rowe
119 DATE CREATED: Jan 2017
120
121 Resample Am from time-varying rate L=floor(pi/Wo) to fixed rate K.
122
123\*---------------------------------------------------------------------------*/
124
125void resample_const_rate_f(C2CONST *c2const, MODEL *model, float rate_K_vec[], float rate_K_sample_freqs_kHz[], int K)
126{
127 int m;
128 float AmdB[MAX_AMP+1], rate_L_sample_freqs_kHz[MAX_AMP+1], AmdB_peak;
129
130 /* convert rate L=pi/Wo amplitude samples to fixed rate K */
131
132 AmdB_peak = -100.0;
133 for(m=1; m<=model->L; m++) {
134 AmdB[m] = 20.0*log10f(model->A[m]+1E-16);
135 if (AmdB[m] > AmdB_peak) {
136 AmdB_peak = AmdB[m];
137 }
138 rate_L_sample_freqs_kHz[m] = m*model->Wo*(c2const->Fs/2000.0)/M_PI;
139 //printf("m: %d AmdB: %f AmdB_peak: %f sf: %f\n", m, AmdB[m], AmdB_peak, rate_L_sample_freqs_kHz[m]);
140 }
141
142 /* clip between peak and peak -50dB, to reduce dynamic range */
143
144 for(m=1; m<=model->L; m++) {
145 if (AmdB[m] < (AmdB_peak-50.0)) {
146 AmdB[m] = AmdB_peak-50.0;
147 }
148 }
149
150 interp_para(rate_K_vec, &rate_L_sample_freqs_kHz[1], &AmdB[1], model->L, rate_K_sample_freqs_kHz, K);
151}
152
153
154/*---------------------------------------------------------------------------*\
155
156 FUNCTION....: rate_K_mbest_encode
157 AUTHOR......: David Rowe
158 DATE CREATED: Jan 2017
159
160 Two stage rate K newamp1 VQ quantiser using mbest search.
161
162\*---------------------------------------------------------------------------*/
163
164float rate_K_mbest_encode(int *indexes, float *x, float *xq, int ndim, int mbest_entries)
165{
166 int i, j, n1, n2;
167 const float *codebook1 = newamp1vq_cb[0].cb;
168 const float *codebook2 = newamp1vq_cb[1].cb;
169 struct MBEST *mbest_stage1, *mbest_stage2;
170 float target[ndim];
171 float w[ndim];
172 int index[MBEST_STAGES];
173 float mse, tmp;
174
175 /* codebook is compiled for a fixed K */
176
177 assert(ndim == newamp1vq_cb[0].k);
178
179 /* equal weights, could be argued mel freq axis gives freq dep weighting */
180
181 for(i=0; i<ndim; i++)
182 w[i] = 1.0;
183
184 mbest_stage1 = mbest_create(mbest_entries);
185 mbest_stage2 = mbest_create(mbest_entries);
186 for(i=0; i<MBEST_STAGES; i++)
187 index[i] = 0;
188
189 /* Stage 1 */
190
191 mbest_search(codebook1, x, w, ndim, newamp1vq_cb[0].m, mbest_stage1, index);
192 MBEST_PRINT("Stage 1:", mbest_stage1);
193
194 /* Stage 2 */
195
196 for (j=0; j<mbest_entries; j++) {
197 index[1] = n1 = mbest_stage1->list[j].index[0];
198 for(i=0; i<ndim; i++)
199 target[i] = x[i] - codebook1[ndim*n1+i];
200 mbest_search(codebook2, target, w, ndim, newamp1vq_cb[1].m, mbest_stage2, index);
201 }
202 MBEST_PRINT("Stage 2:", mbest_stage2);
203
204 n1 = mbest_stage2->list[0].index[1];
205 n2 = mbest_stage2->list[0].index[0];
206 mse = 0.0;
207 for (i=0;i<ndim;i++) {
208 tmp = codebook1[ndim*n1+i] + codebook2[ndim*n2+i];
209 mse += (x[i]-tmp)*(x[i]-tmp);
210 xq[i] = tmp;
211 }
212
213 mbest_destroy(mbest_stage1);
214 mbest_destroy(mbest_stage2);
215
216 indexes[0] = n1; indexes[1] = n2;
217
218 return mse;
219}
220
221
222/*---------------------------------------------------------------------------*\
223
224 FUNCTION....: post_filter
225 AUTHOR......: David Rowe
226 DATE CREATED: Jan 2017
227
228 Post Filter, has a big impact on speech quality after VQ. When used
229 on a mean removed rate K vector, it raises formants, and supresses
230 anti-formants. As it manipulates amplitudes, we normalise energy to
231 prevent clipping or large level variations. pf_gain of 1.2 to 1.5
232 (dB) seems to work OK. Good area for further investigations and
233 improvements in speech quality.
234
235\*---------------------------------------------------------------------------*/
236
237void post_filter_newamp1(float vec[], float sample_freq_kHz[], int K, float pf_gain)
238{
239 int k;
240
241 /*
242 vec is rate K vector describing spectrum of current frame lets
243 pre-emp before applying PF. 20dB/dec over 300Hz. Postfilter
244 affects energy of frame so we measure energy before and after
245 and normalise. Plenty of room for experiment here as well.
246 */
247
248 float pre[K];
249 float e_before = 0.0;
250 float e_after = 0.0;
251 for(k=0; k<K; k++) {
252 pre[k] = 20.0*log10f(sample_freq_kHz[k]/0.3);
253 vec[k] += pre[k];
254 e_before += POW10F(vec[k]/10.0);
255 vec[k] *= pf_gain;
256 e_after += POW10F(vec[k]/10.0);
257 }
258
259 float gain = e_after/e_before;
260 float gaindB = 10*log10f(gain);
261
262 for(k=0; k<K; k++) {
263 vec[k] -= gaindB;
264 vec[k] -= pre[k];
265 }
266}
267
268
269/*---------------------------------------------------------------------------*\
270
271 FUNCTION....: interp_Wo_v
272 AUTHOR......: David Rowe
273 DATE CREATED: Jan 2017
274
275 Decoder side interpolation of Wo and voicing, to go from 25 Hz
276 sample rate used over channle to 100Hz internal sample rate of Codec 2.
277
278\*---------------------------------------------------------------------------*/
279
280void interp_Wo_v(float Wo_[], int L_[], int voicing_[], float Wo1, float Wo2, int voicing1, int voicing2)
281{
282 int i;
283 int M = 4; /* interpolation rate */
284
285 for(i=0; i<M; i++)
286 voicing_[i] = 0;
287
288 if (!voicing1 && !voicing2) {
289 for(i=0; i<M; i++)
290 Wo_[i] = 2.0*M_PI/100.0;
291 }
292
293 if (voicing1 && !voicing2) {
294 Wo_[0] = Wo_[1] = Wo1;
295 Wo_[2] = Wo_[3] = 2.0*M_PI/100.0;
296 voicing_[0] = voicing_[1] = 1;
297 }
298
299 if (!voicing1 && voicing2) {
300 Wo_[0] = Wo_[1] = 2.0*M_PI/100.0;
301 Wo_[2] = Wo_[3] = Wo2;
302 voicing_[2] = voicing_[3] = 1;
303 }
304
305 if (voicing1 && voicing2) {
306 float c;
307 for(i=0,c=1.0; i<M; i++,c-=1.0/M) {
308 Wo_[i] = Wo1*c + Wo2*(1.0-c);
309 voicing_[i] = 1;
310 }
311 }
312
313 for(i=0; i<M; i++) {
314 L_[i] = floorf(M_PI/Wo_[i]);
315 }
316}
317
318
319/*---------------------------------------------------------------------------*\
320
321 FUNCTION....: resample_rate_L
322 AUTHOR......: David Rowe
323 DATE CREATED: Jan 2017
324
325 Decoder side conversion of rate K vector back to rate L.
326
327\*---------------------------------------------------------------------------*/
328
329void resample_rate_L(C2CONST *c2const, MODEL *model, float rate_K_vec[], float rate_K_sample_freqs_kHz[], int K)
330{
331 float rate_K_vec_term[K+2], rate_K_sample_freqs_kHz_term[K+2];
332 float AmdB[MAX_AMP+1], rate_L_sample_freqs_kHz[MAX_AMP+1];
333 int m,k;
334
335 /* terminate either end of the rate K vecs with 0dB points */
336
337 rate_K_vec_term[0] = rate_K_vec_term[K+1] = 0.0;
338 rate_K_sample_freqs_kHz_term[0] = 0.0;
339 rate_K_sample_freqs_kHz_term[K+1] = 4.0;
340
341 for(k=0; k<K; k++) {
342 rate_K_vec_term[k+1] = rate_K_vec[k];
343 rate_K_sample_freqs_kHz_term[k+1] = rate_K_sample_freqs_kHz[k];
344
345 //printf("k: %d f: %f rate_K: %f\n", k, rate_K_sample_freqs_kHz[k], rate_K_vec[k]);
346 }
347
348 for(m=1; m<=model->L; m++) {
349 rate_L_sample_freqs_kHz[m] = m*model->Wo*(c2const->Fs/2000.0)/M_PI;
350 }
351
352 interp_para(&AmdB[1], rate_K_sample_freqs_kHz_term, rate_K_vec_term, K+2, &rate_L_sample_freqs_kHz[1], model->L);
353 for(m=1; m<=model->L; m++) {
354 model->A[m] = POW10F(AmdB[m]/20.0);
355 // printf("m: %d f: %f AdB: %f A: %f\n", m, rate_L_sample_freqs_kHz[m], AmdB[m], model->A[m]);
356 }
357}
358
359
360/*---------------------------------------------------------------------------*\
361
362 FUNCTION....: determine_phase
363 AUTHOR......: David Rowe
364 DATE CREATED: Jan 2017
365
366 Given a magnitude spectrum determine a phase spectrum, used for
367 phase synthesis with newamp1.
368
369\*---------------------------------------------------------------------------*/
370
371void determine_phase(C2CONST *c2const, COMP H[], MODEL *model, int Nfft, codec2_fft_cfg fwd_cfg, codec2_fft_cfg inv_cfg)
372{
373 int i,m,b;
374 int Ns = Nfft/2+1;
375 float Gdbfk[Ns], sample_freqs_kHz[Ns], phase[Ns];
376 float AmdB[MAX_AMP+1], rate_L_sample_freqs_kHz[MAX_AMP+1];
377
378 for(m=1; m<=model->L; m++) {
379 assert(model->A[m] != 0.0);
380 AmdB[m] = 20.0*log10f(model->A[m]);
381 rate_L_sample_freqs_kHz[m] = (float)m*model->Wo*(c2const->Fs/2000.0)/M_PI;
382 }
383
384 for(i=0; i<Ns; i++) {
385 sample_freqs_kHz[i] = (c2const->Fs/1000.0)*(float)i/Nfft;
386 }
387
388 interp_para(Gdbfk, &rate_L_sample_freqs_kHz[1], &AmdB[1], model->L, sample_freqs_kHz, Ns);
389 mag_to_phase(phase, Gdbfk, Nfft, fwd_cfg, inv_cfg);
390
391 for(m=1; m<=model->L; m++) {
392 b = floorf(0.5+m*model->Wo*Nfft/(2.0*M_PI));
393 H[m].real = cosf(phase[b]); H[m].imag = sinf(phase[b]);
394 }
395}
396
397
398/*---------------------------------------------------------------------------*\
399
400 FUNCTION....: newamp1_model_to_indexes
401 AUTHOR......: David Rowe
402 DATE CREATED: Jan 2017
403
404 newamp1 encoder for amplitdues {Am}. Given the rate L model
405 parameters, outputs VQ and energy quantiser indexes.
406
407\*---------------------------------------------------------------------------*/
408
409void newamp1_model_to_indexes(C2CONST *c2const,
410 int indexes[],
411 MODEL *model,
412 float rate_K_vec[],
413 float rate_K_sample_freqs_kHz[],
414 int K,
415 float *mean,
416 float rate_K_vec_no_mean[],
417 float rate_K_vec_no_mean_[],
418 float *se
419 )
420{
421 int k;
422
423 /* convert variable rate L to fixed rate K */
424
425 resample_const_rate_f(c2const, model, rate_K_vec, rate_K_sample_freqs_kHz, K);
426
427 /* remove mean and two stage VQ */
428
429 float sum = 0.0;
430 for(k=0; k<K; k++)
431 sum += rate_K_vec[k];
432 *mean = sum/K;;
433 for(k=0; k<K; k++)
434 rate_K_vec_no_mean[k] = rate_K_vec[k] - *mean;
435 rate_K_mbest_encode(indexes, rate_K_vec_no_mean, rate_K_vec_no_mean_, K, NEWAMP1_VQ_MBEST_DEPTH);
436
437 /* running sum of squared error for variance calculation */
438 for(k=0; k<K; k++)
439 *se += pow(rate_K_vec_no_mean[k]-rate_K_vec_no_mean_[k],2.0);
440
441 /* scalar quantise mean (effectively the frame energy) */
442
443 float w[1] = {1.0};
444 float se_mean;
445 indexes[2] = quantise(newamp1_energy_cb[0].cb,
446 mean,
447 w,
448 newamp1_energy_cb[0].k,
449 newamp1_energy_cb[0].m,
450 &se_mean);
451
452 /* scalar quantise Wo. We steal the smallest Wo index to signal
453 an unvoiced frame */
454
455 if (model->voiced) {
456 int index = encode_log_Wo(c2const, model->Wo, 6);
457 if (index == 0) {
458 index = 1;
459 }
460 indexes[3] = index;
461 }
462 else {
463 indexes[3] = 0;
464 }
465
466 }
467
468
469/*---------------------------------------------------------------------------*\
470
471 FUNCTION....: newamp1_interpolate
472 AUTHOR......: David Rowe
473 DATE CREATED: Jan 2017
474
475\*---------------------------------------------------------------------------*/
476
477void newamp1_interpolate(float interpolated_surface_[], float left_vec[], float right_vec[], int K)
478{
479 int i, k;
480 int M = 4;
481 float c;
482
483 /* (linearly) interpolate 25Hz amplitude vectors back to 100Hz */
484
485 for(i=0,c=1.0; i<M; i++,c-=1.0/M) {
486 for(k=0; k<K; k++) {
487 interpolated_surface_[i*K+k] = left_vec[k]*c + right_vec[k]*(1.0-c);
488 }
489 }
490}
491
492
493/*---------------------------------------------------------------------------*\
494
495 FUNCTION....: newamp1_indexes_to_rate_K_vec
496 AUTHOR......: David Rowe
497 DATE CREATED: Jan 2017
498
499 newamp1 decoder for amplitudes {Am}. Given the rate K VQ and energy
500 indexes, outputs rate K vector.
501
502\*---------------------------------------------------------------------------*/
503
504void newamp1_indexes_to_rate_K_vec(float rate_K_vec_[],
505 float rate_K_vec_no_mean_[],
506 float rate_K_sample_freqs_kHz[],
507 int K,
508 float *mean_,
509 int indexes[],
510 float user_rate_K_vec_no_mean_[],
511 int post_filter_en)
512{
513 int k;
514 const float *codebook1 = newamp1vq_cb[0].cb;
515 const float *codebook2 = newamp1vq_cb[1].cb;
516 int n1 = indexes[0];
517 int n2 = indexes[1];
518
519 if (user_rate_K_vec_no_mean_ == NULL) {
520 /* normal operation */
521 for(k=0; k<K; k++) {
522 rate_K_vec_no_mean_[k] = codebook1[K*n1+k] + codebook2[K*n2+k];
523 }
524 } else {
525 /* for development we can optionally inject the quantised rate K vector here */
526 for(k=0; k<K; k++)
527 rate_K_vec_no_mean_[k] = user_rate_K_vec_no_mean_[k];
528 }
529
530 if (post_filter_en)
531 post_filter_newamp1(rate_K_vec_no_mean_, rate_K_sample_freqs_kHz, K, 1.5);
532
533 *mean_ = newamp1_energy_cb[0].cb[indexes[2]];
534
535 for(k=0; k<K; k++) {
536 rate_K_vec_[k] = rate_K_vec_no_mean_[k] + *mean_;
537 }
538}
539
540
541/*---------------------------------------------------------------------------*\
542
543 FUNCTION....: newamp1_indexes_to_model
544 AUTHOR......: David Rowe
545 DATE CREATED: Jan 2017
546
547 newamp1 decoder.
548
549\*---------------------------------------------------------------------------*/
550
551void newamp1_indexes_to_model(C2CONST *c2const,
552 MODEL model_[],
553 COMP H[],
554 float *interpolated_surface_,
555 float prev_rate_K_vec_[],
556 float *Wo_left,
557 int *voicing_left,
558 float rate_K_sample_freqs_kHz[],
559 int K,
560 codec2_fft_cfg fwd_cfg,
561 codec2_fft_cfg inv_cfg,
562 int indexes[],
563 float user_rate_K_vec_no_mean_[],
564 int post_filter_en)
565{
566 float rate_K_vec_[K], rate_K_vec_no_mean_[K], mean_, Wo_right;
567 int voicing_right, k;
568 int M = 4;
569
570 /* extract latest rate K vector */
571
572 newamp1_indexes_to_rate_K_vec(rate_K_vec_,
573 rate_K_vec_no_mean_,
574 rate_K_sample_freqs_kHz,
575 K,
576 &mean_,
577 indexes,
578 user_rate_K_vec_no_mean_,
579 post_filter_en);
580
581 /* decode latest Wo and voicing */
582
583 if (indexes[3]) {
584 Wo_right = decode_log_Wo(c2const, indexes[3], 6);
585 voicing_right = 1;
586 }
587 else {
588 Wo_right = 2.0*M_PI/100.0;
589 voicing_right = 0;
590 }
591
592 /* interpolate 25Hz rate K vec back to 100Hz */
593
594 float *left_vec = prev_rate_K_vec_;
595 float *right_vec = rate_K_vec_;
596 newamp1_interpolate(interpolated_surface_, left_vec, right_vec, K);
597
598 /* interpolate 25Hz v and Wo back to 100Hz */
599
600 float aWo_[M];
601 int avoicing_[M], aL_[M], i;
602
603 interp_Wo_v(aWo_, aL_, avoicing_, *Wo_left, Wo_right, *voicing_left, voicing_right);
604
605 /* back to rate L amplitudes, synthesis phase for each frame */
606
607 for(i=0; i<M; i++) {
608 model_[i].Wo = aWo_[i];
609 model_[i].L = aL_[i];
610 model_[i].voiced = avoicing_[i];
611
612 resample_rate_L(c2const, &model_[i], &interpolated_surface_[K*i], rate_K_sample_freqs_kHz, K);
613 determine_phase(c2const, &H[(MAX_AMP+1)*i], &model_[i], NEWAMP1_PHASE_NFFT, fwd_cfg, inv_cfg);
614 }
615
616 /* update memories for next time */
617
618 for(k=0; k<K; k++) {
619 prev_rate_K_vec_[k] = rate_K_vec_[k];
620 }
621 *Wo_left = Wo_right;
622 *voicing_left = voicing_right;
623
624}
625