stripdown of version 0.9

author: erdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box> 2019-07-04 23:26:09 +0200
committer: erdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box> 2019-07-04 23:26:09 +0200
commit: f02dfce6e6c34b3d8a7b8a0e784b506178e331fa (patch)
tree: 45556e6104242d4702689760433d7321ae74ec17 /newamp1.c
1 files changed, 625 insertions, 0 deletions
diff --git a/newamp1.c b/newamp1.c
new file mode 100644
index 0000000..575620a
--- /dev/null
+++ b/newamp1.c
@@ -0,0 +1,625 @@
+/*---------------------------------------------------------------------------*\
+  FILE........: newamp1.c
+  AUTHOR......: David Rowe
+  DATE CREATED: Jan 2017
+  Quantisation functions for the sinusoidal coder, using "newamp1"
+  algorithm that resamples variable rate L [Am} to a fixed rate K then
+  VQs.
+\*---------------------------------------------------------------------------*/
+/*
+  Copyright David Rowe 2017
+  All rights reserved.
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "defines.h"
+#include "phase.h"
+#include "quantise.h"
+#include "mbest.h"
+#include "newamp1.h"
+#define NEWAMP1_VQ_MBEST_DEPTH 5  /* how many candidates we keep for each stage of mbest search */
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: interp_para()
+  AUTHOR......: David Rowe
+  DATE CREATED: Jan 2017
+  General 2nd order parabolic interpolator.  Used splines orginally,
+  but this is much simpler and we don't need much accuracy.  Given two
+  vectors of points xp and yp, find interpolated values y at points x.
+\*---------------------------------------------------------------------------*/
+void interp_para(float y[], float xp[], float yp[], int np, float x[], int n)
+{
+    assert(np >= 3);
+    int k,i;
+    float xi, x1, y1, x2, y2, x3, y3, a, b;
+    k = 0;
+    for (i=0; i<n; i++) {
+        xi = x[i];
+        /* k is index into xp of where we start 3 points used to form parabola */
+        while ((xp[k+1] < xi) && (k < (np-3)))
+            k++;
+    
+        x1 = xp[k]; y1 = yp[k]; x2 = xp[k+1]; y2 = yp[k+1]; x3 = xp[k+2]; y3 = yp[k+2];
+        //printf("k: %d np: %d i: %d xi: %f x1: %f y1: %f\n", k, np, i, xi, x1, y1);
+        a = ((y3-y2)/(x3-x2)-(y2-y1)/(x2-x1))/(x3-x1);
+        b = ((y3-y2)/(x3-x2)*(x2-x1)+(y2-y1)/(x2-x1)*(x3-x2))/(x3-x1);
+  
+        y[i] = a*(xi-x2)*(xi-x2) + b*(xi-x2) + y2;
+    }
+}
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: ftomel()
+  AUTHOR......: David Rowe
+  DATE CREATED: Jan 2017
+  Non linear sampling of frequency axis, reducing the "rate" is a
+  first step before VQ
+\*---------------------------------------------------------------------------*/
+float ftomel(float fHz) {
+    float mel = floorf(2595.0*log10f(1.0 + fHz/700.0)+0.5);
+    return mel;
+}
+void mel_sample_freqs_kHz(float rate_K_sample_freqs_kHz[], int K, float mel_start, float mel_end)
+{
+    float step = (mel_end-mel_start)/(K-1);
+    float mel;
+    int k;
+    mel = mel_start;
+    for (k=0; k<K; k++) {
+        rate_K_sample_freqs_kHz[k] = 0.7*(POW10F(mel/2595.0) - 1.0);
+        mel += step;
+    }
+}
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: resample_const_rate_f()
+  AUTHOR......: David Rowe
+  DATE CREATED: Jan 2017
+  Resample Am from time-varying rate L=floor(pi/Wo) to fixed rate K.
+\*---------------------------------------------------------------------------*/
+void resample_const_rate_f(C2CONST *c2const, MODEL *model, float rate_K_vec[], float rate_K_sample_freqs_kHz[], int K)
+{
+    int m;
+    float AmdB[MAX_AMP+1], rate_L_sample_freqs_kHz[MAX_AMP+1], AmdB_peak;
+    /* convert rate L=pi/Wo amplitude samples to fixed rate K */
+    AmdB_peak = -100.0;
+    for(m=1; m<=model->L; m++) {
+        AmdB[m] = 20.0*log10f(model->A[m]+1E-16);
+        if (AmdB[m] > AmdB_peak) {
+            AmdB_peak = AmdB[m];
+        }
+        rate_L_sample_freqs_kHz[m] = m*model->Wo*(c2const->Fs/2000.0)/M_PI;
+        //printf("m: %d AmdB: %f AmdB_peak: %f  sf: %f\n", m, AmdB[m], AmdB_peak, rate_L_sample_freqs_kHz[m]);
+    }
+    
+    /* clip between peak and peak -50dB, to reduce dynamic range */
+    for(m=1; m<=model->L; m++) {
+        if (AmdB[m] < (AmdB_peak-50.0)) {
+            AmdB[m] = AmdB_peak-50.0;
+        }
+    }
+    interp_para(rate_K_vec, &rate_L_sample_freqs_kHz[1], &AmdB[1], model->L, rate_K_sample_freqs_kHz, K);    
+}
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: rate_K_mbest_encode
+  AUTHOR......: David Rowe
+  DATE CREATED: Jan 2017
+  Two stage rate K newamp1 VQ quantiser using mbest search.
+\*---------------------------------------------------------------------------*/
+float rate_K_mbest_encode(int *indexes, float *x, float *xq, int ndim, int mbest_entries)
+{
+  int i, j, n1, n2;
+  const float *codebook1 = newamp1vq_cb[0].cb;
+  const float *codebook2 = newamp1vq_cb[1].cb;
+  struct MBEST *mbest_stage1, *mbest_stage2;
+  float target[ndim];
+  float w[ndim];
+  int   index[MBEST_STAGES];
+  float mse, tmp;
+  /* codebook is compiled for a fixed K */
+  assert(ndim == newamp1vq_cb[0].k);
+  /* equal weights, could be argued mel freq axis gives freq dep weighting */
+  for(i=0; i<ndim; i++)
+      w[i] = 1.0;
+  mbest_stage1 = mbest_create(mbest_entries);
+  mbest_stage2 = mbest_create(mbest_entries);
+  for(i=0; i<MBEST_STAGES; i++)
+      index[i] = 0;
+  /* Stage 1 */
+  mbest_search(codebook1, x, w, ndim, newamp1vq_cb[0].m, mbest_stage1, index);
+  MBEST_PRINT("Stage 1:", mbest_stage1);
+  /* Stage 2 */
+  for (j=0; j<mbest_entries; j++) {
+      index[1] = n1 = mbest_stage1->list[j].index[0];
+      for(i=0; i<ndim; i++)
+          target[i] = x[i] - codebook1[ndim*n1+i];
+      mbest_search(codebook2, target, w, ndim, newamp1vq_cb[1].m, mbest_stage2, index);
+  }
+  MBEST_PRINT("Stage 2:", mbest_stage2);
+  n1 = mbest_stage2->list[0].index[1];
+  n2 = mbest_stage2->list[0].index[0];
+  mse = 0.0;
+  for (i=0;i<ndim;i++) {
+      tmp = codebook1[ndim*n1+i] + codebook2[ndim*n2+i];
+      mse += (x[i]-tmp)*(x[i]-tmp);
+      xq[i] = tmp;
+  }
+  mbest_destroy(mbest_stage1);
+  mbest_destroy(mbest_stage2);
+  indexes[0] = n1; indexes[1] = n2;
+  return mse;
+}
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: post_filter
+  AUTHOR......: David Rowe
+  DATE CREATED: Jan 2017
+  Post Filter, has a big impact on speech quality after VQ.  When used
+  on a mean removed rate K vector, it raises formants, and supresses
+  anti-formants.  As it manipulates amplitudes, we normalise energy to
+  prevent clipping or large level variations.  pf_gain of 1.2 to 1.5
+  (dB) seems to work OK.  Good area for further investigations and
+  improvements in speech quality.
+\*---------------------------------------------------------------------------*/
+void post_filter_newamp1(float vec[], float sample_freq_kHz[], int K, float pf_gain)
+{
+    int k;
+    /*
+      vec is rate K vector describing spectrum of current frame lets
+      pre-emp before applying PF. 20dB/dec over 300Hz.  Postfilter
+      affects energy of frame so we measure energy before and after
+      and normalise.  Plenty of room for experiment here as well.
+    */
+    
+    float pre[K];
+    float e_before = 0.0;
+    float e_after = 0.0;
+    for(k=0; k<K; k++) {
+        pre[k] = 20.0*log10f(sample_freq_kHz[k]/0.3);
+        vec[k] += pre[k];
+        e_before += POW10F(vec[k]/10.0);
+        vec[k] *= pf_gain;
+        e_after += POW10F(vec[k]/10.0);
+    }
+    float gain = e_after/e_before;
+    float gaindB = 10*log10f(gain);
+  
+    for(k=0; k<K; k++) {
+        vec[k] -= gaindB;
+        vec[k] -= pre[k];
+    }
+}
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: interp_Wo_v
+  AUTHOR......: David Rowe
+  DATE CREATED: Jan 2017
+  Decoder side interpolation of Wo and voicing, to go from 25 Hz
+  sample rate used over channle to 100Hz internal sample rate of Codec 2.
+\*---------------------------------------------------------------------------*/
+void interp_Wo_v(float Wo_[], int L_[], int voicing_[], float Wo1, float Wo2, int voicing1, int voicing2)
+{
+    int i;
+    int M = 4;  /* interpolation rate */
+    for(i=0; i<M; i++)
+        voicing_[i] = 0;
+    if (!voicing1 && !voicing2) {
+        for(i=0; i<M; i++)
+            Wo_[i] = 2.0*M_PI/100.0;
+    }
+    if (voicing1 && !voicing2) {
+       Wo_[0] = Wo_[1] = Wo1;
+       Wo_[2] = Wo_[3] = 2.0*M_PI/100.0;
+       voicing_[0] = voicing_[1] = 1;
+    }
+    if (!voicing1 && voicing2) {
+       Wo_[0] = Wo_[1] = 2.0*M_PI/100.0;
+       Wo_[2] = Wo_[3] = Wo2;
+       voicing_[2] = voicing_[3] = 1;
+    }
+    if (voicing1 && voicing2) {
+        float c;
+        for(i=0,c=1.0; i<M; i++,c-=1.0/M) {
+            Wo_[i] = Wo1*c + Wo2*(1.0-c);
+            voicing_[i] = 1;
+        }
+    }
+    for(i=0; i<M; i++) {
+        L_[i] = floorf(M_PI/Wo_[i]);
+    }
+}
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: resample_rate_L
+  AUTHOR......: David Rowe
+  DATE CREATED: Jan 2017
+  Decoder side conversion of rate K vector back to rate L.
+\*---------------------------------------------------------------------------*/
+void resample_rate_L(C2CONST *c2const, MODEL *model, float rate_K_vec[], float rate_K_sample_freqs_kHz[], int K)
+{
+   float rate_K_vec_term[K+2], rate_K_sample_freqs_kHz_term[K+2];
+   float AmdB[MAX_AMP+1], rate_L_sample_freqs_kHz[MAX_AMP+1];
+   int m,k;
+   /* terminate either end of the rate K vecs with 0dB points */
+   rate_K_vec_term[0] = rate_K_vec_term[K+1] = 0.0;
+   rate_K_sample_freqs_kHz_term[0] = 0.0;
+   rate_K_sample_freqs_kHz_term[K+1] = 4.0;
+   for(k=0; k<K; k++) {
+       rate_K_vec_term[k+1] = rate_K_vec[k];
+       rate_K_sample_freqs_kHz_term[k+1] = rate_K_sample_freqs_kHz[k];
+  
+       //printf("k: %d f: %f rate_K: %f\n", k, rate_K_sample_freqs_kHz[k], rate_K_vec[k]);
+   }
+   for(m=1; m<=model->L; m++) {
+       rate_L_sample_freqs_kHz[m] = m*model->Wo*(c2const->Fs/2000.0)/M_PI;
+   }
+   interp_para(&AmdB[1], rate_K_sample_freqs_kHz_term, rate_K_vec_term, K+2, &rate_L_sample_freqs_kHz[1], model->L);    
+   for(m=1; m<=model->L; m++) {
+       model->A[m] = POW10F(AmdB[m]/20.0);
+       // printf("m: %d f: %f AdB: %f A: %f\n", m, rate_L_sample_freqs_kHz[m], AmdB[m], model->A[m]);
+   }
+}
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: determine_phase
+  AUTHOR......: David Rowe
+  DATE CREATED: Jan 2017
+  Given a magnitude spectrum determine a phase spectrum, used for
+  phase synthesis with newamp1.
+\*---------------------------------------------------------------------------*/
+void determine_phase(C2CONST *c2const, COMP H[], MODEL *model, int Nfft, codec2_fft_cfg fwd_cfg, codec2_fft_cfg inv_cfg)
+{
+    int i,m,b;
+    int Ns = Nfft/2+1;
+    float Gdbfk[Ns], sample_freqs_kHz[Ns], phase[Ns];
+    float AmdB[MAX_AMP+1], rate_L_sample_freqs_kHz[MAX_AMP+1];
+    for(m=1; m<=model->L; m++) {
+        assert(model->A[m] != 0.0);
+        AmdB[m] = 20.0*log10f(model->A[m]);
+        rate_L_sample_freqs_kHz[m] = (float)m*model->Wo*(c2const->Fs/2000.0)/M_PI;        
+    }
+    
+    for(i=0; i<Ns; i++) {
+        sample_freqs_kHz[i] = (c2const->Fs/1000.0)*(float)i/Nfft;
+    }
+    interp_para(Gdbfk, &rate_L_sample_freqs_kHz[1], &AmdB[1], model->L, sample_freqs_kHz, Ns);
+    mag_to_phase(phase, Gdbfk, Nfft, fwd_cfg, inv_cfg);
+    for(m=1; m<=model->L; m++) {
+        b = floorf(0.5+m*model->Wo*Nfft/(2.0*M_PI));
+        H[m].real = cosf(phase[b]); H[m].imag = sinf(phase[b]);
+    }
+}
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: newamp1_model_to_indexes
+  AUTHOR......: David Rowe
+  DATE CREATED: Jan 2017
+  newamp1 encoder for amplitdues {Am}.  Given the rate L model
+  parameters, outputs VQ and energy quantiser indexes.
+\*---------------------------------------------------------------------------*/
+void newamp1_model_to_indexes(C2CONST *c2const,
+                              int    indexes[], 
+                              MODEL *model, 
+                              float  rate_K_vec[], 
+                              float  rate_K_sample_freqs_kHz[], 
+                              int    K,
+                              float *mean,
+                              float  rate_K_vec_no_mean[], 
+                              float  rate_K_vec_no_mean_[],
+                              float *se
+                              )
+{
+    int k;
+    /* convert variable rate L to fixed rate K */
+    resample_const_rate_f(c2const, model, rate_K_vec, rate_K_sample_freqs_kHz, K);
+    /* remove mean and two stage VQ */
+    float sum = 0.0;
+    for(k=0; k<K; k++)
+        sum += rate_K_vec[k];   
+    *mean = sum/K;;
+    for(k=0; k<K; k++)
+        rate_K_vec_no_mean[k] = rate_K_vec[k] - *mean;
+    rate_K_mbest_encode(indexes, rate_K_vec_no_mean, rate_K_vec_no_mean_, K, NEWAMP1_VQ_MBEST_DEPTH);
+    /* running sum of squared error for variance calculation */
+    for(k=0; k<K; k++)
+        *se += pow(rate_K_vec_no_mean[k]-rate_K_vec_no_mean_[k],2.0);
+    /* scalar quantise mean (effectively the frame energy) */
+    float w[1] = {1.0};
+    float se_mean;
+    indexes[2] = quantise(newamp1_energy_cb[0].cb, 
+                          mean, 
+                          w, 
+                          newamp1_energy_cb[0].k, 
+                          newamp1_energy_cb[0].m, 
+                          &se_mean);
+    /* scalar quantise Wo.  We steal the smallest Wo index to signal
+       an unvoiced frame */
+    if (model->voiced) {
+        int index = encode_log_Wo(c2const, model->Wo, 6);
+        if (index == 0) {
+            index = 1;
+        }
+        indexes[3] = index;
+    }
+    else {
+        indexes[3] = 0;
+    }
+ }
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: newamp1_interpolate
+  AUTHOR......: David Rowe
+  DATE CREATED: Jan 2017
+\*---------------------------------------------------------------------------*/
+void newamp1_interpolate(float interpolated_surface_[], float left_vec[], float right_vec[], int K)
+{
+    int  i, k;
+    int  M = 4;
+    float c;
+    /* (linearly) interpolate 25Hz amplitude vectors back to 100Hz */
+    for(i=0,c=1.0; i<M; i++,c-=1.0/M) {
+        for(k=0; k<K; k++) {
+            interpolated_surface_[i*K+k] = left_vec[k]*c + right_vec[k]*(1.0-c);
+        }
+    }
+}
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: newamp1_indexes_to_rate_K_vec
+  AUTHOR......: David Rowe
+  DATE CREATED: Jan 2017
+  newamp1 decoder for amplitudes {Am}.  Given the rate K VQ and energy
+  indexes, outputs rate K vector.
+\*---------------------------------------------------------------------------*/
+void newamp1_indexes_to_rate_K_vec(float  rate_K_vec_[],  
+                                   float  rate_K_vec_no_mean_[],
+                                   float  rate_K_sample_freqs_kHz[], 
+                                   int    K,
+                                   float *mean_,
+                                   int    indexes[],
+                                   float user_rate_K_vec_no_mean_[],
+                                   int post_filter_en)
+{
+    int   k;
+    const float *codebook1 = newamp1vq_cb[0].cb;
+    const float *codebook2 = newamp1vq_cb[1].cb;
+    int n1 = indexes[0];
+    int n2 = indexes[1];
+    
+    if (user_rate_K_vec_no_mean_ == NULL) {
+        /* normal operation */
+        for(k=0; k<K; k++) {
+            rate_K_vec_no_mean_[k] = codebook1[K*n1+k] + codebook2[K*n2+k];
+        }
+    } else {
+        /* for development we can optionally inject the quantised rate K vector here */
+        for(k=0; k<K; k++)
+            rate_K_vec_no_mean_[k] = user_rate_K_vec_no_mean_[k];
+    }
+        
+    if (post_filter_en)
+        post_filter_newamp1(rate_K_vec_no_mean_, rate_K_sample_freqs_kHz, K, 1.5);
+    *mean_ = newamp1_energy_cb[0].cb[indexes[2]];
+    for(k=0; k<K; k++) {
+        rate_K_vec_[k] = rate_K_vec_no_mean_[k] + *mean_;
+    }
+}
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: newamp1_indexes_to_model
+  AUTHOR......: David Rowe
+  DATE CREATED: Jan 2017
+  newamp1 decoder.
+\*---------------------------------------------------------------------------*/
+void newamp1_indexes_to_model(C2CONST *c2const,
+                              MODEL  model_[],
+                              COMP   H[],
+                              float *interpolated_surface_,
+                              float  prev_rate_K_vec_[],
+                              float  *Wo_left,
+                              int    *voicing_left,
+                              float  rate_K_sample_freqs_kHz[], 
+                              int    K,
+                              codec2_fft_cfg fwd_cfg, 
+                              codec2_fft_cfg inv_cfg,
+                              int    indexes[],
+                              float  user_rate_K_vec_no_mean_[],
+                              int    post_filter_en)
+{
+    float rate_K_vec_[K], rate_K_vec_no_mean_[K], mean_, Wo_right;
+    int   voicing_right, k;
+    int   M = 4;
+    /* extract latest rate K vector */
+    newamp1_indexes_to_rate_K_vec(rate_K_vec_, 
+                                  rate_K_vec_no_mean_,
+                                  rate_K_sample_freqs_kHz, 
+                                  K,
+                                  &mean_,
+                                  indexes,
+                                  user_rate_K_vec_no_mean_,
+                                  post_filter_en);
+    /* decode latest Wo and voicing */
+    if (indexes[3]) {
+        Wo_right = decode_log_Wo(c2const, indexes[3], 6);
+        voicing_right = 1;
+    }
+    else {
+        Wo_right  = 2.0*M_PI/100.0;
+        voicing_right = 0;
+    }
+    /* interpolate 25Hz rate K vec back to 100Hz */
+    float *left_vec = prev_rate_K_vec_;
+    float *right_vec = rate_K_vec_;
+    newamp1_interpolate(interpolated_surface_, left_vec, right_vec, K);
+    /* interpolate 25Hz v and Wo back to 100Hz */
+    float aWo_[M];
+    int avoicing_[M], aL_[M], i;
+    interp_Wo_v(aWo_, aL_, avoicing_, *Wo_left, Wo_right, *voicing_left, voicing_right);
+    /* back to rate L amplitudes, synthesis phase for each frame */
+    for(i=0; i<M; i++) {
+        model_[i].Wo = aWo_[i];
+        model_[i].L  = aL_[i];
+        model_[i].voiced = avoicing_[i];
+        resample_rate_L(c2const, &model_[i], &interpolated_surface_[K*i], rate_K_sample_freqs_kHz, K);
+        determine_phase(c2const, &H[(MAX_AMP+1)*i], &model_[i], NEWAMP1_PHASE_NFFT, fwd_cfg, inv_cfg);
+    }
+    /* update memories for next time */
+    for(k=0; k<K; k++) {
+        prev_rate_K_vec_[k] = rate_K_vec_[k];
+    }
+    *Wo_left = Wo_right;
+    *voicing_left = voicing_right;
+}
author	erdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box>	2019-07-04 23:26:09 +0200
committer	erdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box>	2019-07-04 23:26:09 +0200
commit	f02dfce6e6c34b3d8a7b8a0e784b506178e331fa (patch)
tree	45556e6104242d4702689760433d7321ae74ec17 /newamp1.c

diff --git a/newamp1.c b/newamp1.c new file mode 100644 index 0000000..575620a --- /dev/null +++ b/newamp1.c
@@ -0,0 +1,625 @@
	1	/---------------------------------------------------------------------------\
	2
	3	FILE........: newamp1.c
	4	AUTHOR......: David Rowe
	5	DATE CREATED: Jan 2017
	6
	7	Quantisation functions for the sinusoidal coder, using "newamp1"
	8	algorithm that resamples variable rate L [Am} to a fixed rate K then
	9	VQs.
	10
	11	\---------------------------------------------------------------------------/
	12
	13	/*
	14	Copyright David Rowe 2017
	15
	16	All rights reserved.
	17
	18	This program is free software; you can redistribute it and/or modify
	19	it under the terms of the GNU Lesser General Public License version 2.1, as
	20	published by the Free Software Foundation. This program is
	21	distributed in the hope that it will be useful, but WITHOUT ANY
	22	WARRANTY; without even the implied warranty of MERCHANTABILITY or
	23	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
	24	License for more details.
	25
	26	You should have received a copy of the GNU Lesser General Public License
	27	along with this program; if not, see <http://www.gnu.org/licenses/>.
	28
	29	*/
	30
	31	#include <assert.h>
	32	#include <stdio.h>
	33	#include <stdlib.h>
	34	#include <string.h>
	35	#include <math.h>
	36
	37	#include "defines.h"
	38	#include "phase.h"
	39	#include "quantise.h"
	40	#include "mbest.h"
	41	#include "newamp1.h"
	42
	43	#define NEWAMP1_VQ_MBEST_DEPTH 5 /* how many candidates we keep for each stage of mbest search */
	44
	45	/---------------------------------------------------------------------------\
	46
	47	FUNCTION....: interp_para()
	48	AUTHOR......: David Rowe
	49	DATE CREATED: Jan 2017
	50
	51	General 2nd order parabolic interpolator. Used splines orginally,
	52	but this is much simpler and we don't need much accuracy. Given two
	53	vectors of points xp and yp, find interpolated values y at points x.
	54
	55	\---------------------------------------------------------------------------/
	56
	57	void interp_para(float y[], float xp[], float yp[], int np, float x[], int n)
	58	{
	59	assert(np >= 3);
	60
	61	int k,i;
	62	float xi, x1, y1, x2, y2, x3, y3, a, b;
	63
	64	k = 0;
	65	for (i=0; i<n; i++) {
	66	xi = x[i];
	67
	68	/* k is index into xp of where we start 3 points used to form parabola */
	69
	70	while ((xp[k+1] < xi) && (k < (np-3)))
	71	k++;
	72
	73	x1 = xp[k]; y1 = yp[k]; x2 = xp[k+1]; y2 = yp[k+1]; x3 = xp[k+2]; y3 = yp[k+2];
	74
	75	//printf("k: %d np: %d i: %d xi: %f x1: %f y1: %f\n", k, np, i, xi, x1, y1);
	76
	77	a = ((y3-y2)/(x3-x2)-(y2-y1)/(x2-x1))/(x3-x1);
	78	b = ((y3-y2)/(x3-x2)(x2-x1)+(y2-y1)/(x2-x1)(x3-x2))/(x3-x1);
	79
	80	y[i] = a(xi-x2)(xi-x2) + b*(xi-x2) + y2;
	81	}
	82	}
	83
	84
	85	/---------------------------------------------------------------------------\
	86
	87	FUNCTION....: ftomel()
	88	AUTHOR......: David Rowe
	89	DATE CREATED: Jan 2017
	90
	91	Non linear sampling of frequency axis, reducing the "rate" is a
	92	first step before VQ
	93
	94	\---------------------------------------------------------------------------/
	95
	96	float ftomel(float fHz) {
	97	float mel = floorf(2595.0*log10f(1.0 + fHz/700.0)+0.5);
	98	return mel;
	99	}
	100
	101	void mel_sample_freqs_kHz(float rate_K_sample_freqs_kHz[], int K, float mel_start, float mel_end)
	102	{
	103	float step = (mel_end-mel_start)/(K-1);
	104	float mel;
	105	int k;
	106
	107	mel = mel_start;
	108	for (k=0; k<K; k++) {
	109	rate_K_sample_freqs_kHz[k] = 0.7*(POW10F(mel/2595.0) - 1.0);
	110	mel += step;
	111	}
	112	}
	113
	114
	115	/---------------------------------------------------------------------------\
	116
	117	FUNCTION....: resample_const_rate_f()
	118	AUTHOR......: David Rowe
	119	DATE CREATED: Jan 2017
	120
	121	Resample Am from time-varying rate L=floor(pi/Wo) to fixed rate K.
	122
	123	\---------------------------------------------------------------------------/
	124
	125	void resample_const_rate_f(C2CONST c2const, MODEL model, float rate_K_vec[], float rate_K_sample_freqs_kHz[], int K)
	126	{
	127	int m;
	128	float AmdB[MAX_AMP+1], rate_L_sample_freqs_kHz[MAX_AMP+1], AmdB_peak;
	129
	130	/* convert rate L=pi/Wo amplitude samples to fixed rate K */
	131
	132	AmdB_peak = -100.0;
	133	for(m=1; m<=model->L; m++) {
	134	AmdB[m] = 20.0*log10f(model->A[m]+1E-16);
	135	if (AmdB[m] > AmdB_peak) {
	136	AmdB_peak = AmdB[m];
	137	}
	138	rate_L_sample_freqs_kHz[m] = mmodel->Wo(c2const->Fs/2000.0)/M_PI;
	139	//printf("m: %d AmdB: %f AmdB_peak: %f sf: %f\n", m, AmdB[m], AmdB_peak, rate_L_sample_freqs_kHz[m]);
	140	}
	141
	142	/* clip between peak and peak -50dB, to reduce dynamic range */
	143
	144	for(m=1; m<=model->L; m++) {
	145	if (AmdB[m] < (AmdB_peak-50.0)) {
	146	AmdB[m] = AmdB_peak-50.0;
	147	}
	148	}
	149
	150	interp_para(rate_K_vec, &rate_L_sample_freqs_kHz[1], &AmdB[1], model->L, rate_K_sample_freqs_kHz, K);
	151	}
	152
	153
	154	/---------------------------------------------------------------------------\
	155
	156	FUNCTION....: rate_K_mbest_encode
	157	AUTHOR......: David Rowe
	158	DATE CREATED: Jan 2017
	159
	160	Two stage rate K newamp1 VQ quantiser using mbest search.
	161
	162	\---------------------------------------------------------------------------/
	163
	164	float rate_K_mbest_encode(int indexes, float x, float *xq, int ndim, int mbest_entries)
	165	{
	166	int i, j, n1, n2;
	167	const float *codebook1 = newamp1vq_cb[0].cb;
	168	const float *codebook2 = newamp1vq_cb[1].cb;
	169	struct MBEST mbest_stage1, mbest_stage2;
	170	float target[ndim];
	171	float w[ndim];
	172	int index[MBEST_STAGES];
	173	float mse, tmp;
	174
	175	/* codebook is compiled for a fixed K */
	176
	177	assert(ndim == newamp1vq_cb[0].k);
	178
	179	/* equal weights, could be argued mel freq axis gives freq dep weighting */
	180
	181	for(i=0; i<ndim; i++)
	182	w[i] = 1.0;
	183
	184	mbest_stage1 = mbest_create(mbest_entries);
	185	mbest_stage2 = mbest_create(mbest_entries);
	186	for(i=0; i<MBEST_STAGES; i++)
	187	index[i] = 0;
	188
	189	/* Stage 1 */
	190
	191	mbest_search(codebook1, x, w, ndim, newamp1vq_cb[0].m, mbest_stage1, index);
	192	MBEST_PRINT("Stage 1:", mbest_stage1);
	193
	194	/* Stage 2 */
	195
	196	for (j=0; j<mbest_entries; j++) {
	197	index[1] = n1 = mbest_stage1->list[j].index[0];
	198	for(i=0; i<ndim; i++)
	199	target[i] = x[i] - codebook1[ndim*n1+i];
	200	mbest_search(codebook2, target, w, ndim, newamp1vq_cb[1].m, mbest_stage2, index);
	201	}
	202	MBEST_PRINT("Stage 2:", mbest_stage2);
	203
	204	n1 = mbest_stage2->list[0].index[1];
	205	n2 = mbest_stage2->list[0].index[0];
	206	mse = 0.0;
	207	for (i=0;i<ndim;i++) {
	208	tmp = codebook1[ndimn1+i] + codebook2[ndimn2+i];
	209	mse += (x[i]-tmp)*(x[i]-tmp);
	210	xq[i] = tmp;
	211	}
	212
	213	mbest_destroy(mbest_stage1);
	214	mbest_destroy(mbest_stage2);
	215
	216	indexes[0] = n1; indexes[1] = n2;
	217
	218	return mse;
	219	}
	220
	221
	222	/---------------------------------------------------------------------------\
	223
	224	FUNCTION....: post_filter
	225	AUTHOR......: David Rowe
	226	DATE CREATED: Jan 2017
	227
	228	Post Filter, has a big impact on speech quality after VQ. When used
	229	on a mean removed rate K vector, it raises formants, and supresses
	230	anti-formants. As it manipulates amplitudes, we normalise energy to
	231	prevent clipping or large level variations. pf_gain of 1.2 to 1.5
	232	(dB) seems to work OK. Good area for further investigations and
	233	improvements in speech quality.
	234
	235	\---------------------------------------------------------------------------/
	236
	237	void post_filter_newamp1(float vec[], float sample_freq_kHz[], int K, float pf_gain)
	238	{
	239	int k;
	240
	241	/*
	242	vec is rate K vector describing spectrum of current frame lets
	243	pre-emp before applying PF. 20dB/dec over 300Hz. Postfilter
	244	affects energy of frame so we measure energy before and after
	245	and normalise. Plenty of room for experiment here as well.
	246	*/
	247
	248	float pre[K];
	249	float e_before = 0.0;
	250	float e_after = 0.0;
	251	for(k=0; k<K; k++) {
	252	pre[k] = 20.0*log10f(sample_freq_kHz[k]/0.3);
	253	vec[k] += pre[k];
	254	e_before += POW10F(vec[k]/10.0);
	255	vec[k] *= pf_gain;
	256	e_after += POW10F(vec[k]/10.0);
	257	}
	258
	259	float gain = e_after/e_before;
	260	float gaindB = 10*log10f(gain);
	261
	262	for(k=0; k<K; k++) {
	263	vec[k] -= gaindB;
	264	vec[k] -= pre[k];
	265	}
	266	}
	267
	268
	269	/---------------------------------------------------------------------------\
	270
	271	FUNCTION....: interp_Wo_v
	272	AUTHOR......: David Rowe
	273	DATE CREATED: Jan 2017
	274
	275	Decoder side interpolation of Wo and voicing, to go from 25 Hz
	276	sample rate used over channle to 100Hz internal sample rate of Codec 2.
	277
	278	\---------------------------------------------------------------------------/
	279
	280	void interp_Wo_v(float Wo_[], int L_[], int voicing_[], float Wo1, float Wo2, int voicing1, int voicing2)
	281	{
	282	int i;
	283	int M = 4; /* interpolation rate */
	284
	285	for(i=0; i<M; i++)
	286	voicing_[i] = 0;
	287
	288	if (!voicing1 && !voicing2) {
	289	for(i=0; i<M; i++)
	290	Wo_[i] = 2.0*M_PI/100.0;
	291	}
	292
	293	if (voicing1 && !voicing2) {
	294	Wo_[0] = Wo_[1] = Wo1;
	295	Wo_[2] = Wo_[3] = 2.0*M_PI/100.0;
	296	voicing_[0] = voicing_[1] = 1;
	297	}
	298
	299	if (!voicing1 && voicing2) {
	300	Wo_[0] = Wo_[1] = 2.0*M_PI/100.0;
	301	Wo_[2] = Wo_[3] = Wo2;
	302	voicing_[2] = voicing_[3] = 1;
	303	}
	304
	305	if (voicing1 && voicing2) {
	306	float c;
	307	for(i=0,c=1.0; i<M; i++,c-=1.0/M) {
	308	Wo_[i] = Wo1c + Wo2(1.0-c);
	309	voicing_[i] = 1;
	310	}
	311	}
	312
	313	for(i=0; i<M; i++) {
	314	L_[i] = floorf(M_PI/Wo_[i]);
	315	}
	316	}
	317
	318
	319	/---------------------------------------------------------------------------\
	320
	321	FUNCTION....: resample_rate_L
	322	AUTHOR......: David Rowe
	323	DATE CREATED: Jan 2017
	324
	325	Decoder side conversion of rate K vector back to rate L.
	326
	327	\---------------------------------------------------------------------------/
	328
	329	void resample_rate_L(C2CONST c2const, MODEL model, float rate_K_vec[], float rate_K_sample_freqs_kHz[], int K)
	330	{
	331	float rate_K_vec_term[K+2], rate_K_sample_freqs_kHz_term[K+2];
	332	float AmdB[MAX_AMP+1], rate_L_sample_freqs_kHz[MAX_AMP+1];
	333	int m,k;
	334
	335	/* terminate either end of the rate K vecs with 0dB points */
	336
	337	rate_K_vec_term[0] = rate_K_vec_term[K+1] = 0.0;
	338	rate_K_sample_freqs_kHz_term[0] = 0.0;
	339	rate_K_sample_freqs_kHz_term[K+1] = 4.0;
	340
	341	for(k=0; k<K; k++) {
	342	rate_K_vec_term[k+1] = rate_K_vec[k];
	343	rate_K_sample_freqs_kHz_term[k+1] = rate_K_sample_freqs_kHz[k];
	344
	345	//printf("k: %d f: %f rate_K: %f\n", k, rate_K_sample_freqs_kHz[k], rate_K_vec[k]);
	346	}
	347
	348	for(m=1; m<=model->L; m++) {
	349	rate_L_sample_freqs_kHz[m] = mmodel->Wo(c2const->Fs/2000.0)/M_PI;
	350	}
	351
	352	interp_para(&AmdB[1], rate_K_sample_freqs_kHz_term, rate_K_vec_term, K+2, &rate_L_sample_freqs_kHz[1], model->L);
	353	for(m=1; m<=model->L; m++) {
	354	model->A[m] = POW10F(AmdB[m]/20.0);
	355	// printf("m: %d f: %f AdB: %f A: %f\n", m, rate_L_sample_freqs_kHz[m], AmdB[m], model->A[m]);
	356	}
	357	}
	358
	359
	360	/---------------------------------------------------------------------------\
	361
	362	FUNCTION....: determine_phase
	363	AUTHOR......: David Rowe
	364	DATE CREATED: Jan 2017
	365
	366	Given a magnitude spectrum determine a phase spectrum, used for
	367	phase synthesis with newamp1.
	368
	369	\---------------------------------------------------------------------------/
	370
	371	void determine_phase(C2CONST c2const, COMP H[], MODEL model, int Nfft, codec2_fft_cfg fwd_cfg, codec2_fft_cfg inv_cfg)
	372	{
	373	int i,m,b;
	374	int Ns = Nfft/2+1;
	375	float Gdbfk[Ns], sample_freqs_kHz[Ns], phase[Ns];
	376	float AmdB[MAX_AMP+1], rate_L_sample_freqs_kHz[MAX_AMP+1];
	377
	378	for(m=1; m<=model->L; m++) {
	379	assert(model->A[m] != 0.0);
	380	AmdB[m] = 20.0*log10f(model->A[m]);
	381	rate_L_sample_freqs_kHz[m] = (float)mmodel->Wo(c2const->Fs/2000.0)/M_PI;
	382	}
	383
	384	for(i=0; i<Ns; i++) {
	385	sample_freqs_kHz[i] = (c2const->Fs/1000.0)*(float)i/Nfft;
	386	}
	387
	388	interp_para(Gdbfk, &rate_L_sample_freqs_kHz[1], &AmdB[1], model->L, sample_freqs_kHz, Ns);
	389	mag_to_phase(phase, Gdbfk, Nfft, fwd_cfg, inv_cfg);
	390
	391	for(m=1; m<=model->L; m++) {
	392	b = floorf(0.5+mmodel->WoNfft/(2.0*M_PI));
	393	H[m].real = cosf(phase[b]); H[m].imag = sinf(phase[b]);
	394	}
	395	}
	396
	397
	398	/---------------------------------------------------------------------------\
	399
	400	FUNCTION....: newamp1_model_to_indexes
	401	AUTHOR......: David Rowe
	402	DATE CREATED: Jan 2017
	403
	404	newamp1 encoder for amplitdues {Am}. Given the rate L model
	405	parameters, outputs VQ and energy quantiser indexes.
	406
	407	\---------------------------------------------------------------------------/
	408
	409	void newamp1_model_to_indexes(C2CONST *c2const,
	410	int indexes[],
	411	MODEL *model,
	412	float rate_K_vec[],
	413	float rate_K_sample_freqs_kHz[],
	414	int K,
	415	float *mean,
	416	float rate_K_vec_no_mean[],
	417	float rate_K_vec_no_mean_[],
	418	float *se
	419	)
	420	{
	421	int k;
	422
	423	/* convert variable rate L to fixed rate K */
	424
	425	resample_const_rate_f(c2const, model, rate_K_vec, rate_K_sample_freqs_kHz, K);
	426
	427	/* remove mean and two stage VQ */
	428
	429	float sum = 0.0;
	430	for(k=0; k<K; k++)
	431	sum += rate_K_vec[k];
	432	*mean = sum/K;;
	433	for(k=0; k<K; k++)
	434	rate_K_vec_no_mean[k] = rate_K_vec[k] - *mean;
	435	rate_K_mbest_encode(indexes, rate_K_vec_no_mean, rate_K_vec_no_mean_, K, NEWAMP1_VQ_MBEST_DEPTH);
	436
	437	/* running sum of squared error for variance calculation */
	438	for(k=0; k<K; k++)
	439	*se += pow(rate_K_vec_no_mean[k]-rate_K_vec_no_mean_[k],2.0);
	440
	441	/* scalar quantise mean (effectively the frame energy) */
	442
	443	float w[1] = {1.0};
	444	float se_mean;
	445	indexes[2] = quantise(newamp1_energy_cb[0].cb,
	446	mean,
	447	w,
	448	newamp1_energy_cb[0].k,
	449	newamp1_energy_cb[0].m,
	450	&se_mean);
	451
	452	/* scalar quantise Wo. We steal the smallest Wo index to signal
	453	an unvoiced frame */
	454
	455	if (model->voiced) {
	456	int index = encode_log_Wo(c2const, model->Wo, 6);
	457	if (index == 0) {
	458	index = 1;
	459	}
	460	indexes[3] = index;
	461	}
	462	else {
	463	indexes[3] = 0;
	464	}
	465
	466	}
	467
	468
	469	/---------------------------------------------------------------------------\
	470
	471	FUNCTION....: newamp1_interpolate
	472	AUTHOR......: David Rowe
	473	DATE CREATED: Jan 2017
	474
	475	\---------------------------------------------------------------------------/
	476
	477	void newamp1_interpolate(float interpolated_surface_[], float left_vec[], float right_vec[], int K)
	478	{
	479	int i, k;
	480	int M = 4;
	481	float c;
	482
	483	/* (linearly) interpolate 25Hz amplitude vectors back to 100Hz */
	484
	485	for(i=0,c=1.0; i<M; i++,c-=1.0/M) {
	486	for(k=0; k<K; k++) {
	487	interpolated_surface_[iK+k] = left_vec[k]c + right_vec[k]*(1.0-c);
	488	}
	489	}
	490	}
	491
	492
	493	/---------------------------------------------------------------------------\
	494
	495	FUNCTION....: newamp1_indexes_to_rate_K_vec
	496	AUTHOR......: David Rowe
	497	DATE CREATED: Jan 2017
	498
	499	newamp1 decoder for amplitudes {Am}. Given the rate K VQ and energy
	500	indexes, outputs rate K vector.
	501
	502	\---------------------------------------------------------------------------/
	503
	504	void newamp1_indexes_to_rate_K_vec(float rate_K_vec_[],
	505	float rate_K_vec_no_mean_[],
	506	float rate_K_sample_freqs_kHz[],
	507	int K,
	508	float *mean_,
	509	int indexes[],
	510	float user_rate_K_vec_no_mean_[],
	511	int post_filter_en)
	512	{
	513	int k;
	514	const float *codebook1 = newamp1vq_cb[0].cb;
	515	const float *codebook2 = newamp1vq_cb[1].cb;
	516	int n1 = indexes[0];
	517	int n2 = indexes[1];
	518
	519	if (user_rate_K_vec_no_mean_ == NULL) {
	520	/* normal operation */
	521	for(k=0; k<K; k++) {
	522	rate_K_vec_no_mean_[k] = codebook1[Kn1+k] + codebook2[Kn2+k];
	523	}
	524	} else {
	525	/* for development we can optionally inject the quantised rate K vector here */
	526	for(k=0; k<K; k++)
	527	rate_K_vec_no_mean_[k] = user_rate_K_vec_no_mean_[k];
	528	}
	529
	530	if (post_filter_en)
	531	post_filter_newamp1(rate_K_vec_no_mean_, rate_K_sample_freqs_kHz, K, 1.5);
	532
	533	*mean_ = newamp1_energy_cb[0].cb[indexes[2]];
	534
	535	for(k=0; k<K; k++) {
	536	rate_K_vec_[k] = rate_K_vec_no_mean_[k] + *mean_;
	537	}
	538	}
	539
	540
	541	/---------------------------------------------------------------------------\
	542
	543	FUNCTION....: newamp1_indexes_to_model
	544	AUTHOR......: David Rowe
	545	DATE CREATED: Jan 2017
	546
	547	newamp1 decoder.
	548
	549	\---------------------------------------------------------------------------/
	550
	551	void newamp1_indexes_to_model(C2CONST *c2const,
	552	MODEL model_[],
	553	COMP H[],
	554	float *interpolated_surface_,
	555	float prev_rate_K_vec_[],
	556	float *Wo_left,
	557	int *voicing_left,
	558	float rate_K_sample_freqs_kHz[],
	559	int K,
	560	codec2_fft_cfg fwd_cfg,
	561	codec2_fft_cfg inv_cfg,
	562	int indexes[],
	563	float user_rate_K_vec_no_mean_[],
	564	int post_filter_en)
	565	{
	566	float rate_K_vec_[K], rate_K_vec_no_mean_[K], mean_, Wo_right;
	567	int voicing_right, k;
	568	int M = 4;
	569
	570	/* extract latest rate K vector */
	571
	572	newamp1_indexes_to_rate_K_vec(rate_K_vec_,
	573	rate_K_vec_no_mean_,
	574	rate_K_sample_freqs_kHz,
	575	K,
	576	&mean_,
	577	indexes,
	578	user_rate_K_vec_no_mean_,
	579	post_filter_en);
	580
	581	/* decode latest Wo and voicing */
	582
	583	if (indexes[3]) {
	584	Wo_right = decode_log_Wo(c2const, indexes[3], 6);
	585	voicing_right = 1;
	586	}
	587	else {
	588	Wo_right = 2.0*M_PI/100.0;
	589	voicing_right = 0;
	590	}
	591
	592	/* interpolate 25Hz rate K vec back to 100Hz */
	593
	594	float *left_vec = prev_rate_K_vec_;
	595	float *right_vec = rate_K_vec_;
	596	newamp1_interpolate(interpolated_surface_, left_vec, right_vec, K);
	597
	598	/* interpolate 25Hz v and Wo back to 100Hz */
	599
	600	float aWo_[M];
	601	int avoicing_[M], aL_[M], i;
	602
	603	interp_Wo_v(aWo_, aL_, avoicing_, Wo_left, Wo_right, voicing_left, voicing_right);
	604
	605	/* back to rate L amplitudes, synthesis phase for each frame */
	606
	607	for(i=0; i<M; i++) {
	608	model_[i].Wo = aWo_[i];
	609	model_[i].L = aL_[i];
	610	model_[i].voiced = avoicing_[i];
	611
	612	resample_rate_L(c2const, &model_[i], &interpolated_surface_[K*i], rate_K_sample_freqs_kHz, K);
	613	determine_phase(c2const, &H[(MAX_AMP+1)*i], &model_[i], NEWAMP1_PHASE_NFFT, fwd_cfg, inv_cfg);
	614	}
	615
	616	/* update memories for next time */
	617
	618	for(k=0; k<K; k++) {
	619	prev_rate_K_vec_[k] = rate_K_vec_[k];
	620	}
	621	*Wo_left = Wo_right;
	622	*voicing_left = voicing_right;
	623
	624	}
	625