1 files changed, 689 insertions, 0 deletions
diff --git a/sine.c b/sine.c
new file mode 100644
index 0000000..4e31f37
--- /dev/null
+++ b/sine.c
@@ -0,0 +1,689 @@
+/*---------------------------------------------------------------------------*\
+  FILE........: sine.c
+  AUTHOR......: David Rowe
+  DATE CREATED: 19/8/2010
+  Sinusoidal analysis and synthesis functions.
+\*---------------------------------------------------------------------------*/
+/*
+  Copyright (C) 1990-2010 David Rowe
+  All rights reserved.
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+/*---------------------------------------------------------------------------*\
+                                INCLUDES
+\*---------------------------------------------------------------------------*/
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include "defines.h"
+#include "sine.h"
+#include "kiss_fft.h"
+#define HPF_BETA 0.125
+/*---------------------------------------------------------------------------*\
+                                HEADERS
+\*---------------------------------------------------------------------------*/
+void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax,
+                         float pstep);
+/*---------------------------------------------------------------------------*\
+                                FUNCTIONS
+\*---------------------------------------------------------------------------*/
+C2CONST c2const_create(int Fs, float framelength_s) {
+    C2CONST c2const;
+    assert((Fs == 8000) || (Fs = 16000));
+    c2const.Fs = Fs;
+    c2const.n_samp = round(Fs*framelength_s);
+    c2const.max_amp = floor(Fs*P_MIN_S/2);
+    c2const.p_min = floor(Fs*P_MIN_S);
+    c2const.p_max = floor(Fs*P_MAX_S);
+    c2const.m_pitch = floor(Fs*M_PITCH_S);
+    c2const.Wo_min = TWO_PI/c2const.p_max;
+    c2const.Wo_max = TWO_PI/c2const.p_min;
+    if (Fs == 8000) {
+        c2const.nw = 279;
+    } else {
+        c2const.nw = 511;  /* actually a bit shorter in time but lets us maintain constant FFT size */
+    }
+    c2const.tw = Fs*TW_S;
+    /*
+    fprintf(stderr, "max_amp: %d m_pitch: %d\n", c2const.n_samp, c2const.m_pitch);
+    fprintf(stderr, "p_min: %d p_max: %d\n", c2const.p_min, c2const.p_max);
+    fprintf(stderr, "Wo_min: %f Wo_max: %f\n", c2const.Wo_min, c2const.Wo_max);
+    fprintf(stderr, "nw: %d tw: %d\n", c2const.nw, c2const.tw);
+    */
+    return c2const;
+}
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: make_analysis_window
+  AUTHOR......: David Rowe
+  DATE CREATED: 11/5/94
+  Init function that generates the time domain analysis window and it's DFT.
+\*---------------------------------------------------------------------------*/
+void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[], COMP W[])
+{
+  float m;
+  COMP  wshift[FFT_ENC];
+  COMP  temp;
+  int   i,j;
+  int   m_pitch = c2const->m_pitch;
+  int   nw      = c2const->nw;
+  /*
+     Generate Hamming window centered on M-sample pitch analysis window
+  0            M/2           M-1
+  |-------------|-------------|
+        |-------|-------|
+            nw samples
+     All our analysis/synthsis is centred on the M/2 sample.
+  */
+  m = 0.0;
+  for(i=0; i<m_pitch/2-nw/2; i++)
+    w[i] = 0.0;
+  for(i=m_pitch/2-nw/2,j=0; i<m_pitch/2+nw/2; i++,j++) {
+    w[i] = 0.5 - 0.5*cosf(TWO_PI*j/(nw-1));
+    m += w[i]*w[i];
+  }
+  for(i=m_pitch/2+nw/2; i<m_pitch; i++)
+    w[i] = 0.0;
+  /* Normalise - makes freq domain amplitude estimation straight
+     forward */
+  m = 1.0/sqrtf(m*FFT_ENC);
+  for(i=0; i<m_pitch; i++) {
+    w[i] *= m;
+  }
+  /*
+     Generate DFT of analysis window, used for later processing.  Note
+     we modulo FFT_ENC shift the time domain window w[], this makes the
+     imaginary part of the DFT W[] equal to zero as the shifted w[] is
+     even about the n=0 time axis if nw is odd.  Having the imag part
+     of the DFT W[] makes computation easier.
+     0                      FFT_ENC-1
+     |-------------------------|
+      ----\               /----
+           \             /
+            \           /          <- shifted version of window w[n]
+             \         /
+              \       /
+               -------
+     |---------|     |---------|
+       nw/2              nw/2
+  */
+  for(i=0; i<FFT_ENC; i++) {
+    wshift[i].real = 0.0;
+    wshift[i].imag = 0.0;
+  }
+  for(i=0; i<nw/2; i++)
+    wshift[i].real = w[i+m_pitch/2];
+  for(i=FFT_ENC-nw/2,j=m_pitch/2-nw/2; i<FFT_ENC; i++,j++)
+   wshift[i].real = w[j];
+  codec2_fft(fft_fwd_cfg, wshift, W);
+  /*
+      Re-arrange W[] to be symmetrical about FFT_ENC/2.  Makes later
+      analysis convenient.
+   Before:
+     0                 FFT_ENC-1
+     |----------|---------|
+     __                   _
+       \                 /
+        \_______________/
+   After:
+     0                 FFT_ENC-1
+     |----------|---------|
+               ___
+              /   \
+     ________/     \_______
+  */
+  for(i=0; i<FFT_ENC/2; i++) {
+    temp.real = W[i].real;
+    temp.imag = W[i].imag;
+    W[i].real = W[i+FFT_ENC/2].real;
+    W[i].imag = W[i+FFT_ENC/2].imag;
+    W[i+FFT_ENC/2].real = temp.real;
+    W[i+FFT_ENC/2].imag = temp.imag;
+  }
+}
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: hpf
+  AUTHOR......: David Rowe
+  DATE CREATED: 16 Nov 2010
+  High pass filter with a -3dB point of about 160Hz.
+    y(n) = -HPF_BETA*y(n-1) + x(n) - x(n-1)
+\*---------------------------------------------------------------------------*/
+float hpf(float x, float states[])
+{
+    states[0] = -HPF_BETA*states[0] + x - states[1];
+    states[1] = x;
+    return states[0];
+}
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: dft_speech
+  AUTHOR......: David Rowe
+  DATE CREATED: 27/5/94
+  Finds the DFT of the current speech input speech frame.
+\*---------------------------------------------------------------------------*/
+// TODO: we can either go for a faster FFT using fftr and some stack usage
+// or we can reduce stack usage to almost zero on STM32 by switching to fft_inplace
+#if 1
+void dft_speech(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[])
+{
+    int  i;
+    int  m_pitch = c2const->m_pitch;
+    int   nw      = c2const->nw;
+    for(i=0; i<FFT_ENC; i++) {
+        Sw[i].real = 0.0;
+        Sw[i].imag = 0.0;
+    }
+    /* Centre analysis window on time axis, we need to arrange input
+       to FFT this way to make FFT phases correct */
+    /* move 2nd half to start of FFT input vector */
+    for(i=0; i<nw/2; i++)
+        Sw[i].real = Sn[i+m_pitch/2]*w[i+m_pitch/2];
+    /* move 1st half to end of FFT input vector */
+    for(i=0; i<nw/2; i++)
+        Sw[FFT_ENC-nw/2+i].real = Sn[i+m_pitch/2-nw/2]*w[i+m_pitch/2-nw/2];
+    codec2_fft_inplace(fft_fwd_cfg, Sw);
+}
+#else
+void dft_speech(codec2_fftr_cfg fftr_fwd_cfg, COMP Sw[], float Sn[], float w[])
+{
+    int  i;
+  float sw[FFT_ENC];
+  for(i=0; i<FFT_ENC; i++) {
+    sw[i] = 0.0;
+  }
+  /* Centre analysis window on time axis, we need to arrange input
+     to FFT this way to make FFT phases correct */
+  /* move 2nd half to start of FFT input vector */
+  for(i=0; i<nw/2; i++)
+    sw[i] = Sn[i+m_pitch/2]*w[i+m_pitch/2];
+  /* move 1st half to end of FFT input vector */
+  for(i=0; i<nw/2; i++)
+    sw[FFT_ENC-nw/2+i] = Sn[i+m_pitch/2-nw/2]*w[i+m_pitch/2-nw/2];
+  codec2_fftr(fftr_fwd_cfg, sw, Sw);
+}
+#endif
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: two_stage_pitch_refinement
+  AUTHOR......: David Rowe
+  DATE CREATED: 27/5/94
+  Refines the current pitch estimate using the harmonic sum pitch
+  estimation technique.
+\*---------------------------------------------------------------------------*/
+void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, COMP Sw[])
+{
+  float pmin,pmax,pstep;        /* pitch refinment minimum, maximum and step */
+  /* Coarse refinement */
+  pmax = TWO_PI/model->Wo + 5;
+  pmin = TWO_PI/model->Wo - 5;
+  pstep = 1.0;
+  hs_pitch_refinement(model,Sw,pmin,pmax,pstep);
+  /* Fine refinement */
+  pmax = TWO_PI/model->Wo + 1;
+  pmin = TWO_PI/model->Wo - 1;
+  pstep = 0.25;
+  hs_pitch_refinement(model,Sw,pmin,pmax,pstep);
+  /* Limit range */
+  if (model->Wo < TWO_PI/c2const->p_max)
+    model->Wo = TWO_PI/c2const->p_max;
+  if (model->Wo > TWO_PI/c2const->p_min)
+    model->Wo = TWO_PI/c2const->p_min;
+  model->L = floorf(PI/model->Wo);
+  /* trap occasional round off issues with floorf() */
+  if (model->Wo*model->L >= 0.95*PI) {
+      model->L--;
+  }
+  assert(model->Wo*model->L < PI);
+}
+/*---------------------------------------------------------------------------*\
+ FUNCTION....: hs_pitch_refinement
+ AUTHOR......: David Rowe
+ DATE CREATED: 27/5/94
+ Harmonic sum pitch refinement function.
+ pmin   pitch search range minimum
+ pmax   pitch search range maximum
+ step   pitch search step size
+ model  current pitch estimate in model.Wo
+ model  refined pitch estimate in model.Wo
+\*---------------------------------------------------------------------------*/
+void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float pstep)
+{
+  int m;                /* loop variable */
+  int b;                /* bin for current harmonic centre */
+  float E;              /* energy for current pitch*/
+  float Wo;             /* current "test" fundamental freq. */
+  float Wom;            /* Wo that maximises E */
+  float Em;             /* mamimum energy */
+  float r, one_on_r;    /* number of rads/bin */
+  float p;              /* current pitch */
+  /* Initialisation */
+  model->L = PI/model->Wo;      /* use initial pitch est. for L */
+  Wom = model->Wo;
+  Em = 0.0;
+  r = TWO_PI/FFT_ENC;
+  one_on_r = 1.0/r;
+  /* Determine harmonic sum for a range of Wo values */
+  for(p=pmin; p<=pmax; p+=pstep) {
+    E = 0.0;
+    Wo = TWO_PI/p;
+    /* Sum harmonic magnitudes */
+    for(m=1; m<=model->L; m++) {
+        b = (int)(m*Wo*one_on_r + 0.5);
+        E += Sw[b].real*Sw[b].real + Sw[b].imag*Sw[b].imag;
+    }
+    /* Compare to see if this is a maximum */
+    if (E > Em) {
+      Em = E;
+      Wom = Wo;
+    }
+  }
+  model->Wo = Wom;
+}
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: estimate_amplitudes
+  AUTHOR......: David Rowe
+  DATE CREATED: 27/5/94
+  Estimates the complex amplitudes of the harmonics.
+\*---------------------------------------------------------------------------*/
+void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[], int est_phase)
+{
+  int   i,m;            /* loop variables */
+  int   am,bm;          /* bounds of current harmonic */
+  int   b;              /* DFT bin of centre of current harmonic */
+  float den;            /* denominator of amplitude expression */
+  float r, one_on_r;    /* number of rads/bin */
+  int   offset;
+  COMP  Am;
+  r = TWO_PI/FFT_ENC;
+  one_on_r = 1.0/r;
+  for(m=1; m<=model->L; m++) {
+    den = 0.0;
+    am = (int)((m - 0.5)*model->Wo*one_on_r + 0.5);
+    bm = (int)((m + 0.5)*model->Wo*one_on_r + 0.5);
+    b = (int)(m*model->Wo/r + 0.5);
+    /* Estimate ampltude of harmonic */
+    den = 0.0;
+    Am.real = Am.imag = 0.0;
+    offset = FFT_ENC/2 - (int)(m*model->Wo*one_on_r + 0.5);
+    for(i=am; i<bm; i++) {
+      den += Sw[i].real*Sw[i].real + Sw[i].imag*Sw[i].imag;
+      Am.real += Sw[i].real*W[i + offset].real;
+      Am.imag += Sw[i].imag*W[i + offset].real;
+    }
+    model->A[m] = sqrtf(den);
+    if (est_phase) {
+        /* Estimate phase of harmonic, this is expensive in CPU for
+           embedded devicesso we make it an option */
+        model->phi[m] = atan2f(Sw[b].imag,Sw[b].real);
+    }
+  }
+}
+/*---------------------------------------------------------------------------*\
+  est_voicing_mbe()
+  Returns the error of the MBE cost function for a fiven F0.
+  Note: I think a lot of the operations below can be simplified as
+  W[].imag = 0 and has been normalised such that den always equals 1.
+\*---------------------------------------------------------------------------*/
+float est_voicing_mbe(
+                      C2CONST *c2const,
+                      MODEL *model,
+                      COMP   Sw[],
+                      COMP   W[]
+                      )
+{
+    int   l,al,bl,m;    /* loop variables */
+    COMP  Am;             /* amplitude sample for this band */
+    int   offset;         /* centers Hw[] about current harmonic */
+    float den;            /* denominator of Am expression */
+    float error;          /* accumulated error between original and synthesised */
+    float Wo;
+    float sig, snr;
+    float elow, ehigh, eratio;
+    float sixty;
+    COMP   Ew;
+    Ew.real = 0;
+    Ew.imag = 0;
+    int l_1000hz = model->L*1000.0/(c2const->Fs/2);
+    sig = 1E-4;
+    for(l=1; l<=l_1000hz; l++) {
+        sig += model->A[l]*model->A[l];
+    }
+    Wo = model->Wo;
+    error = 1E-4;
+    /* Just test across the harmonics in the first 1000 Hz */
+    for(l=1; l<=l_1000hz; l++) {
+        Am.real = 0.0;
+        Am.imag = 0.0;
+        den = 0.0;
+        al = ceilf((l - 0.5)*Wo*FFT_ENC/TWO_PI);
+        bl = ceilf((l + 0.5)*Wo*FFT_ENC/TWO_PI);
+        /* Estimate amplitude of harmonic assuming harmonic is totally voiced */
+        offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5;
+        for(m=al; m<bl; m++) {
+            Am.real += Sw[m].real*W[offset+m].real;
+            Am.imag += Sw[m].imag*W[offset+m].real;
+            den += W[offset+m].real*W[offset+m].real;
+        }
+        Am.real = Am.real/den;
+        Am.imag = Am.imag/den;
+        /* Determine error between estimated harmonic and original */
+// Redundant!        offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5;
+        for(m=al; m<bl; m++) {
+            Ew.real = Sw[m].real - Am.real*W[offset+m].real;
+            Ew.imag = Sw[m].imag - Am.imag*W[offset+m].real;
+            error += Ew.real*Ew.real;
+            error += Ew.imag*Ew.imag;
+        }
+    }
+    snr = 10.0*log10f(sig/error);
+    if (snr > V_THRESH)
+        model->voiced = 1;
+    else
+        model->voiced = 0;
+    /* post processing, helps clean up some voicing errors ------------------*/
+    /*
+       Determine the ratio of low freqency to high frequency energy,
+       voiced speech tends to be dominated by low frequency energy,
+       unvoiced by high frequency. This measure can be used to
+       determine if we have made any gross errors.
+    */
+    int l_2000hz = model->L*2000.0/(c2const->Fs/2);
+    int l_4000hz = model->L*4000.0/(c2const->Fs/2);
+    elow = ehigh = 1E-4;
+    for(l=1; l<=l_2000hz; l++) {
+        elow += model->A[l]*model->A[l];
+    }
+    for(l=l_2000hz; l<=l_4000hz; l++) {
+        ehigh += model->A[l]*model->A[l];
+    }
+    eratio = 10.0*log10f(elow/ehigh);
+    /* Look for Type 1 errors, strongly V speech that has been
+       accidentally declared UV */
+    if (model->voiced == 0)
+        if (eratio > 10.0)
+            model->voiced = 1;
+    /* Look for Type 2 errors, strongly UV speech that has been
+       accidentally declared V */
+    if (model->voiced == 1) {
+        if (eratio < -10.0)
+            model->voiced = 0;
+        /* A common source of Type 2 errors is the pitch estimator
+           gives a low (50Hz) estimate for UV speech, which gives a
+           good match with noise due to the close harmoonic spacing.
+           These errors are much more common than people with 50Hz3
+           pitch, so we have just a small eratio threshold. */
+        sixty = 60.0*TWO_PI/c2const->Fs;
+        if ((eratio < -4.0) && (model->Wo <= sixty))
+            model->voiced = 0;
+    }
+    //printf(" v: %d snr: %f eratio: %3.2f %f\n",model->voiced,snr,eratio,dF0);
+    return snr;
+}
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: make_synthesis_window
+  AUTHOR......: David Rowe
+  DATE CREATED: 11/5/94
+  Init function that generates the trapezoidal (Parzen) sythesis window.
+\*---------------------------------------------------------------------------*/
+void make_synthesis_window(C2CONST *c2const, float Pn[])
+{
+  int   i;
+  float win;
+  int   n_samp = c2const->n_samp;
+  int   tw     = c2const->tw;
+  /* Generate Parzen window in time domain */
+  win = 0.0;
+  for(i=0; i<n_samp/2-tw; i++)
+    Pn[i] = 0.0;
+  win = 0.0;
+  for(i=n_samp/2-tw; i<n_samp/2+tw; win+=1.0/(2*tw), i++ )
+    Pn[i] = win;
+  for(i=n_samp/2+tw; i<3*n_samp/2-tw; i++)
+    Pn[i] = 1.0;
+  win = 1.0;
+  for(i=3*n_samp/2-tw; i<3*n_samp/2+tw; win-=1.0/(2*tw), i++)
+    Pn[i] = win;
+  for(i=3*n_samp/2+tw; i<2*n_samp; i++)
+    Pn[i] = 0.0;
+}
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: synthesise
+  AUTHOR......: David Rowe
+  DATE CREATED: 20/2/95
+  Synthesise a speech signal in the frequency domain from the
+  sinusodal model parameters.  Uses overlap-add with a trapezoidal
+  window to smoothly interpolate betwen frames.
+\*---------------------------------------------------------------------------*/
+void synthesise(
+  int    n_samp,
+  codec2_fftr_cfg fftr_inv_cfg,
+  float  Sn_[],         /* time domain synthesised signal              */
+  MODEL *model,         /* ptr to model parameters for this frame      */
+  float  Pn[],          /* time domain Parzen window                   */
+  int    shift          /* flag used to handle transition frames       */
+)
+{
+    int   i,l,j,b;              /* loop variables */
+    COMP  Sw_[FFT_DEC/2+1];     /* DFT of synthesised signal */
+    float sw_[FFT_DEC];         /* synthesised signal */
+    if (shift) {
+        /* Update memories */
+        for(i=0; i<n_samp-1; i++) {
+            Sn_[i] = Sn_[i+n_samp];
+        }
+        Sn_[n_samp-1] = 0.0;
+    }
+    for(i=0; i<FFT_DEC/2+1; i++) {
+        Sw_[i].real = 0.0;
+        Sw_[i].imag = 0.0;
+    }
+    /* Now set up frequency domain synthesised speech */
+    for(l=1; l<=model->L; l++) {
+        b = (int)(l*model->Wo*FFT_DEC/TWO_PI + 0.5);
+        if (b > ((FFT_DEC/2)-1)) {
+            b = (FFT_DEC/2)-1;
+        }
+        Sw_[b].real = model->A[l]*cosf(model->phi[l]);
+        Sw_[b].imag = model->A[l]*sinf(model->phi[l]);
+    }
+    /* Perform inverse DFT */
+    codec2_fftri(fftr_inv_cfg, Sw_,sw_);
+    /* Overlap add to previous samples */
+    #ifdef USE_KISS_FFT
+    #define    FFTI_FACTOR ((float)1.0)
+    #else
+    #define    FFTI_FACTOR ((float32_t)FFT_DEC)
+    #endif
+    for(i=0; i<n_samp-1; i++) {
+        Sn_[i] += sw_[FFT_DEC-n_samp+1+i]*Pn[i] * FFTI_FACTOR;
+    }
+    if (shift)
+        for(i=n_samp-1,j=0; i<2*n_samp; i++,j++)
+            Sn_[i] = sw_[j]*Pn[i] * FFTI_FACTOR;
+    else
+        for(i=n_samp-1,j=0; i<2*n_samp; i++,j++)
+            Sn_[i] += sw_[j]*Pn[i] * FFTI_FACTOR;
+}
+/* todo: this should probably be in some states rather than a static */
+static unsigned long next = 1;
+int codec2_rand(void) {
+    next = next * 1103515245 + 12345;
+    return((unsigned)(next/65536) % 32768);
+}

diff --git a/sine.c b/sine.c new file mode 100644 index 0000000..4e31f37 --- /dev/null +++ b/sine.c
@@ -0,0 +1,689 @@
	1	/---------------------------------------------------------------------------\
	2
	3	FILE........: sine.c
	4	AUTHOR......: David Rowe
	5	DATE CREATED: 19/8/2010
	6
	7	Sinusoidal analysis and synthesis functions.
	8
	9	\---------------------------------------------------------------------------/
	10
	11	/*
	12	Copyright (C) 1990-2010 David Rowe
	13
	14	All rights reserved.
	15
	16	This program is free software; you can redistribute it and/or modify
	17	it under the terms of the GNU Lesser General Public License version 2.1, as
	18	published by the Free Software Foundation. This program is
	19	distributed in the hope that it will be useful, but WITHOUT ANY
	20	WARRANTY; without even the implied warranty of MERCHANTABILITY or
	21	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
	22	License for more details.
	23
	24	You should have received a copy of the GNU Lesser General Public License
	25	along with this program; if not, see <http://www.gnu.org/licenses/>.
	26	*/
	27
	28	/---------------------------------------------------------------------------\
	29
	30	INCLUDES
	31
	32	\---------------------------------------------------------------------------/
	33
	34	#include <stdlib.h>
	35	#include <stdio.h>
	36	#include <math.h>
	37
	38	#include "defines.h"
	39	#include "sine.h"
	40	#include "kiss_fft.h"
	41
	42	#define HPF_BETA 0.125
	43
	44	/---------------------------------------------------------------------------\
	45
	46	HEADERS
	47
	48	\---------------------------------------------------------------------------/
	49
	50	void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax,
	51	float pstep);
	52
	53	/---------------------------------------------------------------------------\
	54
	55	FUNCTIONS
	56
	57	\---------------------------------------------------------------------------/
	58
	59	C2CONST c2const_create(int Fs, float framelength_s) {
	60	C2CONST c2const;
	61
	62	assert((Fs == 8000) \|\| (Fs = 16000));
	63	c2const.Fs = Fs;
	64	c2const.n_samp = round(Fs*framelength_s);
	65	c2const.max_amp = floor(Fs*P_MIN_S/2);
	66	c2const.p_min = floor(Fs*P_MIN_S);
	67	c2const.p_max = floor(Fs*P_MAX_S);
	68	c2const.m_pitch = floor(Fs*M_PITCH_S);
	69	c2const.Wo_min = TWO_PI/c2const.p_max;
	70	c2const.Wo_max = TWO_PI/c2const.p_min;
	71
	72	if (Fs == 8000) {
	73	c2const.nw = 279;
	74	} else {
	75	c2const.nw = 511; /* actually a bit shorter in time but lets us maintain constant FFT size */
	76	}
	77
	78	c2const.tw = Fs*TW_S;
	79
	80	/*
	81	fprintf(stderr, "max_amp: %d m_pitch: %d\n", c2const.n_samp, c2const.m_pitch);
	82	fprintf(stderr, "p_min: %d p_max: %d\n", c2const.p_min, c2const.p_max);
	83	fprintf(stderr, "Wo_min: %f Wo_max: %f\n", c2const.Wo_min, c2const.Wo_max);
	84	fprintf(stderr, "nw: %d tw: %d\n", c2const.nw, c2const.tw);
	85	*/
	86
	87	return c2const;
	88	}
	89
	90	/---------------------------------------------------------------------------\
	91
	92	FUNCTION....: make_analysis_window
	93	AUTHOR......: David Rowe
	94	DATE CREATED: 11/5/94
	95
	96	Init function that generates the time domain analysis window and it's DFT.
	97
	98	\---------------------------------------------------------------------------/
	99
	100	void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[], COMP W[])
	101	{
	102	float m;
	103	COMP wshift[FFT_ENC];
	104	COMP temp;
	105	int i,j;
	106	int m_pitch = c2const->m_pitch;
	107	int nw = c2const->nw;
	108
	109	/*
	110	Generate Hamming window centered on M-sample pitch analysis window
	111
	112	0 M/2 M-1
	113	\|-------------\|-------------\|
	114	\|-------\|-------\|
	115	nw samples
	116
	117	All our analysis/synthsis is centred on the M/2 sample.
	118	*/
	119
	120	m = 0.0;
	121	for(i=0; i<m_pitch/2-nw/2; i++)
	122	w[i] = 0.0;
	123	for(i=m_pitch/2-nw/2,j=0; i<m_pitch/2+nw/2; i++,j++) {
	124	w[i] = 0.5 - 0.5cosf(TWO_PIj/(nw-1));
	125	m += w[i]*w[i];
	126	}
	127	for(i=m_pitch/2+nw/2; i<m_pitch; i++)
	128	w[i] = 0.0;
	129
	130	/* Normalise - makes freq domain amplitude estimation straight
	131	forward */
	132
	133	m = 1.0/sqrtf(m*FFT_ENC);
	134	for(i=0; i<m_pitch; i++) {
	135	w[i] *= m;
	136	}
	137
	138	/*
	139	Generate DFT of analysis window, used for later processing. Note
	140	we modulo FFT_ENC shift the time domain window w[], this makes the
	141	imaginary part of the DFT W[] equal to zero as the shifted w[] is
	142	even about the n=0 time axis if nw is odd. Having the imag part
	143	of the DFT W[] makes computation easier.
	144
	145	0 FFT_ENC-1
	146	\|-------------------------\|
	147
	148	----\ /----
	149	\ /
	150	\ / <- shifted version of window w[n]
	151	\ /
	152	\ /
	153	-------
	154
	155	\|---------\| \|---------\|
	156	nw/2 nw/2
	157	*/
	158
	159	for(i=0; i<FFT_ENC; i++) {
	160	wshift[i].real = 0.0;
	161	wshift[i].imag = 0.0;
	162	}
	163	for(i=0; i<nw/2; i++)
	164	wshift[i].real = w[i+m_pitch/2];
	165	for(i=FFT_ENC-nw/2,j=m_pitch/2-nw/2; i<FFT_ENC; i++,j++)
	166	wshift[i].real = w[j];
	167
	168	codec2_fft(fft_fwd_cfg, wshift, W);
	169
	170	/*
	171	Re-arrange W[] to be symmetrical about FFT_ENC/2. Makes later
	172	analysis convenient.
	173
	174	Before:
	175
	176
	177	0 FFT_ENC-1
	178	\|----------\|---------\|
	179	__ _
	180	\ /
	181	\_______________/
	182
	183	After:
	184
	185	0 FFT_ENC-1
	186	\|----------\|---------\|
	187	___
	188	/ \
	189	________/ \_______
	190
	191	*/
	192
	193
	194	for(i=0; i<FFT_ENC/2; i++) {
	195	temp.real = W[i].real;
	196	temp.imag = W[i].imag;
	197	W[i].real = W[i+FFT_ENC/2].real;
	198	W[i].imag = W[i+FFT_ENC/2].imag;
	199	W[i+FFT_ENC/2].real = temp.real;
	200	W[i+FFT_ENC/2].imag = temp.imag;
	201	}
	202
	203	}
	204
	205	/---------------------------------------------------------------------------\
	206
	207	FUNCTION....: hpf
	208	AUTHOR......: David Rowe
	209	DATE CREATED: 16 Nov 2010
	210
	211	High pass filter with a -3dB point of about 160Hz.
	212
	213	y(n) = -HPF_BETA*y(n-1) + x(n) - x(n-1)
	214
	215	\---------------------------------------------------------------------------/
	216
	217	float hpf(float x, float states[])
	218	{
	219	states[0] = -HPF_BETA*states[0] + x - states[1];
	220	states[1] = x;
	221
	222	return states[0];
	223	}
	224
	225	/---------------------------------------------------------------------------\
	226
	227	FUNCTION....: dft_speech
	228	AUTHOR......: David Rowe
	229	DATE CREATED: 27/5/94
	230
	231	Finds the DFT of the current speech input speech frame.
	232
	233	\---------------------------------------------------------------------------/
	234
	235	// TODO: we can either go for a faster FFT using fftr and some stack usage
	236	// or we can reduce stack usage to almost zero on STM32 by switching to fft_inplace
	237	#if 1
	238	void dft_speech(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[])
	239	{
	240	int i;
	241	int m_pitch = c2const->m_pitch;
	242	int nw = c2const->nw;
	243
	244	for(i=0; i<FFT_ENC; i++) {
	245	Sw[i].real = 0.0;
	246	Sw[i].imag = 0.0;
	247	}
	248
	249	/* Centre analysis window on time axis, we need to arrange input
	250	to FFT this way to make FFT phases correct */
	251
	252	/* move 2nd half to start of FFT input vector */
	253
	254	for(i=0; i<nw/2; i++)
	255	Sw[i].real = Sn[i+m_pitch/2]*w[i+m_pitch/2];
	256
	257	/* move 1st half to end of FFT input vector */
	258
	259	for(i=0; i<nw/2; i++)
	260	Sw[FFT_ENC-nw/2+i].real = Sn[i+m_pitch/2-nw/2]*w[i+m_pitch/2-nw/2];
	261
	262	codec2_fft_inplace(fft_fwd_cfg, Sw);
	263	}
	264	#else
	265	void dft_speech(codec2_fftr_cfg fftr_fwd_cfg, COMP Sw[], float Sn[], float w[])
	266	{
	267	int i;
	268	float sw[FFT_ENC];
	269
	270	for(i=0; i<FFT_ENC; i++) {
	271	sw[i] = 0.0;
	272	}
	273
	274	/* Centre analysis window on time axis, we need to arrange input
	275	to FFT this way to make FFT phases correct */
	276
	277	/* move 2nd half to start of FFT input vector */
	278
	279	for(i=0; i<nw/2; i++)
	280	sw[i] = Sn[i+m_pitch/2]*w[i+m_pitch/2];
	281
	282	/* move 1st half to end of FFT input vector */
	283
	284	for(i=0; i<nw/2; i++)
	285	sw[FFT_ENC-nw/2+i] = Sn[i+m_pitch/2-nw/2]*w[i+m_pitch/2-nw/2];
	286
	287	codec2_fftr(fftr_fwd_cfg, sw, Sw);
	288	}
	289	#endif
	290
	291
	292	/---------------------------------------------------------------------------\
	293
	294	FUNCTION....: two_stage_pitch_refinement
	295	AUTHOR......: David Rowe
	296	DATE CREATED: 27/5/94
	297
	298	Refines the current pitch estimate using the harmonic sum pitch
	299	estimation technique.
	300
	301	\---------------------------------------------------------------------------/
	302
	303	void two_stage_pitch_refinement(C2CONST c2const, MODEL model, COMP Sw[])
	304	{
	305	float pmin,pmax,pstep; /* pitch refinment minimum, maximum and step */
	306
	307	/* Coarse refinement */
	308
	309	pmax = TWO_PI/model->Wo + 5;
	310	pmin = TWO_PI/model->Wo - 5;
	311	pstep = 1.0;
	312	hs_pitch_refinement(model,Sw,pmin,pmax,pstep);
	313
	314	/* Fine refinement */
	315
	316	pmax = TWO_PI/model->Wo + 1;
	317	pmin = TWO_PI/model->Wo - 1;
	318	pstep = 0.25;
	319	hs_pitch_refinement(model,Sw,pmin,pmax,pstep);
	320
	321	/* Limit range */
	322
	323	if (model->Wo < TWO_PI/c2const->p_max)
	324	model->Wo = TWO_PI/c2const->p_max;
	325	if (model->Wo > TWO_PI/c2const->p_min)
	326	model->Wo = TWO_PI/c2const->p_min;
	327
	328	model->L = floorf(PI/model->Wo);
	329
	330	/* trap occasional round off issues with floorf() */
	331	if (model->Womodel->L >= 0.95PI) {
	332	model->L--;
	333	}
	334	assert(model->Wo*model->L < PI);
	335	}
	336
	337	/---------------------------------------------------------------------------\
	338
	339	FUNCTION....: hs_pitch_refinement
	340	AUTHOR......: David Rowe
	341	DATE CREATED: 27/5/94
	342
	343	Harmonic sum pitch refinement function.
	344
	345	pmin pitch search range minimum
	346	pmax pitch search range maximum
	347	step pitch search step size
	348	model current pitch estimate in model.Wo
	349
	350	model refined pitch estimate in model.Wo
	351
	352	\---------------------------------------------------------------------------/
	353
	354	void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float pstep)
	355	{
	356	int m; /* loop variable */
	357	int b; /* bin for current harmonic centre */
	358	float E; /* energy for current pitch*/
	359	float Wo; /* current "test" fundamental freq. */
	360	float Wom; /* Wo that maximises E */
	361	float Em; /* mamimum energy */
	362	float r, one_on_r; /* number of rads/bin */
	363	float p; /* current pitch */
	364
	365	/* Initialisation */
	366
	367	model->L = PI/model->Wo; /* use initial pitch est. for L */
	368	Wom = model->Wo;
	369	Em = 0.0;
	370	r = TWO_PI/FFT_ENC;
	371	one_on_r = 1.0/r;
	372
	373	/* Determine harmonic sum for a range of Wo values */
	374
	375	for(p=pmin; p<=pmax; p+=pstep) {
	376	E = 0.0;
	377	Wo = TWO_PI/p;
	378
	379	/* Sum harmonic magnitudes */
	380	for(m=1; m<=model->L; m++) {
	381	b = (int)(mWoone_on_r + 0.5);
	382	E += Sw[b].realSw[b].real + Sw[b].imagSw[b].imag;
	383	}
	384	/* Compare to see if this is a maximum */
	385
	386	if (E > Em) {
	387	Em = E;
	388	Wom = Wo;
	389	}
	390	}
	391
	392	model->Wo = Wom;
	393	}
	394
	395	/---------------------------------------------------------------------------\
	396
	397	FUNCTION....: estimate_amplitudes
	398	AUTHOR......: David Rowe
	399	DATE CREATED: 27/5/94
	400
	401	Estimates the complex amplitudes of the harmonics.
	402
	403	\---------------------------------------------------------------------------/
	404
	405	void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[], int est_phase)
	406	{
	407	int i,m; /* loop variables */
	408	int am,bm; /* bounds of current harmonic */
	409	int b; /* DFT bin of centre of current harmonic */
	410	float den; /* denominator of amplitude expression */
	411	float r, one_on_r; /* number of rads/bin */
	412	int offset;
	413	COMP Am;
	414
	415	r = TWO_PI/FFT_ENC;
	416	one_on_r = 1.0/r;
	417
	418	for(m=1; m<=model->L; m++) {
	419	den = 0.0;
	420	am = (int)((m - 0.5)model->Woone_on_r + 0.5);
	421	bm = (int)((m + 0.5)model->Woone_on_r + 0.5);
	422	b = (int)(m*model->Wo/r + 0.5);
	423
	424	/* Estimate ampltude of harmonic */
	425
	426	den = 0.0;
	427	Am.real = Am.imag = 0.0;
	428	offset = FFT_ENC/2 - (int)(mmodel->Woone_on_r + 0.5);
	429	for(i=am; i<bm; i++) {
	430	den += Sw[i].realSw[i].real + Sw[i].imagSw[i].imag;
	431	Am.real += Sw[i].real*W[i + offset].real;
	432	Am.imag += Sw[i].imag*W[i + offset].real;
	433	}
	434
	435	model->A[m] = sqrtf(den);
	436
	437	if (est_phase) {
	438
	439	/* Estimate phase of harmonic, this is expensive in CPU for
	440	embedded devicesso we make it an option */
	441
	442	model->phi[m] = atan2f(Sw[b].imag,Sw[b].real);
	443	}
	444	}
	445	}
	446
	447	/---------------------------------------------------------------------------\
	448
	449	est_voicing_mbe()
	450
	451	Returns the error of the MBE cost function for a fiven F0.
	452
	453	Note: I think a lot of the operations below can be simplified as
	454	W[].imag = 0 and has been normalised such that den always equals 1.
	455
	456	\---------------------------------------------------------------------------/
	457
	458	float est_voicing_mbe(
	459	C2CONST *c2const,
	460	MODEL *model,
	461	COMP Sw[],
	462	COMP W[]
	463	)
	464	{
	465	int l,al,bl,m; /* loop variables */
	466	COMP Am; /* amplitude sample for this band */
	467	int offset; /* centers Hw[] about current harmonic */
	468	float den; /* denominator of Am expression */
	469	float error; /* accumulated error between original and synthesised */
	470	float Wo;
	471	float sig, snr;
	472	float elow, ehigh, eratio;
	473	float sixty;
	474	COMP Ew;
	475	Ew.real = 0;
	476	Ew.imag = 0;
	477
	478	int l_1000hz = model->L*1000.0/(c2const->Fs/2);
	479	sig = 1E-4;
	480	for(l=1; l<=l_1000hz; l++) {
	481	sig += model->A[l]*model->A[l];
	482	}
	483
	484	Wo = model->Wo;
	485	error = 1E-4;
	486
	487	/* Just test across the harmonics in the first 1000 Hz */
	488
	489	for(l=1; l<=l_1000hz; l++) {
	490	Am.real = 0.0;
	491	Am.imag = 0.0;
	492	den = 0.0;
	493	al = ceilf((l - 0.5)WoFFT_ENC/TWO_PI);
	494	bl = ceilf((l + 0.5)WoFFT_ENC/TWO_PI);
	495
	496	/* Estimate amplitude of harmonic assuming harmonic is totally voiced */
	497
	498	offset = FFT_ENC/2 - lWoFFT_ENC/TWO_PI + 0.5;
	499	for(m=al; m<bl; m++) {
	500	Am.real += Sw[m].real*W[offset+m].real;
	501	Am.imag += Sw[m].imag*W[offset+m].real;
	502	den += W[offset+m].real*W[offset+m].real;
	503	}
	504
	505	Am.real = Am.real/den;
	506	Am.imag = Am.imag/den;
	507
	508	/* Determine error between estimated harmonic and original */
	509
	510	// Redundant! offset = FFT_ENC/2 - lWoFFT_ENC/TWO_PI + 0.5;
	511	for(m=al; m<bl; m++) {
	512	Ew.real = Sw[m].real - Am.real*W[offset+m].real;
	513	Ew.imag = Sw[m].imag - Am.imag*W[offset+m].real;
	514	error += Ew.real*Ew.real;
	515	error += Ew.imag*Ew.imag;
	516	}
	517	}
	518
	519	snr = 10.0*log10f(sig/error);
	520	if (snr > V_THRESH)
	521	model->voiced = 1;
	522	else
	523	model->voiced = 0;
	524
	525	/* post processing, helps clean up some voicing errors ------------------*/
	526
	527	/*
	528	Determine the ratio of low freqency to high frequency energy,
	529	voiced speech tends to be dominated by low frequency energy,
	530	unvoiced by high frequency. This measure can be used to
	531	determine if we have made any gross errors.
	532	*/
	533
	534	int l_2000hz = model->L*2000.0/(c2const->Fs/2);
	535	int l_4000hz = model->L*4000.0/(c2const->Fs/2);
	536	elow = ehigh = 1E-4;
	537	for(l=1; l<=l_2000hz; l++) {
	538	elow += model->A[l]*model->A[l];
	539	}
	540	for(l=l_2000hz; l<=l_4000hz; l++) {
	541	ehigh += model->A[l]*model->A[l];
	542	}
	543	eratio = 10.0*log10f(elow/ehigh);
	544
	545	/* Look for Type 1 errors, strongly V speech that has been
	546	accidentally declared UV */
	547
	548	if (model->voiced == 0)
	549	if (eratio > 10.0)
	550	model->voiced = 1;
	551
	552	/* Look for Type 2 errors, strongly UV speech that has been
	553	accidentally declared V */
	554
	555	if (model->voiced == 1) {
	556	if (eratio < -10.0)
	557	model->voiced = 0;
	558
	559	/* A common source of Type 2 errors is the pitch estimator
	560	gives a low (50Hz) estimate for UV speech, which gives a
	561	good match with noise due to the close harmoonic spacing.
	562	These errors are much more common than people with 50Hz3
	563	pitch, so we have just a small eratio threshold. */
	564
	565	sixty = 60.0*TWO_PI/c2const->Fs;
	566	if ((eratio < -4.0) && (model->Wo <= sixty))
	567	model->voiced = 0;
	568	}
	569	//printf(" v: %d snr: %f eratio: %3.2f %f\n",model->voiced,snr,eratio,dF0);
	570
	571	return snr;
	572	}
	573
	574	/---------------------------------------------------------------------------\
	575
	576	FUNCTION....: make_synthesis_window
	577	AUTHOR......: David Rowe
	578	DATE CREATED: 11/5/94
	579
	580	Init function that generates the trapezoidal (Parzen) sythesis window.
	581
	582	\---------------------------------------------------------------------------/
	583
	584	void make_synthesis_window(C2CONST *c2const, float Pn[])
	585	{
	586	int i;
	587	float win;
	588	int n_samp = c2const->n_samp;
	589	int tw = c2const->tw;
	590
	591	/* Generate Parzen window in time domain */
	592
	593	win = 0.0;
	594	for(i=0; i<n_samp/2-tw; i++)
	595	Pn[i] = 0.0;
	596	win = 0.0;
	597	for(i=n_samp/2-tw; i<n_samp/2+tw; win+=1.0/(2*tw), i++ )
	598	Pn[i] = win;
	599	for(i=n_samp/2+tw; i<3*n_samp/2-tw; i++)
	600	Pn[i] = 1.0;
	601	win = 1.0;
	602	for(i=3n_samp/2-tw; i<3n_samp/2+tw; win-=1.0/(2*tw), i++)
	603	Pn[i] = win;
	604	for(i=3n_samp/2+tw; i<2n_samp; i++)
	605	Pn[i] = 0.0;
	606	}
	607
	608	/---------------------------------------------------------------------------\
	609
	610	FUNCTION....: synthesise
	611	AUTHOR......: David Rowe
	612	DATE CREATED: 20/2/95
	613
	614	Synthesise a speech signal in the frequency domain from the
	615	sinusodal model parameters. Uses overlap-add with a trapezoidal
	616	window to smoothly interpolate betwen frames.
	617
	618	\---------------------------------------------------------------------------/
	619
	620	void synthesise(
	621	int n_samp,
	622	codec2_fftr_cfg fftr_inv_cfg,
	623	float Sn_[], /* time domain synthesised signal */
	624	MODEL model, / ptr to model parameters for this frame */
	625	float Pn[], /* time domain Parzen window */
	626	int shift /* flag used to handle transition frames */
	627	)
	628	{
	629	int i,l,j,b; /* loop variables */
	630	COMP Sw_[FFT_DEC/2+1]; /* DFT of synthesised signal */
	631	float sw_[FFT_DEC]; /* synthesised signal */
	632
	633	if (shift) {
	634	/* Update memories */
	635	for(i=0; i<n_samp-1; i++) {
	636	Sn_[i] = Sn_[i+n_samp];
	637	}
	638	Sn_[n_samp-1] = 0.0;
	639	}
	640
	641	for(i=0; i<FFT_DEC/2+1; i++) {
	642	Sw_[i].real = 0.0;
	643	Sw_[i].imag = 0.0;
	644	}
	645
	646	/* Now set up frequency domain synthesised speech */
	647
	648	for(l=1; l<=model->L; l++) {
	649	b = (int)(lmodel->WoFFT_DEC/TWO_PI + 0.5);
	650	if (b > ((FFT_DEC/2)-1)) {
	651	b = (FFT_DEC/2)-1;
	652	}
	653	Sw_[b].real = model->A[l]*cosf(model->phi[l]);
	654	Sw_[b].imag = model->A[l]*sinf(model->phi[l]);
	655	}
	656
	657	/* Perform inverse DFT */
	658
	659	codec2_fftri(fftr_inv_cfg, Sw_,sw_);
	660
	661	/* Overlap add to previous samples */
	662
	663	#ifdef USE_KISS_FFT
	664	#define FFTI_FACTOR ((float)1.0)
	665	#else
	666	#define FFTI_FACTOR ((float32_t)FFT_DEC)
	667	#endif
	668
	669	for(i=0; i<n_samp-1; i++) {
	670	Sn_[i] += sw_[FFT_DEC-n_samp+1+i]Pn[i] FFTI_FACTOR;
	671	}
	672
	673	if (shift)
	674	for(i=n_samp-1,j=0; i<2*n_samp; i++,j++)
	675	Sn_[i] = sw_[j]Pn[i] FFTI_FACTOR;
	676	else
	677	for(i=n_samp-1,j=0; i<2*n_samp; i++,j++)
	678	Sn_[i] += sw_[j]Pn[i] FFTI_FACTOR;
	679	}
	680
	681
	682	/* todo: this should probably be in some states rather than a static */
	683	static unsigned long next = 1;
	684
	685	int codec2_rand(void) {
	686	next = next * 1103515245 + 12345;
	687	return((unsigned)(next/65536) % 32768);
	688	}
	689