stripdown of version 0.9

author: erdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box> 2019-07-04 23:26:09 +0200
committer: erdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box> 2019-07-04 23:26:09 +0200
commit: f02dfce6e6c34b3d8a7b8a0e784b506178e331fa (patch)
tree: 45556e6104242d4702689760433d7321ae74ec17 /nlp.c
1 files changed, 702 insertions, 0 deletions
diff --git a/nlp.c b/nlp.c
new file mode 100644
index 0000000..8c8d5f1
--- /dev/null
+++ b/nlp.c
@@ -0,0 +1,702 @@
+/*---------------------------------------------------------------------------*\
+  FILE........: nlp.c
+  AUTHOR......: David Rowe
+  DATE CREATED: 23/3/93
+  Non Linear Pitch (NLP) estimation functions.
+\*---------------------------------------------------------------------------*/
+/*
+  Copyright (C) 2009 David Rowe
+  All rights reserved.
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "defines.h"
+#include "nlp.h"
+#include "dump.h"
+#include "codec2_fft.h"
+#undef PROFILE
+#include "machdep.h"
+#include "os.h"
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+/*---------------------------------------------------------------------------*\
+                                DEFINES
+\*---------------------------------------------------------------------------*/
+#define PMAX_M      320         /* maximum NLP analysis window size     */
+#define COEFF       0.95        /* notch filter parameter               */
+#define PE_FFT_SIZE 512         /* DFT size for pitch estimation        */
+#define DEC         5           /* decimation factor                    */
+#define SAMPLE_RATE 8000
+#define PI          3.141592654 /* mathematical constant                */
+#define T           0.1         /* threshold for local minima candidate */
+#define F0_MAX      500
+#define CNLP        0.3         /* post processor constant              */
+#define NLP_NTAP 48             /* Decimation LPF order */
+#undef  POST_PROCESS_MBE        /* choose post processor                */
+/* 8 to 16 kHz sample rate conversion */
+#define FDMDV_OS                 2                            /* oversampling rate                   */
+#define FDMDV_OS_TAPS_16K       48                            /* number of OS filter taps at 16kHz   */
+#define FDMDV_OS_TAPS_8K        (FDMDV_OS_TAPS_16K/FDMDV_OS)  /* number of OS filter taps at 8kHz    */
+/*---------------------------------------------------------------------------*\
+                                GLOBALS
+\*---------------------------------------------------------------------------*/
+/* 48 tap 600Hz low pass FIR filter coefficients */
+const float nlp_fir[] = {
+  -1.0818124e-03,
+  -1.1008344e-03,
+  -9.2768838e-04,
+  -4.2289438e-04,
+   5.5034190e-04,
+   2.0029849e-03,
+   3.7058509e-03,
+   5.1449415e-03,
+   5.5924666e-03,
+   4.3036754e-03,
+   8.0284511e-04,
+  -4.8204610e-03,
+  -1.1705810e-02,
+  -1.8199275e-02,
+  -2.2065282e-02,
+  -2.0920610e-02,
+  -1.2808831e-02,
+   3.2204775e-03,
+   2.6683811e-02,
+   5.5520624e-02,
+   8.6305944e-02,
+   1.1480192e-01,
+   1.3674206e-01,
+   1.4867556e-01,
+   1.4867556e-01,
+   1.3674206e-01,
+   1.1480192e-01,
+   8.6305944e-02,
+   5.5520624e-02,
+   2.6683811e-02,
+   3.2204775e-03,
+  -1.2808831e-02,
+  -2.0920610e-02,
+  -2.2065282e-02,
+  -1.8199275e-02,
+  -1.1705810e-02,
+  -4.8204610e-03,
+   8.0284511e-04,
+   4.3036754e-03,
+   5.5924666e-03,
+   5.1449415e-03,
+   3.7058509e-03,
+   2.0029849e-03,
+   5.5034190e-04,
+  -4.2289438e-04,
+  -9.2768838e-04,
+  -1.1008344e-03,
+  -1.0818124e-03
+};
+typedef struct {
+    int           Fs;                /* sample rate in Hz            */
+    int           m;
+    float         w[PMAX_M/DEC];     /* DFT window                   */
+    float         sq[PMAX_M];        /* squared speech samples       */
+    float         mem_x,mem_y;       /* memory for notch filter      */
+    float         mem_fir[NLP_NTAP]; /* decimation FIR filter memory */
+    codec2_fft_cfg  fft_cfg;         /* kiss FFT config              */
+    float        *Sn16k;             /* Fs=16kHz input speech vector */
+    FILE         *f;
+} NLP;
+#ifdef POST_PROCESS_MBE
+float test_candidate_mbe(COMP Sw[], COMP W[], float f0);
+float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COMP W[], float *prev_Wo);
+#endif
+float post_process_sub_multiples(COMP Fw[],
+                                 int pmin, int pmax, float gmax, int gmax_bin,
+                                 float *prev_f0);
+static void fdmdv_16_to_8(float out8k[], float in16k[], int n);
+/*---------------------------------------------------------------------------*\
+  nlp_create()
+  Initialisation function for NLP pitch estimator.
+\*---------------------------------------------------------------------------*/
+void *nlp_create(C2CONST *c2const)
+{
+    NLP *nlp;
+    int  i;
+    int  m = c2const->m_pitch;
+    int  Fs = c2const->Fs;
+    nlp = (NLP*)malloc(sizeof(NLP));
+    if (nlp == NULL)
+        return NULL;
+    assert((Fs == 8000) || (Fs == 16000));
+    nlp->Fs = Fs;
+    nlp->m = m;
+    /* if running at 16kHz allocate storage for decimating filter memory */
+    if (Fs == 16000) {
+        nlp->Sn16k = (float*)malloc(sizeof(float)*(FDMDV_OS_TAPS_16K + c2const->n_samp));
+        for(i=0; i<FDMDV_OS_TAPS_16K; i++) {
+           nlp->Sn16k[i] = 0.0;
+        }
+        if (nlp->Sn16k == NULL) {
+            free(nlp);
+            return NULL;
+        }
+        /* most processing occurs at 8 kHz sample rate so halve m */
+        m /= 2;
+    }
+    assert(m <= PMAX_M);
+    
+    for(i=0; i<m/DEC; i++) {
+        nlp->w[i] = 0.5 - 0.5*cosf(2*PI*i/(m/DEC-1));
+    }
+    for(i=0; i<PMAX_M; i++)
+        nlp->sq[i] = 0.0;
+    nlp->mem_x = 0.0;
+    nlp->mem_y = 0.0;
+    for(i=0; i<NLP_NTAP; i++)
+        nlp->mem_fir[i] = 0.0;
+    nlp->fft_cfg = codec2_fft_alloc (PE_FFT_SIZE, 0, NULL, NULL);
+    assert(nlp->fft_cfg != NULL);
+    return (void*)nlp;
+}
+/*---------------------------------------------------------------------------*\
+  nlp_destroy()
+  Shut down function for NLP pitch estimator.
+\*---------------------------------------------------------------------------*/
+void nlp_destroy(void *nlp_state)
+{
+    NLP   *nlp;
+    assert(nlp_state != NULL);
+    nlp = (NLP*)nlp_state;
+    codec2_fft_free(nlp->fft_cfg);
+    if (nlp->Fs == 16000) {
+        free(nlp->Sn16k);
+    }
+    free(nlp_state);
+}
+/*---------------------------------------------------------------------------*\
+  nlp()
+  Determines the pitch in samples using the Non Linear Pitch (NLP)
+  algorithm [1]. Returns the fundamental in Hz.  Note that the actual
+  pitch estimate is for the centre of the M sample Sn[] vector, not
+  the current N sample input vector.  This is (I think) a delay of 2.5
+  frames with N=80 samples.  You should align further analysis using
+  this pitch estimate to be centred on the middle of Sn[].
+  Two post processors have been tried, the MBE version (as discussed
+  in [1]), and a post processor that checks sub-multiples.  Both
+  suffer occasional gross pitch errors (i.e. neither are perfect).  In
+  the presence of background noise the sub-multiple algorithm tends
+  towards low F0 which leads to better sounding background noise than
+  the MBE post processor.
+  A good way to test and develop the NLP pitch estimator is using the
+  tnlp (codec2/unittest) and the codec2/octave/plnlp.m Octave script.
+  A pitch tracker searching a few frames forward and backward in time
+  would be a useful addition.
+  References:
+    [1] http://rowetel.com/downloads/1997_rowe_phd_thesis.pdf Chapter 4
+\*---------------------------------------------------------------------------*/
+float nlp(
+  void *nlp_state,
+  float  Sn[],                  /* input speech vector                                */
+  int    n,                     /* frames shift (no. new samples in Sn[])             */
+  float *pitch,                 /* estimated pitch period in samples at current Fs    */
+  COMP   Sw[],                  /* Freq domain version of Sn[]                        */
+  COMP   W[],                   /* Freq domain window                                 */
+  float *prev_f0                /* previous pitch f0 in Hz, memory for pitch tracking */
+)
+{
+    NLP   *nlp;
+    float  notch;                   /* current notch filter output          */
+    COMP   Fw[PE_FFT_SIZE];         /* DFT of squared signal (input/output) */
+    float  gmax;
+    int    gmax_bin;
+    int    m, i, j;
+    float  best_f0;
+    PROFILE_VAR(start, tnotch, filter, peakpick, window, fft, magsq, shiftmem);
+    assert(nlp_state != NULL);
+    nlp = (NLP*)nlp_state;
+    m = nlp->m;
+    /* Square, notch filter at DC, and LP filter vector */
+    /* If running at 16 kHz decimate to 8 kHz, as NLP ws designed for
+       Fs = 8kHz. The decimating filter introduces about 3ms of delay,
+       that shouldn't be a problem as pitch changes slowly. */
+    if (nlp->Fs == 8000) {
+        /* Square latest input samples */
+        for(i=m-n; i<m; i++) {
+          nlp->sq[i] = Sn[i]*Sn[i];
+        }
+    }
+    else {
+        assert(nlp->Fs == 16000);
+        /* re-sample at 8 KHz */
+        for(i=0; i<n; i++) {
+            nlp->Sn16k[FDMDV_OS_TAPS_16K+i] = Sn[m-n+i];
+        }
+        m /= 2; n /= 2;
+        float Sn8k[n];
+        fdmdv_16_to_8(Sn8k, &nlp->Sn16k[FDMDV_OS_TAPS_16K], n);
+        /* Square latest input samples */
+        for(i=m-n, j=0; i<m; i++, j++) {
+            nlp->sq[i] = Sn8k[j]*Sn8k[j];
+        }
+        assert(j <= n);
+    }
+    //fprintf(stderr, "n: %d m: %d\n", n, m);
+    PROFILE_SAMPLE(start);
+    for(i=m-n; i<m; i++) {      /* notch filter at DC */
+        notch = nlp->sq[i] - nlp->mem_x;
+        notch += COEFF*nlp->mem_y;
+        nlp->mem_x = nlp->sq[i];
+        nlp->mem_y = notch;
+        nlp->sq[i] = notch + 1.0;  /* With 0 input vectors to codec,
+                                      kiss_fft() would take a long
+                                      time to execute when running in
+                                      real time.  Problem was traced
+                                      to kiss_fft function call in
+                                      this function. Adding this small
+                                      constant fixed problem.  Not
+                                      exactly sure why. */
+    }
+    PROFILE_SAMPLE_AND_LOG(tnotch, start, "      square and notch");
+    for(i=m-n; i<m; i++) {      /* FIR filter vector */
+        for(j=0; j<NLP_NTAP-1; j++)
+            nlp->mem_fir[j] = nlp->mem_fir[j+1];
+        nlp->mem_fir[NLP_NTAP-1] = nlp->sq[i];
+        nlp->sq[i] = 0.0;
+        for(j=0; j<NLP_NTAP; j++)
+            nlp->sq[i] += nlp->mem_fir[j]*nlp_fir[j];
+    }
+    PROFILE_SAMPLE_AND_LOG(filter, tnotch, "      filter");
+    /* Decimate and DFT */
+    for(i=0; i<PE_FFT_SIZE; i++) {
+        Fw[i].real = 0.0;
+        Fw[i].imag = 0.0;
+    }
+    for(i=0; i<m/DEC; i++) {
+        Fw[i].real = nlp->sq[i*DEC]*nlp->w[i];
+    }
+    PROFILE_SAMPLE_AND_LOG(window, filter, "      window");
+    #ifdef DUMP
+    dump_dec(Fw);
+    #endif
+    // FIXME: check if this can be converted to a real fft
+    // since all imag inputs are 0
+    codec2_fft_inplace(nlp->fft_cfg, Fw);
+    PROFILE_SAMPLE_AND_LOG(fft, window, "      fft");
+    for(i=0; i<PE_FFT_SIZE; i++)
+        Fw[i].real = Fw[i].real*Fw[i].real + Fw[i].imag*Fw[i].imag;
+    PROFILE_SAMPLE_AND_LOG(magsq, fft, "      mag sq");
+    #ifdef DUMP
+    dump_sq(m, nlp->sq);
+    dump_Fw(Fw);
+    #endif
+    /* todo: express everything in f0, as pitch in samples is dep on Fs */
+    int pmin = floor(SAMPLE_RATE*P_MIN_S);
+    int pmax = floor(SAMPLE_RATE*P_MAX_S);
+    /* find global peak */
+    gmax = 0.0;
+    gmax_bin = PE_FFT_SIZE*DEC/pmax;
+    for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++) {
+        if (Fw[i].real > gmax) {
+            gmax = Fw[i].real;
+            gmax_bin = i;
+        }
+    }
+    PROFILE_SAMPLE_AND_LOG(peakpick, magsq, "      peak pick");
+    #ifdef POST_PROCESS_MBE
+    best_f0 = post_process_mbe(Fw, pmin, pmax, gmax, Sw, W, prev_f0);
+    #else
+    best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_f0);
+    #endif
+    PROFILE_SAMPLE_AND_LOG(shiftmem, peakpick,  "      post process");
+    /* Shift samples in buffer to make room for new samples */
+    for(i=0; i<m-n; i++)
+        nlp->sq[i] = nlp->sq[i+n];
+    /* return pitch period in samples and F0 estimate */
+    *pitch = (float)nlp->Fs/best_f0;
+    PROFILE_SAMPLE_AND_LOG2(shiftmem,  "      shift mem");
+    PROFILE_SAMPLE_AND_LOG2(start,  "      nlp int");
+    *prev_f0 = best_f0;
+    return(best_f0);
+}
+/*---------------------------------------------------------------------------*\
+  post_process_sub_multiples()
+  Given the global maximma of Fw[] we search integer submultiples for
+  local maxima.  If local maxima exist and they are above an
+  experimentally derived threshold (OK a magic number I pulled out of
+  the air) we choose the submultiple as the F0 estimate.
+  The rational for this is that the lowest frequency peak of Fw[]
+  should be F0, as Fw[] can be considered the autocorrelation function
+  of Sw[] (the speech spectrum).  However sometimes due to phase
+  effects the lowest frequency maxima may not be the global maxima.
+  This works OK in practice and favours low F0 values in the presence
+  of background noise which means the sinusoidal codec does an OK job
+  of synthesising the background noise.  High F0 in background noise
+  tends to sound more periodic introducing annoying artifacts.
+\*---------------------------------------------------------------------------*/
+float post_process_sub_multiples(COMP Fw[],
+                                 int pmin, int pmax, float gmax, int gmax_bin,
+                                 float *prev_f0)
+{
+    int   min_bin, cmax_bin;
+    int   mult;
+    float thresh, best_f0;
+    int   b, bmin, bmax, lmax_bin;
+    float lmax;
+    int   prev_f0_bin;
+    /* post process estimate by searching submultiples */
+    mult = 2;
+    min_bin = PE_FFT_SIZE*DEC/pmax;
+    cmax_bin = gmax_bin;
+    prev_f0_bin = *prev_f0*(PE_FFT_SIZE*DEC)/SAMPLE_RATE;
+    while(gmax_bin/mult >= min_bin) {
+        b = gmax_bin/mult;                      /* determine search interval */
+        bmin = 0.8*b;
+        bmax = 1.2*b;
+        if (bmin < min_bin)
+            bmin = min_bin;
+        /* lower threshold to favour previous frames pitch estimate,
+            this is a form of pitch tracking */
+        if ((prev_f0_bin > bmin) && (prev_f0_bin < bmax))
+            thresh = CNLP*0.5*gmax;
+        else
+            thresh = CNLP*gmax;
+        lmax = 0;
+        lmax_bin = bmin;
+        for (b=bmin; b<=bmax; b++)           /* look for maximum in interval */
+            if (Fw[b].real > lmax) {
+                lmax = Fw[b].real;
+                lmax_bin = b;
+            }
+        if (lmax > thresh)
+            if ((lmax > Fw[lmax_bin-1].real) && (lmax > Fw[lmax_bin+1].real)) {
+                cmax_bin = lmax_bin;
+            }
+        mult++;
+    }
+    best_f0 = (float)cmax_bin*SAMPLE_RATE/(PE_FFT_SIZE*DEC);
+    return best_f0;
+}
+#ifdef POST_PROCESS_MBE
+/*---------------------------------------------------------------------------*\
+  post_process_mbe()
+  Use the MBE pitch estimation algorithm to evaluate pitch candidates.  This
+  works OK but the accuracy at low F0 is affected by NW, the analysis window
+  size used for the DFT of the input speech Sw[].  Also favours high F0 in
+  the presence of background noise which causes periodic artifacts in the
+  synthesised speech.
+\*---------------------------------------------------------------------------*/
+float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COMP W[], float *prev_Wo)
+{
+  float candidate_f0;
+  float f0,best_f0;             /* fundamental frequency */
+  float e,e_min;                /* MBE cost function */
+  int   i;
+  #ifdef DUMP
+  float e_hz[F0_MAX];
+  #endif
+  #if !defined(NDEBUG) || defined(DUMP)
+  int   bin;
+  #endif
+  float f0_min, f0_max;
+  float f0_start, f0_end;
+  f0_min = (float)SAMPLE_RATE/pmax;
+  f0_max = (float)SAMPLE_RATE/pmin;
+  /* Now look for local maxima.  Each local maxima is a candidate
+     that we test using the MBE pitch estimation algotithm */
+  #ifdef DUMP
+  for(i=0; i<F0_MAX; i++)
+      e_hz[i] = -1;
+  #endif
+  e_min = 1E32;
+  best_f0 = 50;
+  for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++) {
+    if ((Fw[i].real > Fw[i-1].real) && (Fw[i].real > Fw[i+1].real)) {
+        /* local maxima found, lets test if it's big enough */
+        if (Fw[i].real > T*gmax) {
+            /* OK, sample MBE cost function over +/- 10Hz range in 2.5Hz steps */
+            candidate_f0 = (float)i*SAMPLE_RATE/(PE_FFT_SIZE*DEC);
+            f0_start = candidate_f0-20;
+            f0_end = candidate_f0+20;
+            if (f0_start < f0_min) f0_start = f0_min;
+            if (f0_end > f0_max) f0_end = f0_max;
+            for(f0=f0_start; f0<=f0_end; f0+= 2.5) {
+                e = test_candidate_mbe(Sw, W, f0);
+                #if !defined(NDEBUG) || defined(DUMP)
+                bin = floorf(f0); assert((bin > 0) && (bin < F0_MAX));
+                #endif
+                #ifdef DUMP
+                e_hz[bin] = e;
+                #endif
+                if (e < e_min) {
+                    e_min = e;
+                    best_f0 = f0;
+                }
+            }
+        }
+    }
+  }
+  /* finally sample MBE cost function around previous pitch estimate
+     (form of pitch tracking) */
+  candidate_f0 = *prev_Wo * SAMPLE_RATE/TWO_PI;
+  f0_start = candidate_f0-20;
+  f0_end = candidate_f0+20;
+  if (f0_start < f0_min) f0_start = f0_min;
+  if (f0_end > f0_max) f0_end = f0_max;
+  for(f0=f0_start; f0<=f0_end; f0+= 2.5) {
+      e = test_candidate_mbe(Sw, W, f0);
+      #if !defined(NDEBUG) || defined(DUMP)
+      bin = floorf(f0); assert((bin > 0) && (bin < F0_MAX));
+      #endif
+      #ifdef DUMP
+      e_hz[bin] = e;
+      #endif
+      if (e < e_min) {
+          e_min = e;
+          best_f0 = f0;
+      }
+  }
+  #ifdef DUMP
+  dump_e(e_hz);
+  #endif
+  return best_f0;
+}
+/*---------------------------------------------------------------------------*\
+  test_candidate_mbe()
+  Returns the error of the MBE cost function for the input f0.
+  Note: I think a lot of the operations below can be simplified as
+  W[].imag = 0 and has been normalised such that den always equals 1.
+\*---------------------------------------------------------------------------*/
+float test_candidate_mbe(
+    COMP  Sw[],
+    COMP  W[],
+    float f0
+)
+{
+    COMP  Sw_[FFT_ENC];   /* DFT of all voiced synthesised signal */
+    int   l,al,bl,m;      /* loop variables */
+    COMP  Am;             /* amplitude sample for this band */
+    int   offset;         /* centers Hw[] about current harmonic */
+    float den;            /* denominator of Am expression */
+    float error;          /* accumulated error between originl and synthesised */
+    float Wo;             /* current "test" fundamental freq. */
+    int   L;
+    L = floorf((SAMPLE_RATE/2.0)/f0);
+    Wo = f0*(2*PI/SAMPLE_RATE);
+    error = 0.0;
+    /* Just test across the harmonics in the first 1000 Hz (L/4) */
+    for(l=1; l<L/4; l++) {
+        Am.real = 0.0;
+        Am.imag = 0.0;
+        den = 0.0;
+        al = ceilf((l - 0.5)*Wo*FFT_ENC/TWO_PI);
+        bl = ceilf((l + 0.5)*Wo*FFT_ENC/TWO_PI);
+        /* Estimate amplitude of harmonic assuming harmonic is totally voiced */
+        for(m=al; m<bl; m++) {
+            offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5;
+            Am.real += Sw[m].real*W[offset].real + Sw[m].imag*W[offset].imag;
+            Am.imag += Sw[m].imag*W[offset].real - Sw[m].real*W[offset].imag;
+            den += W[offset].real*W[offset].real + W[offset].imag*W[offset].imag;
+        }
+        Am.real = Am.real/den;
+        Am.imag = Am.imag/den;
+        /* Determine error between estimated harmonic and original */
+        for(m=al; m<bl; m++) {
+            offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5;
+            Sw_[m].real = Am.real*W[offset].real - Am.imag*W[offset].imag;
+            Sw_[m].imag = Am.real*W[offset].imag + Am.imag*W[offset].real;
+            error += (Sw[m].real - Sw_[m].real)*(Sw[m].real - Sw_[m].real);
+            error += (Sw[m].imag - Sw_[m].imag)*(Sw[m].imag - Sw_[m].imag);
+        }
+    }
+    return error;
+}
+#endif
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: fdmdv_16_to_8()
+  AUTHOR......: David Rowe
+  DATE CREATED: 9 May 2012
+  Changes the sample rate of a signal from 16 to 8 kHz.
+  n is the number of samples at the 8 kHz rate, there are FDMDV_OS*n
+  samples at the 48 kHz rate.  As above however a memory of
+  FDMDV_OS_TAPS samples is reqd for in16k[] (see t16_8.c unit test as example).
+  Low pass filter the 16 kHz signal at 4 kHz using the same filter as
+  the upsampler, then just output every FDMDV_OS-th filtered sample.
+  Note: this function copied from fdmdv.c, included in nlp.c as a convenience
+  to avoid linking with another source file.
+\*---------------------------------------------------------------------------*/
+static void fdmdv_16_to_8(float out8k[], float in16k[], int n)
+{
+    float acc;
+    int   i,j,k;
+    for(i=0, k=0; k<n; i+=FDMDV_OS, k++) {
+        acc = 0.0;
+        for(j=0; j<FDMDV_OS_TAPS_16K; j++)
+            acc += fdmdv_os_filter[j]*in16k[i-j];
+        out8k[k] = acc;
+    }
+    /* update filter memory */
+    for(i=-FDMDV_OS_TAPS_16K; i<0; i++)
+        in16k[i] = in16k[i + n*FDMDV_OS];
+}
author	erdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box>	2019-07-04 23:26:09 +0200
committer	erdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box>	2019-07-04 23:26:09 +0200
commit	f02dfce6e6c34b3d8a7b8a0e784b506178e331fa (patch)
tree	45556e6104242d4702689760433d7321ae74ec17 /nlp.c

diff --git a/nlp.c b/nlp.c new file mode 100644 index 0000000..8c8d5f1 --- /dev/null +++ b/nlp.c
@@ -0,0 +1,702 @@
	1	/---------------------------------------------------------------------------\
	2
	3	FILE........: nlp.c
	4	AUTHOR......: David Rowe
	5	DATE CREATED: 23/3/93
	6
	7	Non Linear Pitch (NLP) estimation functions.
	8
	9	\---------------------------------------------------------------------------/
	10
	11	/*
	12	Copyright (C) 2009 David Rowe
	13
	14	All rights reserved.
	15
	16	This program is free software; you can redistribute it and/or modify
	17	it under the terms of the GNU Lesser General Public License version 2.1, as
	18	published by the Free Software Foundation. This program is
	19	distributed in the hope that it will be useful, but WITHOUT ANY
	20	WARRANTY; without even the implied warranty of MERCHANTABILITY or
	21	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
	22	License for more details.
	23
	24	You should have received a copy of the GNU Lesser General Public License
	25	along with this program; if not, see <http://www.gnu.org/licenses/>.
	26	*/
	27
	28	#include "defines.h"
	29	#include "nlp.h"
	30	#include "dump.h"
	31	#include "codec2_fft.h"
	32	#undef PROFILE
	33	#include "machdep.h"
	34	#include "os.h"
	35
	36	#include <assert.h>
	37	#include <math.h>
	38	#include <stdlib.h>
	39
	40	/---------------------------------------------------------------------------\
	41
	42	DEFINES
	43
	44	\---------------------------------------------------------------------------/
	45
	46	#define PMAX_M 320 /* maximum NLP analysis window size */
	47	#define COEFF 0.95 /* notch filter parameter */
	48	#define PE_FFT_SIZE 512 /* DFT size for pitch estimation */
	49	#define DEC 5 /* decimation factor */
	50	#define SAMPLE_RATE 8000
	51	#define PI 3.141592654 /* mathematical constant */
	52	#define T 0.1 /* threshold for local minima candidate */
	53	#define F0_MAX 500
	54	#define CNLP 0.3 /* post processor constant */
	55	#define NLP_NTAP 48 /* Decimation LPF order */
	56	#undef POST_PROCESS_MBE /* choose post processor */
	57
	58	/* 8 to 16 kHz sample rate conversion */
	59
	60	#define FDMDV_OS 2 /* oversampling rate */
	61	#define FDMDV_OS_TAPS_16K 48 /* number of OS filter taps at 16kHz */
	62	#define FDMDV_OS_TAPS_8K (FDMDV_OS_TAPS_16K/FDMDV_OS) /* number of OS filter taps at 8kHz */
	63
	64	/---------------------------------------------------------------------------\
	65
	66	GLOBALS
	67
	68	\---------------------------------------------------------------------------/
	69
	70	/* 48 tap 600Hz low pass FIR filter coefficients */
	71
	72	const float nlp_fir[] = {
	73	-1.0818124e-03,
	74	-1.1008344e-03,
	75	-9.2768838e-04,
	76	-4.2289438e-04,
	77	5.5034190e-04,
	78	2.0029849e-03,
	79	3.7058509e-03,
	80	5.1449415e-03,
	81	5.5924666e-03,
	82	4.3036754e-03,
	83	8.0284511e-04,
	84	-4.8204610e-03,
	85	-1.1705810e-02,
	86	-1.8199275e-02,
	87	-2.2065282e-02,
	88	-2.0920610e-02,
	89	-1.2808831e-02,
	90	3.2204775e-03,
	91	2.6683811e-02,
	92	5.5520624e-02,
	93	8.6305944e-02,
	94	1.1480192e-01,
	95	1.3674206e-01,
	96	1.4867556e-01,
	97	1.4867556e-01,
	98	1.3674206e-01,
	99	1.1480192e-01,
	100	8.6305944e-02,
	101	5.5520624e-02,
	102	2.6683811e-02,
	103	3.2204775e-03,
	104	-1.2808831e-02,
	105	-2.0920610e-02,
	106	-2.2065282e-02,
	107	-1.8199275e-02,
	108	-1.1705810e-02,
	109	-4.8204610e-03,
	110	8.0284511e-04,
	111	4.3036754e-03,
	112	5.5924666e-03,
	113	5.1449415e-03,
	114	3.7058509e-03,
	115	2.0029849e-03,
	116	5.5034190e-04,
	117	-4.2289438e-04,
	118	-9.2768838e-04,
	119	-1.1008344e-03,
	120	-1.0818124e-03
	121	};
	122
	123	typedef struct {
	124	int Fs; /* sample rate in Hz */
	125	int m;
	126	float w[PMAX_M/DEC]; /* DFT window */
	127	float sq[PMAX_M]; /* squared speech samples */
	128	float mem_x,mem_y; /* memory for notch filter */
	129	float mem_fir[NLP_NTAP]; /* decimation FIR filter memory */
	130	codec2_fft_cfg fft_cfg; /* kiss FFT config */
	131	float Sn16k; / Fs=16kHz input speech vector */
	132	FILE *f;
	133	} NLP;
	134
	135	#ifdef POST_PROCESS_MBE
	136	float test_candidate_mbe(COMP Sw[], COMP W[], float f0);
	137	float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COMP W[], float *prev_Wo);
	138	#endif
	139	float post_process_sub_multiples(COMP Fw[],
	140	int pmin, int pmax, float gmax, int gmax_bin,
	141	float *prev_f0);
	142	static void fdmdv_16_to_8(float out8k[], float in16k[], int n);
	143
	144	/---------------------------------------------------------------------------\
	145
	146	nlp_create()
	147
	148	Initialisation function for NLP pitch estimator.
	149
	150	\---------------------------------------------------------------------------/
	151
	152	void nlp_create(C2CONST c2const)
	153	{
	154	NLP *nlp;
	155	int i;
	156	int m = c2const->m_pitch;
	157	int Fs = c2const->Fs;
	158
	159	nlp = (NLP*)malloc(sizeof(NLP));
	160	if (nlp == NULL)
	161	return NULL;
	162
	163	assert((Fs == 8000) \|\| (Fs == 16000));
	164	nlp->Fs = Fs;
	165
	166	nlp->m = m;
	167
	168	/* if running at 16kHz allocate storage for decimating filter memory */
	169
	170	if (Fs == 16000) {
	171	nlp->Sn16k = (float)malloc(sizeof(float)(FDMDV_OS_TAPS_16K + c2const->n_samp));
	172	for(i=0; i<FDMDV_OS_TAPS_16K; i++) {
	173	nlp->Sn16k[i] = 0.0;
	174	}
	175	if (nlp->Sn16k == NULL) {
	176	free(nlp);
	177	return NULL;
	178	}
	179
	180	/* most processing occurs at 8 kHz sample rate so halve m */
	181
	182	m /= 2;
	183	}
	184
	185	assert(m <= PMAX_M);
	186
	187	for(i=0; i<m/DEC; i++) {
	188	nlp->w[i] = 0.5 - 0.5cosf(2PI*i/(m/DEC-1));
	189	}
	190
	191	for(i=0; i<PMAX_M; i++)
	192	nlp->sq[i] = 0.0;
	193	nlp->mem_x = 0.0;
	194	nlp->mem_y = 0.0;
	195	for(i=0; i<NLP_NTAP; i++)
	196	nlp->mem_fir[i] = 0.0;
	197
	198	nlp->fft_cfg = codec2_fft_alloc (PE_FFT_SIZE, 0, NULL, NULL);
	199	assert(nlp->fft_cfg != NULL);
	200
	201	return (void*)nlp;
	202	}
	203
	204	/---------------------------------------------------------------------------\
	205
	206	nlp_destroy()
	207
	208	Shut down function for NLP pitch estimator.
	209
	210	\---------------------------------------------------------------------------/
	211
	212	void nlp_destroy(void *nlp_state)
	213	{
	214	NLP *nlp;
	215	assert(nlp_state != NULL);
	216	nlp = (NLP*)nlp_state;
	217
	218	codec2_fft_free(nlp->fft_cfg);
	219	if (nlp->Fs == 16000) {
	220	free(nlp->Sn16k);
	221	}
	222	free(nlp_state);
	223	}
	224
	225	/---------------------------------------------------------------------------\
	226
	227	nlp()
	228
	229	Determines the pitch in samples using the Non Linear Pitch (NLP)
	230	algorithm [1]. Returns the fundamental in Hz. Note that the actual
	231	pitch estimate is for the centre of the M sample Sn[] vector, not
	232	the current N sample input vector. This is (I think) a delay of 2.5
	233	frames with N=80 samples. You should align further analysis using
	234	this pitch estimate to be centred on the middle of Sn[].
	235
	236	Two post processors have been tried, the MBE version (as discussed
	237	in [1]), and a post processor that checks sub-multiples. Both
	238	suffer occasional gross pitch errors (i.e. neither are perfect). In
	239	the presence of background noise the sub-multiple algorithm tends
	240	towards low F0 which leads to better sounding background noise than
	241	the MBE post processor.
	242
	243	A good way to test and develop the NLP pitch estimator is using the
	244	tnlp (codec2/unittest) and the codec2/octave/plnlp.m Octave script.
	245
	246	A pitch tracker searching a few frames forward and backward in time
	247	would be a useful addition.
	248
	249	References:
	250
	251	[1] http://rowetel.com/downloads/1997_rowe_phd_thesis.pdf Chapter 4
	252
	253	\---------------------------------------------------------------------------/
	254
	255	float nlp(
	256	void *nlp_state,
	257	float Sn[], /* input speech vector */
	258	int n, /* frames shift (no. new samples in Sn[]) */
	259	float pitch, / estimated pitch period in samples at current Fs */
	260	COMP Sw[], /* Freq domain version of Sn[] */
	261	COMP W[], /* Freq domain window */
	262	float prev_f0 / previous pitch f0 in Hz, memory for pitch tracking */
	263	)
	264	{
	265	NLP *nlp;
	266	float notch; /* current notch filter output */
	267	COMP Fw[PE_FFT_SIZE]; /* DFT of squared signal (input/output) */
	268	float gmax;
	269	int gmax_bin;
	270	int m, i, j;
	271	float best_f0;
	272	PROFILE_VAR(start, tnotch, filter, peakpick, window, fft, magsq, shiftmem);
	273
	274	assert(nlp_state != NULL);
	275	nlp = (NLP*)nlp_state;
	276	m = nlp->m;
	277
	278	/* Square, notch filter at DC, and LP filter vector */
	279
	280	/* If running at 16 kHz decimate to 8 kHz, as NLP ws designed for
	281	Fs = 8kHz. The decimating filter introduces about 3ms of delay,
	282	that shouldn't be a problem as pitch changes slowly. */
	283
	284	if (nlp->Fs == 8000) {
	285	/* Square latest input samples */
	286
	287	for(i=m-n; i<m; i++) {
	288	nlp->sq[i] = Sn[i]*Sn[i];
	289	}
	290	}
	291	else {
	292	assert(nlp->Fs == 16000);
	293
	294	/* re-sample at 8 KHz */
	295
	296	for(i=0; i<n; i++) {
	297	nlp->Sn16k[FDMDV_OS_TAPS_16K+i] = Sn[m-n+i];
	298	}
	299
	300	m /= 2; n /= 2;
	301
	302	float Sn8k[n];
	303	fdmdv_16_to_8(Sn8k, &nlp->Sn16k[FDMDV_OS_TAPS_16K], n);
	304
	305	/* Square latest input samples */
	306
	307	for(i=m-n, j=0; i<m; i++, j++) {
	308	nlp->sq[i] = Sn8k[j]*Sn8k[j];
	309	}
	310	assert(j <= n);
	311	}
	312	//fprintf(stderr, "n: %d m: %d\n", n, m);
	313
	314	PROFILE_SAMPLE(start);
	315
	316	for(i=m-n; i<m; i++) { /* notch filter at DC */
	317	notch = nlp->sq[i] - nlp->mem_x;
	318	notch += COEFF*nlp->mem_y;
	319	nlp->mem_x = nlp->sq[i];
	320	nlp->mem_y = notch;
	321	nlp->sq[i] = notch + 1.0; /* With 0 input vectors to codec,
	322	kiss_fft() would take a long
	323	time to execute when running in
	324	real time. Problem was traced
	325	to kiss_fft function call in
	326	this function. Adding this small
	327	constant fixed problem. Not
	328	exactly sure why. */
	329	}
	330
	331	PROFILE_SAMPLE_AND_LOG(tnotch, start, " square and notch");
	332
	333	for(i=m-n; i<m; i++) { /* FIR filter vector */
	334
	335	for(j=0; j<NLP_NTAP-1; j++)
	336	nlp->mem_fir[j] = nlp->mem_fir[j+1];
	337	nlp->mem_fir[NLP_NTAP-1] = nlp->sq[i];
	338
	339	nlp->sq[i] = 0.0;
	340	for(j=0; j<NLP_NTAP; j++)
	341	nlp->sq[i] += nlp->mem_fir[j]*nlp_fir[j];
	342	}
	343
	344	PROFILE_SAMPLE_AND_LOG(filter, tnotch, " filter");
	345
	346	/* Decimate and DFT */
	347
	348	for(i=0; i<PE_FFT_SIZE; i++) {
	349	Fw[i].real = 0.0;
	350	Fw[i].imag = 0.0;
	351	}
	352	for(i=0; i<m/DEC; i++) {
	353	Fw[i].real = nlp->sq[iDEC]nlp->w[i];
	354	}
	355	PROFILE_SAMPLE_AND_LOG(window, filter, " window");
	356	#ifdef DUMP
	357	dump_dec(Fw);
	358	#endif
	359
	360	// FIXME: check if this can be converted to a real fft
	361	// since all imag inputs are 0
	362	codec2_fft_inplace(nlp->fft_cfg, Fw);
	363	PROFILE_SAMPLE_AND_LOG(fft, window, " fft");
	364
	365	for(i=0; i<PE_FFT_SIZE; i++)
	366	Fw[i].real = Fw[i].realFw[i].real + Fw[i].imagFw[i].imag;
	367
	368	PROFILE_SAMPLE_AND_LOG(magsq, fft, " mag sq");
	369	#ifdef DUMP
	370	dump_sq(m, nlp->sq);
	371	dump_Fw(Fw);
	372	#endif
	373
	374	/* todo: express everything in f0, as pitch in samples is dep on Fs */
	375
	376	int pmin = floor(SAMPLE_RATE*P_MIN_S);
	377	int pmax = floor(SAMPLE_RATE*P_MAX_S);
	378
	379	/* find global peak */
	380
	381	gmax = 0.0;
	382	gmax_bin = PE_FFT_SIZE*DEC/pmax;
	383	for(i=PE_FFT_SIZEDEC/pmax; i<=PE_FFT_SIZEDEC/pmin; i++) {
	384	if (Fw[i].real > gmax) {
	385	gmax = Fw[i].real;
	386	gmax_bin = i;
	387	}
	388	}
	389
	390	PROFILE_SAMPLE_AND_LOG(peakpick, magsq, " peak pick");
	391
	392	#ifdef POST_PROCESS_MBE
	393	best_f0 = post_process_mbe(Fw, pmin, pmax, gmax, Sw, W, prev_f0);
	394	#else
	395	best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_f0);
	396	#endif
	397
	398	PROFILE_SAMPLE_AND_LOG(shiftmem, peakpick, " post process");
	399
	400	/* Shift samples in buffer to make room for new samples */
	401
	402	for(i=0; i<m-n; i++)
	403	nlp->sq[i] = nlp->sq[i+n];
	404
	405	/* return pitch period in samples and F0 estimate */
	406
	407	*pitch = (float)nlp->Fs/best_f0;
	408
	409	PROFILE_SAMPLE_AND_LOG2(shiftmem, " shift mem");
	410
	411	PROFILE_SAMPLE_AND_LOG2(start, " nlp int");
	412
	413	*prev_f0 = best_f0;
	414
	415	return(best_f0);
	416	}
	417
	418	/---------------------------------------------------------------------------\
	419
	420	post_process_sub_multiples()
	421
	422	Given the global maximma of Fw[] we search integer submultiples for
	423	local maxima. If local maxima exist and they are above an
	424	experimentally derived threshold (OK a magic number I pulled out of
	425	the air) we choose the submultiple as the F0 estimate.
	426
	427	The rational for this is that the lowest frequency peak of Fw[]
	428	should be F0, as Fw[] can be considered the autocorrelation function
	429	of Sw[] (the speech spectrum). However sometimes due to phase
	430	effects the lowest frequency maxima may not be the global maxima.
	431
	432	This works OK in practice and favours low F0 values in the presence
	433	of background noise which means the sinusoidal codec does an OK job
	434	of synthesising the background noise. High F0 in background noise
	435	tends to sound more periodic introducing annoying artifacts.
	436
	437	\---------------------------------------------------------------------------/
	438
	439	float post_process_sub_multiples(COMP Fw[],
	440	int pmin, int pmax, float gmax, int gmax_bin,
	441	float *prev_f0)
	442	{
	443	int min_bin, cmax_bin;
	444	int mult;
	445	float thresh, best_f0;
	446	int b, bmin, bmax, lmax_bin;
	447	float lmax;
	448	int prev_f0_bin;
	449
	450	/* post process estimate by searching submultiples */
	451
	452	mult = 2;
	453	min_bin = PE_FFT_SIZE*DEC/pmax;
	454	cmax_bin = gmax_bin;
	455	prev_f0_bin = prev_f0(PE_FFT_SIZE*DEC)/SAMPLE_RATE;
	456
	457	while(gmax_bin/mult >= min_bin) {
	458
	459	b = gmax_bin/mult; /* determine search interval */
	460	bmin = 0.8*b;
	461	bmax = 1.2*b;
	462	if (bmin < min_bin)
	463	bmin = min_bin;
	464
	465	/* lower threshold to favour previous frames pitch estimate,
	466	this is a form of pitch tracking */
	467
	468	if ((prev_f0_bin > bmin) && (prev_f0_bin < bmax))
	469	thresh = CNLP0.5gmax;
	470	else
	471	thresh = CNLP*gmax;
	472
	473	lmax = 0;
	474	lmax_bin = bmin;
	475	for (b=bmin; b<=bmax; b++) /* look for maximum in interval */
	476	if (Fw[b].real > lmax) {
	477	lmax = Fw[b].real;
	478	lmax_bin = b;
	479	}
	480
	481	if (lmax > thresh)
	482	if ((lmax > Fw[lmax_bin-1].real) && (lmax > Fw[lmax_bin+1].real)) {
	483	cmax_bin = lmax_bin;
	484	}
	485
	486	mult++;
	487	}
	488
	489	best_f0 = (float)cmax_binSAMPLE_RATE/(PE_FFT_SIZEDEC);
	490
	491	return best_f0;
	492	}
	493
	494	#ifdef POST_PROCESS_MBE
	495
	496	/---------------------------------------------------------------------------\
	497
	498	post_process_mbe()
	499
	500	Use the MBE pitch estimation algorithm to evaluate pitch candidates. This
	501	works OK but the accuracy at low F0 is affected by NW, the analysis window
	502	size used for the DFT of the input speech Sw[]. Also favours high F0 in
	503	the presence of background noise which causes periodic artifacts in the
	504	synthesised speech.
	505
	506	\---------------------------------------------------------------------------/
	507
	508	float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COMP W[], float *prev_Wo)
	509	{
	510	float candidate_f0;
	511	float f0,best_f0; /* fundamental frequency */
	512	float e,e_min; /* MBE cost function */
	513	int i;
	514	#ifdef DUMP
	515	float e_hz[F0_MAX];
	516	#endif
	517	#if !defined(NDEBUG) \|\| defined(DUMP)
	518	int bin;
	519	#endif
	520	float f0_min, f0_max;
	521	float f0_start, f0_end;
	522
	523	f0_min = (float)SAMPLE_RATE/pmax;
	524	f0_max = (float)SAMPLE_RATE/pmin;
	525
	526	/* Now look for local maxima. Each local maxima is a candidate
	527	that we test using the MBE pitch estimation algotithm */
	528
	529	#ifdef DUMP
	530	for(i=0; i<F0_MAX; i++)
	531	e_hz[i] = -1;
	532	#endif
	533	e_min = 1E32;
	534	best_f0 = 50;
	535	for(i=PE_FFT_SIZEDEC/pmax; i<=PE_FFT_SIZEDEC/pmin; i++) {
	536	if ((Fw[i].real > Fw[i-1].real) && (Fw[i].real > Fw[i+1].real)) {
	537
	538	/* local maxima found, lets test if it's big enough */
	539
	540	if (Fw[i].real > T*gmax) {
	541
	542	/* OK, sample MBE cost function over +/- 10Hz range in 2.5Hz steps */
	543
	544	candidate_f0 = (float)iSAMPLE_RATE/(PE_FFT_SIZEDEC);
	545	f0_start = candidate_f0-20;
	546	f0_end = candidate_f0+20;
	547	if (f0_start < f0_min) f0_start = f0_min;
	548	if (f0_end > f0_max) f0_end = f0_max;
	549
	550	for(f0=f0_start; f0<=f0_end; f0+= 2.5) {
	551	e = test_candidate_mbe(Sw, W, f0);
	552	#if !defined(NDEBUG) \|\| defined(DUMP)
	553	bin = floorf(f0); assert((bin > 0) && (bin < F0_MAX));
	554	#endif
	555	#ifdef DUMP
	556	e_hz[bin] = e;
	557	#endif
	558	if (e < e_min) {
	559	e_min = e;
	560	best_f0 = f0;
	561	}
	562	}
	563
	564	}
	565	}
	566	}
	567
	568	/* finally sample MBE cost function around previous pitch estimate
	569	(form of pitch tracking) */
	570
	571	candidate_f0 = prev_Wo SAMPLE_RATE/TWO_PI;
	572	f0_start = candidate_f0-20;
	573	f0_end = candidate_f0+20;
	574	if (f0_start < f0_min) f0_start = f0_min;
	575	if (f0_end > f0_max) f0_end = f0_max;
	576
	577	for(f0=f0_start; f0<=f0_end; f0+= 2.5) {
	578	e = test_candidate_mbe(Sw, W, f0);
	579	#if !defined(NDEBUG) \|\| defined(DUMP)
	580	bin = floorf(f0); assert((bin > 0) && (bin < F0_MAX));
	581	#endif
	582	#ifdef DUMP
	583	e_hz[bin] = e;
	584	#endif
	585	if (e < e_min) {
	586	e_min = e;
	587	best_f0 = f0;
	588	}
	589	}
	590
	591	#ifdef DUMP
	592	dump_e(e_hz);
	593	#endif
	594
	595	return best_f0;
	596	}
	597
	598	/---------------------------------------------------------------------------\
	599
	600	test_candidate_mbe()
	601
	602	Returns the error of the MBE cost function for the input f0.
	603
	604	Note: I think a lot of the operations below can be simplified as
	605	W[].imag = 0 and has been normalised such that den always equals 1.
	606
	607	\---------------------------------------------------------------------------/
	608
	609	float test_candidate_mbe(
	610	COMP Sw[],
	611	COMP W[],
	612	float f0
	613	)
	614	{
	615	COMP Sw_[FFT_ENC]; /* DFT of all voiced synthesised signal */
	616	int l,al,bl,m; /* loop variables */
	617	COMP Am; /* amplitude sample for this band */
	618	int offset; /* centers Hw[] about current harmonic */
	619	float den; /* denominator of Am expression */
	620	float error; /* accumulated error between originl and synthesised */
	621	float Wo; /* current "test" fundamental freq. */
	622	int L;
	623
	624	L = floorf((SAMPLE_RATE/2.0)/f0);
	625	Wo = f0(2PI/SAMPLE_RATE);
	626
	627	error = 0.0;
	628
	629	/* Just test across the harmonics in the first 1000 Hz (L/4) */
	630
	631	for(l=1; l<L/4; l++) {
	632	Am.real = 0.0;
	633	Am.imag = 0.0;
	634	den = 0.0;
	635	al = ceilf((l - 0.5)WoFFT_ENC/TWO_PI);
	636	bl = ceilf((l + 0.5)WoFFT_ENC/TWO_PI);
	637
	638	/* Estimate amplitude of harmonic assuming harmonic is totally voiced */
	639
	640	for(m=al; m<bl; m++) {
	641	offset = FFT_ENC/2 + m - lWoFFT_ENC/TWO_PI + 0.5;
	642	Am.real += Sw[m].realW[offset].real + Sw[m].imagW[offset].imag;
	643	Am.imag += Sw[m].imagW[offset].real - Sw[m].realW[offset].imag;
	644	den += W[offset].realW[offset].real + W[offset].imagW[offset].imag;
	645	}
	646
	647	Am.real = Am.real/den;
	648	Am.imag = Am.imag/den;
	649
	650	/* Determine error between estimated harmonic and original */
	651
	652	for(m=al; m<bl; m++) {
	653	offset = FFT_ENC/2 + m - lWoFFT_ENC/TWO_PI + 0.5;
	654	Sw_[m].real = Am.realW[offset].real - Am.imagW[offset].imag;
	655	Sw_[m].imag = Am.realW[offset].imag + Am.imagW[offset].real;
	656	error += (Sw[m].real - Sw_[m].real)*(Sw[m].real - Sw_[m].real);
	657	error += (Sw[m].imag - Sw_[m].imag)*(Sw[m].imag - Sw_[m].imag);
	658	}
	659	}
	660
	661	return error;
	662	}
	663
	664	#endif
	665
	666	/---------------------------------------------------------------------------\
	667
	668	FUNCTION....: fdmdv_16_to_8()
	669	AUTHOR......: David Rowe
	670	DATE CREATED: 9 May 2012
	671
	672	Changes the sample rate of a signal from 16 to 8 kHz.
	673
	674	n is the number of samples at the 8 kHz rate, there are FDMDV_OS*n
	675	samples at the 48 kHz rate. As above however a memory of
	676	FDMDV_OS_TAPS samples is reqd for in16k[] (see t16_8.c unit test as example).
	677
	678	Low pass filter the 16 kHz signal at 4 kHz using the same filter as
	679	the upsampler, then just output every FDMDV_OS-th filtered sample.
	680
	681	Note: this function copied from fdmdv.c, included in nlp.c as a convenience
	682	to avoid linking with another source file.
	683
	684	\---------------------------------------------------------------------------/
	685
	686	static void fdmdv_16_to_8(float out8k[], float in16k[], int n)
	687	{
	688	float acc;
	689	int i,j,k;
	690
	691	for(i=0, k=0; k<n; i+=FDMDV_OS, k++) {
	692	acc = 0.0;
	693	for(j=0; j<FDMDV_OS_TAPS_16K; j++)
	694	acc += fdmdv_os_filter[j]*in16k[i-j];
	695	out8k[k] = acc;
	696	}
	697
	698	/* update filter memory */
	699
	700	for(i=-FDMDV_OS_TAPS_16K; i<0; i++)
	701	in16k[i] = in16k[i + n*FDMDV_OS];
	702	}