stripdown of version 0.9

author: erdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box> 2019-07-04 23:26:09 +0200
committer: erdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box> 2019-07-04 23:26:09 +0200
commit: f02dfce6e6c34b3d8a7b8a0e784b506178e331fa (patch)
tree: 45556e6104242d4702689760433d7321ae74ec17 /postfilter.c
1 files changed, 142 insertions, 0 deletions
diff --git a/postfilter.c b/postfilter.c
new file mode 100644
index 0000000..6542c7c
--- /dev/null
+++ b/postfilter.c
@@ -0,0 +1,142 @@
+/*---------------------------------------------------------------------------*\
+  FILE........: postfilter.c
+  AUTHOR......: David Rowe
+  DATE CREATED: 13/09/09
+  Postfilter to improve sound quality for speech with high levels of
+  background noise.  Unlike mixed-excitation models requires no bits
+  to be transmitted to handle background noise.
+\*---------------------------------------------------------------------------*/
+/*
+  Copyright (C) 2009 David Rowe
+  All rights reserved.
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include "defines.h"
+#include "comp.h"
+#include "dump.h"
+#include "sine.h"
+#include "postfilter.h"
+/*---------------------------------------------------------------------------*\
+                                DEFINES
+\*---------------------------------------------------------------------------*/
+#define BG_THRESH 40.0     /* only consider low levels signals for bg_est */
+#define BG_BETA    0.1     /* averaging filter constant                   */
+#define BG_MARGIN  6.0     /* harmonics this far above BG noise are
+                              randomised.  Helped make bg noise less
+                              spikey (impulsive) for mmt1, but speech was
+                              perhaps a little rougher.
+                           */
+/*---------------------------------------------------------------------------*\
+  postfilter()
+  The post filter is designed to help with speech corrupted by
+  background noise.  The zero phase model tends to make speech with
+  background noise sound "clicky".  With high levels of background
+  noise the low level inter-formant parts of the spectrum will contain
+  noise rather than speech harmonics, so modelling them as voiced
+  (i.e. a continuous, non-random phase track) is inaccurate.
+  Some codecs (like MBE) have a mixed voicing model that breaks the
+  spectrum into voiced and unvoiced regions.  Several bits/frame
+  (5-12) are required to transmit the frequency selective voicing
+  information.  Mixed excitation also requires accurate voicing
+  estimation (parameter estimators always break occasionally under
+  exceptional conditions).
+  In our case we use a post filter approach which requires no
+  additional bits to be transmitted.  The decoder measures the average
+  level of the background noise during unvoiced frames.  If a harmonic
+  is less than this level it is made unvoiced by randomising it's
+  phases.
+  This idea is rather experimental.  Some potential problems that may
+  happen:
+  1/ If someone says "aaaaaaaahhhhhhhhh" will background estimator track
+     up to speech level?  This would be a bad thing.
+  2/ If background noise suddenly dissapears from the source speech does
+     estimate drop quickly?  What is noise suddenly re-appears?
+  3/ Background noise with a non-flat sepctrum.  Current algorithm just
+     comsiders scpetrum as a whole, but this could be broken up into
+     bands, each with their own estimator.
+  4/ Males and females with the same level of background noise.  Check
+     performance the same.  Changing Wo affects width of each band, may
+     affect bg energy estimates.
+  5/ Not sure what happens during long periods of voiced speech
+     e.g. "sshhhhhhh"
+\*---------------------------------------------------------------------------*/
+void postfilter(
+  MODEL *model,
+  float *bg_est
+)
+{
+  int   m, uv;
+  float e, thresh;
+  /* determine average energy across spectrum */
+  e = 1E-12;
+  for(m=1; m<=model->L; m++)
+      e += model->A[m]*model->A[m];
+  assert(e > 0.0);
+  e = 10.0*log10f(e/model->L);
+  /* If beneath threhold, update bg estimate.  The idea
+     of the threshold is to prevent updating during high level
+     speech. */
+  if ((e < BG_THRESH) && !model->voiced)
+      *bg_est =  *bg_est*(1.0 - BG_BETA) + e*BG_BETA;
+  /* now mess with phases during voiced frames to make any harmonics
+     less then our background estimate unvoiced.
+  */
+  uv = 0;
+  thresh = POW10F((*bg_est + BG_MARGIN)/20.0);
+  if (model->voiced)
+      for(m=1; m<=model->L; m++)
+          if (model->A[m] < thresh) {
+              model->phi[m] = (TWO_PI/CODEC2_RAND_MAX)*(float)codec2_rand();
+              uv++;
+          }
+#ifdef DUMP
+  dump_bg(e, *bg_est, 100.0*uv/model->L);
+#endif
+}
author	erdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box>	2019-07-04 23:26:09 +0200
committer	erdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box>	2019-07-04 23:26:09 +0200
commit	f02dfce6e6c34b3d8a7b8a0e784b506178e331fa (patch)
tree	45556e6104242d4702689760433d7321ae74ec17 /postfilter.c

diff --git a/postfilter.c b/postfilter.c new file mode 100644 index 0000000..6542c7c --- /dev/null +++ b/postfilter.c
@@ -0,0 +1,142 @@
	1	/---------------------------------------------------------------------------\
	2
	3	FILE........: postfilter.c
	4	AUTHOR......: David Rowe
	5	DATE CREATED: 13/09/09
	6
	7	Postfilter to improve sound quality for speech with high levels of
	8	background noise. Unlike mixed-excitation models requires no bits
	9	to be transmitted to handle background noise.
	10
	11	\---------------------------------------------------------------------------/
	12
	13	/*
	14	Copyright (C) 2009 David Rowe
	15
	16	All rights reserved.
	17
	18	This program is free software; you can redistribute it and/or modify
	19	it under the terms of the GNU Lesser General Public License version 2.1, as
	20	published by the Free Software Foundation. This program is
	21	distributed in the hope that it will be useful, but WITHOUT ANY
	22	WARRANTY; without even the implied warranty of MERCHANTABILITY or
	23	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
	24	License for more details.
	25
	26	You should have received a copy of the GNU Lesser General Public License
	27	along with this program; if not, see <http://www.gnu.org/licenses/>.
	28	*/
	29
	30	#include <assert.h>
	31	#include <stdlib.h>
	32	#include <stdio.h>
	33	#include <math.h>
	34
	35	#include "defines.h"
	36	#include "comp.h"
	37	#include "dump.h"
	38	#include "sine.h"
	39	#include "postfilter.h"
	40
	41	/---------------------------------------------------------------------------\
	42
	43	DEFINES
	44
	45	\---------------------------------------------------------------------------/
	46
	47	#define BG_THRESH 40.0 /* only consider low levels signals for bg_est */
	48	#define BG_BETA 0.1 /* averaging filter constant */
	49	#define BG_MARGIN 6.0 /* harmonics this far above BG noise are
	50	randomised. Helped make bg noise less
	51	spikey (impulsive) for mmt1, but speech was
	52	perhaps a little rougher.
	53	*/
	54
	55	/---------------------------------------------------------------------------\
	56
	57	postfilter()
	58
	59	The post filter is designed to help with speech corrupted by
	60	background noise. The zero phase model tends to make speech with
	61	background noise sound "clicky". With high levels of background
	62	noise the low level inter-formant parts of the spectrum will contain
	63	noise rather than speech harmonics, so modelling them as voiced
	64	(i.e. a continuous, non-random phase track) is inaccurate.
	65
	66	Some codecs (like MBE) have a mixed voicing model that breaks the
	67	spectrum into voiced and unvoiced regions. Several bits/frame
	68	(5-12) are required to transmit the frequency selective voicing
	69	information. Mixed excitation also requires accurate voicing
	70	estimation (parameter estimators always break occasionally under
	71	exceptional conditions).
	72
	73	In our case we use a post filter approach which requires no
	74	additional bits to be transmitted. The decoder measures the average
	75	level of the background noise during unvoiced frames. If a harmonic
	76	is less than this level it is made unvoiced by randomising it's
	77	phases.
	78
	79	This idea is rather experimental. Some potential problems that may
	80	happen:
	81
	82	1/ If someone says "aaaaaaaahhhhhhhhh" will background estimator track
	83	up to speech level? This would be a bad thing.
	84
	85	2/ If background noise suddenly dissapears from the source speech does
	86	estimate drop quickly? What is noise suddenly re-appears?
	87
	88	3/ Background noise with a non-flat sepctrum. Current algorithm just
	89	comsiders scpetrum as a whole, but this could be broken up into
	90	bands, each with their own estimator.
	91
	92	4/ Males and females with the same level of background noise. Check
	93	performance the same. Changing Wo affects width of each band, may
	94	affect bg energy estimates.
	95
	96	5/ Not sure what happens during long periods of voiced speech
	97	e.g. "sshhhhhhh"
	98
	99	\---------------------------------------------------------------------------/
	100
	101	void postfilter(
	102	MODEL *model,
	103	float *bg_est
	104	)
	105	{
	106	int m, uv;
	107	float e, thresh;
	108
	109	/* determine average energy across spectrum */
	110
	111	e = 1E-12;
	112	for(m=1; m<=model->L; m++)
	113	e += model->A[m]*model->A[m];
	114
	115	assert(e > 0.0);
	116	e = 10.0*log10f(e/model->L);
	117
	118	/* If beneath threhold, update bg estimate. The idea
	119	of the threshold is to prevent updating during high level
	120	speech. */
	121
	122	if ((e < BG_THRESH) && !model->voiced)
	123	bg_est = bg_est(1.0 - BG_BETA) + eBG_BETA;
	124
	125	/* now mess with phases during voiced frames to make any harmonics
	126	less then our background estimate unvoiced.
	127	*/
	128
	129	uv = 0;
	130	thresh = POW10F((*bg_est + BG_MARGIN)/20.0);
	131	if (model->voiced)
	132	for(m=1; m<=model->L; m++)
	133	if (model->A[m] < thresh) {
	134	model->phi[m] = (TWO_PI/CODEC2_RAND_MAX)*(float)codec2_rand();
	135	uv++;
	136	}
	137
	138	#ifdef DUMP
	139	dump_bg(e, bg_est, 100.0uv/model->L);
	140	#endif
	141
	142	}