summaryrefslogtreecommitdiff
path: root/postfilter.c
diff options
context:
space:
mode:
authorerdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box>2019-07-04 23:26:09 +0200
committererdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box>2019-07-04 23:26:09 +0200
commitf02dfce6e6c34b3d8a7b8a0e784b506178e331fa (patch)
tree45556e6104242d4702689760433d7321ae74ec17 /postfilter.c
stripdown of version 0.9
Diffstat (limited to 'postfilter.c')
-rw-r--r--postfilter.c142
1 files changed, 142 insertions, 0 deletions
diff --git a/postfilter.c b/postfilter.c
new file mode 100644
index 0000000..6542c7c
--- /dev/null
+++ b/postfilter.c
@@ -0,0 +1,142 @@
1/*---------------------------------------------------------------------------*\
2
3 FILE........: postfilter.c
4 AUTHOR......: David Rowe
5 DATE CREATED: 13/09/09
6
7 Postfilter to improve sound quality for speech with high levels of
8 background noise. Unlike mixed-excitation models requires no bits
9 to be transmitted to handle background noise.
10
11\*---------------------------------------------------------------------------*/
12
13/*
14 Copyright (C) 2009 David Rowe
15
16 All rights reserved.
17
18 This program is free software; you can redistribute it and/or modify
19 it under the terms of the GNU Lesser General Public License version 2.1, as
20 published by the Free Software Foundation. This program is
21 distributed in the hope that it will be useful, but WITHOUT ANY
22 WARRANTY; without even the implied warranty of MERCHANTABILITY or
23 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
24 License for more details.
25
26 You should have received a copy of the GNU Lesser General Public License
27 along with this program; if not, see <http://www.gnu.org/licenses/>.
28*/
29
30#include <assert.h>
31#include <stdlib.h>
32#include <stdio.h>
33#include <math.h>
34
35#include "defines.h"
36#include "comp.h"
37#include "dump.h"
38#include "sine.h"
39#include "postfilter.h"
40
41/*---------------------------------------------------------------------------*\
42
43 DEFINES
44
45\*---------------------------------------------------------------------------*/
46
47#define BG_THRESH 40.0 /* only consider low levels signals for bg_est */
48#define BG_BETA 0.1 /* averaging filter constant */
49#define BG_MARGIN 6.0 /* harmonics this far above BG noise are
50 randomised. Helped make bg noise less
51 spikey (impulsive) for mmt1, but speech was
52 perhaps a little rougher.
53 */
54
55/*---------------------------------------------------------------------------*\
56
57 postfilter()
58
59 The post filter is designed to help with speech corrupted by
60 background noise. The zero phase model tends to make speech with
61 background noise sound "clicky". With high levels of background
62 noise the low level inter-formant parts of the spectrum will contain
63 noise rather than speech harmonics, so modelling them as voiced
64 (i.e. a continuous, non-random phase track) is inaccurate.
65
66 Some codecs (like MBE) have a mixed voicing model that breaks the
67 spectrum into voiced and unvoiced regions. Several bits/frame
68 (5-12) are required to transmit the frequency selective voicing
69 information. Mixed excitation also requires accurate voicing
70 estimation (parameter estimators always break occasionally under
71 exceptional conditions).
72
73 In our case we use a post filter approach which requires no
74 additional bits to be transmitted. The decoder measures the average
75 level of the background noise during unvoiced frames. If a harmonic
76 is less than this level it is made unvoiced by randomising it's
77 phases.
78
79 This idea is rather experimental. Some potential problems that may
80 happen:
81
82 1/ If someone says "aaaaaaaahhhhhhhhh" will background estimator track
83 up to speech level? This would be a bad thing.
84
85 2/ If background noise suddenly dissapears from the source speech does
86 estimate drop quickly? What is noise suddenly re-appears?
87
88 3/ Background noise with a non-flat sepctrum. Current algorithm just
89 comsiders scpetrum as a whole, but this could be broken up into
90 bands, each with their own estimator.
91
92 4/ Males and females with the same level of background noise. Check
93 performance the same. Changing Wo affects width of each band, may
94 affect bg energy estimates.
95
96 5/ Not sure what happens during long periods of voiced speech
97 e.g. "sshhhhhhh"
98
99\*---------------------------------------------------------------------------*/
100
101void postfilter(
102 MODEL *model,
103 float *bg_est
104)
105{
106 int m, uv;
107 float e, thresh;
108
109 /* determine average energy across spectrum */
110
111 e = 1E-12;
112 for(m=1; m<=model->L; m++)
113 e += model->A[m]*model->A[m];
114
115 assert(e > 0.0);
116 e = 10.0*log10f(e/model->L);
117
118 /* If beneath threhold, update bg estimate. The idea
119 of the threshold is to prevent updating during high level
120 speech. */
121
122 if ((e < BG_THRESH) && !model->voiced)
123 *bg_est = *bg_est*(1.0 - BG_BETA) + e*BG_BETA;
124
125 /* now mess with phases during voiced frames to make any harmonics
126 less then our background estimate unvoiced.
127 */
128
129 uv = 0;
130 thresh = POW10F((*bg_est + BG_MARGIN)/20.0);
131 if (model->voiced)
132 for(m=1; m<=model->L; m++)
133 if (model->A[m] < thresh) {
134 model->phi[m] = (TWO_PI/CODEC2_RAND_MAX)*(float)codec2_rand();
135 uv++;
136 }
137
138#ifdef DUMP
139 dump_bg(e, *bg_est, 100.0*uv/model->L);
140#endif
141
142}