summaryrefslogtreecommitdiff
path: root/phase.c
diff options
context:
space:
mode:
authorerdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box>2019-07-04 23:26:09 +0200
committererdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box>2019-07-04 23:26:09 +0200
commitf02dfce6e6c34b3d8a7b8a0e784b506178e331fa (patch)
tree45556e6104242d4702689760433d7321ae74ec17 /phase.c
stripdown of version 0.9
Diffstat (limited to 'phase.c')
-rw-r--r--phase.c289
1 files changed, 289 insertions, 0 deletions
diff --git a/phase.c b/phase.c
new file mode 100644
index 0000000..e486613
--- /dev/null
+++ b/phase.c
@@ -0,0 +1,289 @@
1/*---------------------------------------------------------------------------*\
2
3 FILE........: phase.c
4 AUTHOR......: David Rowe
5 DATE CREATED: 1/2/09
6
7 Functions for modelling and synthesising phase.
8
9\*---------------------------------------------------------------------------*/
10
11/*
12 Copyright (C) 2009 David Rowe
13
14 All rights reserved.
15
16 This program is free software; you can redistribute it and/or modify
17 it under the terms of the GNU Lesser General Public License version 2.1, as
18 published by the Free Software Foundation. This program is
19 distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
22 License for more details.
23
24 You should have received a copy of the GNU Lesser General Public License
25 along with this program; if not,see <http://www.gnu.org/licenses/>.
26*/
27
28#include "defines.h"
29#include "phase.h"
30#include "kiss_fft.h"
31#include "comp.h"
32#include "comp_prim.h"
33#include "sine.h"
34
35#include <assert.h>
36#include <ctype.h>
37#include <math.h>
38#include <string.h>
39#include <stdlib.h>
40
41/*---------------------------------------------------------------------------*\
42
43 sample_phase()
44
45 Samples phase at centre of each harmonic from and array of FFT_ENC
46 DFT samples.
47
48\*---------------------------------------------------------------------------*/
49
50void sample_phase(MODEL *model,
51 COMP H[],
52 COMP A[] /* LPC analysis filter in freq domain */
53 )
54{
55 int m, b;
56 float r;
57
58 r = TWO_PI/(FFT_ENC);
59
60 /* Sample phase at harmonics */
61
62 for(m=1; m<=model->L; m++) {
63 b = (int)(m*model->Wo/r + 0.5);
64 H[m] = cconj(A[b]); /* synth filter 1/A is opposite phase to analysis filter */
65 }
66}
67
68
69/*---------------------------------------------------------------------------*\
70
71 phase_synth_zero_order()
72
73 Synthesises phases based on SNR and a rule based approach. No phase
74 parameters are required apart from the SNR (which can be reduced to a
75 1 bit V/UV decision per frame).
76
77 The phase of each harmonic is modelled as the phase of a synthesis
78 filter excited by an impulse. In many Codec 2 modes the synthesis
79 filter is a LPC filter. Unlike the first order model the position
80 of the impulse is not transmitted, so we create an excitation pulse
81 train using a rule based approach.
82
83 Consider a pulse train with a pulse starting time n=0, with pulses
84 repeated at a rate of Wo, the fundamental frequency. A pulse train
85 in the time domain is equivalent to harmonics in the frequency
86 domain. We can make an excitation pulse train using a sum of
87 sinsusoids:
88
89 for(m=1; m<=L; m++)
90 ex[n] = cos(m*Wo*n)
91
92 Note: the Octave script ../octave/phase.m is an example of this if
93 you would like to try making a pulse train.
94
95 The phase of each excitation harmonic is:
96
97 arg(E[m]) = mWo
98
99 where E[m] are the complex excitation (freq domain) samples,
100 arg(x), just returns the phase of a complex sample x.
101
102 As we don't transmit the pulse position for this model, we need to
103 synthesise it. Now the excitation pulses occur at a rate of Wo.
104 This means the phase of the first harmonic advances by N_SAMP samples
105 over a synthesis frame of N_SAMP samples. For example if Wo is pi/20
106 (200 Hz), then over a 10ms frame (N_SAMP=80 samples), the phase of the
107 first harmonic would advance (pi/20)*80 = 4*pi or two complete
108 cycles.
109
110 We generate the excitation phase of the fundamental (first
111 harmonic):
112
113 arg[E[1]] = Wo*N_SAMP;
114
115 We then relate the phase of the m-th excitation harmonic to the
116 phase of the fundamental as:
117
118 arg(E[m]) = m*arg(E[1])
119
120 This E[m] then gets passed through the LPC synthesis filter to
121 determine the final harmonic phase.
122
123 Comparing to speech synthesised using original phases:
124
125 - Through headphones speech synthesised with this model is not as
126 good. Through a loudspeaker it is very close to original phases.
127
128 - If there are voicing errors, the speech can sound clicky or
129 staticy. If V speech is mistakenly declared UV, this model tends to
130 synthesise impulses or clicks, as there is usually very little shift or
131 dispersion through the LPC synthesis filter.
132
133 - When combined with LPC amplitude modelling there is an additional
134 drop in quality. I am not sure why, theory is interformant energy
135 is raised making any phase errors more obvious.
136
137 NOTES:
138
139 1/ This synthesis model is effectively the same as a simple LPC-10
140 vocoders, and yet sounds much better. Why? Conventional wisdom
141 (AMBE, MELP) says mixed voicing is required for high quality
142 speech.
143
144 2/ I am pretty sure the Lincoln Lab sinusoidal coding guys (like xMBE
145 also from MIT) first described this zero phase model, I need to look
146 up the paper.
147
148 3/ Note that this approach could cause some discontinuities in
149 the phase at the edge of synthesis frames, as no attempt is made
150 to make sure that the phase tracks are continuous (the excitation
151 phases are continuous, but not the final phases after filtering
152 by the LPC spectra). Technically this is a bad thing. However
153 this may actually be a good thing, disturbing the phase tracks a
154 bit. More research needed, e.g. test a synthesis model that adds
155 a small delta-W to make phase tracks line up for voiced
156 harmonics.
157
158\*---------------------------------------------------------------------------*/
159
160void phase_synth_zero_order(
161 int n_samp,
162 MODEL *model,
163 float *ex_phase, /* excitation phase of fundamental */
164 COMP H[] /* L synthesis filter freq domain samples */
165
166)
167{
168 int m;
169 float new_phi;
170 COMP Ex[MAX_AMP+1]; /* excitation samples */
171 COMP A_[MAX_AMP+1]; /* synthesised harmonic samples */
172
173 /*
174 Update excitation fundamental phase track, this sets the position
175 of each pitch pulse during voiced speech. After much experiment
176 I found that using just this frame's Wo improved quality for UV
177 sounds compared to interpolating two frames Wo like this:
178
179 ex_phase[0] += (*prev_Wo+model->Wo)*N_SAMP/2;
180 */
181
182 ex_phase[0] += (model->Wo)*n_samp;
183 ex_phase[0] -= TWO_PI*floorf(ex_phase[0]/TWO_PI + 0.5);
184
185 for(m=1; m<=model->L; m++) {
186
187 /* generate excitation */
188
189 if (model->voiced) {
190
191 Ex[m].real = cosf(ex_phase[0]*m);
192 Ex[m].imag = sinf(ex_phase[0]*m);
193 }
194 else {
195
196 /* When a few samples were tested I found that LPC filter
197 phase is not needed in the unvoiced case, but no harm in
198 keeping it.
199 */
200 float phi = TWO_PI*(float)codec2_rand()/CODEC2_RAND_MAX;
201 Ex[m].real = cosf(phi);
202 Ex[m].imag = sinf(phi);
203 }
204
205 /* filter using LPC filter */
206
207 A_[m].real = H[m].real*Ex[m].real - H[m].imag*Ex[m].imag;
208 A_[m].imag = H[m].imag*Ex[m].real + H[m].real*Ex[m].imag;
209
210 /* modify sinusoidal phase */
211
212 new_phi = atan2f(A_[m].imag, A_[m].real+1E-12);
213 model->phi[m] = new_phi;
214 }
215
216}
217
218
219/*---------------------------------------------------------------------------*\
220
221 FUNCTION....: mag_to_phase
222 AUTHOR......: David Rowe
223 DATE CREATED: Jan 2017
224
225 Algorithm for http://www.dsprelated.com/showcode/20.php ported to C. See
226 also Octave function mag_to_phase.m
227
228 Given a magnitude spectrum in dB, returns a minimum-phase phase
229 spectra.
230
231\*---------------------------------------------------------------------------*/
232
233void mag_to_phase(float phase[], /* Nfft/2+1 output phase samples in radians */
234 float Gdbfk[], /* Nfft/2+1 postive freq amplitudes samples in dB */
235 int Nfft,
236 codec2_fft_cfg fft_fwd_cfg,
237 codec2_fft_cfg fft_inv_cfg
238 )
239{
240 COMP Sdb[Nfft], c[Nfft], cf[Nfft], Cf[Nfft];
241 int Ns = Nfft/2+1;
242 int i;
243
244 /* install negative frequency components, 1/Nfft takes into
245 account kiss fft lack of scaling on ifft */
246
247 Sdb[0].real = Gdbfk[0];
248 Sdb[0].imag = 0.0;
249 for(i=1; i<Ns; i++) {
250 Sdb[i].real = Sdb[Nfft-i].real = Gdbfk[i];
251 Sdb[i].imag = Sdb[Nfft-i].imag = 0.0;
252 }
253
254 /* compute real cepstrum from log magnitude spectrum */
255
256 codec2_fft(fft_inv_cfg, Sdb, c);
257 for(i=0; i<Nfft; i++) {
258 c[i].real /= (float)Nfft;
259 c[i].imag /= (float)Nfft;
260 }
261
262 /* Fold cepstrum to reflect non-min-phase zeros inside unit circle */
263
264 cf[0] = c[0];
265 for(i=1; i<Ns-1; i++) {
266 cf[i] = cadd(c[i],c[Nfft-i]);
267 }
268 cf[Ns-1] = c[Ns-1];
269 for(i=Ns; i<Nfft; i++) {
270 cf[i].real = 0.0;
271 cf[i].imag = 0.0;
272 }
273
274 /* Cf = dB_magnitude + j * minimum_phase */
275
276 codec2_fft(fft_fwd_cfg, cf, Cf);
277
278 /* The maths says we are meant to be using log(x), not 20*log10(x),
279 so we need to scale the phase to account for this:
280 log(x) = 20*log10(x)/scale */
281
282 float scale = (20.0/logf(10.0));
283
284 for(i=0; i<Ns; i++) {
285 phase[i] = Cf[i].imag/scale;
286 }
287
288
289}