summaryrefslogtreecommitdiff
path: root/nlp.c
diff options
context:
space:
mode:
Diffstat (limited to 'nlp.c')
-rw-r--r--nlp.c183
1 files changed, 1 insertions, 182 deletions
diff --git a/nlp.c b/nlp.c
index 8c8d5f1..036f6be 100644
--- a/nlp.c
+++ b/nlp.c
@@ -53,7 +53,6 @@
53#define F0_MAX 500 53#define F0_MAX 500
54#define CNLP 0.3 /* post processor constant */ 54#define CNLP 0.3 /* post processor constant */
55#define NLP_NTAP 48 /* Decimation LPF order */ 55#define NLP_NTAP 48 /* Decimation LPF order */
56#undef POST_PROCESS_MBE /* choose post processor */
57 56
58/* 8 to 16 kHz sample rate conversion */ 57/* 8 to 16 kHz sample rate conversion */
59 58
@@ -132,10 +131,6 @@ typedef struct {
132 FILE *f; 131 FILE *f;
133} NLP; 132} NLP;
134 133
135#ifdef POST_PROCESS_MBE
136float test_candidate_mbe(COMP Sw[], COMP W[], float f0);
137float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COMP W[], float *prev_Wo);
138#endif
139float post_process_sub_multiples(COMP Fw[], 134float post_process_sub_multiples(COMP Fw[],
140 int pmin, int pmax, float gmax, int gmax_bin, 135 int pmin, int pmax, float gmax, int gmax_bin,
141 float *prev_f0); 136 float *prev_f0);
@@ -258,7 +253,7 @@ float nlp(
258 int n, /* frames shift (no. new samples in Sn[]) */ 253 int n, /* frames shift (no. new samples in Sn[]) */
259 float *pitch, /* estimated pitch period in samples at current Fs */ 254 float *pitch, /* estimated pitch period in samples at current Fs */
260 COMP Sw[], /* Freq domain version of Sn[] */ 255 COMP Sw[], /* Freq domain version of Sn[] */
261 COMP W[], /* Freq domain window */ 256 float W[], /* Freq domain window */
262 float *prev_f0 /* previous pitch f0 in Hz, memory for pitch tracking */ 257 float *prev_f0 /* previous pitch f0 in Hz, memory for pitch tracking */
263) 258)
264{ 259{
@@ -389,11 +384,7 @@ float nlp(
389 384
390 PROFILE_SAMPLE_AND_LOG(peakpick, magsq, " peak pick"); 385 PROFILE_SAMPLE_AND_LOG(peakpick, magsq, " peak pick");
391 386
392 #ifdef POST_PROCESS_MBE
393 best_f0 = post_process_mbe(Fw, pmin, pmax, gmax, Sw, W, prev_f0);
394 #else
395 best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_f0); 387 best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_f0);
396 #endif
397 388
398 PROFILE_SAMPLE_AND_LOG(shiftmem, peakpick, " post process"); 389 PROFILE_SAMPLE_AND_LOG(shiftmem, peakpick, " post process");
399 390
@@ -491,178 +482,6 @@ float post_process_sub_multiples(COMP Fw[],
491 return best_f0; 482 return best_f0;
492} 483}
493 484
494#ifdef POST_PROCESS_MBE
495
496/*---------------------------------------------------------------------------*\
497
498 post_process_mbe()
499
500 Use the MBE pitch estimation algorithm to evaluate pitch candidates. This
501 works OK but the accuracy at low F0 is affected by NW, the analysis window
502 size used for the DFT of the input speech Sw[]. Also favours high F0 in
503 the presence of background noise which causes periodic artifacts in the
504 synthesised speech.
505
506\*---------------------------------------------------------------------------*/
507
508float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COMP W[], float *prev_Wo)
509{
510 float candidate_f0;
511 float f0,best_f0; /* fundamental frequency */
512 float e,e_min; /* MBE cost function */
513 int i;
514 #ifdef DUMP
515 float e_hz[F0_MAX];
516 #endif
517 #if !defined(NDEBUG) || defined(DUMP)
518 int bin;
519 #endif
520 float f0_min, f0_max;
521 float f0_start, f0_end;
522
523 f0_min = (float)SAMPLE_RATE/pmax;
524 f0_max = (float)SAMPLE_RATE/pmin;
525
526 /* Now look for local maxima. Each local maxima is a candidate
527 that we test using the MBE pitch estimation algotithm */
528
529 #ifdef DUMP
530 for(i=0; i<F0_MAX; i++)
531 e_hz[i] = -1;
532 #endif
533 e_min = 1E32;
534 best_f0 = 50;
535 for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++) {
536 if ((Fw[i].real > Fw[i-1].real) && (Fw[i].real > Fw[i+1].real)) {
537
538 /* local maxima found, lets test if it's big enough */
539
540 if (Fw[i].real > T*gmax) {
541
542 /* OK, sample MBE cost function over +/- 10Hz range in 2.5Hz steps */
543
544 candidate_f0 = (float)i*SAMPLE_RATE/(PE_FFT_SIZE*DEC);
545 f0_start = candidate_f0-20;
546 f0_end = candidate_f0+20;
547 if (f0_start < f0_min) f0_start = f0_min;
548 if (f0_end > f0_max) f0_end = f0_max;
549
550 for(f0=f0_start; f0<=f0_end; f0+= 2.5) {
551 e = test_candidate_mbe(Sw, W, f0);
552 #if !defined(NDEBUG) || defined(DUMP)
553 bin = floorf(f0); assert((bin > 0) && (bin < F0_MAX));
554 #endif
555 #ifdef DUMP
556 e_hz[bin] = e;
557 #endif
558 if (e < e_min) {
559 e_min = e;
560 best_f0 = f0;
561 }
562 }
563
564 }
565 }
566 }
567
568 /* finally sample MBE cost function around previous pitch estimate
569 (form of pitch tracking) */
570
571 candidate_f0 = *prev_Wo * SAMPLE_RATE/TWO_PI;
572 f0_start = candidate_f0-20;
573 f0_end = candidate_f0+20;
574 if (f0_start < f0_min) f0_start = f0_min;
575 if (f0_end > f0_max) f0_end = f0_max;
576
577 for(f0=f0_start; f0<=f0_end; f0+= 2.5) {
578 e = test_candidate_mbe(Sw, W, f0);
579 #if !defined(NDEBUG) || defined(DUMP)
580 bin = floorf(f0); assert((bin > 0) && (bin < F0_MAX));
581 #endif
582 #ifdef DUMP
583 e_hz[bin] = e;
584 #endif
585 if (e < e_min) {
586 e_min = e;
587 best_f0 = f0;
588 }
589 }
590
591 #ifdef DUMP
592 dump_e(e_hz);
593 #endif
594
595 return best_f0;
596}
597
598/*---------------------------------------------------------------------------*\
599
600 test_candidate_mbe()
601
602 Returns the error of the MBE cost function for the input f0.
603
604 Note: I think a lot of the operations below can be simplified as
605 W[].imag = 0 and has been normalised such that den always equals 1.
606
607\*---------------------------------------------------------------------------*/
608
609float test_candidate_mbe(
610 COMP Sw[],
611 COMP W[],
612 float f0
613)
614{
615 COMP Sw_[FFT_ENC]; /* DFT of all voiced synthesised signal */
616 int l,al,bl,m; /* loop variables */
617 COMP Am; /* amplitude sample for this band */
618 int offset; /* centers Hw[] about current harmonic */
619 float den; /* denominator of Am expression */
620 float error; /* accumulated error between originl and synthesised */
621 float Wo; /* current "test" fundamental freq. */
622 int L;
623
624 L = floorf((SAMPLE_RATE/2.0)/f0);
625 Wo = f0*(2*PI/SAMPLE_RATE);
626
627 error = 0.0;
628
629 /* Just test across the harmonics in the first 1000 Hz (L/4) */
630
631 for(l=1; l<L/4; l++) {
632 Am.real = 0.0;
633 Am.imag = 0.0;
634 den = 0.0;
635 al = ceilf((l - 0.5)*Wo*FFT_ENC/TWO_PI);
636 bl = ceilf((l + 0.5)*Wo*FFT_ENC/TWO_PI);
637
638 /* Estimate amplitude of harmonic assuming harmonic is totally voiced */
639
640 for(m=al; m<bl; m++) {
641 offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5;
642 Am.real += Sw[m].real*W[offset].real + Sw[m].imag*W[offset].imag;
643 Am.imag += Sw[m].imag*W[offset].real - Sw[m].real*W[offset].imag;
644 den += W[offset].real*W[offset].real + W[offset].imag*W[offset].imag;
645 }
646
647 Am.real = Am.real/den;
648 Am.imag = Am.imag/den;
649
650 /* Determine error between estimated harmonic and original */
651
652 for(m=al; m<bl; m++) {
653 offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5;
654 Sw_[m].real = Am.real*W[offset].real - Am.imag*W[offset].imag;
655 Sw_[m].imag = Am.real*W[offset].imag + Am.imag*W[offset].real;
656 error += (Sw[m].real - Sw_[m].real)*(Sw[m].real - Sw_[m].real);
657 error += (Sw[m].imag - Sw_[m].imag)*(Sw[m].imag - Sw_[m].imag);
658 }
659 }
660
661 return error;
662}
663
664#endif
665
666/*---------------------------------------------------------------------------*\ 485/*---------------------------------------------------------------------------*\
667 486
668 FUNCTION....: fdmdv_16_to_8() 487 FUNCTION....: fdmdv_16_to_8()