summaryrefslogtreecommitdiff
path: root/timestretch.c
blob: 76c3f2c5baa275de24d2a851a753beb41247fc95 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <inttypes.h>

// global values
static short * g_overlap_buffer;
static short * g_overlap_heuristic;
static size_t  g_overlap;

static size_t  g_skip;          // Per frame skip << 16, i.e. amount of samples consumed if not FRAME_LAST
static size_t  g_input_length;  // Minimal length, not everything is used
static size_t  g_output_length; // Exact length of output
static size_t  g_corr_length;   // Length of frame part correlation is attempted in

static size_t  g_offset;        // Offset into stream, lower bits

#define FRAME_FIRST 0x01
#define FRAME_LAST  0x02

/* some good default for mixing voice, values in milliseconds */
#define OVERLAP            10   // overlapping length (music default = 12 ms)
#define CORR_WINDOW        15   // overlapping correlation window length (music default = 28 ms)
#define OUTPUT_LEN         30   // one processing sequence length in milliseconds (music default = 82 ms)

#define sampcpy(A,B,C) memcpy((A),(B),(C)*sizeof(short))

// Returns the length of one output frame
static size_t calc_convert_values( int sample_rate, double tempo ) {
    unsigned int i;
    g_overlap = ( sample_rate * OVERLAP ) / 1000;

    free( g_overlap_buffer );
    g_overlap_buffer = malloc( sizeof(short) * g_overlap );
    g_offset = 0;

    g_output_length = (sample_rate * OUTPUT_LEN ) / 1000;
    if ( g_output_length < g_overlap)
        g_output_length = g_overlap;
    g_corr_length = ( sample_rate * CORR_WINDOW ) / 1000;

    free( g_overlap_heuristic );
    g_overlap_heuristic = malloc( sizeof(short) * g_corr_length );

    // boost middle of sequence by top of a flat parabola
    // slope stolen from soundtouch, precalc table
    for( i = 0; i < g_corr_length; ++i )
        g_overlap_heuristic[i] = (short)16384.0*(-(double)(i*i)/((double)(g_corr_length*g_corr_length))+(double)i/(double)g_corr_length+0.75);

    g_skip = (size_t)( tempo * (double)g_output_length * 65536.0 );
    g_input_length = g_corr_length + g_output_length + g_overlap;
    if( g_skip / 65536 > g_input_length )
        g_input_length = g_skip / 65536;

    return g_output_length;
}

/* Example: tempo 1.5 with 30/15/10 => skip == 45, out = 30
we found an offset of 5 msec

  [#####OOOOOOOOOOVVVVVVVVVVVVVVVVVVVVmmmmmmmmmm?????????????????????????]
                                               ^--skip
  [###############OOOOOOOOOOVVVVVVVVVVVVVVVVVVVVmmmmmmmmmm???????????????]
  [                                            #####OOOOOOOOOOVVVVVVVVVVVVVVVVVVVVmmmmmmmmmm?????????????????????????]
*/

// Returns the amount of samples that can be discarded from begin of the input buffer
size_t process_frame( short *input, short *output, short *overlap, int frame_flag ) {
    int offset = 0;

    // The first frame needs to be copied verbatim, we do not have anything to mix, yet.
    if( frame_flag & FRAME_FIRST )
        sampcpy( output, input, g_output_length );
    else {
        int64_t acc, corr_max = 0;
        int i, j;

        // Scans for the best correlation value by testing each possible position
        for( i = 0; i < (int)g_corr_length; ++i ) {
            for( j = 0, acc = 0; j < (int)g_overlap; ++j )
                acc += input[i+j] * overlap[j];

            acc *= g_overlap_heuristic[i];
            if ( corr_max < acc ) {
                offset = i;
                corr_max = acc;
            }
        }
//printf( "%03d %016llX\n", offset, corr_max );

        // Cross fade end of last frame with begin of this frame
        for( i = 0, j = (int)g_overlap; i < (int)g_overlap ; ++i, --j )
            output[i] = ( j * overlap[i] + i * input[i+offset] ) / (int)g_overlap;

        // Copy rest of the input verbatim
        sampcpy( output + g_overlap, input + offset + g_overlap, g_output_length - g_overlap );
    }

    // On the last frame help connect the next frame from input seamlessly
    if( frame_flag & FRAME_LAST )
        return offset + g_output_length;

    // Remember end of this frame for next frame
    sampcpy( overlap, input + offset + g_output_length, g_overlap );

    // Remove the processed samples from the input buffer.
    g_offset &= 0xffff;
    g_offset += g_skip;
    return g_offset / 65536;
}

int main( int args, char **argv ) {
  size_t    out_chunk_size = calc_convert_values( 8000, 1.25 );
  size_t    in_fill = 0;
  short     outbuf[ g_output_length ];
  short     inbuf [ g_input_length  ];
  int       fd_in  = open( "in.raw", O_RDONLY );
  int       fd_out = open( "out.raw", O_CREAT | O_WRONLY | O_TRUNC );
  int       first_frame = FRAME_FIRST;
  (void)args; (void)argv; (void)out_chunk_size;

//  printf( "DEBUG: OL: %zd SWL: %zd SL: %zd SK: %zd ICM: %zd\n", g_overlap, g_output_length, g_corr_length, g_skip / 65536, g_input_length );

  while( 1 ) {
    size_t processed;
    size_t missing = g_input_length - in_fill;
    size_t fromfd = read( fd_in, inbuf + in_fill, missing * sizeof(short) );

    if( fromfd > 0 )
        in_fill += fromfd / sizeof(short);
    if( fromfd != missing * sizeof(short) ) {
        write( fd_out, inbuf, in_fill * sizeof(short) );
        close( fd_in ); close( fd_out );
        exit(0);
    }

    // Do one cycle of processing and outputting
    processed = process_frame( inbuf, outbuf, g_overlap_buffer, first_frame );
    first_frame = 0;
    write( fd_out, outbuf, g_output_length * sizeof(short) );

    memmove( inbuf, inbuf + processed, ( in_fill - processed ) * sizeof(short) );
    in_fill -= processed;
  }

  return 0;
}