有什么方法可以让 SoX 只打印 wav 文件中的振幅值?

Any way I can get SoX to just print the amplitude values from a wav file?

我正在使用一行简单的代码:

sox vocal2.wav -t dat vocal2.dat

通过 SoX 将 wav 文件转换为显示特定时间位置振幅值的文本文件。输出数据还有一个 header 显示采样率和通道数。

我需要在C++程序中使用振幅数据来确定输入的音高,但是很烦必须转换文件,将数据拖入Excel,复制一列,保存到一个新的 .txt 文件,然后使用它。

有什么方法可以让 SoX 只在转换后的文件中输出振幅吗?

一个 wav 文件由一个普通的 header 加上一长串 16 位 pcm 样本组成。

这里有一些代码看了一个,不太清楚它的开发状态。

#define WAVE_FORMAT_PCM 0x01
#define WAVE_FORMAT_IEEE_FLOAT  0x03
#define WAVE_FORMAT_ALAW    0x06  /*8 - bit ITU - T G.711 A - law */
#define WAVE_FORMAT_MULAW   0x07 /* 8 - bit ITU - T G.711 µ - law */
#define WAVE_FORMAT_EXTENSIBLE 0xFFFE


typedef struct
{
    short format_tag;
    short channels;
    short block_align;
    short bits_per_sample;
    unsigned long format_length;
    unsigned long sample_rate;
    unsigned long avg_bytes_sec;

    unsigned long data_size;
    unsigned char *sound_buffer;
} WAV;

WAV *loadwav(const char* filename, int *err);
WAV *floadwav(FILE *fp, int *err);
void killwav(WAV *wav);

static void readformatchunk(FILE *fp, WAV *wav, int *err);
static void readdatachunk(FILE *fp, WAV *wav, int *err);
static void readunknownchunk(FILE *fp, int *err);
static double freadieee754(FILE *fp, int bigendian);
static float freadieee754f(FILE *fp, int bigendian);
static int fget16le(FILE *fp);
static long fget32le(FILE *fp);

WAV *loadwav16stereo(const char *filename, int *err)
{

}
WAV *loadwav(const char* filename, int *err)
{
    WAV *answer;
    FILE *fp;

    if (err)
        *err = 0;
    fp = fopen(filename, "rb");
    if (!fp)
    {
        *err = -2;
        return 0;
    }
    answer = floadwav(fp, err);
    fclose(fp);

    return answer;
}

WAV *floadwav(FILE *fp, int *err)
{
    short format_tag, channels, block_align, bits_per_sample;
    unsigned long format_length, sample_rate, avg_bytes_sec, i;

    unsigned char *sound_buffer;
    int data_size;

    WAV *answer = 0;
    unsigned char id[4];
    unsigned long size;

    if (err)
        *err = 0;

    answer = malloc(sizeof(WAV));
    if (!answer)
        goto out_of_memory;
    answer->sound_buffer = 0;

    fread(id, sizeof(unsigned char), 4, fp);

    if (strncmp(id, "RIFF", 4))
        goto parse_error;

    size = fget32le(fp);
    fread(id, sizeof(unsigned char), 4, fp);
    if (strncmp(id, "WAVE", 4))
        goto parse_error;

    while (1)
    {
        if (fread(id, sizeof(unsigned char), 4, fp) != 4)
            goto parse_error;
        if (!strncmp(id, "fmt ", 4))
        {
            readformatchunk(fp, answer, err);
            if (*err)
                goto parse_error;
        }
        else if (!strncmp(id, "data", 4))
        {
            readdatachunk(fp, answer, err);
            if (*err)
                goto parse_error;
            break;
        }
        else
        {
        }
    }


    return answer;

parse_error:
    if (err)
        *err = -3;
    killwav(answer);
    return 0;
out_of_memory:
    if (err)
        *err = -1;
    killwav(answer);
    return 0;
}

void killwav(WAV *wav)
{
    if (wav)
    {
        free(wav->sound_buffer);
        free(wav);
    }
}

int wav_Nchannels(WAV *wav)
{
    return wav->channels;
}

int wav_samplerate(WAV *wav)
{
    return wav->sample_rate;
}

unsigned short *wav_samplesasshort(WAV *wav, long *Nsamples)
{
    unsigned short *answer = 0;
    long N;
    long i;

    if (wav->format_tag == WAVE_FORMAT_PCM && wav->bits_per_sample == 8)
    {
        N = wav->data_size;
        answer = malloc(N * sizeof(short));
        if (!answer)
            goto out_of_memory;
        for (i = 0; i < N; i++)
        {
            answer[i] = (wav->sound_buffer[i] - 128) * 256;
        }
    }
    else if (wav->format_tag == WAVE_FORMAT_PCM && wav->bits_per_sample == 16)
    {
        short *sbuffer = (short *) wav->sound_buffer;
        N = wav->data_size/2;
        answer = malloc(N * sizeof(short));
        if (!answer)
            goto out_of_memory;
        for (i = 0; i < N; i++)
        {
            answer[i] = sbuffer[i];
        }
    }
    else if (wav->format_tag == WAVE_FORMAT_IEEE_FLOAT && wav->bits_per_sample == 32)
    {
        float *fbuffer = (float *)wav->sound_buffer;
        N = wav->data_size / 4;
        answer = malloc(N * sizeof(short));
        if (!answer)
            goto out_of_memory;
        for (i = 0; i < N; i++)
        {
            answer[i] = (short)(fbuffer[i] * SHRT_MAX);
        }
    }
    else if (wav->format_tag == WAVE_FORMAT_IEEE_FLOAT && wav->bits_per_sample == 32)
    {
        float *dbuffer = (float *)wav->sound_buffer;
        N = wav->data_size / 8;
        answer = malloc(N * sizeof(short));
        if (!answer)
            goto out_of_memory;
        for (i = 0; i < N; i++)
        {
            answer[i] = (short)(dbuffer[i] * SHRT_MAX);
        }
    }
    if (Nsamples)
        *Nsamples = N;
    return answer;

out_of_memory:
    return 0;
}

static void readformatchunk(FILE *fp, WAV *wav, int *err)
{
    short format_tag, channels, block_align, bits_per_sample;
    unsigned long format_length, sample_rate, avg_bytes_sec, i;
    short cb_size = 0;
    short valid_bits_per_sample;
    unsigned long channel_mask;
    unsigned char guid[16];

    format_length = fget32le(fp);
    if (format_length < 16)
        goto parse_error;
    //fread(&format_tag, sizeof(short), 1, fp);
    format_tag = fget16le(fp);
    //fread(&channels, sizeof(short), 1, fp);
    channels = fget16le(fp);
    //fread(&sample_rate, sizeof(unsigned long), 1, fp);
    sample_rate = fget32le(fp);
    //fread(&avg_bytes_sec, sizeof(short), 1, fp);
    avg_bytes_sec = fget32le(fp);
    //fread(&block_align, sizeof(short), 1, fp);
    block_align = fget16le(fp);
    //fread(&bits_per_sample, sizeof(short), 1, fp);
    bits_per_sample = fget16le(fp);

    if (format_length > 16)
    {
        cb_size = fget16le(fp);
    }
    if (cb_size >= 22)
    {
        valid_bits_per_sample = fget16le(fp);
        channel_mask = fget32le(fp);
        fread(&guid, 1, 16, fp);

        cb_size -= 22;
        for (i = 0; i < cb_size; i++)
            fgetc(fp);
    }
    else
    {
        for (i = 0; i < cb_size; i++)
            fgetc(fp);
    }
    wav->format_tag = format_tag;
    wav->channels = channels;
    wav->bits_per_sample = bits_per_sample;
    wav->sample_rate = sample_rate;

    return;
parse_error:
    *err = -1;
}

static void readdatachunk(FILE *fp, WAV *wav, int *err)
{
    unsigned long data_size;
    unsigned long i;
    unsigned char *sound_buffer;
    unsigned char *buff8;
    short *buff16;
    float *bufffloat;
    double *buffdouble;

    data_size = fget32le(fp);
    wav->data_size = data_size;
    if (wav->format_tag == WAVE_FORMAT_PCM && wav->bits_per_sample == 8)
    {
        buff8 = malloc(data_size);
        for (i = 0; i < data_size; i++)
        {
            buff8[i] = fgetc(fp);
        }
        wav->sound_buffer = buff8;
    }
    else if (wav->format_tag == WAVE_FORMAT_PCM && wav->bits_per_sample == 16)
    {
        buff16 = malloc(data_size/2 * sizeof(short));
        for (i = 0; i < data_size/2; i++)
        {
            buff16[i] = fget16le(fp);
        }
        wav->sound_buffer = buff16;
    }
    else if (wav->format_tag == WAVE_FORMAT_IEEE_FLOAT && wav->bits_per_sample == 32)
    {
        bufffloat = malloc(data_size / 4 * sizeof(float));
        for (i = 0; i < data_size / 4; i++)
        {
            bufffloat[i] = freadieee754f(fp, 0);
        }
        wav->sound_buffer = bufffloat;

    }
    else if (wav->format_tag == WAVE_FORMAT_IEEE_FLOAT && wav->bits_per_sample == 64)
    {
        buffdouble = malloc(data_size / 8 * sizeof(float));
        for (i = 0; i < data_size / 8; i++)
        {
            buffdouble[i] = freadieee754(fp, 0);
        }
        wav->sound_buffer = buffdouble;
    }
    return;
parse_error:
    *err = -3;
    return;
out_of_memory:
    *err = -1;
    return;
}

static void readunknownchunk(FILE *fp, int *err)
{
    unsigned long data_size;

    data_size = fget32le(fp);
    while (data_size--)
    {
        if (fgetc(fp) == EOF)
            goto parse_error;
    }
    return;
parse_error:
    *err = -3;
    return;
}

static int wav_is_16bitstereo(WAV *wav)
{

}

/*
* read a double from a stream in ieee754 format regardless of host
*  encoding.
*  fp - the stream
*  bigendian - set to if big bytes first, clear for little bytes
*              first
*
*/
static double freadieee754(FILE *fp, int bigendian)
{
    unsigned char buff[8];
    int i;
    double fnorm = 0.0;
    unsigned char temp;
    int sign;
    int exponent;
    double bitval;
    int maski, mask;
    int expbits = 11;
    int significandbits = 52;
    int shift;
    double answer;

    /* read the data */
    for (i = 0; i < 8; i++)
        buff[i] = fgetc(fp);
    /* just reverse if not big-endian*/
    if (!bigendian)
    {
        for (i = 0; i < 4; i++)
        {
            temp = buff[i];
            buff[i] = buff[8 - i - 1];
            buff[8 - i - 1] = temp;
        }
    }
    sign = buff[0] & 0x80 ? -1 : 1;
    /* exponet in raw format*/
    exponent = ((buff[0] & 0x7F) << 4) | ((buff[1] & 0xF0) >> 4);

    /* read inthe mantissa. Top bit is 0.5, the successive bits half*/
    bitval = 0.5;
    maski = 1;
    mask = 0x08;
    for (i = 0; i < significandbits; i++)
    {
        if (buff[maski] & mask)
            fnorm += bitval;

        bitval /= 2.0;
        mask >>= 1;
        if (mask == 0)
        {
            mask = 0x80;
            maski++;
        }
    }
    /* handle zero specially */
    if (exponent == 0 && fnorm == 0)
        return 0.0;

    shift = exponent - ((1 << (expbits - 1)) - 1); /* exponent = shift + bias */
    /* nans have exp 1024 and non-zero mantissa */
    if (shift == 1024 && fnorm != 0)
        return sqrt(-1.0);
    /*infinity*/
    if (shift == 1024 && fnorm == 0)
    {

#ifdef INFINITY
        return sign == 1 ? INFINITY : -INFINITY;
#endif
        return  (sign * 1.0) / 0.0;
    }
    if (shift > -1023)
    {
        answer = ldexp(fnorm + 1.0, shift);
        return answer * sign;
    }
    else
    {
        /* denormalised numbers */
        if (fnorm == 0.0)
            return 0.0;
        shift = -1022;
        while (fnorm < 1.0)
        {
            fnorm *= 2;
            shift--;
        }
        answer = ldexp(fnorm, shift);
        return answer * sign;
    }
}


static float freadieee754f(FILE *fp, int bigendian)
{
    unsigned long buff = 0;
    unsigned long buff2 = 0;
    unsigned long mask;
    int sign;
    int exponent;
    int shift;
    int i;
    int significandbits = 23;
    int expbits = 8;
    double fnorm = 0.0;
    double bitval;
    double answer;

    for (i = 0; i<4; i++)
        buff = (buff << 8) | fgetc(fp);
    if (!bigendian)
    {
        for (i = 0; i<4; i++)
        {
            buff2 <<= 8;
            buff2 |= (buff & 0xFF);
            buff >>= 8;
        }
        buff = buff2;
    }

    sign = (buff & 0x80000000) ? -1 : 1;
    mask = 0x00400000;
    exponent = (buff & 0x7F800000) >> 23;
    bitval = 0.5;
    for (i = 0; i<significandbits; i++)
    {
        if (buff & mask)
            fnorm += bitval;
        bitval /= 2;
        mask >>= 1;
    }
    if (exponent == 0 && fnorm == 0.0)
        return 0.0f;
    shift = exponent - ((1 << (expbits - 1)) - 1); /* exponent = shift + bias */

    if (shift == 128 && fnorm != 0.0)
        return (float)sqrt(-1.0);
    if (shift == 128 && fnorm == 0.0)
    {
#ifdef INFINITY
        return sign == 1 ? INFINITY : -INFINITY;
#endif
        return (sign * 1.0f) / 0.0f;
    }
    if (shift > -127)
    {
        answer = ldexp(fnorm + 1.0, shift);
        return (float)answer * sign;
    }
    else
    {
        if (fnorm == 0.0)
        {
            return 0.0f;
        }
        shift = -126;
        while (fnorm < 1.0)
        {
            fnorm *= 2;
            shift--;
        }
        answer = ldexp(fnorm, shift);
        return (float)answer * sign;
    }
}

static int fget16le(FILE *fp)
{
    int c1, c2;

    c1 = fgetc(fp);
    c2 = fgetc(fp);

    return ((c2 ^ 128) - 128) * 256 + c1;
}

static long fget32le(FILE *fp)
{
    int c1, c2, c3, c4;

    c1 = fgetc(fp);
    c2 = fgetc(fp);
    c3 = fgetc(fp);
    c4 = fgetc(fp);
    return ((c4 ^ 128) - 128) * 256 * 256 * 256 + c3 * 256 * 256 + c2 * 256 + c1;
}


void wavfilemain(void)
{
    int err;
    WAV *wav = loadwav("C:\Users\Malcolm\Documents\Visual Studio 2013\Projects\ANSIScratch\ANSIScratch\LaserBlast.wav", &err);
    short *samples;
    long N;
    long i;

    printf("here %p\n", wav);
    printf("%d fmt %d bits %d channels %d\n", wav->sample_rate, wav->format_tag, wav->bits_per_sample, wav->channels);
    samples = wav_samplesasshort(wav, &N);

    killwav(wav);
} 

如果您想要专门用于 C++ 的数据,使用 Libsndfile 之类的东西非常容易。它是一个相当成熟的 C 库,但带有一个方便的 C++ 包装器 (sndfile.hh)。

这里的示例用法摘自我最近写的东西,我需要轻松访问音频数据。

std::string infile_name = "/path/to/vocal2.wav";

// Open input file.
SndfileHandle infile_handle( infile_name );
if( !infile_handle || infile_handle.error() != 0 )
{
    std::cerr << "Unable to read " << infile_name << std::endl;
    std::cerr << infile_handle.strError() << std::endl;
    return 1;
}

// Show file stats
int64_t in_frames = infile_handle.frames();
int in_channels = infile_handle.channels();
int in_samplerate = infile_handle.samplerate();
std::cerr << "Input file: " << infile_name << std::endl;
std::cerr << " * Frames      : " << std::setw(6) << in_frames << std::endl;
std::cerr << " * Channels    : " << std::setw(6) << in_channels << std::endl;
std::cerr << " * Sample Rate : " << std::setw(6) << in_samplerate << std::endl;

// Read audio data as float
std::vector<float> in_data( in_frames * in_channels );
infile_handle.read( in_data.data(), in_data.size() );

如果你只想在命令行上使用 SoX 并获得文本输出,你可以这样做:

sox vocal2.wav -t f32 - | od -ve -An | more

这里我指定了原始 32 位浮点数的输出,并通过 GNU od 运行 它。有点令人沮丧的是,您无法告诉 od 您想要多少列,但是您可以使用其他简单的工具来清理它。如果您想要不同的示例编码,请查看 od 的联机帮助页。