从 base64 解码后的嘈杂音频剪辑

Noisy audio clip after decoding from base64

我用 base64 编码了 wav 文件(audioClipName.txt in Resources/Sounds)。

HERE IS THE SOURCE WAVE FILE

然后我尝试解码它,从中制作一个 AudioClip 并像这样播放:

public static void CreateAudioClip()
{
    string s = Resources.Load<TextAsset> ("Sounds/audioClipName").text;

    byte[] bytes = System.Convert.FromBase64String (s);
    float[] f = ConvertByteToFloat(bytes);

    AudioClip audioClip = AudioClip.Create("testSound", f.Length, 2, 44100, false, false);
    audioClip.SetData(f, 0);

    AudioSource as = GameObject.FindObjectOfType<AudioSource> ();
    as.PlayOneShot (audioClip);
}

private static float[] ConvertByteToFloat(byte[] array) 
{
    float[] floatArr = new float[array.Length / 4];

    for (int i = 0; i < floatArr.Length; i++) 
    {
        if (BitConverter.IsLittleEndian) 
            Array.Reverse(array, i * 4, 4);

        floatArr[i] = BitConverter.ToSingle(array, i * 4);
    }

    return floatArr;
}

一切正常,除了声音只是一种噪音。

我发现 this 这里是关于堆栈溢出的,但是答案并没有解决问题。

以下是有关 Unity3D 的 wav 文件的详细信息:

有人知道这里的问题是什么吗?

编辑

我写下了二进制文件,一个刚从 base64 解码后,第二个在最终转换后,并将其与原始二进制 wav 文件进行比较:

如您所见,文件已正确编码,因为只需对其进行解码并像这样写下文件:

string scat = Resources.Load<TextAsset> ("Sounds/test").text;

byte[] bcat = System.Convert.FromBase64String (scat);
System.IO.File.WriteAllBytes ("Assets/just_decoded.wav", bcat);

给了同样的文件。所有文件都有一定的长度。

但是最后一个是错误的,所以问题出在转换为float数组的地方。但是我不明白哪里出了问题。

编辑:

这里是写下final.wav的代码:

string scat = Resources.Load<TextAsset> ("Sounds/test").text;

byte[] bcat = System.Convert.FromBase64String (scat);
float[] f = ConvertByteToFloat(bcat);

byte[] byteArray = new byte[f.Length * 4];
Buffer.BlockCopy(f, 0, byteArray, 0, byteArray.Length);

System.IO.File.WriteAllBytes ("Assets/final.wav", byteArray);

根据文档 here

The samples should be floats ranging from -1.0f to 1.0f (exceeding these limits will lead to artifacts and undefined behaviour). The sample count is determined by the length of the float array. Use offsetSamples to write into a random position in the clip. If the length from the offset is longer than the clip length, the write will wrap around and write the remaining samples from the start of the clip.

看来你就是那个效果。所以我想你必须先规范化数组才能处理它。

因为你在unity中操作,我不确定你可以使用什么功能,所以我提供了一些基本的float数组扩展方法:

/// <summary>
/// Normalizes the values within this array.
/// </summary>
/// <param name="data">The array which holds the values to be normalized.</param>
static void Normalize(this float[] data)
{
    float max = float.MinValue;

    // Find maximum
    for (int i = 0; i < data.Length; i++)
    {
        if (Math.Abs(data[i]) > max)
        {
            max = Math.Abs(data[i]);
        }
    }

    // Divide all by max
    for (int i = 0; i < data.Length; i++)
    {
        data[i] = data[i] / max;
    }
}

在像这样进一步处理数据之前使用此扩展方法:

byte[] bytes = System.Convert.FromBase64String (s);
float[] f = ConvertByteToFloat(bytes);

// Normalize the values before using them
f.Normalize();

AudioClip audioClip = AudioClip.Create("testSound", f.Length, 2, 44100, false, false);
audioClip.SetData(f, 0);

您尝试播放的波形文件 (meow.wav) 具有以下属性:

  • PCM
  • 2 个通道
  • 44100 赫兹
  • 有符号 16 位 little-endian

您的主要错误是,您正在解释二进制数据,就好像它已经代表了一个浮点数。这就是 BitConverter.ToSingle() 的作用。

但是您需要做的是,从每两个创建一个带符号的 16 位 little-endian 值(如 Wavefile-header 中指定)字节,cast 将其转换为浮点数,然后对其进行规范化。对于您的文件(16 位!),每个 两个 个字节构成一个样本,而不是 四个 个字节。数据 little endian (s16le),因此如果主机不是,您只需交换它。

这将是更正后的转换函数:

private static float[] ConvertByteToFloat(byte[] array) {
    float[] floatArr = new float[array.Length / 2];

    for (int i = 0; i < floatArr.Length; i++) {
        floatArr[i] = ((float) BitConverter.ToInt16(array, i * 2))/32768.0;
    }

    return floatArr;
}

并且您应该跳过 wave-file 的 header(真实音频数据从偏移量 44 开始)。

对于干净的解决方案,您必须正确解释 Wave-header 并根据那里指定的内容调整您的操作(或者如果它包含不受支持的参数则退出)。 例如,必须注意样本格式(每个样本的位数和字节序)、采样率和通道数。

的实现:

PcmHeader

private readonly struct PcmHeader
{
    #region Public types & data

    public int    BitDepth         { get; }
    public int    AudioSampleSize  { get; }
    public int    AudioSampleCount { get; }
    public ushort Channels         { get; }
    public int    SampleRate       { get; }
    public int    AudioStartIndex  { get; }
    public int    ByteRate         { get; }
    public ushort BlockAlign       { get; }

    #endregion

    #region Constructors & Finalizer

    private PcmHeader(int bitDepth,
        int               audioSize,
        int               audioStartIndex,
        ushort            channels,
        int               sampleRate,
        int               byteRate,
        ushort            blockAlign)
    {
        BitDepth       = bitDepth;
        _negativeDepth = Mathf.Pow(2f, BitDepth - 1f);
        _positiveDepth = _negativeDepth - 1f;

        AudioSampleSize  = bitDepth / 8;
        AudioSampleCount = Mathf.FloorToInt(audioSize / (float)AudioSampleSize);
        AudioStartIndex  = audioStartIndex;

        Channels   = channels;
        SampleRate = sampleRate;
        ByteRate   = byteRate;
        BlockAlign = blockAlign;
    }

    #endregion

    #region Public Methods

    public static PcmHeader FromBytes(byte[] pcmBytes)
    {
        using var memoryStream = new MemoryStream(pcmBytes);
        return FromStream(memoryStream);
    }

    public static PcmHeader FromStream(Stream pcmStream)
    {
        pcmStream.Position = SizeIndex;
        using BinaryReader reader = new BinaryReader(pcmStream);

        int    headerSize      = reader.ReadInt32();  // 16
        ushort audioFormatCode = reader.ReadUInt16(); // 20

        string audioFormat = GetAudioFormatFromCode(audioFormatCode);
        if (audioFormatCode != 1 && audioFormatCode == 65534)
        {
            // Only uncompressed PCM wav files are supported.
            throw new ArgumentOutOfRangeException(nameof(pcmStream),
                                                  $"Detected format code '{audioFormatCode}' {audioFormat}, but only PCM and WaveFormatExtensible uncompressed formats are currently supported.");
        }

        ushort channelCount = reader.ReadUInt16(); // 22
        int    sampleRate   = reader.ReadInt32();  // 24
        int    byteRate     = reader.ReadInt32();  // 28
        ushort blockAlign   = reader.ReadUInt16(); // 32
        ushort bitDepth     = reader.ReadUInt16(); //34

        pcmStream.Position = SizeIndex + headerSize + 2 * sizeof(int); // Header end index
        int audioSize = reader.ReadInt32();                            // Audio size index

        return new PcmHeader(bitDepth, audioSize, (int)pcmStream.Position, channelCount, sampleRate, byteRate, blockAlign); // audio start index
    }

    public float NormalizeSample(float rawSample)
    {
        float sampleDepth = rawSample < 0 ? _negativeDepth : _positiveDepth;
        return rawSample / sampleDepth;
    }

    #endregion

    #region Private Methods

    private static string GetAudioFormatFromCode(ushort code)
    {
        switch (code)
        {
            case 1:     return "PCM";
            case 2:     return "ADPCM";
            case 3:     return "IEEE";
            case 7:     return "?-law";
            case 65534: return "WaveFormatExtensible";
            default:    throw new ArgumentOutOfRangeException(nameof(code), code, "Unknown wav code format.");
        }
    }

    #endregion

    #region Private types & Data

    private const int SizeIndex = 16;

    private readonly float _positiveDepth;
    private readonly float _negativeDepth;

    #endregion
}

PcmData

private readonly struct PcmData
{
    #region Public types & data

    public float[] Value      { get; }
    public int     Length     { get; }
    public int     Channels   { get; }
    public int     SampleRate { get; }

    #endregion

    #region Constructors & Finalizer

    private PcmData(float[] value, int channels, int sampleRate)
    {
        Value      = value;
        Length     = value.Length;
        Channels   = channels;
        SampleRate = sampleRate;
    }

    #endregion

    #region Public Methods

    public static PcmData FromBytes(byte[] bytes)
    {
        if (bytes == null)
        {
            throw new ArgumentNullException(nameof(bytes));
        }

        PcmHeader pcmHeader = PcmHeader.FromBytes(bytes);
        if (pcmHeader.BitDepth != 16 && pcmHeader.BitDepth != 32 && pcmHeader.BitDepth != 8)
        {
            throw new ArgumentOutOfRangeException(nameof(pcmHeader.BitDepth), pcmHeader.BitDepth, "Supported values are: 8, 16, 32");
        }

        float[] samples = new float[pcmHeader.AudioSampleCount];
        for (int i = 0; i < samples.Length; ++i)
        {
            int   byteIndex = pcmHeader.AudioStartIndex + i * pcmHeader.AudioSampleSize;
            float rawSample;
            switch (pcmHeader.BitDepth)
            {
                case 8:
                    rawSample = bytes[byteIndex];
                    break;

                case 16:
                    rawSample = BitConverter.ToInt16(bytes, byteIndex);
                    break;

                case 32:
                    rawSample = BitConverter.ToInt32(bytes, byteIndex);
                    break;

                default: throw new ArgumentOutOfRangeException(nameof(pcmHeader.BitDepth), pcmHeader.BitDepth, "Supported values are: 8, 16, 32");
            }

            samples[i] = pcmHeader.NormalizeSample(rawSample); // normalize sample between [-1f, 1f]
        }

        return new PcmData(samples, pcmHeader.Channels, pcmHeader.SampleRate);
    }

    #endregion
}

用法

public static AudioClip FromPcmBytes(byte[] bytes, string clipName = "pcm")
{
    clipName.ThrowIfNullOrWhitespace(nameof(clipName));
    var pcmData   = PcmData.FromBytes(bytes);
    var audioClip = AudioClip.Create(clipName, pcmData.Length, pcmData.Channels, pcmData.SampleRate, false);
    audioClip.SetData(pcmData.Value, 0);
    return audioClip;
}

请注意,AudioClip.Create 提供了 Read 和 SetPosition 回调的重载,以防您需要使用源 Stream 而不是字节块。