从 base64 解码后的嘈杂音频剪辑
Noisy audio clip after decoding from base64
我用 base64 编码了 wav 文件(audioClipName.txt in Resources/Sounds)。
然后我尝试解码它,从中制作一个 AudioClip 并像这样播放:
public static void CreateAudioClip()
{
string s = Resources.Load<TextAsset> ("Sounds/audioClipName").text;
byte[] bytes = System.Convert.FromBase64String (s);
float[] f = ConvertByteToFloat(bytes);
AudioClip audioClip = AudioClip.Create("testSound", f.Length, 2, 44100, false, false);
audioClip.SetData(f, 0);
AudioSource as = GameObject.FindObjectOfType<AudioSource> ();
as.PlayOneShot (audioClip);
}
private static float[] ConvertByteToFloat(byte[] array)
{
float[] floatArr = new float[array.Length / 4];
for (int i = 0; i < floatArr.Length; i++)
{
if (BitConverter.IsLittleEndian)
Array.Reverse(array, i * 4, 4);
floatArr[i] = BitConverter.ToSingle(array, i * 4);
}
return floatArr;
}
一切正常,除了声音只是一种噪音。
我发现 this 这里是关于堆栈溢出的,但是答案并没有解决问题。
以下是有关 Unity3D 的 wav 文件的详细信息:
有人知道这里的问题是什么吗?
编辑
我写下了二进制文件,一个刚从 base64 解码后,第二个在最终转换后,并将其与原始二进制 wav 文件进行比较:
如您所见,文件已正确编码,因为只需对其进行解码并像这样写下文件:
string scat = Resources.Load<TextAsset> ("Sounds/test").text;
byte[] bcat = System.Convert.FromBase64String (scat);
System.IO.File.WriteAllBytes ("Assets/just_decoded.wav", bcat);
给了同样的文件。所有文件都有一定的长度。
但是最后一个是错误的,所以问题出在转换为float数组的地方。但是我不明白哪里出了问题。
编辑:
这里是写下final.wav的代码:
string scat = Resources.Load<TextAsset> ("Sounds/test").text;
byte[] bcat = System.Convert.FromBase64String (scat);
float[] f = ConvertByteToFloat(bcat);
byte[] byteArray = new byte[f.Length * 4];
Buffer.BlockCopy(f, 0, byteArray, 0, byteArray.Length);
System.IO.File.WriteAllBytes ("Assets/final.wav", byteArray);
根据文档 here,
The samples should be floats ranging from -1.0f to 1.0f (exceeding these limits will lead to artifacts and undefined behaviour). The sample count is determined by the length of the float array. Use offsetSamples to write into a random position in the clip. If the length from the offset is longer than the clip length, the write will wrap around and write the remaining samples from the start of the clip.
看来你就是那个效果。所以我想你必须先规范化数组才能处理它。
因为你在unity中操作,我不确定你可以使用什么功能,所以我提供了一些基本的float数组扩展方法:
/// <summary>
/// Normalizes the values within this array.
/// </summary>
/// <param name="data">The array which holds the values to be normalized.</param>
static void Normalize(this float[] data)
{
float max = float.MinValue;
// Find maximum
for (int i = 0; i < data.Length; i++)
{
if (Math.Abs(data[i]) > max)
{
max = Math.Abs(data[i]);
}
}
// Divide all by max
for (int i = 0; i < data.Length; i++)
{
data[i] = data[i] / max;
}
}
在像这样进一步处理数据之前使用此扩展方法:
byte[] bytes = System.Convert.FromBase64String (s);
float[] f = ConvertByteToFloat(bytes);
// Normalize the values before using them
f.Normalize();
AudioClip audioClip = AudioClip.Create("testSound", f.Length, 2, 44100, false, false);
audioClip.SetData(f, 0);
您尝试播放的波形文件 (meow.wav
) 具有以下属性:
- PCM
- 2 个通道
- 44100 赫兹
- 有符号 16 位 little-endian
您的主要错误是,您正在解释二进制数据,就好像它已经代表了一个浮点数。这就是 BitConverter.ToSingle()
的作用。
但是您需要做的是,从每两个创建一个带符号的 16 位 little-endian 值(如 Wavefile-header 中指定)字节,cast 将其转换为浮点数,然后对其进行规范化。对于您的文件(16 位!),每个 两个 个字节构成一个样本,而不是 四个 个字节。数据 是 little endian (s16le),因此如果主机不是,您只需交换它。
这将是更正后的转换函数:
private static float[] ConvertByteToFloat(byte[] array) {
float[] floatArr = new float[array.Length / 2];
for (int i = 0; i < floatArr.Length; i++) {
floatArr[i] = ((float) BitConverter.ToInt16(array, i * 2))/32768.0;
}
return floatArr;
}
并且您应该跳过 wave-file 的 header(真实音频数据从偏移量 44 开始)。
对于干净的解决方案,您必须正确解释 Wave-header 并根据那里指定的内容调整您的操作(或者如果它包含不受支持的参数则退出)。
例如,必须注意样本格式(每个样本的位数和字节序)、采样率和通道数。
的实现:
PcmHeader
private readonly struct PcmHeader
{
#region Public types & data
public int BitDepth { get; }
public int AudioSampleSize { get; }
public int AudioSampleCount { get; }
public ushort Channels { get; }
public int SampleRate { get; }
public int AudioStartIndex { get; }
public int ByteRate { get; }
public ushort BlockAlign { get; }
#endregion
#region Constructors & Finalizer
private PcmHeader(int bitDepth,
int audioSize,
int audioStartIndex,
ushort channels,
int sampleRate,
int byteRate,
ushort blockAlign)
{
BitDepth = bitDepth;
_negativeDepth = Mathf.Pow(2f, BitDepth - 1f);
_positiveDepth = _negativeDepth - 1f;
AudioSampleSize = bitDepth / 8;
AudioSampleCount = Mathf.FloorToInt(audioSize / (float)AudioSampleSize);
AudioStartIndex = audioStartIndex;
Channels = channels;
SampleRate = sampleRate;
ByteRate = byteRate;
BlockAlign = blockAlign;
}
#endregion
#region Public Methods
public static PcmHeader FromBytes(byte[] pcmBytes)
{
using var memoryStream = new MemoryStream(pcmBytes);
return FromStream(memoryStream);
}
public static PcmHeader FromStream(Stream pcmStream)
{
pcmStream.Position = SizeIndex;
using BinaryReader reader = new BinaryReader(pcmStream);
int headerSize = reader.ReadInt32(); // 16
ushort audioFormatCode = reader.ReadUInt16(); // 20
string audioFormat = GetAudioFormatFromCode(audioFormatCode);
if (audioFormatCode != 1 && audioFormatCode == 65534)
{
// Only uncompressed PCM wav files are supported.
throw new ArgumentOutOfRangeException(nameof(pcmStream),
$"Detected format code '{audioFormatCode}' {audioFormat}, but only PCM and WaveFormatExtensible uncompressed formats are currently supported.");
}
ushort channelCount = reader.ReadUInt16(); // 22
int sampleRate = reader.ReadInt32(); // 24
int byteRate = reader.ReadInt32(); // 28
ushort blockAlign = reader.ReadUInt16(); // 32
ushort bitDepth = reader.ReadUInt16(); //34
pcmStream.Position = SizeIndex + headerSize + 2 * sizeof(int); // Header end index
int audioSize = reader.ReadInt32(); // Audio size index
return new PcmHeader(bitDepth, audioSize, (int)pcmStream.Position, channelCount, sampleRate, byteRate, blockAlign); // audio start index
}
public float NormalizeSample(float rawSample)
{
float sampleDepth = rawSample < 0 ? _negativeDepth : _positiveDepth;
return rawSample / sampleDepth;
}
#endregion
#region Private Methods
private static string GetAudioFormatFromCode(ushort code)
{
switch (code)
{
case 1: return "PCM";
case 2: return "ADPCM";
case 3: return "IEEE";
case 7: return "?-law";
case 65534: return "WaveFormatExtensible";
default: throw new ArgumentOutOfRangeException(nameof(code), code, "Unknown wav code format.");
}
}
#endregion
#region Private types & Data
private const int SizeIndex = 16;
private readonly float _positiveDepth;
private readonly float _negativeDepth;
#endregion
}
PcmData
private readonly struct PcmData
{
#region Public types & data
public float[] Value { get; }
public int Length { get; }
public int Channels { get; }
public int SampleRate { get; }
#endregion
#region Constructors & Finalizer
private PcmData(float[] value, int channels, int sampleRate)
{
Value = value;
Length = value.Length;
Channels = channels;
SampleRate = sampleRate;
}
#endregion
#region Public Methods
public static PcmData FromBytes(byte[] bytes)
{
if (bytes == null)
{
throw new ArgumentNullException(nameof(bytes));
}
PcmHeader pcmHeader = PcmHeader.FromBytes(bytes);
if (pcmHeader.BitDepth != 16 && pcmHeader.BitDepth != 32 && pcmHeader.BitDepth != 8)
{
throw new ArgumentOutOfRangeException(nameof(pcmHeader.BitDepth), pcmHeader.BitDepth, "Supported values are: 8, 16, 32");
}
float[] samples = new float[pcmHeader.AudioSampleCount];
for (int i = 0; i < samples.Length; ++i)
{
int byteIndex = pcmHeader.AudioStartIndex + i * pcmHeader.AudioSampleSize;
float rawSample;
switch (pcmHeader.BitDepth)
{
case 8:
rawSample = bytes[byteIndex];
break;
case 16:
rawSample = BitConverter.ToInt16(bytes, byteIndex);
break;
case 32:
rawSample = BitConverter.ToInt32(bytes, byteIndex);
break;
default: throw new ArgumentOutOfRangeException(nameof(pcmHeader.BitDepth), pcmHeader.BitDepth, "Supported values are: 8, 16, 32");
}
samples[i] = pcmHeader.NormalizeSample(rawSample); // normalize sample between [-1f, 1f]
}
return new PcmData(samples, pcmHeader.Channels, pcmHeader.SampleRate);
}
#endregion
}
用法
public static AudioClip FromPcmBytes(byte[] bytes, string clipName = "pcm")
{
clipName.ThrowIfNullOrWhitespace(nameof(clipName));
var pcmData = PcmData.FromBytes(bytes);
var audioClip = AudioClip.Create(clipName, pcmData.Length, pcmData.Channels, pcmData.SampleRate, false);
audioClip.SetData(pcmData.Value, 0);
return audioClip;
}
请注意,AudioClip.Create
提供了 Read 和 SetPosition 回调的重载,以防您需要使用源 Stream
而不是字节块。
我用 base64 编码了 wav 文件(audioClipName.txt in Resources/Sounds)。
然后我尝试解码它,从中制作一个 AudioClip 并像这样播放:
public static void CreateAudioClip()
{
string s = Resources.Load<TextAsset> ("Sounds/audioClipName").text;
byte[] bytes = System.Convert.FromBase64String (s);
float[] f = ConvertByteToFloat(bytes);
AudioClip audioClip = AudioClip.Create("testSound", f.Length, 2, 44100, false, false);
audioClip.SetData(f, 0);
AudioSource as = GameObject.FindObjectOfType<AudioSource> ();
as.PlayOneShot (audioClip);
}
private static float[] ConvertByteToFloat(byte[] array)
{
float[] floatArr = new float[array.Length / 4];
for (int i = 0; i < floatArr.Length; i++)
{
if (BitConverter.IsLittleEndian)
Array.Reverse(array, i * 4, 4);
floatArr[i] = BitConverter.ToSingle(array, i * 4);
}
return floatArr;
}
一切正常,除了声音只是一种噪音。
我发现 this 这里是关于堆栈溢出的,但是答案并没有解决问题。
以下是有关 Unity3D 的 wav 文件的详细信息:
有人知道这里的问题是什么吗?
编辑
我写下了二进制文件,一个刚从 base64 解码后,第二个在最终转换后,并将其与原始二进制 wav 文件进行比较:
如您所见,文件已正确编码,因为只需对其进行解码并像这样写下文件:
string scat = Resources.Load<TextAsset> ("Sounds/test").text;
byte[] bcat = System.Convert.FromBase64String (scat);
System.IO.File.WriteAllBytes ("Assets/just_decoded.wav", bcat);
给了同样的文件。所有文件都有一定的长度。
但是最后一个是错误的,所以问题出在转换为float数组的地方。但是我不明白哪里出了问题。
编辑:
这里是写下final.wav的代码:
string scat = Resources.Load<TextAsset> ("Sounds/test").text;
byte[] bcat = System.Convert.FromBase64String (scat);
float[] f = ConvertByteToFloat(bcat);
byte[] byteArray = new byte[f.Length * 4];
Buffer.BlockCopy(f, 0, byteArray, 0, byteArray.Length);
System.IO.File.WriteAllBytes ("Assets/final.wav", byteArray);
根据文档 here,
The samples should be floats ranging from -1.0f to 1.0f (exceeding these limits will lead to artifacts and undefined behaviour). The sample count is determined by the length of the float array. Use offsetSamples to write into a random position in the clip. If the length from the offset is longer than the clip length, the write will wrap around and write the remaining samples from the start of the clip.
看来你就是那个效果。所以我想你必须先规范化数组才能处理它。
因为你在unity中操作,我不确定你可以使用什么功能,所以我提供了一些基本的float数组扩展方法:
/// <summary>
/// Normalizes the values within this array.
/// </summary>
/// <param name="data">The array which holds the values to be normalized.</param>
static void Normalize(this float[] data)
{
float max = float.MinValue;
// Find maximum
for (int i = 0; i < data.Length; i++)
{
if (Math.Abs(data[i]) > max)
{
max = Math.Abs(data[i]);
}
}
// Divide all by max
for (int i = 0; i < data.Length; i++)
{
data[i] = data[i] / max;
}
}
在像这样进一步处理数据之前使用此扩展方法:
byte[] bytes = System.Convert.FromBase64String (s);
float[] f = ConvertByteToFloat(bytes);
// Normalize the values before using them
f.Normalize();
AudioClip audioClip = AudioClip.Create("testSound", f.Length, 2, 44100, false, false);
audioClip.SetData(f, 0);
您尝试播放的波形文件 (meow.wav
) 具有以下属性:
- PCM
- 2 个通道
- 44100 赫兹
- 有符号 16 位 little-endian
您的主要错误是,您正在解释二进制数据,就好像它已经代表了一个浮点数。这就是 BitConverter.ToSingle()
的作用。
但是您需要做的是,从每两个创建一个带符号的 16 位 little-endian 值(如 Wavefile-header 中指定)字节,cast 将其转换为浮点数,然后对其进行规范化。对于您的文件(16 位!),每个 两个 个字节构成一个样本,而不是 四个 个字节。数据 是 little endian (s16le),因此如果主机不是,您只需交换它。
这将是更正后的转换函数:
private static float[] ConvertByteToFloat(byte[] array) {
float[] floatArr = new float[array.Length / 2];
for (int i = 0; i < floatArr.Length; i++) {
floatArr[i] = ((float) BitConverter.ToInt16(array, i * 2))/32768.0;
}
return floatArr;
}
并且您应该跳过 wave-file 的 header(真实音频数据从偏移量 44 开始)。
对于干净的解决方案,您必须正确解释 Wave-header 并根据那里指定的内容调整您的操作(或者如果它包含不受支持的参数则退出)。 例如,必须注意样本格式(每个样本的位数和字节序)、采样率和通道数。
PcmHeader
private readonly struct PcmHeader
{
#region Public types & data
public int BitDepth { get; }
public int AudioSampleSize { get; }
public int AudioSampleCount { get; }
public ushort Channels { get; }
public int SampleRate { get; }
public int AudioStartIndex { get; }
public int ByteRate { get; }
public ushort BlockAlign { get; }
#endregion
#region Constructors & Finalizer
private PcmHeader(int bitDepth,
int audioSize,
int audioStartIndex,
ushort channels,
int sampleRate,
int byteRate,
ushort blockAlign)
{
BitDepth = bitDepth;
_negativeDepth = Mathf.Pow(2f, BitDepth - 1f);
_positiveDepth = _negativeDepth - 1f;
AudioSampleSize = bitDepth / 8;
AudioSampleCount = Mathf.FloorToInt(audioSize / (float)AudioSampleSize);
AudioStartIndex = audioStartIndex;
Channels = channels;
SampleRate = sampleRate;
ByteRate = byteRate;
BlockAlign = blockAlign;
}
#endregion
#region Public Methods
public static PcmHeader FromBytes(byte[] pcmBytes)
{
using var memoryStream = new MemoryStream(pcmBytes);
return FromStream(memoryStream);
}
public static PcmHeader FromStream(Stream pcmStream)
{
pcmStream.Position = SizeIndex;
using BinaryReader reader = new BinaryReader(pcmStream);
int headerSize = reader.ReadInt32(); // 16
ushort audioFormatCode = reader.ReadUInt16(); // 20
string audioFormat = GetAudioFormatFromCode(audioFormatCode);
if (audioFormatCode != 1 && audioFormatCode == 65534)
{
// Only uncompressed PCM wav files are supported.
throw new ArgumentOutOfRangeException(nameof(pcmStream),
$"Detected format code '{audioFormatCode}' {audioFormat}, but only PCM and WaveFormatExtensible uncompressed formats are currently supported.");
}
ushort channelCount = reader.ReadUInt16(); // 22
int sampleRate = reader.ReadInt32(); // 24
int byteRate = reader.ReadInt32(); // 28
ushort blockAlign = reader.ReadUInt16(); // 32
ushort bitDepth = reader.ReadUInt16(); //34
pcmStream.Position = SizeIndex + headerSize + 2 * sizeof(int); // Header end index
int audioSize = reader.ReadInt32(); // Audio size index
return new PcmHeader(bitDepth, audioSize, (int)pcmStream.Position, channelCount, sampleRate, byteRate, blockAlign); // audio start index
}
public float NormalizeSample(float rawSample)
{
float sampleDepth = rawSample < 0 ? _negativeDepth : _positiveDepth;
return rawSample / sampleDepth;
}
#endregion
#region Private Methods
private static string GetAudioFormatFromCode(ushort code)
{
switch (code)
{
case 1: return "PCM";
case 2: return "ADPCM";
case 3: return "IEEE";
case 7: return "?-law";
case 65534: return "WaveFormatExtensible";
default: throw new ArgumentOutOfRangeException(nameof(code), code, "Unknown wav code format.");
}
}
#endregion
#region Private types & Data
private const int SizeIndex = 16;
private readonly float _positiveDepth;
private readonly float _negativeDepth;
#endregion
}
PcmData
private readonly struct PcmData
{
#region Public types & data
public float[] Value { get; }
public int Length { get; }
public int Channels { get; }
public int SampleRate { get; }
#endregion
#region Constructors & Finalizer
private PcmData(float[] value, int channels, int sampleRate)
{
Value = value;
Length = value.Length;
Channels = channels;
SampleRate = sampleRate;
}
#endregion
#region Public Methods
public static PcmData FromBytes(byte[] bytes)
{
if (bytes == null)
{
throw new ArgumentNullException(nameof(bytes));
}
PcmHeader pcmHeader = PcmHeader.FromBytes(bytes);
if (pcmHeader.BitDepth != 16 && pcmHeader.BitDepth != 32 && pcmHeader.BitDepth != 8)
{
throw new ArgumentOutOfRangeException(nameof(pcmHeader.BitDepth), pcmHeader.BitDepth, "Supported values are: 8, 16, 32");
}
float[] samples = new float[pcmHeader.AudioSampleCount];
for (int i = 0; i < samples.Length; ++i)
{
int byteIndex = pcmHeader.AudioStartIndex + i * pcmHeader.AudioSampleSize;
float rawSample;
switch (pcmHeader.BitDepth)
{
case 8:
rawSample = bytes[byteIndex];
break;
case 16:
rawSample = BitConverter.ToInt16(bytes, byteIndex);
break;
case 32:
rawSample = BitConverter.ToInt32(bytes, byteIndex);
break;
default: throw new ArgumentOutOfRangeException(nameof(pcmHeader.BitDepth), pcmHeader.BitDepth, "Supported values are: 8, 16, 32");
}
samples[i] = pcmHeader.NormalizeSample(rawSample); // normalize sample between [-1f, 1f]
}
return new PcmData(samples, pcmHeader.Channels, pcmHeader.SampleRate);
}
#endregion
}
用法
public static AudioClip FromPcmBytes(byte[] bytes, string clipName = "pcm")
{
clipName.ThrowIfNullOrWhitespace(nameof(clipName));
var pcmData = PcmData.FromBytes(bytes);
var audioClip = AudioClip.Create(clipName, pcmData.Length, pcmData.Channels, pcmData.SampleRate, false);
audioClip.SetData(pcmData.Value, 0);
return audioClip;
}
请注意,AudioClip.Create
提供了 Read 和 SetPosition 回调的重载,以防您需要使用源 Stream
而不是字节块。