如何在 iOS 中将 2 个单声道文件转换为一个立体声文件?
How to convert 2 mono files into a single stereo file in iOS?
我正在尝试将 2 个 CAF 文件本地转换为一个文件。这 2 个 CAF 文件是单声道流,理想情况下,我希望它们是立体声文件,这样我就可以从一个通道获得麦克风,从另一个通道获得扬声器。
我最初是使用 AVAssetTrack 和 AVMutableCompositionTracks 开始的,但是我无法解决混合问题。我的合并文件是交错两个文件的单个单声道流。所以我选择了 AVAudioEngine 路线。
根据我的理解,我可以将我的两个文件作为输入节点传入,将它们附加到混音器,并有一个能够获得立体声混音的输出节点。输出文件具有立体声布局,但似乎没有音频数据写入其中,因为我可以在 Audacity 中打开它并查看立体声布局。在 installTapOnBus 调用周围放置一个 dipatch sephamore 信号也没有多大帮助。任何见解将不胜感激,因为 CoreAudio 一直是一个难以理解的挑战。
// obtain path of microphone and speaker files
NSString *micPath = [[NSBundle mainBundle] pathForResource:@"microphone" ofType:@"caf"];
NSString *spkPath = [[NSBundle mainBundle] pathForResource:@"speaker" ofType:@"caf"];
NSURL *micURL = [NSURL fileURLWithPath:micPath];
NSURL *spkURL = [NSURL fileURLWithPath:spkPath];
// create engine
AVAudioEngine *engine = [[AVAudioEngine alloc] init];
AVAudioFormat *stereoFormat = [[AVAudioFormat alloc] initStandardFormatWithSampleRate:16000 channels:2];
AVAudioMixerNode *mainMixer = engine.mainMixerNode;
// create audio files
AVAudioFile *audioFile1 = [[AVAudioFile alloc] initForReading:micURL error:nil];
AVAudioFile *audioFile2 = [[AVAudioFile alloc] initForReading:spkURL error:nil];
// create player input nodes
AVAudioPlayerNode *apNode1 = [[AVAudioPlayerNode alloc] init];
AVAudioPlayerNode *apNode2 = [[AVAudioPlayerNode alloc] init];
// attach nodes to the engine
[engine attachNode:apNode1];
[engine attachNode:apNode2];
// connect player nodes to engine's main mixer
stereoFormat = [mainMixer outputFormatForBus:0];
[engine connect:apNode1 to:mainMixer fromBus:0 toBus:0 format:audioFile1.processingFormat];
[engine connect:apNode2 to:mainMixer fromBus:0 toBus:1 format:audioFile2.processingFormat];
[engine connect:mainMixer to:engine.outputNode format:stereoFormat];
// start the engine
NSError *error = nil;
if(![engine startAndReturnError:&error]){
NSLog(@"Engine failed to start.");
}
// create output file
NSString *mergedAudioFile = [[micPath stringByDeletingLastPathComponent] stringByAppendingPathComponent:@"merged.caf"];
[[NSFileManager defaultManager] removeItemAtPath:mergedAudioFile error:&error];
NSURL *mergedURL = [NSURL fileURLWithPath:mergedAudioFile];
AVAudioFile *outputFile = [[AVAudioFile alloc] initForWriting:mergedURL settings:[engine.inputNode inputFormatForBus:0].settings error:&error];
// write from buffer to output file
[mainMixer installTapOnBus:0 bufferSize:4096 format:[mainMixer outputFormatForBus:0] block:^(AVAudioPCMBuffer *buffer, AVAudioTime *when){
NSError *error;
BOOL success;
NSLog(@"Writing");
if((outputFile.length < audioFile1.length) || (outputFile.length < audioFile2.length)){
success = [outputFile writeFromBuffer:buffer error:&error];
NSCAssert(success, @"error writing buffer data to file, %@", [error localizedDescription]);
if(error){
NSLog(@"Error: %@", error);
}
}
else{
[mainMixer removeTapOnBus:0];
NSLog(@"Done writing");
}
}];
}
使用 ExtAudioFile 执行此操作涉及三个文件和三个缓冲区。两个单声道用于阅读,一个立体声用于书写。在一个循环中,每个单声道文件将读取一小段音频到其单声道输出缓冲区,然后复制到正确的 "half" 立体声缓冲区。然后用充满数据的立体声缓冲区,将该缓冲区写入输出文件,重复直到两个单声道文件都完成读取(如果一个单声道文件比另一个长,则写入零)。
对我来说问题最大的领域是正确设置文件格式,core-audio 需要非常具体的格式。幸运的是,AVAudioFormat 的存在是为了简化一些常见格式的创建。
每个音频文件 reader/writer 有两种格式,一种表示数据存储的格式 (file_format),另一种表示 in/out 的格式reader/writer (client_format)。 reader/writer 中内置了格式转换器,以防格式不同。
这是一个例子:
-(void)soTest{
//This is what format the readers will output
AVAudioFormat *monoClienFormat = [[AVAudioFormat alloc]initWithCommonFormat:AVAudioPCMFormatInt16 sampleRate:44100.0 channels:1 interleaved:0];
//This is the format the writer will take as input
AVAudioFormat *stereoClientFormat = [[AVAudioFormat alloc]initWithCommonFormat:AVAudioPCMFormatInt16 sampleRate:44100 channels:2 interleaved:0];
//This is the format that will be written to storage. It must be interleaved.
AVAudioFormat *stereoFileFormat = [[AVAudioFormat alloc]initWithCommonFormat:AVAudioPCMFormatInt16 sampleRate:44100 channels:2 interleaved:1];
NSURL *leftURL = [NSBundle.mainBundle URLForResource:@"left" withExtension:@"wav"];
NSURL *rightURL = [NSBundle.mainBundle URLForResource:@"right" withExtension:@"wav"];
NSString *stereoPath = [documentsDir() stringByAppendingPathComponent:@"stereo.wav"];
NSURL *stereoURL = [NSURL URLWithString:stereoPath];
ExtAudioFileRef leftReader;
ExtAudioFileRef rightReader;
ExtAudioFileRef stereoWriter;
OSStatus status = 0;
//Create readers and writer
status = ExtAudioFileOpenURL((__bridge CFURLRef)leftURL, &leftReader);
if(status)printf("error %i",status);//All the ExtAudioFile functins return a non-zero status if there's an error, I'm only checking one to demonstrate, but you should be checking all the ExtAudioFile function returns.
ExtAudioFileOpenURL((__bridge CFURLRef)rightURL, &rightReader);
//Here the file format is set to stereo interleaved.
ExtAudioFileCreateWithURL((__bridge CFURLRef)stereoURL, kAudioFileCAFType, stereoFileFormat.streamDescription, nil, kAudioFileFlags_EraseFile, &stereoWriter);
//Set client format for readers and writer
ExtAudioFileSetProperty(leftReader, kExtAudioFileProperty_ClientDataFormat, sizeof(AudioStreamBasicDescription), monoClienFormat.streamDescription);
ExtAudioFileSetProperty(rightReader, kExtAudioFileProperty_ClientDataFormat, sizeof(AudioStreamBasicDescription), monoClienFormat.streamDescription);
ExtAudioFileSetProperty(stereoWriter, kExtAudioFileProperty_ClientDataFormat, sizeof(AudioStreamBasicDescription), stereoClientFormat.streamDescription);
int framesPerRead = 4096;
int bufferSize = framesPerRead * sizeof(SInt16);
//Allocate memory for the buffers
AudioBufferList *leftBuffer = createBufferList(bufferSize,1);
AudioBufferList *rightBuffer = createBufferList(bufferSize,1);
AudioBufferList *stereoBuffer = createBufferList(bufferSize,2);
//ExtAudioFileRead takes an ioNumberFrames argument. On input the number of frames you want, on otput it's the number of frames you got. 0 means your done.
UInt32 leftFramesIO = framesPerRead;
UInt32 rightFramesIO = framesPerRead;
while (leftFramesIO || rightFramesIO) {
if (leftFramesIO){
//If frames to read is less than a full buffer, zero out the remainder of the buffer
int framesRemaining = framesPerRead - leftFramesIO;
if (framesRemaining){
memset(((SInt16 *)leftBuffer->mBuffers[0].mData) + framesRemaining, 0, sizeof(SInt16) * framesRemaining);
}
//Read into left buffer
leftBuffer->mBuffers[0].mDataByteSize = leftFramesIO * sizeof(SInt16);
ExtAudioFileRead(leftReader, &leftFramesIO, leftBuffer);
}
else{
//set to zero if no more frames to read
memset(leftBuffer->mBuffers[0].mData, 0, sizeof(SInt16) * framesPerRead);
}
if (rightFramesIO){
int framesRemaining = framesPerRead - rightFramesIO;
if (framesRemaining){
memset(((SInt16 *)rightBuffer->mBuffers[0].mData) + framesRemaining, 0, sizeof(SInt16) * framesRemaining);
}
rightBuffer->mBuffers[0].mDataByteSize = rightFramesIO * sizeof(SInt16);
ExtAudioFileRead(rightReader, &rightFramesIO, rightBuffer);
}
else{
memset(rightBuffer->mBuffers[0].mData, 0, sizeof(SInt16) * framesPerRead);
}
UInt32 stereoFrames = MAX(leftFramesIO, rightFramesIO);
//copy left to stereoLeft and right to stereoRight
memcpy(stereoBuffer->mBuffers[0].mData, leftBuffer->mBuffers[0].mData, sizeof(SInt16) * stereoFrames);
memcpy(stereoBuffer->mBuffers[1].mData, rightBuffer->mBuffers[0].mData, sizeof(SInt16) * stereoFrames);
//write to file
stereoBuffer->mBuffers[0].mDataByteSize = stereoFrames * sizeof(SInt16);
stereoBuffer->mBuffers[1].mDataByteSize = stereoFrames * sizeof(SInt16);
ExtAudioFileWrite(stereoWriter, stereoFrames, stereoBuffer);
}
ExtAudioFileDispose(leftReader);
ExtAudioFileDispose(rightReader);
ExtAudioFileDispose(stereoWriter);
freeBufferList(leftBuffer);
freeBufferList(rightBuffer);
freeBufferList(stereoBuffer);
}
AudioBufferList *createBufferList(int bufferSize, int numberBuffers){
assert(bufferSize > 0 && numberBuffers > 0);
int bufferlistByteSize = sizeof(AudioBufferList);
bufferlistByteSize += sizeof(AudioBuffer) * (numberBuffers - 1);
AudioBufferList *bufferList = malloc(bufferlistByteSize);
bufferList->mNumberBuffers = numberBuffers;
for (int i = 0; i < numberBuffers; i++) {
bufferList->mBuffers[i].mNumberChannels = 1;
bufferList->mBuffers[i].mData = malloc(bufferSize);
}
return bufferList;
};
void freeBufferList(AudioBufferList *bufferList){
for (int i = 0; i < bufferList->mNumberBuffers; i++) {
free(bufferList->mBuffers[i].mData);
}
free(bufferList);
}
NSString *documentsDir(){
static NSString *path = NULL;
if(!path){
path = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, 1).firstObject;
}
return path;
}
我正在尝试将 2 个 CAF 文件本地转换为一个文件。这 2 个 CAF 文件是单声道流,理想情况下,我希望它们是立体声文件,这样我就可以从一个通道获得麦克风,从另一个通道获得扬声器。
我最初是使用 AVAssetTrack 和 AVMutableCompositionTracks 开始的,但是我无法解决混合问题。我的合并文件是交错两个文件的单个单声道流。所以我选择了 AVAudioEngine 路线。
根据我的理解,我可以将我的两个文件作为输入节点传入,将它们附加到混音器,并有一个能够获得立体声混音的输出节点。输出文件具有立体声布局,但似乎没有音频数据写入其中,因为我可以在 Audacity 中打开它并查看立体声布局。在 installTapOnBus 调用周围放置一个 dipatch sephamore 信号也没有多大帮助。任何见解将不胜感激,因为 CoreAudio 一直是一个难以理解的挑战。
// obtain path of microphone and speaker files
NSString *micPath = [[NSBundle mainBundle] pathForResource:@"microphone" ofType:@"caf"];
NSString *spkPath = [[NSBundle mainBundle] pathForResource:@"speaker" ofType:@"caf"];
NSURL *micURL = [NSURL fileURLWithPath:micPath];
NSURL *spkURL = [NSURL fileURLWithPath:spkPath];
// create engine
AVAudioEngine *engine = [[AVAudioEngine alloc] init];
AVAudioFormat *stereoFormat = [[AVAudioFormat alloc] initStandardFormatWithSampleRate:16000 channels:2];
AVAudioMixerNode *mainMixer = engine.mainMixerNode;
// create audio files
AVAudioFile *audioFile1 = [[AVAudioFile alloc] initForReading:micURL error:nil];
AVAudioFile *audioFile2 = [[AVAudioFile alloc] initForReading:spkURL error:nil];
// create player input nodes
AVAudioPlayerNode *apNode1 = [[AVAudioPlayerNode alloc] init];
AVAudioPlayerNode *apNode2 = [[AVAudioPlayerNode alloc] init];
// attach nodes to the engine
[engine attachNode:apNode1];
[engine attachNode:apNode2];
// connect player nodes to engine's main mixer
stereoFormat = [mainMixer outputFormatForBus:0];
[engine connect:apNode1 to:mainMixer fromBus:0 toBus:0 format:audioFile1.processingFormat];
[engine connect:apNode2 to:mainMixer fromBus:0 toBus:1 format:audioFile2.processingFormat];
[engine connect:mainMixer to:engine.outputNode format:stereoFormat];
// start the engine
NSError *error = nil;
if(![engine startAndReturnError:&error]){
NSLog(@"Engine failed to start.");
}
// create output file
NSString *mergedAudioFile = [[micPath stringByDeletingLastPathComponent] stringByAppendingPathComponent:@"merged.caf"];
[[NSFileManager defaultManager] removeItemAtPath:mergedAudioFile error:&error];
NSURL *mergedURL = [NSURL fileURLWithPath:mergedAudioFile];
AVAudioFile *outputFile = [[AVAudioFile alloc] initForWriting:mergedURL settings:[engine.inputNode inputFormatForBus:0].settings error:&error];
// write from buffer to output file
[mainMixer installTapOnBus:0 bufferSize:4096 format:[mainMixer outputFormatForBus:0] block:^(AVAudioPCMBuffer *buffer, AVAudioTime *when){
NSError *error;
BOOL success;
NSLog(@"Writing");
if((outputFile.length < audioFile1.length) || (outputFile.length < audioFile2.length)){
success = [outputFile writeFromBuffer:buffer error:&error];
NSCAssert(success, @"error writing buffer data to file, %@", [error localizedDescription]);
if(error){
NSLog(@"Error: %@", error);
}
}
else{
[mainMixer removeTapOnBus:0];
NSLog(@"Done writing");
}
}];
}
使用 ExtAudioFile 执行此操作涉及三个文件和三个缓冲区。两个单声道用于阅读,一个立体声用于书写。在一个循环中,每个单声道文件将读取一小段音频到其单声道输出缓冲区,然后复制到正确的 "half" 立体声缓冲区。然后用充满数据的立体声缓冲区,将该缓冲区写入输出文件,重复直到两个单声道文件都完成读取(如果一个单声道文件比另一个长,则写入零)。
对我来说问题最大的领域是正确设置文件格式,core-audio 需要非常具体的格式。幸运的是,AVAudioFormat 的存在是为了简化一些常见格式的创建。
每个音频文件 reader/writer 有两种格式,一种表示数据存储的格式 (file_format),另一种表示 in/out 的格式reader/writer (client_format)。 reader/writer 中内置了格式转换器,以防格式不同。
这是一个例子:
-(void)soTest{
//This is what format the readers will output
AVAudioFormat *monoClienFormat = [[AVAudioFormat alloc]initWithCommonFormat:AVAudioPCMFormatInt16 sampleRate:44100.0 channels:1 interleaved:0];
//This is the format the writer will take as input
AVAudioFormat *stereoClientFormat = [[AVAudioFormat alloc]initWithCommonFormat:AVAudioPCMFormatInt16 sampleRate:44100 channels:2 interleaved:0];
//This is the format that will be written to storage. It must be interleaved.
AVAudioFormat *stereoFileFormat = [[AVAudioFormat alloc]initWithCommonFormat:AVAudioPCMFormatInt16 sampleRate:44100 channels:2 interleaved:1];
NSURL *leftURL = [NSBundle.mainBundle URLForResource:@"left" withExtension:@"wav"];
NSURL *rightURL = [NSBundle.mainBundle URLForResource:@"right" withExtension:@"wav"];
NSString *stereoPath = [documentsDir() stringByAppendingPathComponent:@"stereo.wav"];
NSURL *stereoURL = [NSURL URLWithString:stereoPath];
ExtAudioFileRef leftReader;
ExtAudioFileRef rightReader;
ExtAudioFileRef stereoWriter;
OSStatus status = 0;
//Create readers and writer
status = ExtAudioFileOpenURL((__bridge CFURLRef)leftURL, &leftReader);
if(status)printf("error %i",status);//All the ExtAudioFile functins return a non-zero status if there's an error, I'm only checking one to demonstrate, but you should be checking all the ExtAudioFile function returns.
ExtAudioFileOpenURL((__bridge CFURLRef)rightURL, &rightReader);
//Here the file format is set to stereo interleaved.
ExtAudioFileCreateWithURL((__bridge CFURLRef)stereoURL, kAudioFileCAFType, stereoFileFormat.streamDescription, nil, kAudioFileFlags_EraseFile, &stereoWriter);
//Set client format for readers and writer
ExtAudioFileSetProperty(leftReader, kExtAudioFileProperty_ClientDataFormat, sizeof(AudioStreamBasicDescription), monoClienFormat.streamDescription);
ExtAudioFileSetProperty(rightReader, kExtAudioFileProperty_ClientDataFormat, sizeof(AudioStreamBasicDescription), monoClienFormat.streamDescription);
ExtAudioFileSetProperty(stereoWriter, kExtAudioFileProperty_ClientDataFormat, sizeof(AudioStreamBasicDescription), stereoClientFormat.streamDescription);
int framesPerRead = 4096;
int bufferSize = framesPerRead * sizeof(SInt16);
//Allocate memory for the buffers
AudioBufferList *leftBuffer = createBufferList(bufferSize,1);
AudioBufferList *rightBuffer = createBufferList(bufferSize,1);
AudioBufferList *stereoBuffer = createBufferList(bufferSize,2);
//ExtAudioFileRead takes an ioNumberFrames argument. On input the number of frames you want, on otput it's the number of frames you got. 0 means your done.
UInt32 leftFramesIO = framesPerRead;
UInt32 rightFramesIO = framesPerRead;
while (leftFramesIO || rightFramesIO) {
if (leftFramesIO){
//If frames to read is less than a full buffer, zero out the remainder of the buffer
int framesRemaining = framesPerRead - leftFramesIO;
if (framesRemaining){
memset(((SInt16 *)leftBuffer->mBuffers[0].mData) + framesRemaining, 0, sizeof(SInt16) * framesRemaining);
}
//Read into left buffer
leftBuffer->mBuffers[0].mDataByteSize = leftFramesIO * sizeof(SInt16);
ExtAudioFileRead(leftReader, &leftFramesIO, leftBuffer);
}
else{
//set to zero if no more frames to read
memset(leftBuffer->mBuffers[0].mData, 0, sizeof(SInt16) * framesPerRead);
}
if (rightFramesIO){
int framesRemaining = framesPerRead - rightFramesIO;
if (framesRemaining){
memset(((SInt16 *)rightBuffer->mBuffers[0].mData) + framesRemaining, 0, sizeof(SInt16) * framesRemaining);
}
rightBuffer->mBuffers[0].mDataByteSize = rightFramesIO * sizeof(SInt16);
ExtAudioFileRead(rightReader, &rightFramesIO, rightBuffer);
}
else{
memset(rightBuffer->mBuffers[0].mData, 0, sizeof(SInt16) * framesPerRead);
}
UInt32 stereoFrames = MAX(leftFramesIO, rightFramesIO);
//copy left to stereoLeft and right to stereoRight
memcpy(stereoBuffer->mBuffers[0].mData, leftBuffer->mBuffers[0].mData, sizeof(SInt16) * stereoFrames);
memcpy(stereoBuffer->mBuffers[1].mData, rightBuffer->mBuffers[0].mData, sizeof(SInt16) * stereoFrames);
//write to file
stereoBuffer->mBuffers[0].mDataByteSize = stereoFrames * sizeof(SInt16);
stereoBuffer->mBuffers[1].mDataByteSize = stereoFrames * sizeof(SInt16);
ExtAudioFileWrite(stereoWriter, stereoFrames, stereoBuffer);
}
ExtAudioFileDispose(leftReader);
ExtAudioFileDispose(rightReader);
ExtAudioFileDispose(stereoWriter);
freeBufferList(leftBuffer);
freeBufferList(rightBuffer);
freeBufferList(stereoBuffer);
}
AudioBufferList *createBufferList(int bufferSize, int numberBuffers){
assert(bufferSize > 0 && numberBuffers > 0);
int bufferlistByteSize = sizeof(AudioBufferList);
bufferlistByteSize += sizeof(AudioBuffer) * (numberBuffers - 1);
AudioBufferList *bufferList = malloc(bufferlistByteSize);
bufferList->mNumberBuffers = numberBuffers;
for (int i = 0; i < numberBuffers; i++) {
bufferList->mBuffers[i].mNumberChannels = 1;
bufferList->mBuffers[i].mData = malloc(bufferSize);
}
return bufferList;
};
void freeBufferList(AudioBufferList *bufferList){
for (int i = 0; i < bufferList->mNumberBuffers; i++) {
free(bufferList->mBuffers[i].mData);
}
free(bufferList);
}
NSString *documentsDir(){
static NSString *path = NULL;
if(!path){
path = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, 1).firstObject;
}
return path;
}