将 PCM 立体声文件编码为 M4A 立体声时出现问题
Problem while encoding PCM Stereo file to M4A stereo
我在从 PCM 编码 M4A 立体声文件时遇到问题。我的代码非常适用于 MONO 编码(意味着通道数 = 1),但对于立体声(通道数 = 2)它不起作用。
问题是编码器加倍了音频的持续时间。
I have logged the sample rate and channel, both are perfect i.e., 48.100kHz sample rate and 2 channel count.
下面是我的编码方式代码:
public boolean mediaMux(){
try {
File outputDirectory = new File(outFilePath);
if (!outputDirectory.exists()){
outputDirectory.mkdir();
}
File outputFile = new File(outputDirectory.getPath() , outFileName + ".m4a");
if (outputFile.exists()) outputFile.delete();
MediaMuxer mux = null;
mux = new MediaMuxer(outputFile.getAbsolutePath(), MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4);
MediaFormat outputFormat = MediaFormat.createAudioFormat(COMPRESSED_AUDIO_FILE_MIME_TYPE,
sampleRate, channel);
outputFormat.setInteger(MediaFormat.KEY_AAC_PROFILE, MediaCodecInfo.CodecProfileLevel.AACObjectLC);
outputFormat.setInteger(MediaFormat.KEY_BIT_RATE, bitrate);
MediaCodec codec = MediaCodec.createEncoderByType(COMPRESSED_AUDIO_FILE_MIME_TYPE);
codec.configure(outputFormat, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE);
codec.start();
ByteBuffer[] codecInputBuffers = codec.getInputBuffers(); // Note: Array of buffers
ByteBuffer[] codecOutputBuffers = codec.getOutputBuffers();
MediaCodec.BufferInfo outBuffInfo = new MediaCodec.BufferInfo();
byte[] tempBuffer = new byte[bufferSize];
double presentationTimeUs = 0;
int audioTrackIdx = 0;
int totalBytesRead = 0;
int percentComplete;
do {
int inputBufIndex = 0;
while (inputBufIndex != -1 && data.size() > 0) {
try {
Log.w("Read Log","Reading Data");
inputBufIndex = codec.dequeueInputBuffer(CODEC_TIMEOUT_IN_MS);
if (inputBufIndex >= 0) {
ByteBuffer dstBuf = codecInputBuffers[inputBufIndex];
dstBuf.clear();
byte[] a = data.remove();
int bytesRead = a.length;
//Log.w("DestBuffer Limit",dstBuf.limit() +"");
if (!hasMoreData) { // -1 implies EOS
codec.queueInputBuffer(inputBufIndex, 0, 0, (long) presentationTimeUs, MediaCodec.BUFFER_FLAG_END_OF_STREAM);
} else {
totalBytesRead += bytesRead;
dstBuf.put(a, 0, bytesRead);
codec.queueInputBuffer(inputBufIndex, 0, bytesRead, (long) presentationTimeUs, 0);
presentationTimeUs = 1000000l * (totalBytesRead / 2) / sampleRate;
}
}
}catch (NoSuchElementException ex){
ex.printStackTrace();
}
}
// Drain audio
int outputBufIndex = 0;
while (outputBufIndex != MediaCodec.INFO_TRY_AGAIN_LATER) {
Log.w("Write Log","Writing Data");
outputBufIndex = codec.dequeueOutputBuffer(outBuffInfo, CODEC_TIMEOUT_IN_MS);
if (outputBufIndex >= 0) {
ByteBuffer encodedData = codecOutputBuffers[outputBufIndex];
encodedData.position(outBuffInfo.offset);
encodedData.limit(outBuffInfo.offset + outBuffInfo.size);
if ((outBuffInfo.flags & MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0 && outBuffInfo.size != 0) {
codec.releaseOutputBuffer(outputBufIndex, false);
} else {
mux.writeSampleData(audioTrackIdx, codecOutputBuffers[outputBufIndex], outBuffInfo);
codec.releaseOutputBuffer(outputBufIndex, false);
}
} else if (outputBufIndex == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
outputFormat = codec.getOutputFormat();
Log.v(LOGTAG, "Output format changed - " + outputFormat);
audioTrackIdx = mux.addTrack(outputFormat);
mux.start();
} else if (outputBufIndex == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) {
Log.e(LOGTAG, "Output buffers changed during encode!");
} else if (outputBufIndex == MediaCodec.INFO_TRY_AGAIN_LATER) {
// NO OP
} else {
Log.e(LOGTAG, "Unknown return code from dequeueOutputBuffer - " + outputBufIndex);
}
}
//percentComplete = (int) Math.round(((float) totalBytesRead / (float) inputFile.length()) * 100.0);
Log.v(LOGTAG, "Conversion % - " );
} while (outBuffInfo.flags != MediaCodec.BUFFER_FLAG_END_OF_STREAM && !mStop);
mux.stop();
mux.release();
Log.v(LOGTAG, "Compression done ...");
} catch (FileNotFoundException e) {
Log.e(LOGTAG, "File not found!", e);
return false;
} catch (IOException e) {
Log.e(LOGTAG, "IO exception!", e);
return false;
} catch (Exception e){
e.printStackTrace();
}
return true;
}
更新
录制音频代码chuck
recorder = new AudioRecord(
MediaRecorder.AudioSource.MIC, recordingSampleRate,
recordingChannels,
AudioFormat.ENCODING_PCM_16BIT, minBufferSize * 2);
if User selects mono, recordingChannels = AudioFormat.CHANNEL_IN_MONO
and for stereo recording channels = AudioFormat.CHANNEL_IN_STEREO
Thanks in advance
我觉得
presentationTimeUs = 1000000l * (totalBytesRead / 2) / sampleRate;
其实应该是
presentationTimeUs = 1000000l * (totalBytesRead / (2 * channel)) / sampleRate;
因为每个样本有 2 个字节每个通道,即每个立体声样本 4 个字节。
我在从 PCM 编码 M4A 立体声文件时遇到问题。我的代码非常适用于 MONO 编码(意味着通道数 = 1),但对于立体声(通道数 = 2)它不起作用。 问题是编码器加倍了音频的持续时间。
I have logged the sample rate and channel, both are perfect i.e., 48.100kHz sample rate and 2 channel count.
下面是我的编码方式代码:
public boolean mediaMux(){
try {
File outputDirectory = new File(outFilePath);
if (!outputDirectory.exists()){
outputDirectory.mkdir();
}
File outputFile = new File(outputDirectory.getPath() , outFileName + ".m4a");
if (outputFile.exists()) outputFile.delete();
MediaMuxer mux = null;
mux = new MediaMuxer(outputFile.getAbsolutePath(), MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4);
MediaFormat outputFormat = MediaFormat.createAudioFormat(COMPRESSED_AUDIO_FILE_MIME_TYPE,
sampleRate, channel);
outputFormat.setInteger(MediaFormat.KEY_AAC_PROFILE, MediaCodecInfo.CodecProfileLevel.AACObjectLC);
outputFormat.setInteger(MediaFormat.KEY_BIT_RATE, bitrate);
MediaCodec codec = MediaCodec.createEncoderByType(COMPRESSED_AUDIO_FILE_MIME_TYPE);
codec.configure(outputFormat, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE);
codec.start();
ByteBuffer[] codecInputBuffers = codec.getInputBuffers(); // Note: Array of buffers
ByteBuffer[] codecOutputBuffers = codec.getOutputBuffers();
MediaCodec.BufferInfo outBuffInfo = new MediaCodec.BufferInfo();
byte[] tempBuffer = new byte[bufferSize];
double presentationTimeUs = 0;
int audioTrackIdx = 0;
int totalBytesRead = 0;
int percentComplete;
do {
int inputBufIndex = 0;
while (inputBufIndex != -1 && data.size() > 0) {
try {
Log.w("Read Log","Reading Data");
inputBufIndex = codec.dequeueInputBuffer(CODEC_TIMEOUT_IN_MS);
if (inputBufIndex >= 0) {
ByteBuffer dstBuf = codecInputBuffers[inputBufIndex];
dstBuf.clear();
byte[] a = data.remove();
int bytesRead = a.length;
//Log.w("DestBuffer Limit",dstBuf.limit() +"");
if (!hasMoreData) { // -1 implies EOS
codec.queueInputBuffer(inputBufIndex, 0, 0, (long) presentationTimeUs, MediaCodec.BUFFER_FLAG_END_OF_STREAM);
} else {
totalBytesRead += bytesRead;
dstBuf.put(a, 0, bytesRead);
codec.queueInputBuffer(inputBufIndex, 0, bytesRead, (long) presentationTimeUs, 0);
presentationTimeUs = 1000000l * (totalBytesRead / 2) / sampleRate;
}
}
}catch (NoSuchElementException ex){
ex.printStackTrace();
}
}
// Drain audio
int outputBufIndex = 0;
while (outputBufIndex != MediaCodec.INFO_TRY_AGAIN_LATER) {
Log.w("Write Log","Writing Data");
outputBufIndex = codec.dequeueOutputBuffer(outBuffInfo, CODEC_TIMEOUT_IN_MS);
if (outputBufIndex >= 0) {
ByteBuffer encodedData = codecOutputBuffers[outputBufIndex];
encodedData.position(outBuffInfo.offset);
encodedData.limit(outBuffInfo.offset + outBuffInfo.size);
if ((outBuffInfo.flags & MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0 && outBuffInfo.size != 0) {
codec.releaseOutputBuffer(outputBufIndex, false);
} else {
mux.writeSampleData(audioTrackIdx, codecOutputBuffers[outputBufIndex], outBuffInfo);
codec.releaseOutputBuffer(outputBufIndex, false);
}
} else if (outputBufIndex == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
outputFormat = codec.getOutputFormat();
Log.v(LOGTAG, "Output format changed - " + outputFormat);
audioTrackIdx = mux.addTrack(outputFormat);
mux.start();
} else if (outputBufIndex == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) {
Log.e(LOGTAG, "Output buffers changed during encode!");
} else if (outputBufIndex == MediaCodec.INFO_TRY_AGAIN_LATER) {
// NO OP
} else {
Log.e(LOGTAG, "Unknown return code from dequeueOutputBuffer - " + outputBufIndex);
}
}
//percentComplete = (int) Math.round(((float) totalBytesRead / (float) inputFile.length()) * 100.0);
Log.v(LOGTAG, "Conversion % - " );
} while (outBuffInfo.flags != MediaCodec.BUFFER_FLAG_END_OF_STREAM && !mStop);
mux.stop();
mux.release();
Log.v(LOGTAG, "Compression done ...");
} catch (FileNotFoundException e) {
Log.e(LOGTAG, "File not found!", e);
return false;
} catch (IOException e) {
Log.e(LOGTAG, "IO exception!", e);
return false;
} catch (Exception e){
e.printStackTrace();
}
return true;
}
更新 录制音频代码chuck
recorder = new AudioRecord(
MediaRecorder.AudioSource.MIC, recordingSampleRate,
recordingChannels,
AudioFormat.ENCODING_PCM_16BIT, minBufferSize * 2);
if User selects mono,
recordingChannels = AudioFormat.CHANNEL_IN_MONO
and for stereorecording channels = AudioFormat.CHANNEL_IN_STEREO
Thanks in advance
我觉得
presentationTimeUs = 1000000l * (totalBytesRead / 2) / sampleRate;
其实应该是
presentationTimeUs = 1000000l * (totalBytesRead / (2 * channel)) / sampleRate;
因为每个样本有 2 个字节每个通道,即每个立体声样本 4 个字节。