将 PCM 立体声文件编码为 M4A 立体声时出现问题

Problem while encoding PCM Stereo file to M4A stereo

我在从 PCM 编码 M4A 立体声文件时遇到问题。我的代码非常适用于 MONO 编码(意味着通道数 = 1),但对于立体声(通道数 = 2)它不起作用。 问题是编码器加倍了音频的持续时间。

I have logged the sample rate and channel, both are perfect i.e., 48.100kHz sample rate and 2 channel count.

下面是我的编码方式代码:

public boolean mediaMux(){
        try {

            File outputDirectory = new File(outFilePath);
            if (!outputDirectory.exists()){
                outputDirectory.mkdir();
            }
            File outputFile = new File(outputDirectory.getPath() , outFileName + ".m4a");
            if (outputFile.exists()) outputFile.delete();

            MediaMuxer mux = null;
            mux = new MediaMuxer(outputFile.getAbsolutePath(), MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4);

            MediaFormat outputFormat = MediaFormat.createAudioFormat(COMPRESSED_AUDIO_FILE_MIME_TYPE,
                    sampleRate, channel);
            outputFormat.setInteger(MediaFormat.KEY_AAC_PROFILE, MediaCodecInfo.CodecProfileLevel.AACObjectLC);
            outputFormat.setInteger(MediaFormat.KEY_BIT_RATE, bitrate);


            MediaCodec codec = MediaCodec.createEncoderByType(COMPRESSED_AUDIO_FILE_MIME_TYPE);
            codec.configure(outputFormat, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE);
            codec.start();



            ByteBuffer[] codecInputBuffers = codec.getInputBuffers(); // Note: Array of buffers
            ByteBuffer[] codecOutputBuffers = codec.getOutputBuffers();

            MediaCodec.BufferInfo outBuffInfo = new MediaCodec.BufferInfo();

            byte[] tempBuffer = new byte[bufferSize];

            double presentationTimeUs = 0;
            int audioTrackIdx = 0;
            int totalBytesRead = 0;
            int percentComplete;

            do {

                int inputBufIndex = 0;
                while (inputBufIndex != -1 && data.size() > 0) {
                    try {
                        Log.w("Read Log","Reading Data");
                        inputBufIndex = codec.dequeueInputBuffer(CODEC_TIMEOUT_IN_MS);

                        if (inputBufIndex >= 0) {
                            ByteBuffer dstBuf = codecInputBuffers[inputBufIndex];
                            dstBuf.clear();

                            byte[] a = data.remove();

                            int bytesRead = a.length;
                            //Log.w("DestBuffer Limit",dstBuf.limit() +"");
                            if (!hasMoreData) { // -1 implies EOS

                                codec.queueInputBuffer(inputBufIndex, 0, 0, (long) presentationTimeUs, MediaCodec.BUFFER_FLAG_END_OF_STREAM);
                            } else {
                                totalBytesRead += bytesRead;
                                dstBuf.put(a, 0, bytesRead);
                                codec.queueInputBuffer(inputBufIndex, 0, bytesRead, (long) presentationTimeUs, 0);
                                presentationTimeUs = 1000000l * (totalBytesRead / 2) / sampleRate;
                            }
                        }
                    }catch (NoSuchElementException ex){
                        ex.printStackTrace();
                    }
                }

                // Drain audio
                int outputBufIndex = 0;
                while (outputBufIndex != MediaCodec.INFO_TRY_AGAIN_LATER) {
                    Log.w("Write Log","Writing Data");
                    outputBufIndex = codec.dequeueOutputBuffer(outBuffInfo, CODEC_TIMEOUT_IN_MS);
                    if (outputBufIndex >= 0) {
                        ByteBuffer encodedData = codecOutputBuffers[outputBufIndex];
                        encodedData.position(outBuffInfo.offset);
                        encodedData.limit(outBuffInfo.offset + outBuffInfo.size);

                        if ((outBuffInfo.flags & MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0 && outBuffInfo.size != 0) {
                            codec.releaseOutputBuffer(outputBufIndex, false);
                        } else {
                            mux.writeSampleData(audioTrackIdx, codecOutputBuffers[outputBufIndex], outBuffInfo);
                            codec.releaseOutputBuffer(outputBufIndex, false);
                        }
                    } else if (outputBufIndex == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
                        outputFormat = codec.getOutputFormat();
                        Log.v(LOGTAG, "Output format changed - " + outputFormat);
                        audioTrackIdx = mux.addTrack(outputFormat);
                        mux.start();
                    } else if (outputBufIndex == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) {
                        Log.e(LOGTAG, "Output buffers changed during encode!");
                    } else if (outputBufIndex == MediaCodec.INFO_TRY_AGAIN_LATER) {
                        // NO OP
                    } else {
                        Log.e(LOGTAG, "Unknown return code from dequeueOutputBuffer - " + outputBufIndex);
                    }
                }
                //percentComplete = (int) Math.round(((float) totalBytesRead / (float) inputFile.length()) * 100.0);
                Log.v(LOGTAG, "Conversion % - " );
            } while (outBuffInfo.flags != MediaCodec.BUFFER_FLAG_END_OF_STREAM && !mStop);


            mux.stop();
            mux.release();
            Log.v(LOGTAG, "Compression done ...");
        } catch (FileNotFoundException e) {
            Log.e(LOGTAG, "File not found!", e);
            return false;
        } catch (IOException e) {
            Log.e(LOGTAG, "IO exception!", e);
            return false;
        } catch (Exception e){
            e.printStackTrace();
        }

        return true;
    }

更新 录制音频代码chuck

recorder = new AudioRecord(
                MediaRecorder.AudioSource.MIC, recordingSampleRate,
                recordingChannels,
                AudioFormat.ENCODING_PCM_16BIT, minBufferSize * 2);

if User selects mono, recordingChannels = AudioFormat.CHANNEL_IN_MONO and for stereo recording channels = AudioFormat.CHANNEL_IN_STEREO Thanks in advance

我觉得

presentationTimeUs = 1000000l * (totalBytesRead / 2) / sampleRate;

其实应该是

presentationTimeUs = 1000000l * (totalBytesRead / (2 * channel)) / sampleRate;

因为每个样本有 2 个字节每个通道,即每个立体声样本 4 个字节。