如何在 iOS 中编码 self-delimited 作品
How to encode self-delimited opus in iOS
我可以使用 AVAudioRecorder 录制作品,如下所示:
let opusRecordingSettings = [AVFormatIDKey: kAudioFormatOpus,
AVSampleRateKey: 16000.0,
AVNumberOfChannelsKey: 1] as [String: Any]
do {
try audioRecordingSession.setCategory(.playAndRecord, mode: .default)
try audioRecordingSession.setActive(true)
audioRecorder = try AVAudioRecorder(url: fileUrl(), settings: opusRecordingSettings)
audioRecorder.delegate = self
audioRecorder.prepareToRecord()
audioRecorder.record()
}
catch _ { }
// ... ... ...
现在我需要对作品进行如下编码:
| header | encoded opus data | header | encoded opus data | ... | ... |
每个header表示opus数据的大小(以字节为单位)
我猜的(如果我错了请纠正我):
AVAudioRecorder 将 opus 数据包连续并排放置(因为由
默认的 opus 数据包是 non-delimited) 在 .opus
文件中
录制完成
默认帧持续时间为 20 毫秒,因此 frameSize = 20 * 16 = 320
(因为我使用的是 16 kHz 采样率)
每个数据包只包含一帧,因此 packetSize == frameSize
我知道如果我能以某种方式遍历 opus 数据包,那么我就可以计算每个数据包的大小并将其附加为 header (dataChunk = header + encodedOpusData
)
我从以下方面获得了自定界作品的概念:ietf.org/Self-Delimiting-Framing 但不知道以下内容:
- 如何创建带分隔符的 opus 音频文件(由 header 分隔,其中 header 表示 opus 数据的大小(以字节为单位))
- 如何以及何时追加header(录制时或录制完成后?)
我能够使用 AVAudioRecorder
和 AVAssetReader
对 self-delimited 作品进行编码(完整解决方案:hovermind.com/ios/libopus-kit)
FYI, this is probably not elegant solution but other methods i.e.
tapping on InputNode of AudioEngine or using AudioQueue did not work
for me. If anyone finds elegant solution, can suggest an edit of this
answer
这是总结(代码如下):
使用AVAudioRecorder
录制线性PCM并保存录制的音频文件
即 temp.wav
使用AVAssetReader
读取录制的音频文件并提取PCM(不要使用Data(contentsOf: ...)
加载音频文件,音频文件中有metadata/header会引起一些噪音)
将 PCM 数据拆分为 x
字节块并对其进行循环(在我的例子中 x = 640
)
使用 OpusKit pod
将 x
字节 PCM 块编码为作品
计算编码作品的大小并附加为header
代码(Swift5,iOS13,Xcode11.3)
import UIKit
import MapKit
import MessageKit
import AVFoundation
import OpusKit
import os
class BasicChatViewController: ChatViewController {
override func viewDidLoad() {
super.viewDidLoad()
Logger.logIt(#function)
Logger.logIt("Initilizing opus lib kit")
OpusKit.shared.initialize(sampleRate: Opus.SAMPLE_RATE_DEFAULT,
numberOfChannels: Opus.CHANNEL_COUNT_DEFAULT,
packetSize: Opus.OPUS_ENCODER_BUFFER_SIZE,
encodeBlockSize: Opus.FRAME_SIZE_DEFAULT)
// configure record button here
}
//
// MARK - recording
//
var isRecording = false
var avAudioPlayer: AVAudioPlayer!
var audioRecorder: AVAudioRecorder!
@objc
func onTapRecordButton(sender: UIButton){
Logger.logIt(#function)
toggleRecording()
}
private func toggleRecording(){
Logger.logIt(#function)
Logger.logIt("isRecording: \(isRecording)")
if isRecording {
isRecording = false
stopRecording()
} else {
isRecording = true
checkPermissionAndStartRecording()
}
}
//
// END - recording
//
}
//
// Audio recording related extensions
//
extension BasicChatViewController: AVAudioRecorderDelegate {
private func checkPermissionAndStartRecording() {
Logger.logIt(#function)
AudioUtil.checkRecordingPermission() { isPermissionGranted in
Logger.logIt("isPermissionGranted: \(isPermissionGranted)")
if isPermissionGranted {
self.recordUsingAVAudioRecorder()
} else {
Logger.logIt("don't have permission to record")
}
}
}
private func setupRecorder() {
Logger.logIt(#function)
let tempAudioFileUrl = AudioUtil.TEMP_WAV_FILE
Logger.logIt("tempAudioFileUrl: \(tempAudioFileUrl)")
let linearPcmRecordingSettings = LinearPCMRecording.LINEAR_PCM_RECODING_SETTINGS_DEFAULT
Logger.logIt("RecordingSettings: \(linearPcmRecordingSettings)")
do {
startRecordingSession()
audioRecorder = try AVAudioRecorder(url: tempAudioFileUrl, settings: linearPcmRecordingSettings)
audioRecorder.delegate = self
//audioRecorder.isMeteringEnabled = true
audioRecorder.prepareToRecord()
}
catch {
Logger.logIt("\(error.localizedDescription)")
}
}
private func startRecording() {
Logger.logIt(#function)
if audioRecorder == nil {
setupRecorder()
}
audioRecorder.record()
}
private func stopRecording() {
Logger.logIt(#function)
guard audioRecorder != nil else {
return
}
audioRecorder.stop()
}
private func deleteTempAudioFile(){
Logger.logIt(#function)
guard audioRecorder != nil else {
return
}
if audioRecorder.isRecording {
return
}
// delete temporary audio file
let recordingDeleted = audioRecorder.deleteRecording()
if recordingDeleted {
Logger.logIt("temp (recorded) audio file deleted")
} else {
Logger.logIt("failed to delete temp (recorded) audio file")
}
}
private func startRecordingSession(){
Logger.logIt(#function)
do {
try AVAudioSession.sharedInstance().setCategory(.record, mode: .spokenAudio)
try AVAudioSession.sharedInstance().setActive(true)
} catch {
Logger.logIt("Failed to deactivate recording session")
}
}
private func stopRecordingSession(){
Logger.logIt(#function)
do {
try AVAudioSession.sharedInstance().setActive(false)
} catch {
Logger.logIt("Failed to deactivate recording session")
}
}
private func recordUsingAVAudioRecorder(){
Logger.logIt(#function)
setupRecorder()
startRecording()
}
private func encodeRecordedAudio(){
Logger.logIt(#function)
let pcmData = AudioUtil.extractPcmOnly(from: AudioUtil.TEMP_WAV_FILE)
if pcmData.count > 1 {
Logger.logIt("encoding pcm to self-delimited opus")
let encodedOpusData = AudioUtil.encodeToSelfDelimitedOpus(pcmData: pcmData, splitSize: PCM.SPLIT_CHUNK_SIZE_DEFAULT)
Logger.logIt("encoded opus: \(encodedOpusData)")
Logger.logIt("save encoded opus")
AudioUtil.saveAudio(to: AudioUtil.ENCODED_OPUS_FILE, audioData: encodedOpusData)
} else {
Logger.logIt("no data to encode")
}
deleteTempAudioFile()
stopRecordingSession()
}
func audioRecorderDidFinishRecording(_ recorder: AVAudioRecorder, successfully flag: Bool) {
Logger.logIt(#function)
let finishedSuccessFully = flag
if finishedSuccessFully {
Logger.logIt("finished recording successfully")
encodeRecordedAudio()
} else {
Logger.logIt("recording failed - audio encoding error")
}
}
}
AudioUtil
import Foundation
import AVFoundation
import OpusKit
//
// Opus audio info.
//
public class OpusAudioInfo {
public static let `default` = OpusAudioInfo()
var channels: opus_int32
var headerSize: Int // bytes
var packetSize: opus_int32
var sampleRate: opus_int32 {
didSet {
packetSize = Int32(Opus.FRAME_DURATION_DEFAULT) * (sampleRate / 1000)
}
}
public init(sampleRate: opus_int32 = Opus.SAMPLE_RATE_16_KHZ,
channels: opus_int32 = Opus.CHANNEL_COUNT_DEFAULT,
headerSize: Int = 1) {
self.sampleRate = sampleRate
self.packetSize = Int32(Opus.FRAME_DURATION_DEFAULT) * (sampleRate / 1000)
self.channels = channels
self.headerSize = headerSize
}
}
//
// RAW PCM info.
//
public class PCMInfo {
public static let `default` = PCMInfo()
var sampleRate:Int32
var channels:Int16
var bitDepth:Int16
public init(sampleRate:Int32 = PCM.SAMPLE_RATE_16_KHZ,
channels:Int16 = Int16(PCM.MONO),
bitDepth:Int16 = Int16(PCM.BIT_DEPTH_DEFAULT)) {
self.sampleRate = sampleRate
self.channels = channels
self.bitDepth = bitDepth
}
}
//
// Utility class for audio related operations
//
public class AudioUtil {
private init(){}
//
// Default audio files url in document directory
//
public static let RAW_PCM_FILE = FileUtil.createFileUrl(for: "pcm.raw", in: FileUtil.DOCUMENTS_DIR)
public static let TEMP_WAV_FILE = FileUtil.createFileUrl(for: "wav.wav", in: FileUtil.DOCUMENTS_DIR)
public static let ENCODED_OPUS_FILE = FileUtil.createFileUrl(for: "encoded_opus_ios.opus", in: FileUtil.DOCUMENTS_DIR)
public static let DECODED_WAV_WITH_HEADER_FILE = FileUtil.createFileUrl(for: "decoded_wav_with_header.wav", in: FileUtil.DOCUMENTS_DIR)
/**
Creates fake wav header to play Linear PCM
AVAudioPlayer by default can not play Linear PCM, therefore we need to create a fake wav header
- parameter sampleRate: samples per second
- parameter channelCount: number of channels
- parameter bitDepth: bits per sample
- parameter pcmDataSizeInBytes: PCM data size in bytes
- returns : Data - wav header data
*/
public static func createWavHeader(sampleRate: Int32, channelCount: Int16, bitDepth: Int16, pcmDataSizeInBytes dataSize: Int32) -> Data {
/*
WAV header details: http://www.topherlee.com/software/pcm-tut-wavformat.html
Positions Sample Value Description
1 - 4 "RIFF" Marks the file as a riff file. Characters are each 1 byte long.
5 - 8 File size (integer) Size of the overall file - 8 bytes, in bytes (32-bit integer). Typically, you'd fill this in after creation.
9 -12 "WAVE" File Type Header. For our purposes, it always equals "WAVE".
13-16 "fmt " Format chunk marker. Includes trailing null
17-20 16 Length of format data as listed above
21-22 1 Type of format (1 is PCM) - 2 byte integer
23-24 2 Number of Channels - 2 byte integer
25-28 44100 Sample Rate - 32 byte integer. Common values are 44100 (CD), 48000 (DAT). Sample Rate = Number of Samples per second, or Hertz.
29-32 176400 (Sample Rate * BitsPerSample * Channels) / 8.
33-34 4 (BitsPerSample * Channels) / 8.1 - 8 bit mono2 - 8 bit stereo/16 bit mono4 - 16 bit stereo
35-36 16 Bits per sample
37-40 "data" "data" chunk header. Marks the beginning of the data section.
41-44 File size (data) Size of the data section.
Sample values are given above for a 16-bit stereo source.
An example in swift :
let WAV_HEADER: [Any] = [
"R","I","F","F",
0xFF,0xFF,0xFF,0x7F, // file size
"W","A","V","E",
"f","m","t"," ", // Chunk ID
0x10,0x00,0x00,0x00, // Chunk Size - length of format above
0x01,0x00, // Format Code: 1 is PCM, 3 is IEEE float
0x01,0x00, // Number of Channels (e.g. 2)
0x80,0xBB,0x00,0x00, // Samples per Second, Sample Rate (e.g. 48000)
0x00,0xDC,0x05,0x00, // Bytes per second, byte rate = sample rate * bits per sample * channels / 8
0x08,0x00, // Bytes per Sample Frame, block align = bits per sample * channels / 8
0x20,0x00, // bits per sample (16 for PCM, 32 for float)
"d","a","t","a",
0xFF,0xFF,0xFF,0x7F // size of data section
]
*/
let WAV_HEADER_SIZE:Int32 = 44
let FORMAT_CODE_PCM:Int16 = 1
let fileSize:Int32 = dataSize + WAV_HEADER_SIZE
let sampleRate:Int32 = sampleRate
let subChunkSize:Int32 = 16
let format:Int16 = FORMAT_CODE_PCM
let channels:Int16 = channelCount
let bitsPerSample:Int16 = bitDepth
let byteRate:Int32 = sampleRate * Int32(channels * bitsPerSample / 8)
let blockAlign: Int16 = (bitsPerSample * channels) / 8
let header = NSMutableData()
header.append([UInt8]("RIFF".utf8), length: 4)
header.append(byteArray(from: fileSize), length: 4)
//WAVE
header.append([UInt8]("WAVE".utf8), length: 4)
//FMT
header.append([UInt8]("fmt ".utf8), length: 4)
header.append(byteArray(from: subChunkSize), length: 4)
header.append(byteArray(from: format), length: 2)
header.append(byteArray(from: channels), length: 2)
header.append(byteArray(from: sampleRate), length: 4)
header.append(byteArray(from: byteRate), length: 4)
header.append(byteArray(from: blockAlign), length: 2)
header.append(byteArray(from: bitsPerSample), length: 2)
header.append([UInt8]("data".utf8), length: 4)
header.append(byteArray(from: dataSize), length: 4)
return header as Data
}
/**
Creates default wav header based on default PCM constants
- parameter dataSize: size of PCM data in bytes
- returns : Data - wav header data
*/
public static func createDefaultWavHeader(dataSize: Int32) -> Data {
return createWavHeader(sampleRate: PCM.SAMPLE_RATE_DEFAULT,
channelCount: Int16(PCM.CHANNEL_COUNT_DEFAULT),
bitDepth: Int16(PCM.BIT_DEPTH_DEFAULT),
pcmDataSizeInBytes: dataSize)
}
/**
Converts given value to byte array
- parameter value:FixedWidthInteger type
- returns: array of bytes
*/
public static func byteArray<T>(from value: T) -> [UInt8] where T: FixedWidthInteger {
// https://whosebug.com/a/56964191/4802664
// .littleEndian is required
return withUnsafeBytes(of: value.littleEndian) { Array([=11=]) }
}
/**
Generates wav audio data buffer from given header and raw PCM
- parameter wavHeader: a fake RIFF WAV header (appended to PCM)
- parameter pcmData: Linear PCM data
- returns: Data
*/
public static func generateWav(header wavHeader: Data, pcmData: Data) -> Data {
var wavData = Data()
wavData.append(wavHeader)
wavData.append(pcmData)
return wavData
}
/**
Checks permission for recording and invokes callback with flag
- parameter callback: clouser to invoked after checking permission
*/
public static func checkRecordingPermission(onPermissionChecked callback: @escaping(_ isPermissionGranted: Bool) -> Void) {
Logger.logIt(#function)
var isPermissionGranted = false
switch AVAudioSession.sharedInstance().recordPermission {
case .granted:
isPermissionGranted = true
break
case .denied:
isPermissionGranted = false
break
case .undetermined:
AVAudioSession.sharedInstance().requestRecordPermission({ (allowed) in
if allowed {
isPermissionGranted = true
} else {
isPermissionGranted = false
}
})
break
default:
isPermissionGranted = false
break
}
callback(isPermissionGranted)
}
/**
Saves given audio data to specified url
- parameter fileUri: file url where audio data will be saved
*/
public static func saveAudio(to fileUri: URL, audioData: Data) {
Logger.logIt(#function)
Logger.logIt("save to: \(fileUri)")
do {
try audioData.write(to: fileUri)
} catch {
Logger.logIt(error.localizedDescription)
}
}
/**
Encodes given PCM data into self delimited opus (`|header|data|header|data|...|`) using libopus
- parameter pcmData: Linear PCM data buffer (loaded from file or coming from AudioEngine tapping)
- parameter splitSize: size of chunk to split the given pcmData
- returns : encoded data (encoded as: `|header|data|header|data|...|`)
*/
public static func encodeToSelfDelimitedOpus(pcmData: Data, splitSize: Int) -> Data {
Logger.logIt(#function)
var encodedData = Data()
var readIndex = 0
var readStart = 0
var readEnd = 0
var pcmChunk: Data
var readCount = 1
let splitCount = (pcmData.count / splitSize)
Logger.logIt("split count: \(splitCount)")
var header: Data
while readCount <= splitCount {
readStart = readIndex
readEnd = readStart + splitSize
//
// to prevent index out of bound exception
// check readEnd index
//
if(readEnd >= pcmData.count){
readEnd = readStart + (pcmData.count - readIndex)
}
pcmChunk = pcmData[readStart..<readEnd]
//print("chunk: \(pcmChunk)")
if let encodedChunk = OpusKit.shared.encodeData(pcmChunk) {
//
// header is exactly one byte
// header indicates size of the encoded opus data
//
header = Data(from: encodedChunk.count)[0..<1]
//Logger.logIt("header: \([UInt8](header))")
encodedData.append(header)
encodedData.append(encodedChunk)
} else {
print("failed to encode at index: \(readStart)")
}
readIndex += splitSize
readCount += 1
}
//
// remaining data
//
//Logger.logIt("append remaining data")
pcmChunk = pcmData[readIndex..<pcmData.count]
if let encodedChunk = OpusKit.shared.encodeData(pcmChunk) {
header = Data(from: encodedChunk.count)[0..<1]
//Logger.logIt("header: \([UInt8](header))")
encodedData.append(header)
encodedData.append(encodedChunk)
} else {
print("failed to encode at index: \(readIndex)")
}
return encodedData
}
/**
Decodes given self delimited opus data to PCM
Custom opus is encoded as `|header|data|header|data|...|`
Loops over the data, reads data size from header and takes slice/chunk of given opus data based on data size from header. Then each chunk is decode using libopus
- parameter opusData: Encoded opus data buffer
- parameter headerSizeInBytes: size of header in bytes (default is 1)
- returns : decoded pcm data
*/
public static func decodeSelfDelimitedOpusToPcm(opusData: Data, headerSizeInBytes headerSize: Int = 1) -> Data {
var decodedData: Data = Data()
var headerData: Data
var opusChunkSizeFromHeader = 0
var readIndex = 0
var readStart = 0
var readEnd = 0
var extractedOpusChunk: Data
while readIndex < opusData.count {
headerData = opusData[readIndex..<(readIndex + headerSize)]
//Logger.logIt("headerData: \([UInt8](headerData))")
opusChunkSizeFromHeader = Int([UInt8](headerData)[0])
readStart = readIndex + headerSize
readEnd = readStart + opusChunkSizeFromHeader
extractedOpusChunk = opusData[readStart..<readEnd]
//Logger.logIt("extracted: \(extractedOpusChunk)")
if let decodedDataChunk = OpusKit.shared.decodeData(extractedOpusChunk) {
//Logger.logIt("decodedDataChunk: \(decodedDataChunk)")
decodedData.append(decodedDataChunk)
} else {
print("failed to decode at index: \(readStart)")
}
readIndex += (headerSize + opusChunkSizeFromHeader)
}
return decodedData
}
/**
Extracts PCM only from a audio file using AVAssetReader
Normally system will append some meta data while saving audio file with extension, and therefore we need to use AVAssetReader to get PCM only
- parameter fileUrl : audio file url
- returns: PCM Data
*/
public static func extractPcmOnly(from fileUrl: URL) -> Data {
let pcmOnly = NSMutableData()
do {
let asset = AVAsset(url: fileUrl)
let assetReader = try AVAssetReader(asset: asset)
let track = asset.tracks(withMediaType: AVMediaType.audio).first
let outputSettings = LinearPCMRecording.LINEAR_PCM_RECODING_SETTINGS_DEFAULT
let trackOutput = AVAssetReaderTrackOutput(track: track!, outputSettings: outputSettings)
assetReader.add(trackOutput)
assetReader.startReading()
Logger.logIt("reading data with AVAssetReader")
while assetReader.status == AVAssetReader.Status.reading {
if let sampleBufferRef = trackOutput.copyNextSampleBuffer() {
if let blockBufferRef = CMSampleBufferGetDataBuffer(sampleBufferRef) {
let bufferLength = CMBlockBufferGetDataLength(blockBufferRef)
let data = NSMutableData(length: bufferLength)
// func CMBlockBufferCopyDataBytes(_ theSourceBuffer: CMBlockBuffer, atOffset offsetToData: Int, dataLength: Int, destination: UnsafeMutableRawPointer) -> OSStatus
CMBlockBufferCopyDataBytes(blockBufferRef, atOffset: 0, dataLength: bufferLength, destination: data!.mutableBytes)
let samples = data!.mutableBytes.assumingMemoryBound(to: UInt16.self)
pcmOnly.append(samples, length: bufferLength)
CMSampleBufferInvalidate(sampleBufferRef)
}
} else {
Logger.logIt("failed to copy next")
}
}
} catch {
Logger.logIt(error.localizedDescription)
}
return pcmOnly as Data
}
}
额外的类
import Foundation
import AVFoundation
import OpusKit
public class Audio {
public static let SAMPLE_RATE_16_KHZ: opus_int32 = 16_000
public static let SAMPLE_RATE_8_KHZ: opus_int32 = 8_000
public static let SAMPLE_RATE_DEFAULT = SAMPLE_RATE_16_KHZ
public static let MONO:Int32 = 1
public static let CHANNEL_COUNT_DEFAULT:Int32 = MONO
public static let BIT_DEPTH_DEFAULT:Int32 = 16
public static let FRAME_DURATION_DEFAULT = 20 // milliseconds
// FRAME_SIZE = FRAME (duration in millisecond) * SAMPLE_RATE
public static let FRAME_SIZE_DEFAULT:Int32 = (SAMPLE_RATE_DEFAULT / 1000) * Int32(FRAME_DURATION_DEFAULT)
}
public class PCM: Audio {
public static let SPLIT_CHUNK_SIZE_DEFAULT:Int = Int(FRAME_SIZE_DEFAULT * (BIT_DEPTH_DEFAULT / 8))
}
public class WAV: Audio {
public static let HEADER_SIZE:Int32 = 44 // always 44 bytes
public static let WAV_HEADER_FORMAT_PCM:Int16 = 1
public static let WAV_HEADER_SUB_CHUNK_SIZE:Int32 = 16 // always 16
}
public class Opus: Audio {
public static let ENCODED_OUTPUT_MEMORY_SIZE_LIMIT:Int32 = 255 // Size of the allocated memory for the output payload
public static let OPUS_ENCODER_BUFFER_SIZE:Int32 = 1275 // ref: https://whosebug.com/a/55707654/4802664
}
public class PCMRecordingSetting {
private static let SAMPLE_RATE_16_KHZ = 16_000
private static let BIT_DEPTH_16 = 16
private static let CHANNEL_MONO = 1
public var sampleRate:Int = SAMPLE_RATE_16_KHZ {
willSet {
updateBitRate()
updateLinearPCMRecordingSettings()
}
}
public var channelCount:Int = CHANNEL_MONO {
willSet {
updateBitRate()
updateLinearPCMRecordingSettings()
}
}
public var bitDepth:Int = BIT_DEPTH_16 {
willSet {
updateBitRate()
updateLinearPCMRecordingSettings()
}
}
public private(set) var bitRate = SAMPLE_RATE_16_KHZ * BIT_DEPTH_16 * CHANNEL_MONO
private func updateBitRate(){
bitRate = sampleRate * bitDepth * channelCount
}
public static let LINEAR_PCM_DEFAULT = [
AVFormatIDKey: kAudioFormatLinearPCM,
AVSampleRateKey: SAMPLE_RATE_16_KHZ,
AVNumberOfChannelsKey: CHANNEL_MONO,
AVLinearPCMBitDepthKey: BIT_DEPTH_16,
AVLinearPCMIsFloatKey: false
] as [String : Any]
public var recordingSettings = LINEAR_PCM_DEFAULT
private func updateLinearPCMRecordingSettings(){
Logger.debug(#function)
recordingSettings = [
AVFormatIDKey: kAudioFormatLinearPCM,
AVSampleRateKey: sampleRate,
AVNumberOfChannelsKey: channelCount,
AVLinearPCMBitDepthKey: bitDepth,
AVLinearPCMIsFloatKey: false
] as [String : Any]
}
public init(sampleRate: Int, channelCount: Int, bitDepth: Int){
self.sampleRate = sampleRate
self.channelCount = channelCount
self.bitDepth = bitDepth
updateBitRate()
updateLinearPCMRecordingSettings()
}
public static let `default` = PCMRecordingSetting(sampleRate: SAMPLE_RATE_16_KHZ, channelCount: CHANNEL_MONO, bitDepth: BIT_DEPTH_16)
}
我可以使用 AVAudioRecorder 录制作品,如下所示:
let opusRecordingSettings = [AVFormatIDKey: kAudioFormatOpus,
AVSampleRateKey: 16000.0,
AVNumberOfChannelsKey: 1] as [String: Any]
do {
try audioRecordingSession.setCategory(.playAndRecord, mode: .default)
try audioRecordingSession.setActive(true)
audioRecorder = try AVAudioRecorder(url: fileUrl(), settings: opusRecordingSettings)
audioRecorder.delegate = self
audioRecorder.prepareToRecord()
audioRecorder.record()
}
catch _ { }
// ... ... ...
现在我需要对作品进行如下编码:
| header | encoded opus data | header | encoded opus data | ... | ... |
每个header表示opus数据的大小(以字节为单位)
我猜的(如果我错了请纠正我):
AVAudioRecorder 将 opus 数据包连续并排放置(因为由 默认的 opus 数据包是 non-delimited) 在
.opus
文件中 录制完成默认帧持续时间为 20 毫秒,因此
frameSize = 20 * 16 = 320
(因为我使用的是 16 kHz 采样率)每个数据包只包含一帧,因此
packetSize == frameSize
我知道如果我能以某种方式遍历 opus 数据包,那么我就可以计算每个数据包的大小并将其附加为 header (dataChunk = header + encodedOpusData
)
我从以下方面获得了自定界作品的概念:ietf.org/Self-Delimiting-Framing 但不知道以下内容:
- 如何创建带分隔符的 opus 音频文件(由 header 分隔,其中 header 表示 opus 数据的大小(以字节为单位))
- 如何以及何时追加header(录制时或录制完成后?)
我能够使用 AVAudioRecorder
和 AVAssetReader
对 self-delimited 作品进行编码(完整解决方案:hovermind.com/ios/libopus-kit)
FYI, this is probably not elegant solution but other methods i.e. tapping on InputNode of AudioEngine or using AudioQueue did not work for me. If anyone finds elegant solution, can suggest an edit of this answer
这是总结(代码如下):
使用
AVAudioRecorder
录制线性PCM并保存录制的音频文件 即temp.wav
使用
AVAssetReader
读取录制的音频文件并提取PCM(不要使用Data(contentsOf: ...)
加载音频文件,音频文件中有metadata/header会引起一些噪音)将 PCM 数据拆分为
x
字节块并对其进行循环(在我的例子中x = 640
)使用 OpusKit pod
将x
字节 PCM 块编码为作品计算编码作品的大小并附加为header
代码(Swift5,iOS13,Xcode11.3)
import UIKit
import MapKit
import MessageKit
import AVFoundation
import OpusKit
import os
class BasicChatViewController: ChatViewController {
override func viewDidLoad() {
super.viewDidLoad()
Logger.logIt(#function)
Logger.logIt("Initilizing opus lib kit")
OpusKit.shared.initialize(sampleRate: Opus.SAMPLE_RATE_DEFAULT,
numberOfChannels: Opus.CHANNEL_COUNT_DEFAULT,
packetSize: Opus.OPUS_ENCODER_BUFFER_SIZE,
encodeBlockSize: Opus.FRAME_SIZE_DEFAULT)
// configure record button here
}
//
// MARK - recording
//
var isRecording = false
var avAudioPlayer: AVAudioPlayer!
var audioRecorder: AVAudioRecorder!
@objc
func onTapRecordButton(sender: UIButton){
Logger.logIt(#function)
toggleRecording()
}
private func toggleRecording(){
Logger.logIt(#function)
Logger.logIt("isRecording: \(isRecording)")
if isRecording {
isRecording = false
stopRecording()
} else {
isRecording = true
checkPermissionAndStartRecording()
}
}
//
// END - recording
//
}
//
// Audio recording related extensions
//
extension BasicChatViewController: AVAudioRecorderDelegate {
private func checkPermissionAndStartRecording() {
Logger.logIt(#function)
AudioUtil.checkRecordingPermission() { isPermissionGranted in
Logger.logIt("isPermissionGranted: \(isPermissionGranted)")
if isPermissionGranted {
self.recordUsingAVAudioRecorder()
} else {
Logger.logIt("don't have permission to record")
}
}
}
private func setupRecorder() {
Logger.logIt(#function)
let tempAudioFileUrl = AudioUtil.TEMP_WAV_FILE
Logger.logIt("tempAudioFileUrl: \(tempAudioFileUrl)")
let linearPcmRecordingSettings = LinearPCMRecording.LINEAR_PCM_RECODING_SETTINGS_DEFAULT
Logger.logIt("RecordingSettings: \(linearPcmRecordingSettings)")
do {
startRecordingSession()
audioRecorder = try AVAudioRecorder(url: tempAudioFileUrl, settings: linearPcmRecordingSettings)
audioRecorder.delegate = self
//audioRecorder.isMeteringEnabled = true
audioRecorder.prepareToRecord()
}
catch {
Logger.logIt("\(error.localizedDescription)")
}
}
private func startRecording() {
Logger.logIt(#function)
if audioRecorder == nil {
setupRecorder()
}
audioRecorder.record()
}
private func stopRecording() {
Logger.logIt(#function)
guard audioRecorder != nil else {
return
}
audioRecorder.stop()
}
private func deleteTempAudioFile(){
Logger.logIt(#function)
guard audioRecorder != nil else {
return
}
if audioRecorder.isRecording {
return
}
// delete temporary audio file
let recordingDeleted = audioRecorder.deleteRecording()
if recordingDeleted {
Logger.logIt("temp (recorded) audio file deleted")
} else {
Logger.logIt("failed to delete temp (recorded) audio file")
}
}
private func startRecordingSession(){
Logger.logIt(#function)
do {
try AVAudioSession.sharedInstance().setCategory(.record, mode: .spokenAudio)
try AVAudioSession.sharedInstance().setActive(true)
} catch {
Logger.logIt("Failed to deactivate recording session")
}
}
private func stopRecordingSession(){
Logger.logIt(#function)
do {
try AVAudioSession.sharedInstance().setActive(false)
} catch {
Logger.logIt("Failed to deactivate recording session")
}
}
private func recordUsingAVAudioRecorder(){
Logger.logIt(#function)
setupRecorder()
startRecording()
}
private func encodeRecordedAudio(){
Logger.logIt(#function)
let pcmData = AudioUtil.extractPcmOnly(from: AudioUtil.TEMP_WAV_FILE)
if pcmData.count > 1 {
Logger.logIt("encoding pcm to self-delimited opus")
let encodedOpusData = AudioUtil.encodeToSelfDelimitedOpus(pcmData: pcmData, splitSize: PCM.SPLIT_CHUNK_SIZE_DEFAULT)
Logger.logIt("encoded opus: \(encodedOpusData)")
Logger.logIt("save encoded opus")
AudioUtil.saveAudio(to: AudioUtil.ENCODED_OPUS_FILE, audioData: encodedOpusData)
} else {
Logger.logIt("no data to encode")
}
deleteTempAudioFile()
stopRecordingSession()
}
func audioRecorderDidFinishRecording(_ recorder: AVAudioRecorder, successfully flag: Bool) {
Logger.logIt(#function)
let finishedSuccessFully = flag
if finishedSuccessFully {
Logger.logIt("finished recording successfully")
encodeRecordedAudio()
} else {
Logger.logIt("recording failed - audio encoding error")
}
}
}
AudioUtil
import Foundation
import AVFoundation
import OpusKit
//
// Opus audio info.
//
public class OpusAudioInfo {
public static let `default` = OpusAudioInfo()
var channels: opus_int32
var headerSize: Int // bytes
var packetSize: opus_int32
var sampleRate: opus_int32 {
didSet {
packetSize = Int32(Opus.FRAME_DURATION_DEFAULT) * (sampleRate / 1000)
}
}
public init(sampleRate: opus_int32 = Opus.SAMPLE_RATE_16_KHZ,
channels: opus_int32 = Opus.CHANNEL_COUNT_DEFAULT,
headerSize: Int = 1) {
self.sampleRate = sampleRate
self.packetSize = Int32(Opus.FRAME_DURATION_DEFAULT) * (sampleRate / 1000)
self.channels = channels
self.headerSize = headerSize
}
}
//
// RAW PCM info.
//
public class PCMInfo {
public static let `default` = PCMInfo()
var sampleRate:Int32
var channels:Int16
var bitDepth:Int16
public init(sampleRate:Int32 = PCM.SAMPLE_RATE_16_KHZ,
channels:Int16 = Int16(PCM.MONO),
bitDepth:Int16 = Int16(PCM.BIT_DEPTH_DEFAULT)) {
self.sampleRate = sampleRate
self.channels = channels
self.bitDepth = bitDepth
}
}
//
// Utility class for audio related operations
//
public class AudioUtil {
private init(){}
//
// Default audio files url in document directory
//
public static let RAW_PCM_FILE = FileUtil.createFileUrl(for: "pcm.raw", in: FileUtil.DOCUMENTS_DIR)
public static let TEMP_WAV_FILE = FileUtil.createFileUrl(for: "wav.wav", in: FileUtil.DOCUMENTS_DIR)
public static let ENCODED_OPUS_FILE = FileUtil.createFileUrl(for: "encoded_opus_ios.opus", in: FileUtil.DOCUMENTS_DIR)
public static let DECODED_WAV_WITH_HEADER_FILE = FileUtil.createFileUrl(for: "decoded_wav_with_header.wav", in: FileUtil.DOCUMENTS_DIR)
/**
Creates fake wav header to play Linear PCM
AVAudioPlayer by default can not play Linear PCM, therefore we need to create a fake wav header
- parameter sampleRate: samples per second
- parameter channelCount: number of channels
- parameter bitDepth: bits per sample
- parameter pcmDataSizeInBytes: PCM data size in bytes
- returns : Data - wav header data
*/
public static func createWavHeader(sampleRate: Int32, channelCount: Int16, bitDepth: Int16, pcmDataSizeInBytes dataSize: Int32) -> Data {
/*
WAV header details: http://www.topherlee.com/software/pcm-tut-wavformat.html
Positions Sample Value Description
1 - 4 "RIFF" Marks the file as a riff file. Characters are each 1 byte long.
5 - 8 File size (integer) Size of the overall file - 8 bytes, in bytes (32-bit integer). Typically, you'd fill this in after creation.
9 -12 "WAVE" File Type Header. For our purposes, it always equals "WAVE".
13-16 "fmt " Format chunk marker. Includes trailing null
17-20 16 Length of format data as listed above
21-22 1 Type of format (1 is PCM) - 2 byte integer
23-24 2 Number of Channels - 2 byte integer
25-28 44100 Sample Rate - 32 byte integer. Common values are 44100 (CD), 48000 (DAT). Sample Rate = Number of Samples per second, or Hertz.
29-32 176400 (Sample Rate * BitsPerSample * Channels) / 8.
33-34 4 (BitsPerSample * Channels) / 8.1 - 8 bit mono2 - 8 bit stereo/16 bit mono4 - 16 bit stereo
35-36 16 Bits per sample
37-40 "data" "data" chunk header. Marks the beginning of the data section.
41-44 File size (data) Size of the data section.
Sample values are given above for a 16-bit stereo source.
An example in swift :
let WAV_HEADER: [Any] = [
"R","I","F","F",
0xFF,0xFF,0xFF,0x7F, // file size
"W","A","V","E",
"f","m","t"," ", // Chunk ID
0x10,0x00,0x00,0x00, // Chunk Size - length of format above
0x01,0x00, // Format Code: 1 is PCM, 3 is IEEE float
0x01,0x00, // Number of Channels (e.g. 2)
0x80,0xBB,0x00,0x00, // Samples per Second, Sample Rate (e.g. 48000)
0x00,0xDC,0x05,0x00, // Bytes per second, byte rate = sample rate * bits per sample * channels / 8
0x08,0x00, // Bytes per Sample Frame, block align = bits per sample * channels / 8
0x20,0x00, // bits per sample (16 for PCM, 32 for float)
"d","a","t","a",
0xFF,0xFF,0xFF,0x7F // size of data section
]
*/
let WAV_HEADER_SIZE:Int32 = 44
let FORMAT_CODE_PCM:Int16 = 1
let fileSize:Int32 = dataSize + WAV_HEADER_SIZE
let sampleRate:Int32 = sampleRate
let subChunkSize:Int32 = 16
let format:Int16 = FORMAT_CODE_PCM
let channels:Int16 = channelCount
let bitsPerSample:Int16 = bitDepth
let byteRate:Int32 = sampleRate * Int32(channels * bitsPerSample / 8)
let blockAlign: Int16 = (bitsPerSample * channels) / 8
let header = NSMutableData()
header.append([UInt8]("RIFF".utf8), length: 4)
header.append(byteArray(from: fileSize), length: 4)
//WAVE
header.append([UInt8]("WAVE".utf8), length: 4)
//FMT
header.append([UInt8]("fmt ".utf8), length: 4)
header.append(byteArray(from: subChunkSize), length: 4)
header.append(byteArray(from: format), length: 2)
header.append(byteArray(from: channels), length: 2)
header.append(byteArray(from: sampleRate), length: 4)
header.append(byteArray(from: byteRate), length: 4)
header.append(byteArray(from: blockAlign), length: 2)
header.append(byteArray(from: bitsPerSample), length: 2)
header.append([UInt8]("data".utf8), length: 4)
header.append(byteArray(from: dataSize), length: 4)
return header as Data
}
/**
Creates default wav header based on default PCM constants
- parameter dataSize: size of PCM data in bytes
- returns : Data - wav header data
*/
public static func createDefaultWavHeader(dataSize: Int32) -> Data {
return createWavHeader(sampleRate: PCM.SAMPLE_RATE_DEFAULT,
channelCount: Int16(PCM.CHANNEL_COUNT_DEFAULT),
bitDepth: Int16(PCM.BIT_DEPTH_DEFAULT),
pcmDataSizeInBytes: dataSize)
}
/**
Converts given value to byte array
- parameter value:FixedWidthInteger type
- returns: array of bytes
*/
public static func byteArray<T>(from value: T) -> [UInt8] where T: FixedWidthInteger {
// https://whosebug.com/a/56964191/4802664
// .littleEndian is required
return withUnsafeBytes(of: value.littleEndian) { Array([=11=]) }
}
/**
Generates wav audio data buffer from given header and raw PCM
- parameter wavHeader: a fake RIFF WAV header (appended to PCM)
- parameter pcmData: Linear PCM data
- returns: Data
*/
public static func generateWav(header wavHeader: Data, pcmData: Data) -> Data {
var wavData = Data()
wavData.append(wavHeader)
wavData.append(pcmData)
return wavData
}
/**
Checks permission for recording and invokes callback with flag
- parameter callback: clouser to invoked after checking permission
*/
public static func checkRecordingPermission(onPermissionChecked callback: @escaping(_ isPermissionGranted: Bool) -> Void) {
Logger.logIt(#function)
var isPermissionGranted = false
switch AVAudioSession.sharedInstance().recordPermission {
case .granted:
isPermissionGranted = true
break
case .denied:
isPermissionGranted = false
break
case .undetermined:
AVAudioSession.sharedInstance().requestRecordPermission({ (allowed) in
if allowed {
isPermissionGranted = true
} else {
isPermissionGranted = false
}
})
break
default:
isPermissionGranted = false
break
}
callback(isPermissionGranted)
}
/**
Saves given audio data to specified url
- parameter fileUri: file url where audio data will be saved
*/
public static func saveAudio(to fileUri: URL, audioData: Data) {
Logger.logIt(#function)
Logger.logIt("save to: \(fileUri)")
do {
try audioData.write(to: fileUri)
} catch {
Logger.logIt(error.localizedDescription)
}
}
/**
Encodes given PCM data into self delimited opus (`|header|data|header|data|...|`) using libopus
- parameter pcmData: Linear PCM data buffer (loaded from file or coming from AudioEngine tapping)
- parameter splitSize: size of chunk to split the given pcmData
- returns : encoded data (encoded as: `|header|data|header|data|...|`)
*/
public static func encodeToSelfDelimitedOpus(pcmData: Data, splitSize: Int) -> Data {
Logger.logIt(#function)
var encodedData = Data()
var readIndex = 0
var readStart = 0
var readEnd = 0
var pcmChunk: Data
var readCount = 1
let splitCount = (pcmData.count / splitSize)
Logger.logIt("split count: \(splitCount)")
var header: Data
while readCount <= splitCount {
readStart = readIndex
readEnd = readStart + splitSize
//
// to prevent index out of bound exception
// check readEnd index
//
if(readEnd >= pcmData.count){
readEnd = readStart + (pcmData.count - readIndex)
}
pcmChunk = pcmData[readStart..<readEnd]
//print("chunk: \(pcmChunk)")
if let encodedChunk = OpusKit.shared.encodeData(pcmChunk) {
//
// header is exactly one byte
// header indicates size of the encoded opus data
//
header = Data(from: encodedChunk.count)[0..<1]
//Logger.logIt("header: \([UInt8](header))")
encodedData.append(header)
encodedData.append(encodedChunk)
} else {
print("failed to encode at index: \(readStart)")
}
readIndex += splitSize
readCount += 1
}
//
// remaining data
//
//Logger.logIt("append remaining data")
pcmChunk = pcmData[readIndex..<pcmData.count]
if let encodedChunk = OpusKit.shared.encodeData(pcmChunk) {
header = Data(from: encodedChunk.count)[0..<1]
//Logger.logIt("header: \([UInt8](header))")
encodedData.append(header)
encodedData.append(encodedChunk)
} else {
print("failed to encode at index: \(readIndex)")
}
return encodedData
}
/**
Decodes given self delimited opus data to PCM
Custom opus is encoded as `|header|data|header|data|...|`
Loops over the data, reads data size from header and takes slice/chunk of given opus data based on data size from header. Then each chunk is decode using libopus
- parameter opusData: Encoded opus data buffer
- parameter headerSizeInBytes: size of header in bytes (default is 1)
- returns : decoded pcm data
*/
public static func decodeSelfDelimitedOpusToPcm(opusData: Data, headerSizeInBytes headerSize: Int = 1) -> Data {
var decodedData: Data = Data()
var headerData: Data
var opusChunkSizeFromHeader = 0
var readIndex = 0
var readStart = 0
var readEnd = 0
var extractedOpusChunk: Data
while readIndex < opusData.count {
headerData = opusData[readIndex..<(readIndex + headerSize)]
//Logger.logIt("headerData: \([UInt8](headerData))")
opusChunkSizeFromHeader = Int([UInt8](headerData)[0])
readStart = readIndex + headerSize
readEnd = readStart + opusChunkSizeFromHeader
extractedOpusChunk = opusData[readStart..<readEnd]
//Logger.logIt("extracted: \(extractedOpusChunk)")
if let decodedDataChunk = OpusKit.shared.decodeData(extractedOpusChunk) {
//Logger.logIt("decodedDataChunk: \(decodedDataChunk)")
decodedData.append(decodedDataChunk)
} else {
print("failed to decode at index: \(readStart)")
}
readIndex += (headerSize + opusChunkSizeFromHeader)
}
return decodedData
}
/**
Extracts PCM only from a audio file using AVAssetReader
Normally system will append some meta data while saving audio file with extension, and therefore we need to use AVAssetReader to get PCM only
- parameter fileUrl : audio file url
- returns: PCM Data
*/
public static func extractPcmOnly(from fileUrl: URL) -> Data {
let pcmOnly = NSMutableData()
do {
let asset = AVAsset(url: fileUrl)
let assetReader = try AVAssetReader(asset: asset)
let track = asset.tracks(withMediaType: AVMediaType.audio).first
let outputSettings = LinearPCMRecording.LINEAR_PCM_RECODING_SETTINGS_DEFAULT
let trackOutput = AVAssetReaderTrackOutput(track: track!, outputSettings: outputSettings)
assetReader.add(trackOutput)
assetReader.startReading()
Logger.logIt("reading data with AVAssetReader")
while assetReader.status == AVAssetReader.Status.reading {
if let sampleBufferRef = trackOutput.copyNextSampleBuffer() {
if let blockBufferRef = CMSampleBufferGetDataBuffer(sampleBufferRef) {
let bufferLength = CMBlockBufferGetDataLength(blockBufferRef)
let data = NSMutableData(length: bufferLength)
// func CMBlockBufferCopyDataBytes(_ theSourceBuffer: CMBlockBuffer, atOffset offsetToData: Int, dataLength: Int, destination: UnsafeMutableRawPointer) -> OSStatus
CMBlockBufferCopyDataBytes(blockBufferRef, atOffset: 0, dataLength: bufferLength, destination: data!.mutableBytes)
let samples = data!.mutableBytes.assumingMemoryBound(to: UInt16.self)
pcmOnly.append(samples, length: bufferLength)
CMSampleBufferInvalidate(sampleBufferRef)
}
} else {
Logger.logIt("failed to copy next")
}
}
} catch {
Logger.logIt(error.localizedDescription)
}
return pcmOnly as Data
}
}
额外的类
import Foundation
import AVFoundation
import OpusKit
public class Audio {
public static let SAMPLE_RATE_16_KHZ: opus_int32 = 16_000
public static let SAMPLE_RATE_8_KHZ: opus_int32 = 8_000
public static let SAMPLE_RATE_DEFAULT = SAMPLE_RATE_16_KHZ
public static let MONO:Int32 = 1
public static let CHANNEL_COUNT_DEFAULT:Int32 = MONO
public static let BIT_DEPTH_DEFAULT:Int32 = 16
public static let FRAME_DURATION_DEFAULT = 20 // milliseconds
// FRAME_SIZE = FRAME (duration in millisecond) * SAMPLE_RATE
public static let FRAME_SIZE_DEFAULT:Int32 = (SAMPLE_RATE_DEFAULT / 1000) * Int32(FRAME_DURATION_DEFAULT)
}
public class PCM: Audio {
public static let SPLIT_CHUNK_SIZE_DEFAULT:Int = Int(FRAME_SIZE_DEFAULT * (BIT_DEPTH_DEFAULT / 8))
}
public class WAV: Audio {
public static let HEADER_SIZE:Int32 = 44 // always 44 bytes
public static let WAV_HEADER_FORMAT_PCM:Int16 = 1
public static let WAV_HEADER_SUB_CHUNK_SIZE:Int32 = 16 // always 16
}
public class Opus: Audio {
public static let ENCODED_OUTPUT_MEMORY_SIZE_LIMIT:Int32 = 255 // Size of the allocated memory for the output payload
public static let OPUS_ENCODER_BUFFER_SIZE:Int32 = 1275 // ref: https://whosebug.com/a/55707654/4802664
}
public class PCMRecordingSetting {
private static let SAMPLE_RATE_16_KHZ = 16_000
private static let BIT_DEPTH_16 = 16
private static let CHANNEL_MONO = 1
public var sampleRate:Int = SAMPLE_RATE_16_KHZ {
willSet {
updateBitRate()
updateLinearPCMRecordingSettings()
}
}
public var channelCount:Int = CHANNEL_MONO {
willSet {
updateBitRate()
updateLinearPCMRecordingSettings()
}
}
public var bitDepth:Int = BIT_DEPTH_16 {
willSet {
updateBitRate()
updateLinearPCMRecordingSettings()
}
}
public private(set) var bitRate = SAMPLE_RATE_16_KHZ * BIT_DEPTH_16 * CHANNEL_MONO
private func updateBitRate(){
bitRate = sampleRate * bitDepth * channelCount
}
public static let LINEAR_PCM_DEFAULT = [
AVFormatIDKey: kAudioFormatLinearPCM,
AVSampleRateKey: SAMPLE_RATE_16_KHZ,
AVNumberOfChannelsKey: CHANNEL_MONO,
AVLinearPCMBitDepthKey: BIT_DEPTH_16,
AVLinearPCMIsFloatKey: false
] as [String : Any]
public var recordingSettings = LINEAR_PCM_DEFAULT
private func updateLinearPCMRecordingSettings(){
Logger.debug(#function)
recordingSettings = [
AVFormatIDKey: kAudioFormatLinearPCM,
AVSampleRateKey: sampleRate,
AVNumberOfChannelsKey: channelCount,
AVLinearPCMBitDepthKey: bitDepth,
AVLinearPCMIsFloatKey: false
] as [String : Any]
}
public init(sampleRate: Int, channelCount: Int, bitDepth: Int){
self.sampleRate = sampleRate
self.channelCount = channelCount
self.bitDepth = bitDepth
updateBitRate()
updateLinearPCMRecordingSettings()
}
public static let `default` = PCMRecordingSetting(sampleRate: SAMPLE_RATE_16_KHZ, channelCount: CHANNEL_MONO, bitDepth: BIT_DEPTH_16)
}