Swift FFT - 复杂拆分问题
Swift FFT - Complex split issue
我正在尝试对 音频文件 执行 FFT 以使用 Accelerate 框架查找频率。我从这个问题中改编了代码(可能是错误的):
不过,'spectrum' 的幅度要么是'0',要么是'inf' 或 'nan',以及复合体的 'real' 和 'imag' 分量拆分打印相似的结果;表明这是问题的原因:'magnitude = sqrt(pow(real,2)+pow(imag,2)'。如果我错了请纠正我,但我认为其余代码没问题。
为什么我会收到这些结果,我该如何解决(拆分组件应该是什么),我做错了什么?请记住我是FFT 和采样非常新,不知道如何为音频文件设置它,因此非常感谢您的帮助。谢谢。
这是我使用的代码:
// get audio file
let fileURL:NSURL = NSBundle.mainBundle().URLForResource("foo", withExtension: "mp3")!
let audioFile = try! AVAudioFile(forReading: fileURL)
let fileFormat = audioFile.processingFormat
let frameCount = UInt32(audioFile.length)
let buffer = AVAudioPCMBuffer(PCMFormat: fileFormat, frameCapacity: frameCount)
let audioEngine = AVAudioEngine()
let playerNode = AVAudioPlayerNode()
audioMixerNode = audioEngine.mainMixerNode
let bufferSize = Int(frameCount)
let channels: NSArray = [Int(buffer.format.channelCount)]
let channelCount = channels.count
let floats1 = [Int(buffer.frameLength)]
for var i=0; i<channelCount; ++i {
channelSamples.append([])
let firstSample = buffer.format.interleaved ? i : i*bufferSize
for var j=firstSample; j<bufferSize; j+=buffer.stride*2 {
channelSamples[i].append(DSPComplex(real: buffer.floatChannelData.memory[j], imag: buffer.floatChannelData.memory[j+buffer.stride]))
}
}
// connect node
audioEngine.attachNode(playerNode)
audioEngine.connect(playerNode, to: audioMixerNode, format: playerNode.outputFormatForBus(0))
// Set up the transform
let log2n = UInt(round(log2(Double(bufferSize))))
let fftSetup = vDSP_create_fftsetup(log2n, Int32(kFFTRadix2))
// Create the complex split value to hold the output of the transform
// why doesn't this work?
var realp = [Float](count: bufferSize/2, repeatedValue: 0)
var imagp = [Float](count: bufferSize/2, repeatedValue: 0)
var output = DSPSplitComplex(realp: &realp, imagp: &imagp)
vDSP_ctoz(UnsafePointer(channelSamples), 2, &output, 1, UInt(bufferSize / 2))
// Do the fast Fourier forward transform
vDSP_fft_zrip(fftSetup, &output, 1, log2n, Int32(FFT_FORWARD))
// Convert the complex output to magnitude
var fft = [Float](count:Int(bufferSize / 2), repeatedValue:0.0)
let bufferOver2: vDSP_Length = vDSP_Length(bufferSize / 2)
vDSP_zvmags(&output, 1, &fft, 1, bufferOver2)
var spectrum = [Float]()
for var i=0; i<bufferSize/2; ++i {
let imag = output.imagp[i]
let real = output.realp[i]
let magnitude = sqrt(pow(real,2)+pow(imag,2))
spectrum.append(magnitude) }
// Release the setup
vDSP_destroy_fftsetup(fftSetup)
您的代码有几个问题:
- 您没有阅读音频文件样本
- channelSamples 打包不正确
vDSP_fft_zrip
正在读取超出数组末尾的内容。它需要 2^log2n 个样本
vDSP_fft_zrip
的输出是 packed 并且您的计算期望解压
Swift 4 版本现在对第 3 点进行了实际修复
let fileURL = Bundle.main.url(forResource: "foo", withExtension: "mp3")!
let audioFile = try! AVAudioFile(forReading: fileURL as URL)
let frameCount = UInt32(audioFile.length)
let log2n = UInt(round(log2(Double(frameCount))))
let bufferSizePOT = Int(1 << log2n)
let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: AVAudioFrameCount(bufferSizePOT))!
try! audioFile.read(into: buffer, frameCount:frameCount)
// Not sure if AVAudioPCMBuffer zero initialises extra frames, so when in doubt...
let leftFrames = buffer.floatChannelData![0]
for i in Int(frameCount)..<Int(bufferSizePOT) {
leftFrames[i] = 0
}
// Set up the transform
let fftSetup = vDSP_create_fftsetup(log2n, Int32(kFFTRadix2))!
// create packed real input
var realp = [Float](repeating: 0, count: bufferSizePOT/2)
var imagp = [Float](repeating: 0, count: bufferSizePOT/2)
var output = DSPSplitComplex(realp: &realp, imagp: &imagp)
leftFrames.withMemoryRebound(to: DSPComplex.self, capacity: bufferSizePOT / 2) {
vDSP_ctoz([=10=], 2, &output, 1, UInt(bufferSizePOT / 2))
}
// Do the fast Fourier forward transform, packed input to packed output
vDSP_fft_zrip(fftSetup, &output, 1, log2n, Int32(FFT_FORWARD))
// you can calculate magnitude squared here, with care
// as the first result is wrong! read up on packed formats
var fft = [Float](repeating:0.0, count:Int(bufferSizePOT / 2))
vDSP_zvmags(&output, 1, &fft, 1, vDSP_Length(bufferSizePOT / 2))
// Release the setup
vDSP_destroy_fftsetup(fftSetup)
我正在尝试对 音频文件 执行 FFT 以使用 Accelerate 框架查找频率。我从这个问题中改编了代码(可能是错误的):
不过,'spectrum' 的幅度要么是'0',要么是'inf' 或 'nan',以及复合体的 'real' 和 'imag' 分量拆分打印相似的结果;表明这是问题的原因:'magnitude = sqrt(pow(real,2)+pow(imag,2)'。如果我错了请纠正我,但我认为其余代码没问题。
为什么我会收到这些结果,我该如何解决(拆分组件应该是什么),我做错了什么?请记住我是FFT 和采样非常新,不知道如何为音频文件设置它,因此非常感谢您的帮助。谢谢。
这是我使用的代码:
// get audio file
let fileURL:NSURL = NSBundle.mainBundle().URLForResource("foo", withExtension: "mp3")!
let audioFile = try! AVAudioFile(forReading: fileURL)
let fileFormat = audioFile.processingFormat
let frameCount = UInt32(audioFile.length)
let buffer = AVAudioPCMBuffer(PCMFormat: fileFormat, frameCapacity: frameCount)
let audioEngine = AVAudioEngine()
let playerNode = AVAudioPlayerNode()
audioMixerNode = audioEngine.mainMixerNode
let bufferSize = Int(frameCount)
let channels: NSArray = [Int(buffer.format.channelCount)]
let channelCount = channels.count
let floats1 = [Int(buffer.frameLength)]
for var i=0; i<channelCount; ++i {
channelSamples.append([])
let firstSample = buffer.format.interleaved ? i : i*bufferSize
for var j=firstSample; j<bufferSize; j+=buffer.stride*2 {
channelSamples[i].append(DSPComplex(real: buffer.floatChannelData.memory[j], imag: buffer.floatChannelData.memory[j+buffer.stride]))
}
}
// connect node
audioEngine.attachNode(playerNode)
audioEngine.connect(playerNode, to: audioMixerNode, format: playerNode.outputFormatForBus(0))
// Set up the transform
let log2n = UInt(round(log2(Double(bufferSize))))
let fftSetup = vDSP_create_fftsetup(log2n, Int32(kFFTRadix2))
// Create the complex split value to hold the output of the transform
// why doesn't this work?
var realp = [Float](count: bufferSize/2, repeatedValue: 0)
var imagp = [Float](count: bufferSize/2, repeatedValue: 0)
var output = DSPSplitComplex(realp: &realp, imagp: &imagp)
vDSP_ctoz(UnsafePointer(channelSamples), 2, &output, 1, UInt(bufferSize / 2))
// Do the fast Fourier forward transform
vDSP_fft_zrip(fftSetup, &output, 1, log2n, Int32(FFT_FORWARD))
// Convert the complex output to magnitude
var fft = [Float](count:Int(bufferSize / 2), repeatedValue:0.0)
let bufferOver2: vDSP_Length = vDSP_Length(bufferSize / 2)
vDSP_zvmags(&output, 1, &fft, 1, bufferOver2)
var spectrum = [Float]()
for var i=0; i<bufferSize/2; ++i {
let imag = output.imagp[i]
let real = output.realp[i]
let magnitude = sqrt(pow(real,2)+pow(imag,2))
spectrum.append(magnitude) }
// Release the setup
vDSP_destroy_fftsetup(fftSetup)
您的代码有几个问题:
- 您没有阅读音频文件样本
- channelSamples 打包不正确
vDSP_fft_zrip
正在读取超出数组末尾的内容。它需要 2^log2n 个样本vDSP_fft_zrip
的输出是 packed 并且您的计算期望解压
Swift 4 版本现在对第 3 点进行了实际修复
let fileURL = Bundle.main.url(forResource: "foo", withExtension: "mp3")!
let audioFile = try! AVAudioFile(forReading: fileURL as URL)
let frameCount = UInt32(audioFile.length)
let log2n = UInt(round(log2(Double(frameCount))))
let bufferSizePOT = Int(1 << log2n)
let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: AVAudioFrameCount(bufferSizePOT))!
try! audioFile.read(into: buffer, frameCount:frameCount)
// Not sure if AVAudioPCMBuffer zero initialises extra frames, so when in doubt...
let leftFrames = buffer.floatChannelData![0]
for i in Int(frameCount)..<Int(bufferSizePOT) {
leftFrames[i] = 0
}
// Set up the transform
let fftSetup = vDSP_create_fftsetup(log2n, Int32(kFFTRadix2))!
// create packed real input
var realp = [Float](repeating: 0, count: bufferSizePOT/2)
var imagp = [Float](repeating: 0, count: bufferSizePOT/2)
var output = DSPSplitComplex(realp: &realp, imagp: &imagp)
leftFrames.withMemoryRebound(to: DSPComplex.self, capacity: bufferSizePOT / 2) {
vDSP_ctoz([=10=], 2, &output, 1, UInt(bufferSizePOT / 2))
}
// Do the fast Fourier forward transform, packed input to packed output
vDSP_fft_zrip(fftSetup, &output, 1, log2n, Int32(FFT_FORWARD))
// you can calculate magnitude squared here, with care
// as the first result is wrong! read up on packed formats
var fft = [Float](repeating:0.0, count:Int(bufferSizePOT / 2))
vDSP_zvmags(&output, 1, &fft, 1, vDSP_Length(bufferSizePOT / 2))
// Release the setup
vDSP_destroy_fftsetup(fftSetup)