试图了解 AudioKit 中 AKFFTTap 的输出

Trying to understand the output of AKFFTTap in AudioKit

我正在尝试使用 AudioKit 构建一个应用程序来分析麦克风的输入并将传入的声音分成 3 个频率范围(低、中、高)及其振幅。

这是我的代码:

class ViewController: UIViewController {

    var mic: AKMicrophone!
    var amplitude: AKAmplitudeTracker!
    var fftTap: AKFFTTap?
    var timer:  Timer!

    override func viewDidLoad() {
        super.viewDidLoad()
        // Do any additional setup after loading the view, typically from a nib.

        mic = AKMicrophone()
        fftTap = AKFFTTap.init(mic)

    }

    override func viewDidAppear(_ animated: Bool) {
        super.viewDidAppear(animated)

        do {
            try AudioKit.start()
        } catch {
            AKLog("AudioKit did not start!")
        }

        mic.start()

        timer = Timer.scheduledTimer(withTimeInterval: 0.01, repeats: true, block: { (timer) in

            for i in 0...256 {
                print(Double(self.fftTap?.fftData[i] ?? 0.0))
            }

        })
    }

}

但现在我不知道输出的实际含义是什么?

如何获得特定频率范围内的最大振幅?我同时需要所有三个范围,所以我认为仅仅 Frequency-Tracker 是做不到的。

通过阅读有关 FFT 的文档,我了解到前 256 个 bin 表示某个频率的幅度。但我只找到了将这些值转换为绘图的 Matlab 绘图示例(这对我来说真的没有意义)。

我在 Google 上找到了帮助我解决问题的代码片段:

https://groups.google.com/forum/#!topic/comp.dsp/cZsS1ftN5oI

具体这部分:

/* do FFT (taken from NR [http://www.nr.com] but uses array of doubles) */
    four1(fftBuffer-1, FFT_SIZE, 1);

/* display 15 bins around the frequency of interest */
    for (long k = 80; k < 110; k += 2) {

    /* real */
        double re = fftBuffer[k];

    /* imaginary */
        double im = fftBuffer[k+1];

    /* get normalized bin magnitude */
        double normBinMag = 2.*sqrt(re*re + im*im) / FFT_SIZE;

    /* convert to dB value */
        double amplitude = 20. * log10( normBinMag );

    /* and display */
        printf("bin: %d,\tfreq: %f [Hz],\tmag: %f,\t ampl.: %f [dB]\n", \
               k/2, sampleRate*.5*(double)k/FFT_SIZE, normBinMag, amplitude);
    }
}

/* Program output:

bin: 40,    freq: 861.328125 [Hz],  mag: 0.000000,   ampl.: -182.347994 [dB]
bin: 41,    freq: 882.861328 [Hz],  mag: 0.000000,   ampl.: -180.895076 [dB]
bin: 42,    freq: 904.394531 [Hz],  mag: 0.000000,   ampl.: -179.201401 [dB]
bin: 43,    freq: 925.927734 [Hz],  mag: 0.000000,   ampl.: -177.156879 [dB]
bin: 44,    freq: 947.460938 [Hz],  mag: 0.000000,   ampl.: -174.555312 [dB]
bin: 45,    freq: 968.994141 [Hz],  mag: 0.000000,   ampl.: -170.934049 [dB]
bin: 46,    freq: 990.527344 [Hz],  mag: 0.000000,   ampl.: -164.817195 [dB]
bin: 47,    freq: 1012.060547 [Hz], mag: 1.000000,   ampl.: 0.000000 [dB]
bin: 48,    freq: 1033.593750 [Hz], mag: 0.000000,   ampl.: -164.633624 [dB]
bin: 49,    freq: 1055.126953 [Hz], mag: 0.000000,   ampl.: -170.566625 [dB]
bin: 50,    freq: 1076.660156 [Hz], mag: 0.000000,   ampl.: -174.003468 [dB]
bin: 51,    freq: 1098.193359 [Hz], mag: 0.000000,   ampl.: -176.419757 [dB]
bin: 52,    freq: 1119.726562 [Hz], mag: 0.000000,   ampl.: -178.277857 [dB]
bin: 53,    freq: 1141.259766 [Hz], mag: 0.000000,   ampl.: -179.783660 [dB]
bin: 54,    freq: 1162.792969 [Hz], mag: 0.000000,   ampl.: -181.046952 [dB]

*/

[编辑]

根据要求,这是 Swift-代码:

//
//  ViewController.swift
//

import AudioKit
import UIKit

class ViewController: UIViewController {

    var mic: AKMicrophone!
    var fftTap: AKFFTTap?
    var timer:  Timer!
    let FFT_SIZE = 512
    let sampleRate:double_t = 44100

    override func viewDidLoad() {
        super.viewDidLoad()

        mic = AKMicrophone()

        fftTap = AKFFTTap.init(mic)

    }

    override func viewDidAppear(_ animated: Bool) {
        super.viewDidAppear(animated)

        do {
            try AudioKit.start()
        } catch {
            AKLog("AudioKit did not start!")
        }

        mic.start()

        timer = Timer.scheduledTimer(withTimeInterval: 0.1, repeats: true, block: { (timer) in

            for i in 0...510 {

                let re = self.fftTap!.fftData[i]
                let im = self.fftTap!.fftData[i + 1]
                let normBinMag = 2.0 * sqrt(re * re + im * im)/self.FFT_SIZE
                let amplitude = ((20.0 * log10(normBinMag))

                print("bin: \(i/2) \t freq: \(frequency)\t ampl.: \(amplitude)")
            }

            // Now do anything you like with the data
            // Be aware, though, that the amplitude is a negative number
            // the lower, the less input it represents
            // in my tests, the lowest number was around -260
            // Read more on Google about converting the negative
            // number to a positive

        })
    }

}