Python for/while 循环

Question

今天我正在做一个关于传入 phone 呼叫被转录并保存到文本文件中的项目，但我对 python 和 python 循环也有点陌生。我想遍历 SQL 服务器列并让每一行循环通过我使用的 azure Speech to text 服务（所有 phonecall OID）。我已经被这个问题困扰了几天，所以我想我可能会在这里找到一些帮助。

import azure.cognitiveservices.speech as speechsdk
import time
from os import path
from pydub import AudioSegment
import requests
import hashlib
import sys
import os.path
import pyodbc

databaseName = '*'
username = '*'
password = '*'
server = '*'
driver = '*'

try:
    CONNECTION_STRING = 'DRIVER='+driver+';SERVER='+server+';DATABASE='+databaseName+';UID='+username+';PWD='+ password

    conn = pyodbc.connect(CONNECTION_STRING)

    cursor = conn.cursor()
    storedproc = "* = *'"
    cursor.execute(storedproc)
    row = cursor.fetchone()
    while row:
        array = [(int(row[1]))]
        row = cursor.fetchone()

        i = 0
        while i<len(array):
            OID = (array[i])
            i = i + 1
            print(OID)


        string = f"{OID}*"
        encoded = string.encode()
        result = hashlib.sha256(encoded)
        resultHash = (result.hexdigest())

        Telefoongesprek = requests.get(f"*{OID}", headers={f"api-key":f"{resultHash}"})



        with open("Telefoongesprek.mp3", "wb") as f:
            f.write(Telefoongesprek.content)

        src = "Telefoongesprek.mp3"
        dst = "Telefoongesprek.wav"

        sound = AudioSegment.from_file(src)
        sound.export(dst, format="wav")


        def speech_recognize_continuous_from_file():
            speech_config = speechsdk.SpeechConfig(subscription="*", region="*")
            speech_config.speech_recognition_language = "nl-NL"
            audio_config = speechsdk.audio.AudioConfig(filename="Telefoongesprek.wav")

            speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)

            done = False

            def stop_cb(evt):
                print('CLOSING on {}'.format(evt))
                nonlocal done
                done = True

            all_results = []
            def handle_final_result(evt):
                all_results.append(evt.result.text)
            speech_recognizer.recognized.connect(handle_final_result)
            speech_recognizer.session_started.connect(handle_final_result)
            speech_recognizer.session_stopped.connect(handle_final_result)
            speech_recognizer.canceled.connect(handle_final_result)
            speech_recognizer.session_stopped.connect(stop_cb)
            speech_recognizer.canceled.connect(stop_cb)

            speech_recognizer.start_continuous_recognition()
            while not done:
                time.sleep(.5)

            speech_recognizer.stop_continuous_recognition()

            print(all_results)

            telefoongesprek = str(all_results)
            filename = f"C:\Users\Beau\Contact-verkeer\contact-verkeer\telefoon\STT Transcriptions\Telefoongesprek#{OID}.txt"
            file = open(filename, "w")
            file.write(telefoongesprek)
            file.close()


        speech_recognize_continuous_from_file()
        cursor.close()
        del cursor

        conn.close()

except Exception as e:
    print("Error: %s" % e)

一切都相互独立，但我只是不知道如何放置循环和我应该使用的循环（For/While 循环）。就在这里，我试图遍历一个数组，但我不认为这是正确的。

错误信息：解码失败。 ffmpeg 返回错误代码：1 [mp3 @ 000001cb8c57e0o0] 无法读取帧大小：无法定位到 1073。

我很确定这意味着我的 azure 函数找不到 mp3 文件，这意味着“Mp3 到 Wav”转换不起作用。

提前致谢！

Answer 1

如果我理解你的问题，你有一个包含大量 phone 通话详细信息的数据库。每行中的一个字段值用于创建关联的 mp3 文件。您想在数据库中的每个 mp3 文件上使用 azure 将语音转为文本。

所以你可以通过两种方式做到这一点：

遍历数据库中的所有行并将所有关联文件创建到本地磁盘中的一个文件夹中，并将 OID 作为您的文件名。
然后编写另一个循环来遍历此文件夹并将要转录的文件发送到 Azure Speech to Text 服务。

另一种方法是像您展示的那样在一个循环中完成所有操作，这需要进行一些更正。

好的，那部分已经清楚了，我们可以进入语音到文本部分。所以 azure 允许您发送压缩格式进行转录，这意味着您实际上不需要将其转换为 wav 文件。

请查看下面修改后的代码并进行更改：

# code snippet borrowed from azure samples
def speech_recognize_continuous_from_file(filename):
    class BinaryFileReaderCallback(speechsdk.audio.PullAudioInputStreamCallback):
        def __init__(self, filename: str):
            super().__init__()
            self._file_h = open(filename, "rb")

        def read(self, buffer: memoryview) -> int:
            try:
                size = buffer.nbytes
                frames = self._file_h.read(size)

                buffer[:len(frames)] = frames

                return len(frames)
            except Exception as ex:
                print('Exception in `read`: {}'.format(ex))
                raise

        def close(self) -> None:
            print('closing file')
            try:
                self._file_h.close()
            except Exception as ex:
                print('Exception in `close`: {}'.format(ex))
                raise

    # Creates an audio stream format. For an example we are using MP3 compressed file here
    compressed_format = speechsdk.audio.AudioStreamFormat(compressed_stream_format=speechsdk.AudioStreamContainerFormat.MP3)
    callback = BinaryFileReaderCallback(filename=filename)

    stream = speechsdk.audio.PullAudioInputStream(stream_format=compressed_format, pull_stream_callback=callback)

    speech_config = speechsdk.SpeechConfig(subscription="*", region="*")
    speech_config.speech_recognition_language = "nl-NL"
    audio_config = speechsdk.audio.AudioConfig(stream=stream)

    # Creates a speech recognizer using a file as audio input, also specify the speech language
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config, audio_config)

    done = False

    def stop_cb(evt):
        print('CLOSING on {}'.format(evt))
        nonlocal done
        done = True

    all_results = []

    def handle_final_result(evt):
        all_results.append(evt.result.text)

    speech_recognizer.recognized.connect(handle_final_result)
    speech_recognizer.session_started.connect(handle_final_result)
    speech_recognizer.session_stopped.connect(handle_final_result)
    speech_recognizer.canceled.connect(handle_final_result)
    speech_recognizer.session_stopped.connect(stop_cb)
    speech_recognizer.canceled.connect(stop_cb)

    speech_recognizer.start_continuous_recognition()
    while not done:
        time.sleep(.5)

    speech_recognizer.stop_continuous_recognition()

    print(all_results)

    telefoongesprek = str(all_results)
    filename = f"C:\Users\Beau\Contact-verkeer\contact-verkeer\telefoon\STT Transcriptions\Telefoongesprek#{OID}.txt"
    file = open(filename, "w")
    file.write(telefoongesprek)
    file.close()

try:
    CONNECTION_STRING = 'DRIVER='+driver+';SERVER='+server+';DATABASE='+databaseName+';UID='+username+';PWD='+ password

    conn = pyodbc.connect(CONNECTION_STRING)

    cursor = conn.cursor()
    storedproc = "* = *'"
    cursor.execute(storedproc)
    row = cursor.fetchone()
    # loop through the rows
    while row:
        array = [(int(row[1]))]

        i = 0
        while i<len(array):
            OID = (array[i])
            i = i + 1
            print(OID)

        string = f"{OID}*"
        encoded = string.encode()
        result = hashlib.sha256(encoded)
        resultHash = (result.hexdigest())

        telefoongesprek_response = requests.get(f"*{OID}", headers={f"api-key":f"{resultHash}"})

        # save the file to local disk as mp3
        with open("Telefoongesprek.mp3", "wb") as f:
            f.write(telefoongesprek_response.content)

        # do the speech to text on the mp3 file
        speech_recognize_continuous_from_file(f.name)

        # fetch the next row
        row = cursor.fetchone()

    cursor.close()
    del cursor
    conn.close()
except Exception as e:
    print("Error: %s" % e)

我没有测试这个完整的代码，因为我没有数据库连接。请随意修改您的用例，如果您有任何问题，请告诉我。

Python for/while 循环

Python for/while loop

python

azure

speech-to-text