Google 云存储 WAV 文件上传 - “404 Requested Entity was not found”
Google cloud storage WAV file upload - “404 Requested Entity was not found”
我的代码引用了粘贴在 google 网站上的代码:https://cloud.google.com/storage/docs/uploading-objects
我正在尝试制作一个 python 程序来记录麦克风单声道音频,从中创建一个 WAV 文件,然后将其上传到 GCS 进行分析。我坚持的部分是上传到 GC 部分。我不知道应该替换什么,因为我什至不知道如何找到该文件路径。但是,我知道 mybucket 的名称是什么。它是 "gcspeechstorage"(我做到了)。此外,将文件上传到存储桶的代码块对我来说非常模糊,我现在意识到 Google 的样板代码对我不起作用。我收到 "google.api_core.exceptions.NotFound: 404 requested entity was not found"
错误。
如果有什么方法可以解决这个问题,那么我可以上传一个 1 分钟以上的剪辑并对其进行分析,那就太好了。我的 NLTK 工作正常。
我将 gcs_uri 定义为等于 os.path.join('gs://<gcspeechstorage>/<file_path_inside_bucket>')
,但我知道这只是部分完成。我不知道如何完成第二个论点。老实说,我什至不确定代码的顺序是否正确。
import pyaudio
import wave
import pprint
import argparse
import datetime
import io
import json
import os
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from google.cloud import storage
import sys
from oauth2client.service_account import ServiceAccountCredentials
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 10
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("* recording")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'C:/Users/Dave/Desktop/mizu/Project Mizu-7e2ecd8c5804.json'
bucket_name = "C:/Users/Dave/Desktop/mizu/output.wav"
source_file_name = "gcspeechstorage"
destination_blob_name = "output.wav"
gcs_uri = "gs://gcspeechstorage/output.wav"
def create_bucket(bucket_name):
"""Creates a new bucket."""
storage_client = storage.Client()
bucket = storage_client.create_bucket(bucket_name)
print('Bucket {} created'.format(bucket.name))
def upload_blob(bucket_name, source_file_name, destination_blob_name):
"""Uploads a file to the bucket."""
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)
print('File {} uploaded to {}.'.format(
source_file_name,
destination_blob_name))
# [START speech_transcribe_async_gcs]
def transcribe_gcs(gcs_uri):
"""Asynchronously transcribes the audio file specified by the gcs_uri."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()
audio = types.RecognitionAudio(uri=gcs_uri)
config = types.RecognitionConfig(
encoding= 'LINEAR16',
sample_rate_hertz=44100,
language_code='en-US')
operation = client.long_running_recognize(config, audio)
print('Waiting for operation to complete...')
response = operation.result(timeout=90)
# Each result is for a consecutive portion of the audio. Iterate through
# them to get the transcripts for the entire audio file.
for result in response.results:
# The first alternative is the most likely one for this portion.
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
transcribedSpeechFile = open('speechToAnalyze.txt', 'a+') # this is where a text file is made with the transcribed speech
transcribedSpeechFile.write(format(result.alternatives[0].transcript))
transcribedSpeechFile.close()
print('Confidence: {}'.format(result.alternatives[0].confidence))
# [END speech_transcribe_async_gcs]
if __name__ == '__main__':
transcribe_gcs(gcs_uri)
audio_rec = open('speechToAnalyze.txt', 'r')
sid = SentimentIntensityAnalyzer()
for sentence in audio_rec:
ss = sid.polarity_scores(sentence)
for k in ss:
print('{0}: {1}, '.format(k, ss[k]), end='')
print()
预期结果:将WAV文件上传到GCS,然后检索它转录,然后分析情绪。
实际结果:录制音频,然后崩溃,出现上述 404 错误。
错误:
Traceback (most recent call last):
File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\grpc_helpers.py", line 57, in error_remapped_callable
return callable_(*args, **kwargs)
File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\grpc\_channel.py", line 565, in __call__
return _end_unary_response_blocking(state, call, False, None)
File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\grpc\_channel.py", line 467, in _end_unary_response_blocking
raise _Rendezvous(state, None, None, deadline)
grpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with:
status = StatusCode.NOT_FOUND
details = "Requested entity was not found."
debug_error_string = "{"created":"@1562714798.427000000","description":"Error received from peer ipv6:[2607:f8b0:4000:804::200a]:443","file":"src/core/lib/surface/call.cc","file_line":1052,"grpc_message":"Requested entity was not found.","grpc_status":5}"
>
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:/Users/Dave/Desktop/mizu/FrankensteinedFile.py", line 100, in <module>
transcribe_gcs('C:/Users/Dave/Desktop/mizu/output.wav')
File "C:/Users/Dave/Desktop/mizu/FrankensteinedFile.py", line 79, in transcribe_gcs
operation = client.long_running_recognize(config, audio)
File "C:\Users\Dave\AppData\Local\Programs\Python\Python37\lib\site-packages\google\cloud\speech_v1\gapic\speech_client.py", line 326, in long_running_recognize
request, retry=retry, timeout=timeout, metadata=metadata
File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\gapic_v1\method.py", line 143, in __call__
return wrapped_func(*args, **kwargs)
File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\retry.py", line 273, in retry_wrapped_func
on_error=on_error,
File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\retry.py", line 182, in retry_target
return target()
File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\timeout.py", line 214, in func_with_timeout
return func(*args, **kwargs)
File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\grpc_helpers.py", line 59, in error_remapped_callable
six.raise_from(exceptions.from_grpc_error(exc), exc)
File "<string>", line 3, in raise_from
google.api_core.exceptions.NotFound: 404 Requested entity was not found
现在,您似乎正在将 gcs_uri C:/Users/Dave/Desktop/mizu/output.wav
传递给您的应用,但那是本地文件,而不是 GCS 中的对象。您需要先将 wav 文件上传到您的 GCS 存储桶,然后您需要在调用 long_running_recognize
时引用该对象。试试像这样的东西:
upload_blob("C:/Users/Dave/Desktop/mizu/output.wav", "gcspeechstorage", "output.wav")
transcribe_gcs("gs://gcspeechstorage/output.wav")
我的代码引用了粘贴在 google 网站上的代码:https://cloud.google.com/storage/docs/uploading-objects
我正在尝试制作一个 python 程序来记录麦克风单声道音频,从中创建一个 WAV 文件,然后将其上传到 GCS 进行分析。我坚持的部分是上传到 GC 部分。我不知道应该替换什么,因为我什至不知道如何找到该文件路径。但是,我知道 mybucket 的名称是什么。它是 "gcspeechstorage"(我做到了)。此外,将文件上传到存储桶的代码块对我来说非常模糊,我现在意识到 Google 的样板代码对我不起作用。我收到 "google.api_core.exceptions.NotFound: 404 requested entity was not found"
错误。
如果有什么方法可以解决这个问题,那么我可以上传一个 1 分钟以上的剪辑并对其进行分析,那就太好了。我的 NLTK 工作正常。
我将 gcs_uri 定义为等于 os.path.join('gs://<gcspeechstorage>/<file_path_inside_bucket>')
,但我知道这只是部分完成。我不知道如何完成第二个论点。老实说,我什至不确定代码的顺序是否正确。
import pyaudio
import wave
import pprint
import argparse
import datetime
import io
import json
import os
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from google.cloud import storage
import sys
from oauth2client.service_account import ServiceAccountCredentials
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 10
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("* recording")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'C:/Users/Dave/Desktop/mizu/Project Mizu-7e2ecd8c5804.json'
bucket_name = "C:/Users/Dave/Desktop/mizu/output.wav"
source_file_name = "gcspeechstorage"
destination_blob_name = "output.wav"
gcs_uri = "gs://gcspeechstorage/output.wav"
def create_bucket(bucket_name):
"""Creates a new bucket."""
storage_client = storage.Client()
bucket = storage_client.create_bucket(bucket_name)
print('Bucket {} created'.format(bucket.name))
def upload_blob(bucket_name, source_file_name, destination_blob_name):
"""Uploads a file to the bucket."""
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)
print('File {} uploaded to {}.'.format(
source_file_name,
destination_blob_name))
# [START speech_transcribe_async_gcs]
def transcribe_gcs(gcs_uri):
"""Asynchronously transcribes the audio file specified by the gcs_uri."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()
audio = types.RecognitionAudio(uri=gcs_uri)
config = types.RecognitionConfig(
encoding= 'LINEAR16',
sample_rate_hertz=44100,
language_code='en-US')
operation = client.long_running_recognize(config, audio)
print('Waiting for operation to complete...')
response = operation.result(timeout=90)
# Each result is for a consecutive portion of the audio. Iterate through
# them to get the transcripts for the entire audio file.
for result in response.results:
# The first alternative is the most likely one for this portion.
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
transcribedSpeechFile = open('speechToAnalyze.txt', 'a+') # this is where a text file is made with the transcribed speech
transcribedSpeechFile.write(format(result.alternatives[0].transcript))
transcribedSpeechFile.close()
print('Confidence: {}'.format(result.alternatives[0].confidence))
# [END speech_transcribe_async_gcs]
if __name__ == '__main__':
transcribe_gcs(gcs_uri)
audio_rec = open('speechToAnalyze.txt', 'r')
sid = SentimentIntensityAnalyzer()
for sentence in audio_rec:
ss = sid.polarity_scores(sentence)
for k in ss:
print('{0}: {1}, '.format(k, ss[k]), end='')
print()
预期结果:将WAV文件上传到GCS,然后检索它转录,然后分析情绪。
实际结果:录制音频,然后崩溃,出现上述 404 错误。
错误:
Traceback (most recent call last):
File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\grpc_helpers.py", line 57, in error_remapped_callable
return callable_(*args, **kwargs)
File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\grpc\_channel.py", line 565, in __call__
return _end_unary_response_blocking(state, call, False, None)
File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\grpc\_channel.py", line 467, in _end_unary_response_blocking
raise _Rendezvous(state, None, None, deadline)
grpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with:
status = StatusCode.NOT_FOUND
details = "Requested entity was not found."
debug_error_string = "{"created":"@1562714798.427000000","description":"Error received from peer ipv6:[2607:f8b0:4000:804::200a]:443","file":"src/core/lib/surface/call.cc","file_line":1052,"grpc_message":"Requested entity was not found.","grpc_status":5}"
>
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:/Users/Dave/Desktop/mizu/FrankensteinedFile.py", line 100, in <module>
transcribe_gcs('C:/Users/Dave/Desktop/mizu/output.wav')
File "C:/Users/Dave/Desktop/mizu/FrankensteinedFile.py", line 79, in transcribe_gcs
operation = client.long_running_recognize(config, audio)
File "C:\Users\Dave\AppData\Local\Programs\Python\Python37\lib\site-packages\google\cloud\speech_v1\gapic\speech_client.py", line 326, in long_running_recognize
request, retry=retry, timeout=timeout, metadata=metadata
File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\gapic_v1\method.py", line 143, in __call__
return wrapped_func(*args, **kwargs)
File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\retry.py", line 273, in retry_wrapped_func
on_error=on_error,
File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\retry.py", line 182, in retry_target
return target()
File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\timeout.py", line 214, in func_with_timeout
return func(*args, **kwargs)
File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\grpc_helpers.py", line 59, in error_remapped_callable
six.raise_from(exceptions.from_grpc_error(exc), exc)
File "<string>", line 3, in raise_from
google.api_core.exceptions.NotFound: 404 Requested entity was not found
现在,您似乎正在将 gcs_uri C:/Users/Dave/Desktop/mizu/output.wav
传递给您的应用,但那是本地文件,而不是 GCS 中的对象。您需要先将 wav 文件上传到您的 GCS 存储桶,然后您需要在调用 long_running_recognize
时引用该对象。试试像这样的东西:
upload_blob("C:/Users/Dave/Desktop/mizu/output.wav", "gcspeechstorage", "output.wav")
transcribe_gcs("gs://gcspeechstorage/output.wav")