AttributeError: 'Namespace' object has no attribute 'project'

AttributeError: 'Namespace' object has no attribute 'project'

我正在尝试重新使用从 https://www.opsguru.io/post/solution-walkthrough-visualizing-daily-cloud-spend-on-gcp-using-gke-dataflow-bigquery-and-grafana 复制的代码。我对 python 不太熟悉,因此请在此处寻求帮助。尝试将 GCP Bigquery 数据复制到 Postgres

我对代码做了一些修改,由于我的错误或代码出现了一些错误

这是我的

import uuid
import argparse
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions, StandardOptions, GoogleCloudOptions, WorkerOptions
from beam_nuggets.io import relational_db
from apache_beam.io.gcp import bigquery


parser = argparse.ArgumentParser()
args = parser.parse_args()
project = args.project("project", help="Enter Project ID")
job_name = args.job_name + str(uuid.uuid4())
bigquery_source = args.bigquery_source
postgresql_user = args.postgresql_user
postgresql_password = args.postgresql_password
postgresql_host = args.postgresql_host
postgresql_port = args.postgresql_port
postgresql_db = args.postgresql_db
postgresql_table = args.postgresql_table
staging_location = args.staging_location
temp_location = args.temp_location
subnetwork = args.subnetwork
 
options = PipelineOptions(
    flags=["--requirements_file", "/opt/python/requirements.txt"])
# For Cloud execution, set the Cloud Platform project, job_name,
# staging location, temp_location and specify DataflowRunner.
 
google_cloud_options = options.view_as(GoogleCloudOptions)
google_cloud_options.project = project
google_cloud_options.job_name = job_name
google_cloud_options.staging_location = staging_location
google_cloud_options.temp_location = temp_location
google_cloud_options.region = "europe-west4"
worker_options = options.view_as(WorkerOptions)
worker_options.zone = "europe-west4-a"
worker_options.subnetwork = subnetwork
worker_options.max_num_workers = 20


 
options.view_as(StandardOptions).runner = 'DataflowRunner'

start_date = define_start_date()
with beam.Pipeline(options=options) as p:
    rows = p | 'QueryTableStdSQL' >> beam.io.Read(beam.io.BigQuerySource(
        query = 'SELECT \
        billing_account_id, \
        service.id as service_id, \
        service.description as service_description, \
        sku.id as sku_id, \
        sku.description as sku_description, \
        usage_start_time, \
        usage_end_time, \
        project.id as project_id, \
        project.name as project_description, \
        TO_JSON_STRING(project.labels) \
        as project_labels, \
        project.ancestry_numbers \
        as project_ancestry_numbers, \
        TO_JSON_STRING(labels) as labels, \
        TO_JSON_STRING(system_labels) as system_labels, \
        location.location as location_location, \
        location.country as location_country, \
        location.region as location_region, \
        location.zone as location_zone, \
        export_time, \
        cost, \
        currency, \
        currency_conversion_rate, \
        usage.amount as usage_amount, \
        usage.unit as usage_unit, \
        usage.amount_in_pricing_units as \
        usage_amount_in_pricing_units, \
        usage.pricing_unit as usage_pricing_unit, \
        TO_JSON_STRING(credits) as credits, \
        invoice.month as invoice_month cost_type \
        FROM `' + project + '.' + bigquery_source + '` \
        WHERE export_time >= "' + start_date + '"', use_standard_sql=True))
    source_config = relational_db.SourceConfiguration(
        drivername='postgresql+pg8000',
        host=postgresql_host,
        port=postgresql_port,
        username=postgresql_user,
        password=postgresql_password,
        database=postgresql_db,
        create_if_missing=True,
    )
    table_config = relational_db.TableConfiguration(
        name=postgresql_table,
        create_if_missing=True
    )
    rows | 'Writing to DB' >> relational_db.Write(
        source_config=source_config,
        table_config=table_config
    )

当我 运行 程序出现以下错误时:

bq-to-sql.py: error: unrecognized arguments: --project xxxxx --job_name bq-to-sql-job --bigquery_source xxxxxxxx
 --postgresql_user xxxxx --postgresql_password xxxxx --postgresql_host xx.xx.xx.xx --postgresql_port 5432 --postgresql_db xxxx --postgresql_table xxxx --staging_location g
s://xxxxx-staging --temp_location gs://xxxxx-temp --subnetwork regions/europe-west4/subnetworks/xxxx

argparse需要配置。 Argparse 就像魔法一样工作,但它确实需要配置。第 10 行 parser = argparse.ArgumentParser() 和第 11 行 args = parser.parse_args()

之间需要这些行
parser.add_argument("--project")
parser.add_argument("--job_name")
parser.add_argument("--bigquery_source")
parser.add_argument("--postgresql_user")
parser.add_argument("--postgresql_password")
parser.add_argument("--postgresql_host")
parser.add_argument("--postgresql_port")
parser.add_argument("--postgresql_db")
parser.add_argument("--postgresql_table")
parser.add_argument("--staging_location")
parser.add_argument("--temp_location")
parser.add_argument("--subnetwork")

Argparse 是一个有用的库。我建议在这些 add_argument 调用中添加很多 options