Table 使用 write_pandas 时不存在错误
Table Does Not Exist Error When Using write_pandas
我有一个非常令人沮丧的问题。在这个 post 的底部是我创建的一个函数,用于 (1) 在雪花中创建一个 table 和 (2) 将数据帧存储到 table.
table 的创建工作正常。这个问题特别发生在 writepandas 代码片段中:
write_pandas(
conn=conn,
df=df,
table_name=table_name,
database=database,
schema=schema
)
我不断收到错误消息,指出我创建的 table“不存在”,因为命名约定已关闭。例如,在数据库中,table 被创建为 [=14] =] 但错误消息显示 'DATABASE.SCHEMA."TABLE"' does not exist
我知道这是一个简单的问题,但我暂时卡住了。任何帮助将不胜感激。
from datetime import datetime, timedelta, date
from airflow import DAG
from airflow.providers.snowflake.operators.snowflake import SnowflakeOperator
from sqlalchemy import create_engine
import requests
from pandas.io.json import json_normalize
import numpy as np
from sqlalchemy.types import Integer, Text, String, DateTime
from IPython.display import display, HTML
from flatten_json import flatten
from snowflake.connector import connect
from snowflake.connector.pandas_tools import write_pandas
from airflow.operators.python_operator import PythonOperator
import os
from airflow.providers.snowflake.hooks.snowflake import SnowflakeHook
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
def create_store_snowflake(df,table):
#quick transforms
df = df.rename(columns=str.upper)
df.columns = df.columns.str.replace('[-,/]','')
#Define the table name, schema, and database you want to write to
#Note: the table, schema, and database need to already exist in Snowflake
#Define the table name, schema, and database you want to write to
table_name = table
schema = 'schema'
database = 'database'
#Connect to Snowflake using the required user
conn = connect(
user="user",
password="password",
account="account",
role="role",
database = "database",
schema = 'schema'
)
#reroute raw data to dataframe variable
dataframe = df
#Create the SQL statement to create or replace the table
create_tbl_statement = "CREATE OR REPLACE TABLE " + database + "." + schema + "." + table_name + " (\n"
# Loop through each column finding the datatype and adding it to the statement
for column in dataframe.columns:
if (
dataframe[column].dtype.name == "int"
or dataframe[column].dtype.name == "int64"
):
create_tbl_statement = create_tbl_statement + column + " int"
elif dataframe[column].dtype.name == "object":
create_tbl_statement = create_tbl_statement + column + " varchar(16777216)"
elif dataframe[column].dtype.name == "datetime64[ns]":
create_tbl_statement = create_tbl_statement + column + " datetime"
elif dataframe[column].dtype.name == "float64":
create_tbl_statement = create_tbl_statement + column + " float8"
elif dataframe[column].dtype.name == "bool":
create_tbl_statement = create_tbl_statement + column + " boolean"
else:
create_tbl_statement = create_tbl_statement + column + " varchar(16777216)"
# If column is not last column, add comma, else end sql-query
if dataframe[column].name != dataframe.columns[-1]:
create_tbl_statement = create_tbl_statement + ",\n"
else:
create_tbl_statement = create_tbl_statement + ")"
#Execute the SQL statement to create the table
conn.cursor().execute(create_tbl_statement)
print(f"{table_name} created!")
#write df to created table
write_pandas(
conn=conn,
df=df,
table_name=table_name,
database=database,
schema=schema
)
print(df.shape[0],f"rows written to {table_name} in Snowflake")
只需确保表名是大写的,因为存储到 Snowflake 的所有内容显然都是大写的 ::face-palm:: 而不是 create_store_snowflake(df,'mynewtable')
,它必须是 create_store_snowflake(df,'MYNEWTABLE')
当 table 标识符在创建过程中用 "
包裹时,以下规则适用:
create_tbl_statement= "CREATE OR REPLACE TABLE " + database + "." + schema + "." + table_name
Delimited identifiers (i.e. identifiers enclosed in double quotes) are case-sensitive and can start with and contain any valid characters
Important
If an object is created using a double-quoted identifier, when referenced in a query or any other SQL statement, the identifier must be specified exactly as created, including the double quotes. Failure to include the quotes might result in an Object does not exist error (or similar type of error).
我有一个非常令人沮丧的问题。在这个 post 的底部是我创建的一个函数,用于 (1) 在雪花中创建一个 table 和 (2) 将数据帧存储到 table.
table 的创建工作正常。这个问题特别发生在 writepandas 代码片段中:
write_pandas(
conn=conn,
df=df,
table_name=table_name,
database=database,
schema=schema
)
我不断收到错误消息,指出我创建的 table“不存在”,因为命名约定已关闭。例如,在数据库中,table 被创建为 [=14] =] 但错误消息显示 'DATABASE.SCHEMA."TABLE"' does not exist
我知道这是一个简单的问题,但我暂时卡住了。任何帮助将不胜感激。
from datetime import datetime, timedelta, date
from airflow import DAG
from airflow.providers.snowflake.operators.snowflake import SnowflakeOperator
from sqlalchemy import create_engine
import requests
from pandas.io.json import json_normalize
import numpy as np
from sqlalchemy.types import Integer, Text, String, DateTime
from IPython.display import display, HTML
from flatten_json import flatten
from snowflake.connector import connect
from snowflake.connector.pandas_tools import write_pandas
from airflow.operators.python_operator import PythonOperator
import os
from airflow.providers.snowflake.hooks.snowflake import SnowflakeHook
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
def create_store_snowflake(df,table):
#quick transforms
df = df.rename(columns=str.upper)
df.columns = df.columns.str.replace('[-,/]','')
#Define the table name, schema, and database you want to write to
#Note: the table, schema, and database need to already exist in Snowflake
#Define the table name, schema, and database you want to write to
table_name = table
schema = 'schema'
database = 'database'
#Connect to Snowflake using the required user
conn = connect(
user="user",
password="password",
account="account",
role="role",
database = "database",
schema = 'schema'
)
#reroute raw data to dataframe variable
dataframe = df
#Create the SQL statement to create or replace the table
create_tbl_statement = "CREATE OR REPLACE TABLE " + database + "." + schema + "." + table_name + " (\n"
# Loop through each column finding the datatype and adding it to the statement
for column in dataframe.columns:
if (
dataframe[column].dtype.name == "int"
or dataframe[column].dtype.name == "int64"
):
create_tbl_statement = create_tbl_statement + column + " int"
elif dataframe[column].dtype.name == "object":
create_tbl_statement = create_tbl_statement + column + " varchar(16777216)"
elif dataframe[column].dtype.name == "datetime64[ns]":
create_tbl_statement = create_tbl_statement + column + " datetime"
elif dataframe[column].dtype.name == "float64":
create_tbl_statement = create_tbl_statement + column + " float8"
elif dataframe[column].dtype.name == "bool":
create_tbl_statement = create_tbl_statement + column + " boolean"
else:
create_tbl_statement = create_tbl_statement + column + " varchar(16777216)"
# If column is not last column, add comma, else end sql-query
if dataframe[column].name != dataframe.columns[-1]:
create_tbl_statement = create_tbl_statement + ",\n"
else:
create_tbl_statement = create_tbl_statement + ")"
#Execute the SQL statement to create the table
conn.cursor().execute(create_tbl_statement)
print(f"{table_name} created!")
#write df to created table
write_pandas(
conn=conn,
df=df,
table_name=table_name,
database=database,
schema=schema
)
print(df.shape[0],f"rows written to {table_name} in Snowflake")
只需确保表名是大写的,因为存储到 Snowflake 的所有内容显然都是大写的 ::face-palm:: 而不是 create_store_snowflake(df,'mynewtable')
,它必须是 create_store_snowflake(df,'MYNEWTABLE')
当 table 标识符在创建过程中用 "
包裹时,以下规则适用:
create_tbl_statement= "CREATE OR REPLACE TABLE " + database + "." + schema + "." + table_name
Delimited identifiers (i.e. identifiers enclosed in double quotes) are case-sensitive and can start with and contain any valid characters
Important
If an object is created using a double-quoted identifier, when referenced in a query or any other SQL statement, the identifier must be specified exactly as created, including the double quotes. Failure to include the quotes might result in an Object does not exist error (or similar type of error).