Python 为 neo4j 加载脚本 returns ValueError
Python loading script for neo4j returns ValueError
我对一般编程比较陌生(商业分析学生转为数据分析师),我正在测试一个 python 脚本来迭代 csv 行并为每一行构造一个密码查询以加载到 neo4j -
import pandas as pd
from neo4j import GraphDatabase
pd.set_option('display.max_colwidth', -1)
# neo4j credentials
uri= "bolt://localhost:7687"
userName= "neo4j"
password= "password"
df = pd.read_csv('C://Users/ABC/Documents/Test/Test/lineage_stored_procedure_dedup.csv',
sep=',', index_col=None, header=0,usecols=[0,1,2,3,4,5])
df.columns.str.replace(' ', '')
graphDB_Driver = GraphDatabase.driver(uri, auth=(userName, password))
with graphDB_Driver.session() as graphDB_Session:
for row in df.iterrows():
cq = 'merge (p:Program{programName:"'+df['Parent_Procedure']+'"}) set p.type = "'+df['Parent_Object_Type']+'"'
res = graphDB_Session.run(cq)
graphDB_Driver.close()
我收到以下错误 -
Traceback (most recent call last):
File "<ipython-input-91-01ba397763e3>", line 1, in <module>
runfile('C:/Users/ABC/Documents/Test/Test/StoredProcLoadScript.py', wdir='C:/Users/ABC/Documents/Test/Test')
File "C:\Users\ABC\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)
File "C:\Users\ABC\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/ABC/Documents/Test/Test/StoredProcLoadScript.py", line 35, in <module>
res = graphDB_Session.run(cq)
File "C:\Users\ABC\Anaconda3\lib\site-packages\neo4j\__init__.py", line 429, in run
if not statement:
File "C:\Users\ABC\Anaconda3\lib\site-packages\pandas\core\generic.py", line 1555, in __nonzero__
self.__class__.__name__
ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
我明白这个错误意味着我没有按照 pandas 文档使用正确的位运算符。但我不明白我什至需要在代码中的什么地方开始使用它?感谢任何帮助。谢谢。
#Connect to the neo4j database server
graphDB_Driver = GraphDatabase.driver(uri, auth=(userName, password))
#CREATE NODES (:Program{Parent_Procedure}) set property 'type' = Parent_Object_Type
with graphDB_Driver.session() as graphDB_Session:
for index, row in df.iterrows():
cq1 = 'merge (p:Program{programName:"'+row["Parent_Procedure"]+'"}) set p.type = "'+row["Parent_Object_Type"]+'"'
#Execute the Cypher query
res1 = graphDB_Session.run(cq1)
print(res1)
#CREATE NODES (:Program{Called_Procedure}) set property 'type' = Called_Object_Type
for index, row in df.iterrows():
cq2 = 'merge (p:Program{programName:"'+row["Called_Procedure"]+'"}) set p.type = "'+row["Called_Object_Type"]+'"'
#Execute the Cypher query
res2 = graphDB_Session.run(cq2)
print(res2)
#Create relationship - (Parent_Procedure)-[:CALLS_TO]->(Called_Procedure)
for index, row in df.iterrows():
cq3 = 'match (p1:Program{programName:"'+row["Parent_Procedure"]+'"}) match (p2:Program{programName:"'+row["Called_Procedure"]+'"}) merge (p1)-[:CALLS_TO]->(p2)'
#Execute the Cypher query
res3 = graphDB_Session.run(cq3)
print(res3)
graphDB_Driver.close()
我对一般编程比较陌生(商业分析学生转为数据分析师),我正在测试一个 python 脚本来迭代 csv 行并为每一行构造一个密码查询以加载到 neo4j -
import pandas as pd
from neo4j import GraphDatabase
pd.set_option('display.max_colwidth', -1)
# neo4j credentials
uri= "bolt://localhost:7687"
userName= "neo4j"
password= "password"
df = pd.read_csv('C://Users/ABC/Documents/Test/Test/lineage_stored_procedure_dedup.csv',
sep=',', index_col=None, header=0,usecols=[0,1,2,3,4,5])
df.columns.str.replace(' ', '')
graphDB_Driver = GraphDatabase.driver(uri, auth=(userName, password))
with graphDB_Driver.session() as graphDB_Session:
for row in df.iterrows():
cq = 'merge (p:Program{programName:"'+df['Parent_Procedure']+'"}) set p.type = "'+df['Parent_Object_Type']+'"'
res = graphDB_Session.run(cq)
graphDB_Driver.close()
我收到以下错误 -
Traceback (most recent call last):
File "<ipython-input-91-01ba397763e3>", line 1, in <module>
runfile('C:/Users/ABC/Documents/Test/Test/StoredProcLoadScript.py', wdir='C:/Users/ABC/Documents/Test/Test')
File "C:\Users\ABC\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)
File "C:\Users\ABC\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/ABC/Documents/Test/Test/StoredProcLoadScript.py", line 35, in <module>
res = graphDB_Session.run(cq)
File "C:\Users\ABC\Anaconda3\lib\site-packages\neo4j\__init__.py", line 429, in run
if not statement:
File "C:\Users\ABC\Anaconda3\lib\site-packages\pandas\core\generic.py", line 1555, in __nonzero__
self.__class__.__name__
ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
我明白这个错误意味着我没有按照 pandas 文档使用正确的位运算符。但我不明白我什至需要在代码中的什么地方开始使用它?感谢任何帮助。谢谢。
#Connect to the neo4j database server
graphDB_Driver = GraphDatabase.driver(uri, auth=(userName, password))
#CREATE NODES (:Program{Parent_Procedure}) set property 'type' = Parent_Object_Type
with graphDB_Driver.session() as graphDB_Session:
for index, row in df.iterrows():
cq1 = 'merge (p:Program{programName:"'+row["Parent_Procedure"]+'"}) set p.type = "'+row["Parent_Object_Type"]+'"'
#Execute the Cypher query
res1 = graphDB_Session.run(cq1)
print(res1)
#CREATE NODES (:Program{Called_Procedure}) set property 'type' = Called_Object_Type
for index, row in df.iterrows():
cq2 = 'merge (p:Program{programName:"'+row["Called_Procedure"]+'"}) set p.type = "'+row["Called_Object_Type"]+'"'
#Execute the Cypher query
res2 = graphDB_Session.run(cq2)
print(res2)
#Create relationship - (Parent_Procedure)-[:CALLS_TO]->(Called_Procedure)
for index, row in df.iterrows():
cq3 = 'match (p1:Program{programName:"'+row["Parent_Procedure"]+'"}) match (p2:Program{programName:"'+row["Called_Procedure"]+'"}) merge (p1)-[:CALLS_TO]->(p2)'
#Execute the Cypher query
res3 = graphDB_Session.run(cq3)
print(res3)
graphDB_Driver.close()