Unable to identify cause of: ValueError: Must have equal len keys and value when setting with an iterable
Unable to identify cause of: ValueError: Must have equal len keys and value when setting with an iterable
背景:
我有一个脚本,每天API调用财务数据,returns数据作为JSON 对象,在对 df
进行一些操作之前将其保存为 pandas df
,最后在上传到系统之前保存为 .csv
。
问题:
我的脚本一直运行良好,直到最近可能有新数据进入 JSON 对象,我现在收到以下 ValueError
-
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
C:\Users\JONNY~1.FOR\AppData\Local\Temp/ipykernel_17600/3701323957.py in <module>
13
14 if __name__ == "__main__":
---> 15 main()
C:\Users\JONNY~1.FOR\AppData\Local\Temp/ipykernel_17600/3701323957.py in main()
1 # Function that writes Exceptions Report and API Response as a consolidated .xlsx file.
2 def main():
----> 3 financial_accounts_df = dataframe_transformation()
4
5 # Writing dataframe to .csv
C:\Users\JONNY~1.FOR\AppData\Local\Temp/ipykernel_17600/203167952.py in dataframe_transformation()
19 # Step 3 - remove the parent rows, leaving only children
20 rows_to_remove = financial_accounts_df['FinServ__SourceSystemId__c'].isin(financial_accounts_df['Addepar_Direct_Owner_ID__c'])
---> 21 financial_accounts_df.loc[financial_accounts_df['Addepar_Direct_Owner_ID__c'].isin(financial_accounts_df['FinServ__SourceSystemId__c'].values),
22 'Addepar_Direct_Owner_ID__c'] = financial_accounts_df.loc[rows_to_remove, 'Addepar_Direct_Owner_ID__c'].to_numpy()
23 financial_accounts_df = financial_accounts_df[~rows_to_remove]
~\.conda\envs\JPDevelopment\lib\site-packages\pandas\core\indexing.py in __setitem__(self, key, value)
714
715 iloc = self if self.name == "iloc" else self.obj.iloc
--> 716 iloc._setitem_with_indexer(indexer, value, self.name)
717
718 def _validate_key(self, key, axis: int):
~\.conda\envs\JPDevelopment\lib\site-packages\pandas\core\indexing.py in _setitem_with_indexer(self, indexer, value, name)
1686 if take_split_path:
1687 # We have to operate column-wise
-> 1688 self._setitem_with_indexer_split_path(indexer, value, name)
1689 else:
1690 self._setitem_single_block(indexer, value, name)
~\.conda\envs\JPDevelopment\lib\site-packages\pandas\core\indexing.py in _setitem_with_indexer_split_path(self, indexer, value, name)
1741 return self._setitem_with_indexer((pi, info_axis[0]), value[0])
1742
-> 1743 raise ValueError(
1744 "Must have equal len keys and value "
1745 "when setting with an iterable"
ValueError: Must have equal len keys and value when setting with an iterable
脚本: 我删除了调用 API 的函数,而是编写了一个函数来加载下面提供的示例数据集并模拟 ValueError
我收到了。注意:并非所有库依赖项都被使用:
# Importing depedencies
from configparser import ConfigParser
import datetime as date
import datetime as dt
import datetime
from datetime import timedelta
from datetime import date
import itertools
import pandas as pd
from pandas import json_normalize
import requests as requests
from requests.auth import HTTPBasicAuth
import time
import json
import jsonpath_ng as jp
import enlighten
import numpy as np
# Function to read API response / JSON Object
def response_writer():
with open('test_not_working.json') as f:
api_response = json.load(f)
return api_response
# api_response = response_writer()
api_response = response_writer()
# Set an auto_id_field which appears later as "json_path" in pandas dataframe columns
jp.jsonpath.auto_id_field = 'json_path'
def unpack_response():
# Create a dataframe from JSON response
expr = jp.parse('$..children.[*].json_path')
data = [{'json_path': m.value, **m.datum.value} for m in expr.find(api_response)]
df = pd.json_normalize(data).sort_values('json_path', ignore_index=True)
# Append a portfolio column
df['portfolio'] = df.loc[df.json_path.str.contains(r'total\.children\.\[\d+]$'), 'name']
df['portfolio'].fillna(method='ffill', inplace=True)
# Mapping column headers appropriately from the JSON
trans = {'columns.' + c['key']: c['display_name'] for c in api_response['meta']['columns']}
cols = ['json_path', 'name', 'FinServ__SourceSystemId__c', 'Addepar_Direct_Owner_ID__c', 'FinServ__FinancialAccountNumber__c', 'FinServ__OpenDate__c', 'FinServ__CloseDate__c', 'Display_Name__c',
'JP_Custodian__c', 'Online_Status__c', 'Custodian_Account_Name__c', 'Management_Style__c', 'Portfolio_Type__c', 'Advisory_Firm__c', 'FinServ__Balance__c', 'Target_Cash__c', 'Target_Cash_Notes__c']
df = df.rename(columns=trans)[cols]
# Then renaming those that could be phrased better.
df.rename(columns={'name': 'Name'}, inplace=True)
return df
# Function that takes df and performs varios manipulation, before saving in dataframe
def dataframe_transformation():
# Calling function that returns both dataframes
financial_accounts_df = unpack_response()
# Limiting character length of Name column
financial_accounts_df['Name'] = financial_accounts_df['Name'].str[:80]
# Removing Directly Owned Rows
financial_accounts_df = financial_accounts_df[financial_accounts_df['Name'].str.contains("Directly Owned")==False]
# Changing 'Holding Account' name to 'Name'
financial_accounts_df = financial_accounts_df.rename(columns={'Holding Account': 'Name'})
# Creating RecordTypeID Column and setting initial value of '0123h000000FPFjAAO'
financial_accounts_df['RecordTypeID'] = '0123h000000FPFjAAO'
# Step 1 - Search for 'Addepar_Direct_Owner_ID__c' values in 'FinServ__SourceSystemId__c' column.
# Step 2 - for rows where Step 1 is true, mirror 'Addepar_Direct_Owner_ID__c' value to match.
# Step 3 - remove the parent rows, leaving only children
rows_to_remove = financial_accounts_df['FinServ__SourceSystemId__c'].isin(financial_accounts_df['Addepar_Direct_Owner_ID__c'])
financial_accounts_df.loc[financial_accounts_df['Addepar_Direct_Owner_ID__c'].isin(financial_accounts_df['FinServ__SourceSystemId__c'].values),
'Addepar_Direct_Owner_ID__c'] = financial_accounts_df.loc[rows_to_remove, 'Addepar_Direct_Owner_ID__c'].to_numpy()
financial_accounts_df = financial_accounts_df[~rows_to_remove]
# Duplicating the 'Addepar_Direct_Owner_ID__c' column and renaming duplication 'FinServ__PrimaryOwner__c'
financial_accounts_df['FinServ__PrimaryOwner__c'] = financial_accounts_df['Addepar_Direct_Owner_ID__c']
# Moving position of newly created column to index position 3 for ease of comparison
first_column = financial_accounts_df.pop('FinServ__PrimaryOwner__c')
financial_accounts_df.insert(3, 'FinServ__PrimaryOwner__c', first_column)
# Dropping the first df column / json_path
financial_accounts_df = financial_accounts_df.iloc[: , 1:]
return financial_accounts_df
# Function that writes dataframe to csv file
def main():
financial_accounts_df = dataframe_transformation()
# Writing dataframe to .csv
timestr = datetime.datetime.now().strftime("%Y-%m-%d")
filename = 'financial_accounts_'+timestr+'.csv'
# financial_accounts_df.to_csv(filename, encoding='utf-8')
financial_accounts_df.to_csv(filename, index=False)
print(f' Filename:',filename)
if __name__ == "__main__":
main()
识别问题的挑战:
理论上返回的数据将是相似的,唯一的变化是进入 JSON 对象的额外数据行.
我担心的是,我想不出一个解决方案可以让我确定是哪一行数据(总共 7000 行)导致了这个问题/哪一行数据显示了我的代码是(在某种程度上有缺陷)。
我感谢我提供的信息很简单,但是有人对我如何隔离 pandas 中有问题的数据行有什么建议吗df
and/or 知道我的代码可能存在缺陷并导致此问题吗?
示例数据:
这里有 2 个示例,一个适用于函数,另一个不适用。我希望这有助于帮助我对问题进行三角测量 -
示例 1(工作) - 这会毫无问题地运行函数:
{
"meta": {
"columns": [
{
"key": "node_id",
"display_name": "FinServ__SourceSystemId__c",
"output_type": "Word"
},
{
"key": "direct_owner_id",
"display_name": "Addepar_Direct_Owner_ID__c",
"output_type": "Word"
},
{
"key": "bottom_level_holding_account_number",
"display_name": "FinServ__FinancialAccountNumber__c",
"output_type": "Word"
},
{
"key": "_custom_account_open_date_425913",
"display_name": "FinServ__OpenDate__c",
"output_type": "Date"
},
{
"key": "_custom_close_date_411160",
"display_name": "FinServ__CloseDate__c",
"output_type": "Date"
},
{
"key": "display_name",
"display_name": "Display_Name__c",
"output_type": "Word"
},
{
"key": "_custom_jp_custodian_305769",
"display_name": "JP_Custodian__c",
"output_type": "Word"
},
{
"key": "online_status",
"display_name": "Online_Status__c",
"output_type": "Word"
},
{
"key": "_custom_custodian_account_name_487351",
"display_name": "Custodian_Account_Name__c",
"output_type": "Word"
},
{
"key": "_custom_management_style_295599",
"display_name": "Management_Style__c",
"output_type": "Word"
},
{
"key": "_custom_portfolio_type_295600",
"display_name": "Portfolio_Type__c",
"output_type": "Word"
},
{
"key": "_custom_advisor_302684",
"display_name": "Advisory_Firm__c",
"output_type": "Word"
},
{
"key": "_custom_test1_679151",
"display_name": "FinServ__Balance__c",
"output_type": "Number"
},
{
"key": "_custom_new_target_cash_balance_gwl_453547",
"display_name": "Target_Cash__c",
"output_type": "Number"
},
{
"key": "_custom_target_cash_notes_341522",
"display_name": "Target_Cash_Notes__c",
"output_type": "Word"
}
],
"groupings": [
{
"key": "holding_account",
"display_name": "Holding Account"
}
]
},
"data": {
"type": "portfolio_views",
"attributes": {
"total": {
"name": "Total",
"columns": {
"online_status": null,
"_custom_test1_679151": null,
"direct_owner_id": null,
"_custom_account_open_date_425913": null,
"display_name": null,
"_custom_custodian_account_name_487351": null,
"_custom_portfolio_type_295600": null,
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": null,
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": null,
"_custom_jp_custodian_305769": null,
"_custom_management_style_295599": null,
"_custom_target_cash_notes_341522": null,
"node_id": null
},
"children": [
{
"entity_id": 10663945,
"name": "10 Laverockbank LLC Hold (668168788)",
"grouping": "holding_account",
"columns": {
"online_status": "Online",
"_custom_test1_679151": 5045.08,
"direct_owner_id": "10710095",
"_custom_account_open_date_425913": "2021-05-14",
"display_name": "10 Madison LLC Hold",
"_custom_custodian_account_name_487351": "10 MADISON LLC | &HOLDING | LLC",
"_custom_portfolio_type_295600": "Cash Management",
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": "668168788",
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": "Advisory Name",
"_custom_jp_custodian_305769": "Custodian Name",
"_custom_management_style_295599": "Holding",
"_custom_target_cash_notes_341522": null,
"node_id": "10663945"
},
"children": []
}
]
}
}
},
"included": []
}
示例 2 (ValueError
) - 这会创建一个 ValueError
并且是 1/2 打 children
抛出的示例ValueError
:
{
"meta": {
"columns": [
{
"key": "node_id",
"display_name": "FinServ__SourceSystemId__c",
"output_type": "Word"
},
{
"key": "direct_owner_id",
"display_name": "Addepar_Direct_Owner_ID__c",
"output_type": "Word"
},
{
"key": "bottom_level_holding_account_number",
"display_name": "FinServ__FinancialAccountNumber__c",
"output_type": "Word"
},
{
"key": "_custom_account_open_date_425913",
"display_name": "FinServ__OpenDate__c",
"output_type": "Date"
},
{
"key": "_custom_close_date_411160",
"display_name": "FinServ__CloseDate__c",
"output_type": "Date"
},
{
"key": "display_name",
"display_name": "Display_Name__c",
"output_type": "Word"
},
{
"key": "_custom_jp_custodian_305769",
"display_name": "JP_Custodian__c",
"output_type": "Word"
},
{
"key": "online_status",
"display_name": "Online_Status__c",
"output_type": "Word"
},
{
"key": "_custom_custodian_account_name_487351",
"display_name": "Custodian_Account_Name__c",
"output_type": "Word"
},
{
"key": "_custom_management_style_295599",
"display_name": "Management_Style__c",
"output_type": "Word"
},
{
"key": "_custom_portfolio_type_295600",
"display_name": "Portfolio_Type__c",
"output_type": "Word"
},
{
"key": "_custom_advisor_302684",
"display_name": "Advisory_Firm__c",
"output_type": "Word"
},
{
"key": "_custom_test1_679151",
"display_name": "FinServ__Balance__c",
"output_type": "Number"
},
{
"key": "_custom_new_target_cash_balance_gwl_453547",
"display_name": "Target_Cash__c",
"output_type": "Number"
},
{
"key": "_custom_target_cash_notes_341522",
"display_name": "Target_Cash_Notes__c",
"output_type": "Word"
}
],
"groupings": [
{
"key": "holding_account",
"display_name": "Holding Account"
}
]
},
"data": {
"type": "portfolio_views",
"attributes": {
"total": {
"name": "Total",
"columns": {
"online_status": null,
"_custom_test1_679151": null,
"direct_owner_id": null,
"_custom_account_open_date_425913": null,
"display_name": null,
"_custom_custodian_account_name_487351": null,
"_custom_portfolio_type_295600": null,
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": null,
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": null,
"_custom_jp_custodian_305769": null,
"_custom_management_style_295599": null,
"_custom_target_cash_notes_341522": null,
"node_id": null
},
"children": [
{
"entity_id": 15425904,
"name": " WF Rev Tr US Eq 2 Wrapper (E79508009)",
"grouping": "holding_account",
"columns": {
"online_status": "Offline",
"_custom_test1_679151": 99.86,
"direct_owner_id": "7400472",
"_custom_account_open_date_425913": null,
"display_name": null,
"_custom_custodian_account_name_487351": null,
"_custom_portfolio_type_295600": null,
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": "E79508009",
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": null,
"_custom_jp_custodian_305769": null,
"_custom_management_style_295599": null,
"_custom_target_cash_notes_341522": null,
"node_id": "15425904"
},
"children": [
{
"entity_id": 13845019,
"name": "WF Rev Tr US Eq 2 (E79508009)",
"grouping": "holding_account",
"columns": {
"online_status": "Online",
"_custom_test1_679151": 99.86,
"direct_owner_id": "15425904",
"_custom_account_open_date_425913": null,
"display_name": "WF Rev Tr US Eq 2",
"_custom_custodian_account_name_487351": "RYAN HUDSON REVOCABLE TRUST",
"_custom_portfolio_type_295600": "Core Portfolio: Liquid",
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": "E79508009",
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": "Advisory Name",
"_custom_jp_custodian_305769": "Custodian Name",
"_custom_management_style_295599": "US Core Tax-Loss Harvesting",
"_custom_target_cash_notes_341522": null,
"node_id": "13845019"
},
"children": []
},
{
"entity_id": 15425937,
"name": "WF Rev Tr US Eq 2 Non-Discretionary (E79508009)",
"grouping": "holding_account",
"columns": {
"online_status": "Offline",
"_custom_test1_679151": 0,
"direct_owner_id": "15425904",
"_custom_account_open_date_425913": null,
"display_name": null,
"_custom_custodian_account_name_487351": "E79508009",
"_custom_portfolio_type_295600": "L.I.F.E. Assets",
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": "E79508009",
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": "N/A - Client Directed",
"_custom_jp_custodian_305769": "",
"_custom_management_style_295599": "External",
"_custom_target_cash_notes_341522": null,
"node_id": "15425937"
},
"children": []
}
]
},
{
"entity_id": 10663945,
"name": "10 Laverockbank LLC Hold (668168788)",
"grouping": "holding_account",
"columns": {
"online_status": "Online",
"_custom_test1_679151": 5045.08,
"direct_owner_id": "10710095",
"_custom_account_open_date_425913": "2021-05-14",
"display_name": "10 Madison LLC Hold",
"_custom_custodian_account_name_487351": "10 MADISON LLC | &HOLDING | LLC",
"_custom_portfolio_type_295600": "Cash Management",
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": "668168788",
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": "Advisory Name",
"_custom_jp_custodian_305769": "Custodian Name",
"_custom_management_style_295599": "Holding",
"_custom_target_cash_notes_341522": null,
"node_id": "10663945"
},
"children": []
}
]
}
}
},
"included": []
}
看了你的代码大约half-hour,结构和语法太复杂,这对你来说不是一个好的编码习惯。
我会尽量理解你想做什么。
这里有一些建议给你。
如果你下载的json数据超大,json path
不是很好的解析方式,会消耗大量内存,或者,如果是来自 requests
的 HTTP 响应,json path
是可以的,因为通常返回的数据不是那么大。
固定列的顺序并重命名不是合适的方法。因为您无法确保每次的顺序都不相同,尤其是对于 dict
对象的键。更不用说从服务器返回的内容,这意味着您无法控制内容。
针对你的问题
financial_accounts_df.loc[financial_accounts_df['Addepar_Direct_Owner_ID__c'].isin(financial_accounts_df['FinServ__SourceSystemId__c'].values),
'Addepar_Direct_Owner_ID__c'] = financial_accounts_df.loc[rows_to_remove, 'Addepar_Direct_Owner_ID__c'].to_numpy()
第一个索引financial_accounts_df['Addepar_Direct_Owner_ID__c'].isin(financial_accounts_df['FinServ__SourceSystemId__c'].values)
是[True, False, False, False]
第二个索引 rows_to_remove
是 [False, True, True, False]
是你的值错误异常的主要原因。
而且,我不知道你将如何处理嵌套 children
元素,但你必须确保每个索引的 Ture
数量相等。
背景:
我有一个脚本,每天API调用财务数据,returns数据作为JSON 对象,在对 df
进行一些操作之前将其保存为 pandas df
,最后在上传到系统之前保存为 .csv
。
问题:
我的脚本一直运行良好,直到最近可能有新数据进入 JSON 对象,我现在收到以下 ValueError
-
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
C:\Users\JONNY~1.FOR\AppData\Local\Temp/ipykernel_17600/3701323957.py in <module>
13
14 if __name__ == "__main__":
---> 15 main()
C:\Users\JONNY~1.FOR\AppData\Local\Temp/ipykernel_17600/3701323957.py in main()
1 # Function that writes Exceptions Report and API Response as a consolidated .xlsx file.
2 def main():
----> 3 financial_accounts_df = dataframe_transformation()
4
5 # Writing dataframe to .csv
C:\Users\JONNY~1.FOR\AppData\Local\Temp/ipykernel_17600/203167952.py in dataframe_transformation()
19 # Step 3 - remove the parent rows, leaving only children
20 rows_to_remove = financial_accounts_df['FinServ__SourceSystemId__c'].isin(financial_accounts_df['Addepar_Direct_Owner_ID__c'])
---> 21 financial_accounts_df.loc[financial_accounts_df['Addepar_Direct_Owner_ID__c'].isin(financial_accounts_df['FinServ__SourceSystemId__c'].values),
22 'Addepar_Direct_Owner_ID__c'] = financial_accounts_df.loc[rows_to_remove, 'Addepar_Direct_Owner_ID__c'].to_numpy()
23 financial_accounts_df = financial_accounts_df[~rows_to_remove]
~\.conda\envs\JPDevelopment\lib\site-packages\pandas\core\indexing.py in __setitem__(self, key, value)
714
715 iloc = self if self.name == "iloc" else self.obj.iloc
--> 716 iloc._setitem_with_indexer(indexer, value, self.name)
717
718 def _validate_key(self, key, axis: int):
~\.conda\envs\JPDevelopment\lib\site-packages\pandas\core\indexing.py in _setitem_with_indexer(self, indexer, value, name)
1686 if take_split_path:
1687 # We have to operate column-wise
-> 1688 self._setitem_with_indexer_split_path(indexer, value, name)
1689 else:
1690 self._setitem_single_block(indexer, value, name)
~\.conda\envs\JPDevelopment\lib\site-packages\pandas\core\indexing.py in _setitem_with_indexer_split_path(self, indexer, value, name)
1741 return self._setitem_with_indexer((pi, info_axis[0]), value[0])
1742
-> 1743 raise ValueError(
1744 "Must have equal len keys and value "
1745 "when setting with an iterable"
ValueError: Must have equal len keys and value when setting with an iterable
脚本: 我删除了调用 API 的函数,而是编写了一个函数来加载下面提供的示例数据集并模拟 ValueError
我收到了。注意:并非所有库依赖项都被使用:
# Importing depedencies
from configparser import ConfigParser
import datetime as date
import datetime as dt
import datetime
from datetime import timedelta
from datetime import date
import itertools
import pandas as pd
from pandas import json_normalize
import requests as requests
from requests.auth import HTTPBasicAuth
import time
import json
import jsonpath_ng as jp
import enlighten
import numpy as np
# Function to read API response / JSON Object
def response_writer():
with open('test_not_working.json') as f:
api_response = json.load(f)
return api_response
# api_response = response_writer()
api_response = response_writer()
# Set an auto_id_field which appears later as "json_path" in pandas dataframe columns
jp.jsonpath.auto_id_field = 'json_path'
def unpack_response():
# Create a dataframe from JSON response
expr = jp.parse('$..children.[*].json_path')
data = [{'json_path': m.value, **m.datum.value} for m in expr.find(api_response)]
df = pd.json_normalize(data).sort_values('json_path', ignore_index=True)
# Append a portfolio column
df['portfolio'] = df.loc[df.json_path.str.contains(r'total\.children\.\[\d+]$'), 'name']
df['portfolio'].fillna(method='ffill', inplace=True)
# Mapping column headers appropriately from the JSON
trans = {'columns.' + c['key']: c['display_name'] for c in api_response['meta']['columns']}
cols = ['json_path', 'name', 'FinServ__SourceSystemId__c', 'Addepar_Direct_Owner_ID__c', 'FinServ__FinancialAccountNumber__c', 'FinServ__OpenDate__c', 'FinServ__CloseDate__c', 'Display_Name__c',
'JP_Custodian__c', 'Online_Status__c', 'Custodian_Account_Name__c', 'Management_Style__c', 'Portfolio_Type__c', 'Advisory_Firm__c', 'FinServ__Balance__c', 'Target_Cash__c', 'Target_Cash_Notes__c']
df = df.rename(columns=trans)[cols]
# Then renaming those that could be phrased better.
df.rename(columns={'name': 'Name'}, inplace=True)
return df
# Function that takes df and performs varios manipulation, before saving in dataframe
def dataframe_transformation():
# Calling function that returns both dataframes
financial_accounts_df = unpack_response()
# Limiting character length of Name column
financial_accounts_df['Name'] = financial_accounts_df['Name'].str[:80]
# Removing Directly Owned Rows
financial_accounts_df = financial_accounts_df[financial_accounts_df['Name'].str.contains("Directly Owned")==False]
# Changing 'Holding Account' name to 'Name'
financial_accounts_df = financial_accounts_df.rename(columns={'Holding Account': 'Name'})
# Creating RecordTypeID Column and setting initial value of '0123h000000FPFjAAO'
financial_accounts_df['RecordTypeID'] = '0123h000000FPFjAAO'
# Step 1 - Search for 'Addepar_Direct_Owner_ID__c' values in 'FinServ__SourceSystemId__c' column.
# Step 2 - for rows where Step 1 is true, mirror 'Addepar_Direct_Owner_ID__c' value to match.
# Step 3 - remove the parent rows, leaving only children
rows_to_remove = financial_accounts_df['FinServ__SourceSystemId__c'].isin(financial_accounts_df['Addepar_Direct_Owner_ID__c'])
financial_accounts_df.loc[financial_accounts_df['Addepar_Direct_Owner_ID__c'].isin(financial_accounts_df['FinServ__SourceSystemId__c'].values),
'Addepar_Direct_Owner_ID__c'] = financial_accounts_df.loc[rows_to_remove, 'Addepar_Direct_Owner_ID__c'].to_numpy()
financial_accounts_df = financial_accounts_df[~rows_to_remove]
# Duplicating the 'Addepar_Direct_Owner_ID__c' column and renaming duplication 'FinServ__PrimaryOwner__c'
financial_accounts_df['FinServ__PrimaryOwner__c'] = financial_accounts_df['Addepar_Direct_Owner_ID__c']
# Moving position of newly created column to index position 3 for ease of comparison
first_column = financial_accounts_df.pop('FinServ__PrimaryOwner__c')
financial_accounts_df.insert(3, 'FinServ__PrimaryOwner__c', first_column)
# Dropping the first df column / json_path
financial_accounts_df = financial_accounts_df.iloc[: , 1:]
return financial_accounts_df
# Function that writes dataframe to csv file
def main():
financial_accounts_df = dataframe_transformation()
# Writing dataframe to .csv
timestr = datetime.datetime.now().strftime("%Y-%m-%d")
filename = 'financial_accounts_'+timestr+'.csv'
# financial_accounts_df.to_csv(filename, encoding='utf-8')
financial_accounts_df.to_csv(filename, index=False)
print(f' Filename:',filename)
if __name__ == "__main__":
main()
识别问题的挑战:
理论上返回的数据将是相似的,唯一的变化是进入 JSON 对象的额外数据行.
我担心的是,我想不出一个解决方案可以让我确定是哪一行数据(总共 7000 行)导致了这个问题/哪一行数据显示了我的代码是(在某种程度上有缺陷)。
我感谢我提供的信息很简单,但是有人对我如何隔离 pandas 中有问题的数据行有什么建议吗df
and/or 知道我的代码可能存在缺陷并导致此问题吗?
示例数据:
这里有 2 个示例,一个适用于函数,另一个不适用。我希望这有助于帮助我对问题进行三角测量 -
示例 1(工作) - 这会毫无问题地运行函数:
{
"meta": {
"columns": [
{
"key": "node_id",
"display_name": "FinServ__SourceSystemId__c",
"output_type": "Word"
},
{
"key": "direct_owner_id",
"display_name": "Addepar_Direct_Owner_ID__c",
"output_type": "Word"
},
{
"key": "bottom_level_holding_account_number",
"display_name": "FinServ__FinancialAccountNumber__c",
"output_type": "Word"
},
{
"key": "_custom_account_open_date_425913",
"display_name": "FinServ__OpenDate__c",
"output_type": "Date"
},
{
"key": "_custom_close_date_411160",
"display_name": "FinServ__CloseDate__c",
"output_type": "Date"
},
{
"key": "display_name",
"display_name": "Display_Name__c",
"output_type": "Word"
},
{
"key": "_custom_jp_custodian_305769",
"display_name": "JP_Custodian__c",
"output_type": "Word"
},
{
"key": "online_status",
"display_name": "Online_Status__c",
"output_type": "Word"
},
{
"key": "_custom_custodian_account_name_487351",
"display_name": "Custodian_Account_Name__c",
"output_type": "Word"
},
{
"key": "_custom_management_style_295599",
"display_name": "Management_Style__c",
"output_type": "Word"
},
{
"key": "_custom_portfolio_type_295600",
"display_name": "Portfolio_Type__c",
"output_type": "Word"
},
{
"key": "_custom_advisor_302684",
"display_name": "Advisory_Firm__c",
"output_type": "Word"
},
{
"key": "_custom_test1_679151",
"display_name": "FinServ__Balance__c",
"output_type": "Number"
},
{
"key": "_custom_new_target_cash_balance_gwl_453547",
"display_name": "Target_Cash__c",
"output_type": "Number"
},
{
"key": "_custom_target_cash_notes_341522",
"display_name": "Target_Cash_Notes__c",
"output_type": "Word"
}
],
"groupings": [
{
"key": "holding_account",
"display_name": "Holding Account"
}
]
},
"data": {
"type": "portfolio_views",
"attributes": {
"total": {
"name": "Total",
"columns": {
"online_status": null,
"_custom_test1_679151": null,
"direct_owner_id": null,
"_custom_account_open_date_425913": null,
"display_name": null,
"_custom_custodian_account_name_487351": null,
"_custom_portfolio_type_295600": null,
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": null,
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": null,
"_custom_jp_custodian_305769": null,
"_custom_management_style_295599": null,
"_custom_target_cash_notes_341522": null,
"node_id": null
},
"children": [
{
"entity_id": 10663945,
"name": "10 Laverockbank LLC Hold (668168788)",
"grouping": "holding_account",
"columns": {
"online_status": "Online",
"_custom_test1_679151": 5045.08,
"direct_owner_id": "10710095",
"_custom_account_open_date_425913": "2021-05-14",
"display_name": "10 Madison LLC Hold",
"_custom_custodian_account_name_487351": "10 MADISON LLC | &HOLDING | LLC",
"_custom_portfolio_type_295600": "Cash Management",
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": "668168788",
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": "Advisory Name",
"_custom_jp_custodian_305769": "Custodian Name",
"_custom_management_style_295599": "Holding",
"_custom_target_cash_notes_341522": null,
"node_id": "10663945"
},
"children": []
}
]
}
}
},
"included": []
}
示例 2 (ValueError
) - 这会创建一个 ValueError
并且是 1/2 打 children
抛出的示例ValueError
:
{
"meta": {
"columns": [
{
"key": "node_id",
"display_name": "FinServ__SourceSystemId__c",
"output_type": "Word"
},
{
"key": "direct_owner_id",
"display_name": "Addepar_Direct_Owner_ID__c",
"output_type": "Word"
},
{
"key": "bottom_level_holding_account_number",
"display_name": "FinServ__FinancialAccountNumber__c",
"output_type": "Word"
},
{
"key": "_custom_account_open_date_425913",
"display_name": "FinServ__OpenDate__c",
"output_type": "Date"
},
{
"key": "_custom_close_date_411160",
"display_name": "FinServ__CloseDate__c",
"output_type": "Date"
},
{
"key": "display_name",
"display_name": "Display_Name__c",
"output_type": "Word"
},
{
"key": "_custom_jp_custodian_305769",
"display_name": "JP_Custodian__c",
"output_type": "Word"
},
{
"key": "online_status",
"display_name": "Online_Status__c",
"output_type": "Word"
},
{
"key": "_custom_custodian_account_name_487351",
"display_name": "Custodian_Account_Name__c",
"output_type": "Word"
},
{
"key": "_custom_management_style_295599",
"display_name": "Management_Style__c",
"output_type": "Word"
},
{
"key": "_custom_portfolio_type_295600",
"display_name": "Portfolio_Type__c",
"output_type": "Word"
},
{
"key": "_custom_advisor_302684",
"display_name": "Advisory_Firm__c",
"output_type": "Word"
},
{
"key": "_custom_test1_679151",
"display_name": "FinServ__Balance__c",
"output_type": "Number"
},
{
"key": "_custom_new_target_cash_balance_gwl_453547",
"display_name": "Target_Cash__c",
"output_type": "Number"
},
{
"key": "_custom_target_cash_notes_341522",
"display_name": "Target_Cash_Notes__c",
"output_type": "Word"
}
],
"groupings": [
{
"key": "holding_account",
"display_name": "Holding Account"
}
]
},
"data": {
"type": "portfolio_views",
"attributes": {
"total": {
"name": "Total",
"columns": {
"online_status": null,
"_custom_test1_679151": null,
"direct_owner_id": null,
"_custom_account_open_date_425913": null,
"display_name": null,
"_custom_custodian_account_name_487351": null,
"_custom_portfolio_type_295600": null,
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": null,
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": null,
"_custom_jp_custodian_305769": null,
"_custom_management_style_295599": null,
"_custom_target_cash_notes_341522": null,
"node_id": null
},
"children": [
{
"entity_id": 15425904,
"name": " WF Rev Tr US Eq 2 Wrapper (E79508009)",
"grouping": "holding_account",
"columns": {
"online_status": "Offline",
"_custom_test1_679151": 99.86,
"direct_owner_id": "7400472",
"_custom_account_open_date_425913": null,
"display_name": null,
"_custom_custodian_account_name_487351": null,
"_custom_portfolio_type_295600": null,
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": "E79508009",
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": null,
"_custom_jp_custodian_305769": null,
"_custom_management_style_295599": null,
"_custom_target_cash_notes_341522": null,
"node_id": "15425904"
},
"children": [
{
"entity_id": 13845019,
"name": "WF Rev Tr US Eq 2 (E79508009)",
"grouping": "holding_account",
"columns": {
"online_status": "Online",
"_custom_test1_679151": 99.86,
"direct_owner_id": "15425904",
"_custom_account_open_date_425913": null,
"display_name": "WF Rev Tr US Eq 2",
"_custom_custodian_account_name_487351": "RYAN HUDSON REVOCABLE TRUST",
"_custom_portfolio_type_295600": "Core Portfolio: Liquid",
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": "E79508009",
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": "Advisory Name",
"_custom_jp_custodian_305769": "Custodian Name",
"_custom_management_style_295599": "US Core Tax-Loss Harvesting",
"_custom_target_cash_notes_341522": null,
"node_id": "13845019"
},
"children": []
},
{
"entity_id": 15425937,
"name": "WF Rev Tr US Eq 2 Non-Discretionary (E79508009)",
"grouping": "holding_account",
"columns": {
"online_status": "Offline",
"_custom_test1_679151": 0,
"direct_owner_id": "15425904",
"_custom_account_open_date_425913": null,
"display_name": null,
"_custom_custodian_account_name_487351": "E79508009",
"_custom_portfolio_type_295600": "L.I.F.E. Assets",
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": "E79508009",
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": "N/A - Client Directed",
"_custom_jp_custodian_305769": "",
"_custom_management_style_295599": "External",
"_custom_target_cash_notes_341522": null,
"node_id": "15425937"
},
"children": []
}
]
},
{
"entity_id": 10663945,
"name": "10 Laverockbank LLC Hold (668168788)",
"grouping": "holding_account",
"columns": {
"online_status": "Online",
"_custom_test1_679151": 5045.08,
"direct_owner_id": "10710095",
"_custom_account_open_date_425913": "2021-05-14",
"display_name": "10 Madison LLC Hold",
"_custom_custodian_account_name_487351": "10 MADISON LLC | &HOLDING | LLC",
"_custom_portfolio_type_295600": "Cash Management",
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": "668168788",
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": "Advisory Name",
"_custom_jp_custodian_305769": "Custodian Name",
"_custom_management_style_295599": "Holding",
"_custom_target_cash_notes_341522": null,
"node_id": "10663945"
},
"children": []
}
]
}
}
},
"included": []
}
看了你的代码大约half-hour,结构和语法太复杂,这对你来说不是一个好的编码习惯。
我会尽量理解你想做什么。
这里有一些建议给你。
如果你下载的json数据超大,
json path
不是很好的解析方式,会消耗大量内存,或者,如果是来自requests
的 HTTP 响应,json path
是可以的,因为通常返回的数据不是那么大。固定列的顺序并重命名不是合适的方法。因为您无法确保每次的顺序都不相同,尤其是对于
dict
对象的键。更不用说从服务器返回的内容,这意味着您无法控制内容。针对你的问题
financial_accounts_df.loc[financial_accounts_df['Addepar_Direct_Owner_ID__c'].isin(financial_accounts_df['FinServ__SourceSystemId__c'].values),
'Addepar_Direct_Owner_ID__c'] = financial_accounts_df.loc[rows_to_remove, 'Addepar_Direct_Owner_ID__c'].to_numpy()
第一个索引financial_accounts_df['Addepar_Direct_Owner_ID__c'].isin(financial_accounts_df['FinServ__SourceSystemId__c'].values)
是[True, False, False, False]
第二个索引 rows_to_remove
是 [False, True, True, False]
是你的值错误异常的主要原因。
而且,我不知道你将如何处理嵌套 children
元素,但你必须确保每个索引的 Ture
数量相等。