如何根据 python 中不同文件的变量名添加值

Question

我有两个不同的文件，第一个文件有变量名 'InstanceType' 和其他变量。第二个文件具有相同的变量名称 'InstanceType'，其中具有基于分类的其他变量。我想通过添加基于变量 'InstanceType' 的列来添加第二个变量数据。这是我的代码：

df=pd.read_excel(io.BytesIO(uploaded['spot-prices-2021-05-16.xlsx']))
df

输入：

    AvailabilityZone    InstanceType    ProductDescription  SpotPrice   ymd_hms(Timestamp)
0   us-east-1f           r5a.4xlarge    Windows             1.0210     2021-05-16 21:14:12
1   us-east-1c           r5a.4xlarge    Windows             1.0210     2021-05-16 21:14:12
2   us-east-1b           r5a.4xlarge    Windows             1.0210     2021-05-16 21:14:12
3   us-east-1a           r5a.4xlarge    Windows             1.0210     2021-05-16 21:14:12
4   us-east-1d           p3.8xlarge  Red Hat Enterprise Linux   3.8020  2021-05-16 21:14:07

第一个文件 csv

第二个文件输入

InstanceType    vCPU    GPUs    Memory (GiB)    Baseline Performance / vCPU CPU Credits Earned
0   t4g.nano    2       NaN     0.5             0.05      6.0           NaN
1   t4g.micro   2       NaN     1.0             0.10      12.0          NaN 
2   t4g.small   2       NaN     2.0             0.20      24.0          NaN 
3   t4g.medium  2       NaN     4.0             0.20      24.0          NaN 
4   t4g.large   2       NaN     8.0             0.30      36.0          NaN

df3=pd.merge(df,df2, on='InstanceType', how= str('outer'))

输出：

AvailabilityZone    InstanceType    ProductDescription  SpotPrice   ymd_hms(Timestamp)  vCPU    GPUs    Memory (GiB)    Baseline Performance / vCPU CPU Credits Earned / Hr GPU Mem (Gib)   

0        us-east-1f   r5a.4xlarge   Windows             1.0210      2021-05-16 21:14:12   NaN   NaN     NaN               NaN              NaN

预期结果是 - 它应该根据 'InstanceType' 获取所有值并显示在每一行中。

提前致谢。

Answer 1

你的数据有问题，你放在评论中的 CSV 文件是用分号分隔的 ; 而不是默认的逗号 ,

另外，建议使用pandas.read_csv，即使你有Excel sheet，你可以先用to_csv方法转换成CSV，然后读取作为 CSV。

我假设您已经阅读了两个 CSV 文件，所以这是我的解决方案：

import pandas as pd

df1 = pd.read_csv('spot-prices-2021-05-16.csv', delimiter=';')
df2 = pd.read_csv('InstanceAWS.csv')
df_merged = pd.merge(df1, df2, on='InstanceType', how='outer')

print(df_merged.head)

Answer 2

我已经做到了你想要的。您可以将输出文件导入pandas，它将准确显示您想要的内容。首先生成“combined.csv”，然后运行导入代码。

这是要导入的代码：

import pandas as pd

df = pd.read_csv('combined.csv')

print(df)

这里是生成“combined.csv”的代码。运行需要30秒。确保两个源 CSV 与代码位于同一文件夹中。

import re
import os
from contextlib import redirect_stdout
import io
def enforceWhitelist(temp5): # function to ensure only the characters we want are allowed
    new = re.sub('[^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_:.\-]+' ,'', temp5) #regex pattern for whitelisting
    return new
def getInfo(temp9): #function to return the details from the 2nd(sub) csv as string
    global listnamesY
    for temp10 in range(len(listnamesY[0])):
        temp12 = ""
        for temp11 in range(len(listnamesY)):
            temp12 = temp12 + listnamesY[temp11][temp10]+","
        if temp9 in temp12:
            return temp12
    return temp9+","+("NOT_FOUND,"*(len(listnamesY)-1))
tempmain = [] #this is the array with all the lines of the original 1st(main) csv
with open("spot-prices-2021-05-16.csv") as main_in:
    for temp1 in main_in:
        tempmain.append(temp1)
tempsub = [] #this is the array with all the lines of the 2nd(sub) csv
with open("InstanceAWS.csv") as sub_in:
    for temp2 in sub_in:
        tempsub.append(temp2)
listnames1 = [] #this is the array with the names of columns of 1st(main) csv
listnames2 = [] #this is the array with the names of columns of 2nd(sub) csv
listnamesX = [] #this is the array with actual columns of 1st(main) csv
listnamesY = [] #this is the array with actual columns of 2nd(sub) csv
f = io.StringIO()
with redirect_stdout(f): #to redirect stdout to string
    for temp3 in range(len(tempmain[0].split(";"))):

        listnames1.append(enforceWhitelist(tempmain[0].split(';')[temp3]+"_1"))
        exec(listnames1[temp3]+" = []")
        for temp4 in range(len(tempmain)):
            if temp4 > 0:
                exec(listnames1[temp3]+".append('"+enforceWhitelist(tempmain[temp4].split(';')[temp3])+"')")
        exec("listnamesX.append("+enforceWhitelist(tempmain[0].split(';')[temp3]+'_1')+")")
    for temp3 in range(len(tempsub[0].split(","))):
        listnames2.append(enforceWhitelist(tempsub[0].split(',')[temp3]+"_2"))
        exec(listnames2[temp3]+" = []")
        for temp4 in range(len(tempsub)):
            if temp4 > 0:
                exec(listnames2[temp3]+".append('"+enforceWhitelist(tempsub[temp4].split(',')[temp3])+"')")
        exec("listnamesY.append("+enforceWhitelist(tempsub[0].split(',')[temp3]+'_2')+")")
    for temp13 in range(len(listnames1)):
        if listnames1[temp13][:-2] == "InstanceType":
            for temp14 in range(len(listnames2)):
                print(listnames2[temp14][:-2], end=',')
        else:
            print(listnames1[temp13][:-2], end=',')
    print("")
    for temp6 in range(len(listnamesX[0])):
        for temp7 in range(len(listnamesX)):
            if listnames1[temp7][:-2] == "InstanceType":
                print(getInfo(listnamesX[temp7][temp6]), end='')
            else:
                print(listnamesX[temp7][temp6], end=',')
        print("")
s = f.getvalue() #the string with the stdout
s="No."+s #first column has no name so added a name
try:
    os.remove("combined.csv") #delete the old file to see the latest generation
except:
    pass
f2 = open("combined.csv", "w")
for temp15 in range(len(s.split("\n"))):
    f2.write(s.split("\n")[temp15][:-1]+"\n") #write to output csv file
f2.close()

如何根据 python 中不同文件的变量名添加值

How to add values based on variable name from different files in python

python

merge

pandas