使用循环在 Pandas 中创建掩码
Create a mask in Pandas with a loop
rand_samp = fips.sample(n=20)
rand_samp.reset_index(inplace=True)
print(rand_samp)
countys = rand_samp['CountyID'].to_list()
mask = ((temp['CountyID'] == countys[0]) | (temp['CountyID'] == countys[1]) | (temp['CountyID'] == countys[2]) | (temp['CountyID'] == countys[3]) | (temp['CountyID'] == countys[4]) | (temp['CountyID'] == countys[5]) | (temp['CountyID'] == countys[6]) | (temp['CountyID'] == countys[7]) | (temp['CountyID'] == countys[8]) | (temp['CountyID'] == countys[9]) | (temp['CountyID'] == countys[10]) | (temp['CountyID'] == countys[11]) | (temp['CountyID'] == countys[12]) | (temp['CountyID'] == countys[13]) | (temp['CountyID'] == countys[14]) | (temp['CountyID'] == countys[15]) | (temp['CountyID'] == countys[16]) | (temp['CountyID'] == countys[17]) | (temp['CountyID'] == countys[18]) | (temp['CountyID'] == countys[19]))
temp_rand = temp[mask]
temp_rand.reset_index(inplace=True)
temp_rand
我正在寻找一种更好的方法来为我的数据集制作掩码,而无需输入每个单独的语句。我尝试使用 for 循环遍历我的列表,然后附加到新数据集,但还有其他方法吗?
ìsin()
就是您要找的。
import pandas as pd
temp = pd.DataFrame([['USA', 1],['Poland', 8], ['Austria', 3]], columns=['CountyID', 'GDP'])
countys = ['Germany', 'Austria', 'Poland']
mask = temp['CountyID'].isin(countys)
print(mask)
temp_rand = temp[mask]
输出:
0 False
1 True
2 True
Name: CountyID, dtype: bool
rand_samp = fips.sample(n=20)
rand_samp.reset_index(inplace=True)
print(rand_samp)
countys = rand_samp['CountyID'].to_list()
mask = ((temp['CountyID'] == countys[0]) | (temp['CountyID'] == countys[1]) | (temp['CountyID'] == countys[2]) | (temp['CountyID'] == countys[3]) | (temp['CountyID'] == countys[4]) | (temp['CountyID'] == countys[5]) | (temp['CountyID'] == countys[6]) | (temp['CountyID'] == countys[7]) | (temp['CountyID'] == countys[8]) | (temp['CountyID'] == countys[9]) | (temp['CountyID'] == countys[10]) | (temp['CountyID'] == countys[11]) | (temp['CountyID'] == countys[12]) | (temp['CountyID'] == countys[13]) | (temp['CountyID'] == countys[14]) | (temp['CountyID'] == countys[15]) | (temp['CountyID'] == countys[16]) | (temp['CountyID'] == countys[17]) | (temp['CountyID'] == countys[18]) | (temp['CountyID'] == countys[19]))
temp_rand = temp[mask]
temp_rand.reset_index(inplace=True)
temp_rand
我正在寻找一种更好的方法来为我的数据集制作掩码,而无需输入每个单独的语句。我尝试使用 for 循环遍历我的列表,然后附加到新数据集,但还有其他方法吗?
ìsin()
就是您要找的。
import pandas as pd
temp = pd.DataFrame([['USA', 1],['Poland', 8], ['Austria', 3]], columns=['CountyID', 'GDP'])
countys = ['Germany', 'Austria', 'Poland']
mask = temp['CountyID'].isin(countys)
print(mask)
temp_rand = temp[mask]
输出:
0 False
1 True
2 True
Name: CountyID, dtype: bool