根据条件从两个表创建一个新列
Create a new column from two tables based on a condition
medicament 有一个 id_questions_similar 的列表,每个问题在另一个 table(active_questions) 中都有一些 specialty_id。我需要收集所有 specialty_id 匹配 id_questions 并创建一个新列表,其中包含相应问题 ID 的所有专业 ID,并放置在 table medicament_similar
的新列中
data1 = {'id_questions': [1,2,3],
'speciality_id': [[77, 66],[111, 122,133],[1245]]
}
active_questions = pd.DataFrame(data1)
active_questions
data2 = {'id_medicament': [1, 2,3],
'id_questions_similar': [[1, 2],[1,3], [3]]
}
medicament_similar = pd.DataFrame(data2)
medicament_similar
我创建这个示例是为了更好地解释问题(我有两个 table 超过 10000 行)
我希望得到这个结果(我手动完成的):
我试过了,但没有结果:
def get_specialities(data1, data2):
for i in data1['speciality_id']:
for l in data1['id_questions']:
for k in range(len(data2['id_questions_similar'])):
drug_speciality = []
if k == l :
drug_speciality.append(data1['speciality_id'])
return drug_speciality
print(get_specialities(data_1, data_2))
我是这样做的。可能可以在 pandas 上做所有事情并且更优化..?
数据 1['id_questions'] 正在寻找数据 2['id_questions_similar'][i] 中的事件。
根据这些事件,计算索引并获得“问题['speciality_id']”中的值。
import pandas as pd
data1 = {'id_questions': [1, 2, 3],
'speciality_id': [[77, 66], [111, 122, 133], [1245]]
}
active_questions = pd.DataFrame(data1)
data2 = {'id_medicament': [1, 2, 3],
'id_questions_similar': [[1, 2], [1, 3], [3]]
}
questions = pd.DataFrame(data2)
even_numbers = list(range(len(data2['id_questions_similar'])))
for i in range (0, len(data2['id_questions_similar'])):
qqq = list(filter(lambda aaa: aaa in data1['id_questions'], data2['id_questions_similar'][i]))
print(qqq)
index_ = []
count_ = len(qqq)
for k in range(0, count_):
ind = data1['id_questions'].index(qqq[k])#index
if count_ == 1:
even_numbers[i] = data1['speciality_id'][ind]
continue
index_.append(data1['speciality_id'][ind])#values
if count_ > 1:
even_numbers[i] = index_
questions['speciality_id'] = even_numbers
print(questions)
下面我做了一个更优化的版本
import more_itertools as mit
import pandas as pd
data1 = {'id_questions': [1, 2, 3],
'speciality_id': [[77, 66], [111, 122, 133], [1245]]
}
active_questions = pd.DataFrame(data1)
data2 = {'id_medicament': [1, 2, 3],
'id_questions_similar': [[1, 2], [1, 3], [3]]
}
questions = pd.DataFrame(data2)
iterable = active_questions['id_questions']
speciality = []
for i in questions['id_questions_similar']:
index = list(mit.locate(iterable, lambda x: x in i))
if len(index) > 1:
speciality.append(list(active_questions.iloc[index, 1]))
else:
speciality.append(active_questions.iloc[index[0], 1])
questions['speciality_id'] = speciality
print(questions)
medicament 有一个 id_questions_similar 的列表,每个问题在另一个 table(active_questions) 中都有一些 specialty_id。我需要收集所有 specialty_id 匹配 id_questions 并创建一个新列表,其中包含相应问题 ID 的所有专业 ID,并放置在 table medicament_similar
的新列中data1 = {'id_questions': [1,2,3],
'speciality_id': [[77, 66],[111, 122,133],[1245]]
}
active_questions = pd.DataFrame(data1)
active_questions
data2 = {'id_medicament': [1, 2,3],
'id_questions_similar': [[1, 2],[1,3], [3]]
}
medicament_similar = pd.DataFrame(data2)
medicament_similar
我创建这个示例是为了更好地解释问题(我有两个 table 超过 10000 行) 我希望得到这个结果(我手动完成的):
我试过了,但没有结果:
def get_specialities(data1, data2):
for i in data1['speciality_id']:
for l in data1['id_questions']:
for k in range(len(data2['id_questions_similar'])):
drug_speciality = []
if k == l :
drug_speciality.append(data1['speciality_id'])
return drug_speciality
print(get_specialities(data_1, data_2))
我是这样做的。可能可以在 pandas 上做所有事情并且更优化..? 数据 1['id_questions'] 正在寻找数据 2['id_questions_similar'][i] 中的事件。 根据这些事件,计算索引并获得“问题['speciality_id']”中的值。
import pandas as pd
data1 = {'id_questions': [1, 2, 3],
'speciality_id': [[77, 66], [111, 122, 133], [1245]]
}
active_questions = pd.DataFrame(data1)
data2 = {'id_medicament': [1, 2, 3],
'id_questions_similar': [[1, 2], [1, 3], [3]]
}
questions = pd.DataFrame(data2)
even_numbers = list(range(len(data2['id_questions_similar'])))
for i in range (0, len(data2['id_questions_similar'])):
qqq = list(filter(lambda aaa: aaa in data1['id_questions'], data2['id_questions_similar'][i]))
print(qqq)
index_ = []
count_ = len(qqq)
for k in range(0, count_):
ind = data1['id_questions'].index(qqq[k])#index
if count_ == 1:
even_numbers[i] = data1['speciality_id'][ind]
continue
index_.append(data1['speciality_id'][ind])#values
if count_ > 1:
even_numbers[i] = index_
questions['speciality_id'] = even_numbers
print(questions)
下面我做了一个更优化的版本
import more_itertools as mit
import pandas as pd
data1 = {'id_questions': [1, 2, 3],
'speciality_id': [[77, 66], [111, 122, 133], [1245]]
}
active_questions = pd.DataFrame(data1)
data2 = {'id_medicament': [1, 2, 3],
'id_questions_similar': [[1, 2], [1, 3], [3]]
}
questions = pd.DataFrame(data2)
iterable = active_questions['id_questions']
speciality = []
for i in questions['id_questions_similar']:
index = list(mit.locate(iterable, lambda x: x in i))
if len(index) > 1:
speciality.append(list(active_questions.iloc[index, 1]))
else:
speciality.append(active_questions.iloc[index[0], 1])
questions['speciality_id'] = speciality
print(questions)