无法对行进行分类
Not able to categorize the rows
循环不工作。我正在尝试根据 if 条件对行进行分组。但是,在 运行 代码之后,所有行都被归类为一个类别 'global governance'
# create a for loop to re-group as per the values mentioned above
for values in data['Area']:
if ((values=='corporate governance') | (values=='accounting') | (values== 'auditing') | (values=='manufacturing') | (values=='dao challenges')):
data['final group']='corporate governance'
elif (values=='blockchain governance' or values=='bct standards' or values== 'blockchain governance' or
values=='blockchain hashing' or values=='developers incentive' or values=='smart contract' or values=='Smart contract' ):
data['final group']='Blockchain Technology Governance'
elif (values=='Block chain and legal compliance' or values=='Decision Rights' or values=='Dispute resolution' or
values=='law' or values=='judicature'):
data['final group']='Judicature'
elif (values == 'Blockchain and Trust'):
data['final group']='Trust'
elif ((values=='Foreign Aid') | (values=='Global Commons governance') | (values=='global Commons governance')):
data['final group']='global governance'
elif (values=='Public governance' or values=='Rural governance' or values=='Usage of bct in governance' or values=='public governance' or
values=='Smart city' or values=='smart city' or values=='Rural governance' or values=='national records'):
data['final group']='governance'
elif (values=='Music' or values=='Waste management' or values=='blockchian and healthcare' or values== 'forest management' or
values=='healthcare' or
values=='Resource governance'):
data['final group']='Resource governance'
elif (values=='scientific publication' or values=='Academic Governance'):
data['final group']='Academia'
elif(values=='SDG' or values== 'carbon trading' or values=='environmental governance'):
data['final group']='Sustainable developemnt'
elif(values=='data governance' or values=='organisational data governance'):
data['final group']='Data governance'
elif(values=='digital platform governance' or values =='e commerce' or
values=='platform governance' or values=='product management'):
data['final group']='platform governance'
elif(values=='Supplychain'):
data['final group']=='Supplychain'
根据我的评论展开。您的循环遍历 df 的 Area
列中的每一行;但是,当它为列 final group
设置值时,它会为该列的每一行设置它。因此,数据框中最后一行的 Area
确定每一行的 final group
的值。而是考虑像 .apply()
:
这样的东西
def determine_final_group(area):
if area in ['corporate governance','accounting','auditing','manufacturing','dao challenges']:
return 'corporate governance'
elif area in ['blockchain governance','bct standards' ,'blockchain governance','blockchain hashing' ,'developers incentive' ,'smart contract' ,'Smart contract']:
return 'Blockchain Technology Governance'
elif area in ['Block chain and legal compliance' ,'Decision Rights' ,'Dispute resolution', 'law','judicature']:
return 'Judicature'
elif area == 'Blockchain and Trust':
return 'Trust'
elif ['Foreign Aid','Global Commons governance','global Commons governance']:
return 'global governance'
#continue the elif logic using this pattern...
data['final group'] = data['Area'].apply(determine_final_group)
当您像 data['final group']='corporate governance'
那样进行赋值时,它 会为整列 赋值,而不仅仅是一行。
- 将其放入一个函数中,
- 最后一组 return(不是作业)
- 并删除 for-loop
- 改用 .map。
def get_final_group(values):
if values == 'corporate governance' or values == 'accounting' or values == 'auditing' or values == 'manufacturing' or values == 'dao challenges':
return 'corporate governance'
if (values == 'blockchain governance' or values == 'bct standards' or values == 'blockchain governance' or
values == 'blockchain hashing' or values == 'developers incentive' or values == 'smart contract' or values == 'Smart contract'):
return 'Blockchain Technology Governance'
...
df['final_group'] = df['Area'].map(get_final_group)
注释和改进:
- 因为它是
returning
值,所以您不需要 elif
。
- 不要使用
|
- 那是为了 bit-wise 或。
- 不要使用过多的括号,这样会影响可读性
- 您可以重构
if
以使其更具可读性,例如:
if area in ('corporate governance',
'accounting',
'auditing',
'manufacturing',
'dao challenges'
):
return 'corporate governance'
# and so on
- 更好的是,使用字典来映射结果值:
final_group = {
'corporate governance': 'corporate governance',
'accounting': 'corporate governance',
'auditing': 'corporate governance',
'manufacturing': 'corporate governance',
'dao challenges': 'corporate governance',
'blockchain governance': 'Blockchain Technology Governance',
'bct standards': 'Blockchain Technology Governance',
'blockchain governance': 'Blockchain Technology Governance',
'blockchain hashing': 'Blockchain Technology Governance',
'developers incentive': 'Blockchain Technology Governance',
'smart contract': 'Blockchain Technology Governance',
'Smart contract': 'Blockchain Technology Governance',
# and so on
}
df['final_group'] = df['Area'].map(final_group)
- 而且由于这变得非常冗长,您可以反转它并将每个目标用作字典键,并且值将是所有可能的值,作为列表或元组:
final_group_categories = {
'corporate governance': (
'corporate governance',
'accounting',
'auditing',
'manufacturing',
'dao challenges',
),
'Blockchain Technology Governance': (
'blockchain governance',
'bct standards',
'blockchain governance',
'blockchain hashing',
'developers incentive',
'smart contract',
'Smart contract',
),
# etc.
}
# then reverse it to use in the map function:
final_group = {}
for group_target, invals in final_group_categories.items():
for inval in invals:
final_group[inval] = group_target
# same thing as a dict comprehension:
final_group = {inval: group_target
for group_target, invals in final_group_categories.items()
for inval in invals
}
df['final_group'] = df['Area'].map(final_group)
- 执行 matching on lowercase 这样就不会出现像
'smart contract'
和 'Smart contract'
这样的重复项。
df['final_group'] = df['Area'].str.lower().map(final_group)
循环不工作。我正在尝试根据 if 条件对行进行分组。但是,在 运行 代码之后,所有行都被归类为一个类别 'global governance'
# create a for loop to re-group as per the values mentioned above
for values in data['Area']:
if ((values=='corporate governance') | (values=='accounting') | (values== 'auditing') | (values=='manufacturing') | (values=='dao challenges')):
data['final group']='corporate governance'
elif (values=='blockchain governance' or values=='bct standards' or values== 'blockchain governance' or
values=='blockchain hashing' or values=='developers incentive' or values=='smart contract' or values=='Smart contract' ):
data['final group']='Blockchain Technology Governance'
elif (values=='Block chain and legal compliance' or values=='Decision Rights' or values=='Dispute resolution' or
values=='law' or values=='judicature'):
data['final group']='Judicature'
elif (values == 'Blockchain and Trust'):
data['final group']='Trust'
elif ((values=='Foreign Aid') | (values=='Global Commons governance') | (values=='global Commons governance')):
data['final group']='global governance'
elif (values=='Public governance' or values=='Rural governance' or values=='Usage of bct in governance' or values=='public governance' or
values=='Smart city' or values=='smart city' or values=='Rural governance' or values=='national records'):
data['final group']='governance'
elif (values=='Music' or values=='Waste management' or values=='blockchian and healthcare' or values== 'forest management' or
values=='healthcare' or
values=='Resource governance'):
data['final group']='Resource governance'
elif (values=='scientific publication' or values=='Academic Governance'):
data['final group']='Academia'
elif(values=='SDG' or values== 'carbon trading' or values=='environmental governance'):
data['final group']='Sustainable developemnt'
elif(values=='data governance' or values=='organisational data governance'):
data['final group']='Data governance'
elif(values=='digital platform governance' or values =='e commerce' or
values=='platform governance' or values=='product management'):
data['final group']='platform governance'
elif(values=='Supplychain'):
data['final group']=='Supplychain'
根据我的评论展开。您的循环遍历 df 的 Area
列中的每一行;但是,当它为列 final group
设置值时,它会为该列的每一行设置它。因此,数据框中最后一行的 Area
确定每一行的 final group
的值。而是考虑像 .apply()
:
def determine_final_group(area):
if area in ['corporate governance','accounting','auditing','manufacturing','dao challenges']:
return 'corporate governance'
elif area in ['blockchain governance','bct standards' ,'blockchain governance','blockchain hashing' ,'developers incentive' ,'smart contract' ,'Smart contract']:
return 'Blockchain Technology Governance'
elif area in ['Block chain and legal compliance' ,'Decision Rights' ,'Dispute resolution', 'law','judicature']:
return 'Judicature'
elif area == 'Blockchain and Trust':
return 'Trust'
elif ['Foreign Aid','Global Commons governance','global Commons governance']:
return 'global governance'
#continue the elif logic using this pattern...
data['final group'] = data['Area'].apply(determine_final_group)
当您像 data['final group']='corporate governance'
那样进行赋值时,它 会为整列 赋值,而不仅仅是一行。
- 将其放入一个函数中,
- 最后一组 return(不是作业)
- 并删除 for-loop
- 改用 .map。
def get_final_group(values):
if values == 'corporate governance' or values == 'accounting' or values == 'auditing' or values == 'manufacturing' or values == 'dao challenges':
return 'corporate governance'
if (values == 'blockchain governance' or values == 'bct standards' or values == 'blockchain governance' or
values == 'blockchain hashing' or values == 'developers incentive' or values == 'smart contract' or values == 'Smart contract'):
return 'Blockchain Technology Governance'
...
df['final_group'] = df['Area'].map(get_final_group)
注释和改进:
- 因为它是
returning
值,所以您不需要elif
。 - 不要使用
|
- 那是为了 bit-wise 或。 - 不要使用过多的括号,这样会影响可读性
- 您可以重构
if
以使其更具可读性,例如:if area in ('corporate governance', 'accounting', 'auditing', 'manufacturing', 'dao challenges' ): return 'corporate governance' # and so on
- 更好的是,使用字典来映射结果值:
final_group = { 'corporate governance': 'corporate governance', 'accounting': 'corporate governance', 'auditing': 'corporate governance', 'manufacturing': 'corporate governance', 'dao challenges': 'corporate governance', 'blockchain governance': 'Blockchain Technology Governance', 'bct standards': 'Blockchain Technology Governance', 'blockchain governance': 'Blockchain Technology Governance', 'blockchain hashing': 'Blockchain Technology Governance', 'developers incentive': 'Blockchain Technology Governance', 'smart contract': 'Blockchain Technology Governance', 'Smart contract': 'Blockchain Technology Governance', # and so on } df['final_group'] = df['Area'].map(final_group)
- 而且由于这变得非常冗长,您可以反转它并将每个目标用作字典键,并且值将是所有可能的值,作为列表或元组:
final_group_categories = { 'corporate governance': ( 'corporate governance', 'accounting', 'auditing', 'manufacturing', 'dao challenges', ), 'Blockchain Technology Governance': ( 'blockchain governance', 'bct standards', 'blockchain governance', 'blockchain hashing', 'developers incentive', 'smart contract', 'Smart contract', ), # etc. } # then reverse it to use in the map function: final_group = {} for group_target, invals in final_group_categories.items(): for inval in invals: final_group[inval] = group_target # same thing as a dict comprehension: final_group = {inval: group_target for group_target, invals in final_group_categories.items() for inval in invals } df['final_group'] = df['Area'].map(final_group)
- 执行 matching on lowercase 这样就不会出现像
'smart contract'
和'Smart contract'
这样的重复项。df['final_group'] = df['Area'].str.lower().map(final_group)