如何使用 BigQuery 计算几何平均值,同时考虑样本中每个项目的权重?
How to calculate the geometric mean, taking into account the weight for each item in the sample using BigQuery?
我知道如何通过 this post 使用 EXP(AVG(LN(x)))
计算地理平均值。这非常有帮助。
现在我需要计算几何平均值,同时考虑样本中每个项目的重量。
代数表达式看起来像这样
所以我的问题是如何在 BigQuery 中计算它?
以及如何考虑每件物品的重量有什么建议吗?
示例数据
SELECT STRUCT(JSON_EXTRACT_SCALAR(mass, '$.subs_sum') AS subs, JSON_EXTRACT_SCALAR(mass, '$.division') AS division) mass FROM UNNEST (
[
'''{
"subs_sum": "188292",
"division": "0.7708596151869399"
}''',
'''{
"subs_sum": "1182",
"division": "0.8344408128719736"
}''',
'''{
"subs_sum": "142559",
"division": "0.9539818702339475"
}''',
'''{
"subs_sum": "14047",
"division": "0.7836811141666864"
}''',
'''{
"subs_sum": "70344",
"division": "0.7724158684628387"
}''',
'''{
"subs_sum": "101516",
"division": "0.8676896770665041"
}''',
'''{
"subs_sum": "12459",
"division": "0.8029440607145902"
}''',
'''{
"subs_sum": "26070",
"division": "0.9793106723267602"
}''',
'''{
"subs_sum": "151959",
"division": "0.839048212451375"
}''',
'''{
"subs_sum": "5234",
"division": "0.684263034290403"
}'''
]
) mass
你问题中的公式相当于下面一个
可以很容易地按照下面的示例进行编码
select exp(sum(mass.subs * ln(mass.division)) / sum(mass.subs ))
from data
如果应用于您问题中的示例数据
with data as (
SELECT STRUCT(
cast(JSON_EXTRACT_SCALAR(mass, '$.subs_sum') as float64) AS subs,
cast(JSON_EXTRACT_SCALAR(mass, '$.division') as float64) AS division
) as mass
FROM UNNEST ([
'{"subs_sum": "188292","division": "0.7708596151869399"}',
'{"subs_sum": "1182","division": "0.8344408128719736"}',
'{"subs_sum": "142559","division": "0.9539818702339475"}',
'{"subs_sum": "14047","division": "0.7836811141666864"}',
'{"subs_sum": "70344","division": "0.7724158684628387"}',
'{"subs_sum": "101516","division": "0.8676896770665041"}',
'{"subs_sum": "12459","division": "0.8029440607145902"}',
'{"subs_sum": "26070","division": "0.9793106723267602"}',
'{"subs_sum": "151959","division": "0.839048212451375"}',
'{"subs_sum": "5234","division": "0.684263034290403"}'
]) mass
)
select exp(sum(mass.subs * ln(mass.division)) / sum(mass.subs ))
from data
输出是
我知道如何通过 this post 使用 EXP(AVG(LN(x)))
计算地理平均值。这非常有帮助。
现在我需要计算几何平均值,同时考虑样本中每个项目的重量。
代数表达式看起来像这样
所以我的问题是如何在 BigQuery 中计算它? 以及如何考虑每件物品的重量有什么建议吗?
示例数据
SELECT STRUCT(JSON_EXTRACT_SCALAR(mass, '$.subs_sum') AS subs, JSON_EXTRACT_SCALAR(mass, '$.division') AS division) mass FROM UNNEST (
[
'''{
"subs_sum": "188292",
"division": "0.7708596151869399"
}''',
'''{
"subs_sum": "1182",
"division": "0.8344408128719736"
}''',
'''{
"subs_sum": "142559",
"division": "0.9539818702339475"
}''',
'''{
"subs_sum": "14047",
"division": "0.7836811141666864"
}''',
'''{
"subs_sum": "70344",
"division": "0.7724158684628387"
}''',
'''{
"subs_sum": "101516",
"division": "0.8676896770665041"
}''',
'''{
"subs_sum": "12459",
"division": "0.8029440607145902"
}''',
'''{
"subs_sum": "26070",
"division": "0.9793106723267602"
}''',
'''{
"subs_sum": "151959",
"division": "0.839048212451375"
}''',
'''{
"subs_sum": "5234",
"division": "0.684263034290403"
}'''
]
) mass
你问题中的公式相当于下面一个
可以很容易地按照下面的示例进行编码
select exp(sum(mass.subs * ln(mass.division)) / sum(mass.subs ))
from data
如果应用于您问题中的示例数据
with data as (
SELECT STRUCT(
cast(JSON_EXTRACT_SCALAR(mass, '$.subs_sum') as float64) AS subs,
cast(JSON_EXTRACT_SCALAR(mass, '$.division') as float64) AS division
) as mass
FROM UNNEST ([
'{"subs_sum": "188292","division": "0.7708596151869399"}',
'{"subs_sum": "1182","division": "0.8344408128719736"}',
'{"subs_sum": "142559","division": "0.9539818702339475"}',
'{"subs_sum": "14047","division": "0.7836811141666864"}',
'{"subs_sum": "70344","division": "0.7724158684628387"}',
'{"subs_sum": "101516","division": "0.8676896770665041"}',
'{"subs_sum": "12459","division": "0.8029440607145902"}',
'{"subs_sum": "26070","division": "0.9793106723267602"}',
'{"subs_sum": "151959","division": "0.839048212451375"}',
'{"subs_sum": "5234","division": "0.684263034290403"}'
]) mass
)
select exp(sum(mass.subs * ln(mass.division)) / sum(mass.subs ))
from data
输出是