oracle SQL: stats_crosstab 卡方检验返回 0
oracle SQL: stats_crosstab chi square test returning 0
我正在执行以下查询(简化):
SELECT STATS_CROSSTAB(Person, Category, 'CHISQ_DF') DF,
STATS_CROSSTAB(Person, Category, 'CHISQ_OBS') CHI_SQUARED,
STATS_CROSSTAB(Person, Category, 'CHISQ_SIG') CHI_SIG
FROM
tbl
tbl 在哪里
Person Category Count
Person1 Category1 7
Person1 Category2 1
Person1 Category3 1
Person1 Category4 39
Person2 Category1 6
Person2 Category2 2
Person2 Category3 1
Person2 Category4 35
Person3 Category1 7
Person3 Category2 1
Person3 Category3 1
Person3 Category4 14
这是我得到的结果:
DF:6(正确)
CHI_SQUARED: 0(不正确)
CHI_SIG: 1(不正确)
我手动计算了结果,答案应该是:
DF:6(正确)
CHI_SQUARED: 4.5(不正确)
CHI_SIG:0.87(不正确)
导致问题的原因是什么?我从另一个例子知道这应该是可能的。
为什么您的查询不起作用:您似乎认为计数已被考虑在内。他们不是。您在 Person
和 Category
列上调用函数; Count
列被完全忽略。
如果您想获得正确的统计信息,您需要将每一行重复 "Count" 次。您可以在子查询中执行此操作。如何让它发挥作用:
with tbl ( Person, Category, cnt ) as (
select 'Person1', 'Category1', 7 from dual union all
select 'Person1', 'Category2', 1 from dual union all
select 'Person1', 'Category3', 1 from dual union all
select 'Person1', 'Category4', 39 from dual union all
select 'Person2', 'Category1', 6 from dual union all
select 'Person2', 'Category2', 2 from dual union all
select 'Person2', 'Category3', 1 from dual union all
select 'Person2', 'Category4', 35 from dual union all
select 'Person3', 'Category1', 7 from dual union all
select 'Person3', 'Category2', 1 from dual union all
select 'Person3', 'Category3', 1 from dual union all
select 'Person3', 'Category4', 14 from dual
)
SELECT STATS_CROSSTAB(Person, Category, 'CHISQ_DF') DF,
STATS_CROSSTAB(Person, Category, 'CHISQ_OBS') CHI_SQUARED,
STATS_CROSSTAB(Person, Category, 'CHISQ_SIG') CHI_SIG
FROM
(select person, category
from tbl
connect by level <= cnt
and prior person = person
and prior category = category
and prior sys_guid() is not null
);
DF CHI_SQUARE CHI_SIG
-- ---------------------------------------- -------------------
6 4.55259268824609733700642791551882460974 0.60233419532255972
我正在执行以下查询(简化):
SELECT STATS_CROSSTAB(Person, Category, 'CHISQ_DF') DF,
STATS_CROSSTAB(Person, Category, 'CHISQ_OBS') CHI_SQUARED,
STATS_CROSSTAB(Person, Category, 'CHISQ_SIG') CHI_SIG
FROM
tbl
tbl 在哪里
Person Category Count
Person1 Category1 7
Person1 Category2 1
Person1 Category3 1
Person1 Category4 39
Person2 Category1 6
Person2 Category2 2
Person2 Category3 1
Person2 Category4 35
Person3 Category1 7
Person3 Category2 1
Person3 Category3 1
Person3 Category4 14
这是我得到的结果: DF:6(正确) CHI_SQUARED: 0(不正确) CHI_SIG: 1(不正确)
我手动计算了结果,答案应该是: DF:6(正确) CHI_SQUARED: 4.5(不正确) CHI_SIG:0.87(不正确)
导致问题的原因是什么?我从另一个例子知道这应该是可能的。
为什么您的查询不起作用:您似乎认为计数已被考虑在内。他们不是。您在 Person
和 Category
列上调用函数; Count
列被完全忽略。
如果您想获得正确的统计信息,您需要将每一行重复 "Count" 次。您可以在子查询中执行此操作。如何让它发挥作用:
with tbl ( Person, Category, cnt ) as (
select 'Person1', 'Category1', 7 from dual union all
select 'Person1', 'Category2', 1 from dual union all
select 'Person1', 'Category3', 1 from dual union all
select 'Person1', 'Category4', 39 from dual union all
select 'Person2', 'Category1', 6 from dual union all
select 'Person2', 'Category2', 2 from dual union all
select 'Person2', 'Category3', 1 from dual union all
select 'Person2', 'Category4', 35 from dual union all
select 'Person3', 'Category1', 7 from dual union all
select 'Person3', 'Category2', 1 from dual union all
select 'Person3', 'Category3', 1 from dual union all
select 'Person3', 'Category4', 14 from dual
)
SELECT STATS_CROSSTAB(Person, Category, 'CHISQ_DF') DF,
STATS_CROSSTAB(Person, Category, 'CHISQ_OBS') CHI_SQUARED,
STATS_CROSSTAB(Person, Category, 'CHISQ_SIG') CHI_SIG
FROM
(select person, category
from tbl
connect by level <= cnt
and prior person = person
and prior category = category
and prior sys_guid() is not null
);
DF CHI_SQUARE CHI_SIG
-- ---------------------------------------- -------------------
6 4.55259268824609733700642791551882460974 0.60233419532255972