为几列取最常出现的(模态)值

Taking the most commonly occuring (modal) value for several columns

我正在为同一个人清理随着时间的推移记录不佳且不一致的社会人口统计信息的记录。我想为每个人取最常出现的值(众数)。

一种方法是按 id 进行分区,然后计算每个值出现的次数,保留每个 id 的最高计数:

 DROP TABLE dbo.table
 SELECT DISTINCT [id], [ethnic_group] AS [ethnic_mode], ct INTO dbo.table
 FROM (
     SELECT row_number() OVER (PARTITION BY [id] ORDER BY count([ethnic_group]) DESC) as rn, count([ethnic_group]) as ct, [ethnic_group], [id]
     FROM 
     dbo.mytable GROUP BY [id], [ethnic_group]) ranked
     where rn = 1
 ORDER BY ct DESC

但我想对几个变量(种族群体、收入群体等)执行此操作。

如何 select 一个语句中多个变量的模式并插入一个 table(而不是为每个变量创建单独的 table)?

下面的 table 举例说明了我想做的事情:

 DROP TABLE mytable;
 CREATE TABLE mytable(
    id     VARCHAR(2) NOT NULL PRIMARY KEY
   ,ethnic_group VARCHAR(12) NOT NULL
   ,ethnic_mode VARCHAR(11) NOT NULL
   ,income VARCHAR(6) NOT NULL
   ,income_mode VARCHAR(11) NOT NULL
 );
 INSERT INTO mytable(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('id','ethnic_group','ethnic_mode','income','income_mode');
 INSERT INTO mytable(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('1','white','white','middle','middle');
 INSERT INTO mytable(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('1','white','white','middle','middle');
 INSERT INTO mytable(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('1','mixed','white','high','middle');
 INSERT INTO mytable(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('2','asian','asian','middle','middle');
 INSERT INTO mytable(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('2','mixed','asian','middle','middle');
 INSERT INTO mytable(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('2','asian','asian','middle','middle');

我会使用子查询在 1 个插入语句中完成此操作。

这是一个基于您插图中 table 结构的示例:

/* This is the original table and contains duplicate ID's */
DECLARE @source_table TABLE(
    id     VARCHAR(2) NOT NULL
   ,ethnic_group VARCHAR(12) NULL
   ,ethnic_mode VARCHAR(11) NULL
   ,income VARCHAR(6) NULL
   ,income_mode VARCHAR(11) NULL
 );

/* This is the destination table and will not contain duplicate ID's */
DECLARE @destination_table TABLE(
    id     VARCHAR(2) NOT NULL PRIMARY KEY
   ,ethnic_group VARCHAR(12) NULL
   ,income VARCHAR(6) NULL
 );

/* Populate the source table with data */
 INSERT INTO @source_table(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('1','white','white','middle','middle');
 INSERT INTO @source_table(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('1','white','white','middle','middle');
 INSERT INTO @source_table(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('1','mixed','white','high','middle');
 INSERT INTO @source_table(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('2','asian','asian','middle','middle');
 INSERT INTO @source_table(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('2','mixed','asian','middle','middle');
 INSERT INTO @source_table(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('2','asian','asian','middle','middle');
 INSERT INTO @source_table(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('3','asian', NULL, NULL, NULL);
 INSERT INTO @source_table(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('3',NULL, NULL,'middle', NULL);
 INSERT INTO @source_table(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('3',NULL, NULL, NULL, NULL);

/* Insert from source into destination (removing duplicates) */
INSERT INTO @destination_table
        (
          id
        , ethnic_group
        , income
        )
SELECT st.id
    , (
        SELECT TOP 1 ethnic_group
        FROM @source_table sub_st
        WHERE sub_st.id = st.id
        GROUP BY ethnic_group
        ORDER BY COUNT(sub_st.id) DESC
    ) 
    , (
        SELECT TOP 1 income
        FROM @source_table sub_st
        WHERE sub_st.id = st.id
        GROUP BY income
        ORDER BY COUNT(sub_st.id) DESC
    ) 
FROM @source_table st
GROUP BY st.id


/* View the destination to see there are no duplicates */
SELECT  id
      , ethnic_group
      , income
FROM @destination_table