尝试使用 MySQL 创建具有同一 table 中另一列的季至今平均值的列
trying to create column with season-to-date averages of values of another column in same table using MySQL
我正在努力确定 MySQL 代码以在我的 table "starting_pitcher_stats" 中创建一个列(平均 park_factor),我想包含另一列 (park_factor) 中值的季内平均值。我希望这个赛季内平均值按投手和日期分组。
理想情况下,table 看起来像这样:
pitcher park_fac avg_park_fac date
aased001 94 94 1977-07-31
aased001 100 97 1977-08-06
aased001 108 100.666 1977-08-11
aased001 108 102.5 1977-08-16
aased001 96 101.2 1977-08-21
aased001 108 102.33 1977-08-26
aased001 108 103.14 1977-08-31
aased001 104 103.25 1977-09-05
aased001 108 103.77 1977-09-10
aased001 92 102.6 1977-09-16
aased001 106 102.9 1977-09-22
aased001 108 103.33 1977-09-27
我使用的代码是:
SELECT Starting_Pitcher, full_park_factor, AVG(full_park_factor), Game_Date
FROM starting_pitcher_stats
GROUP BY Starting_Pitcher, Game_Date, Game_Number
...结果 table 的样本如下所示:
pitcher park_fac avg_park_fac date
aased001 94 94.0000 1977-07-31
aased001 100 100.0000 1977-08-06
aased001 108 108.0000 1977-08-11
aased001 108 108.0000 1977-08-16
aased001 96 96.0000 1977-08-21
aased001 108 108.0000 1977-08-26
aased001 108 108.0000 1977-08-31
aased001 104 104.0000 1977-09-05
aased001 108 108.0000 1977-09-10
aased001 92 92.0000 1977-09-16
aased001 106 106.0000 1977-09-22
aased001 108 108.0000 1977-09-27
有人可以帮忙吗?
在此先感谢您的帮助。
李
您需要加入您的 table 之前所有来自同一 table 的同一投手的结果。
我不太确定您是如何定义季节的,但假设它是按日历年定义的,则以下查询会产生所需的输出。
SELECT
a.Starting_Pitcher, a.full_park_factor,
AVG(b.full_park_factor), a.Game_Date, a.Game_Number
FROM starting_pitcher_stats a
INNER JOIN starting_pitcher_stats b
ON a.Starting_Pitcher = b.Starting_Pitcher
AND (b.Game_Date < a.Game_Date OR
(b.Game_Date = a.Game_Date AND b.Game_Number <= a.Game_Number))
AND YEAR(b.Game_Date) = YEAR(a.Game_Date)
GROUP BY a.Starting_Pitcher, a.Game_Date, a.Game_Number;
您似乎想用此计算结果更新 table 中的一列。这可以通过使用触发器进行实时更新来实现,该触发器会在您插入或更新现有数据或使用视图时更新列。
CREATE VIEW starting_pitcher_stats_with_average AS
SELECT
a.Starting_Pitcher, a.full_park_factor,
AVG(b.full_park_factor), a.Game_Date, a.Game_Number
FROM starting_pitcher_stats a
INNER JOIN starting_pitcher_stats b
ON a.Starting_Pitcher = b.Starting_Pitcher
AND (b.Game_Date < a.Game_Date OR
(b.Game_Date = a.Game_Date AND b.Game_Number <= a.Game_Number))
AND YEAR(b.Game_Date) = YEAR(a.Game_Date)
GROUP BY a.Starting_Pitcher, a.Game_Date, a.Game_Number;
在您自己的回答中,您创建了一个程序来一次性更新 table 中所有记录的平均列,因此您可能不希望在插入数据时更新列,但是只需能够按需添加所有行的平均值。在这种情况下,您可以编写一个 UPDATE
语句,其中包含上面的 SELECT
查询作为子查询。因为 MySQL 不能对 UPDATE
和子查询使用相同的 table,所以您必须将子查询包装在另一个 SELECT
中,以便 MySQL 生成临时 table 根据您的结果。
UPDATE starting_pitcher_stats c
SET c.std_F_parkfactor = (
SELECT d.std_F_parkfactor FROM (
SELECT
a.Starting_Pitcher,
AVG(b.full_park_factor) std_F_parkfactor,
a.Game_Date, a.Game_Number
FROM starting_pitcher_stats a
INNER JOIN starting_pitcher_stats b
ON a.Starting_Pitcher = b.Starting_Pitcher
AND (b.Game_Date < a.Game_Date OR
(b.Game_Date = a.Game_Date
AND b.Game_Number <= a.Game_Number))
AND YEAR(b.Game_Date) = YEAR(a.Game_Date)
GROUP BY a.Starting_Pitcher, a.Game_Date, a.Game_Number
) d
WHERE c.Starting_Pitcher = d.Starting_Pitcher
AND c.Game_Date = d.Game_Date
AND c.Game_Number = d.Game_Number
);
更新:这是一种将给定季节的季节至今(季节内)公园因子平均值存储在列中的方法,在这种情况下,它是同一列中另一列值的平均值 table 使用存储过程。它基本上通过除以 row_number 来计算平均值,row_number 对应于在计算此变量时循环的值的行数。如果您已经收集了想要一次性更新或很少更新的数据,则此方法有效,但正如 Matt Raines 所暗示的那样,可能必须比他建议的方法 运行 更频繁。如果 table 至少每天更新一次连续几天比赛结果的数据,我认为使用他的方法会减少劳动强度。请告诉我可以消除的内容:
DROP PROCEDURE IF EXISTS std_park_factor_avg;
DELIMITER $$
CREATE PROCEDURE std_park_factor_avg()
BEGIN
DECLARE pit_id CHAR(10);
DECLARE lgID CHAR (2);
DECLARE YEARID INT;
DECLARE gdate DATE;
DECLARE seq INT;
DECLARE F_park_factor INT;
DECLARE RNUMBER INT;
DECLARE accum_F_parkfactor REAL;
DECLARE accum_row_number INT;
DECLARE accum_avg_F_parkfactor REAL;
DECLARE prev_year YEAR(4);
DECLARE end_of_cursor BOOLEAN;
DECLARE no_table CONDITION FOR SQLSTATE '42S02';
DECLARE c1 CURSOR FOR
SELECT Starting_Pitcher, lg_ID, YEAR_ID, Game_Date, Game_Number, full_park_factor, ROW_NUMBER
FROM starting_pitcher_stats
GROUP BY Starting_Pitcher, lg_ID, YEAR_ID, Game_Date, Game_Number;
DECLARE CONTINUE HANDLER FOR NOT FOUND
SET end_of_cursor := TRUE;
SET end_of_cursor := FALSE; -- reset
SET prev_year := 0; -- reset control-break
OPEN c1;
fetch_loop: LOOP
FETCH c1 INTO pit_id, lgID, YEARID, gdate,seq, F_park_factor, RNUMBER;
IF end_of_cursor THEN
LEAVE fetch_loop;
END IF;
-- check control-break conditions
IF YEAR(gdate) != prev_year THEN
SET accum_F_parkfactor := 0.0;
SET RNUMBER:= 1.0;
SET accum_avg_F_parkfactor := 0.0;
SET prev_year := YEAR(gdate);
END IF;
SET accum_F_parkfactor := accum_F_parkfactor + F_park_factor;
SET accum_avg_F_parkfactor := accum_F_parkfactor/RNUMBER;
UPDATE starting_pitcher_stats
SET std_F_parkfactor =accum_avg_F_parkfactor
WHERE Starting_Pitcher = pit_id
AND lg_ID = lgID
AND YEAR_ID = YEARID
AND Game_Date = gdate
AND Game_Number = seq;
END LOOP;
CLOSE c1;
END
$$
DELIMITER ;
我正在努力确定 MySQL 代码以在我的 table "starting_pitcher_stats" 中创建一个列(平均 park_factor),我想包含另一列 (park_factor) 中值的季内平均值。我希望这个赛季内平均值按投手和日期分组。
理想情况下,table 看起来像这样:
pitcher park_fac avg_park_fac date
aased001 94 94 1977-07-31
aased001 100 97 1977-08-06
aased001 108 100.666 1977-08-11
aased001 108 102.5 1977-08-16
aased001 96 101.2 1977-08-21
aased001 108 102.33 1977-08-26
aased001 108 103.14 1977-08-31
aased001 104 103.25 1977-09-05
aased001 108 103.77 1977-09-10
aased001 92 102.6 1977-09-16
aased001 106 102.9 1977-09-22
aased001 108 103.33 1977-09-27
我使用的代码是:
SELECT Starting_Pitcher, full_park_factor, AVG(full_park_factor), Game_Date
FROM starting_pitcher_stats
GROUP BY Starting_Pitcher, Game_Date, Game_Number
...结果 table 的样本如下所示:
pitcher park_fac avg_park_fac date
aased001 94 94.0000 1977-07-31
aased001 100 100.0000 1977-08-06
aased001 108 108.0000 1977-08-11
aased001 108 108.0000 1977-08-16
aased001 96 96.0000 1977-08-21
aased001 108 108.0000 1977-08-26
aased001 108 108.0000 1977-08-31
aased001 104 104.0000 1977-09-05
aased001 108 108.0000 1977-09-10
aased001 92 92.0000 1977-09-16
aased001 106 106.0000 1977-09-22
aased001 108 108.0000 1977-09-27
有人可以帮忙吗?
在此先感谢您的帮助。 李
您需要加入您的 table 之前所有来自同一 table 的同一投手的结果。
我不太确定您是如何定义季节的,但假设它是按日历年定义的,则以下查询会产生所需的输出。
SELECT
a.Starting_Pitcher, a.full_park_factor,
AVG(b.full_park_factor), a.Game_Date, a.Game_Number
FROM starting_pitcher_stats a
INNER JOIN starting_pitcher_stats b
ON a.Starting_Pitcher = b.Starting_Pitcher
AND (b.Game_Date < a.Game_Date OR
(b.Game_Date = a.Game_Date AND b.Game_Number <= a.Game_Number))
AND YEAR(b.Game_Date) = YEAR(a.Game_Date)
GROUP BY a.Starting_Pitcher, a.Game_Date, a.Game_Number;
您似乎想用此计算结果更新 table 中的一列。这可以通过使用触发器进行实时更新来实现,该触发器会在您插入或更新现有数据或使用视图时更新列。
CREATE VIEW starting_pitcher_stats_with_average AS
SELECT
a.Starting_Pitcher, a.full_park_factor,
AVG(b.full_park_factor), a.Game_Date, a.Game_Number
FROM starting_pitcher_stats a
INNER JOIN starting_pitcher_stats b
ON a.Starting_Pitcher = b.Starting_Pitcher
AND (b.Game_Date < a.Game_Date OR
(b.Game_Date = a.Game_Date AND b.Game_Number <= a.Game_Number))
AND YEAR(b.Game_Date) = YEAR(a.Game_Date)
GROUP BY a.Starting_Pitcher, a.Game_Date, a.Game_Number;
在您自己的回答中,您创建了一个程序来一次性更新 table 中所有记录的平均列,因此您可能不希望在插入数据时更新列,但是只需能够按需添加所有行的平均值。在这种情况下,您可以编写一个 UPDATE
语句,其中包含上面的 SELECT
查询作为子查询。因为 MySQL 不能对 UPDATE
和子查询使用相同的 table,所以您必须将子查询包装在另一个 SELECT
中,以便 MySQL 生成临时 table 根据您的结果。
UPDATE starting_pitcher_stats c
SET c.std_F_parkfactor = (
SELECT d.std_F_parkfactor FROM (
SELECT
a.Starting_Pitcher,
AVG(b.full_park_factor) std_F_parkfactor,
a.Game_Date, a.Game_Number
FROM starting_pitcher_stats a
INNER JOIN starting_pitcher_stats b
ON a.Starting_Pitcher = b.Starting_Pitcher
AND (b.Game_Date < a.Game_Date OR
(b.Game_Date = a.Game_Date
AND b.Game_Number <= a.Game_Number))
AND YEAR(b.Game_Date) = YEAR(a.Game_Date)
GROUP BY a.Starting_Pitcher, a.Game_Date, a.Game_Number
) d
WHERE c.Starting_Pitcher = d.Starting_Pitcher
AND c.Game_Date = d.Game_Date
AND c.Game_Number = d.Game_Number
);
更新:这是一种将给定季节的季节至今(季节内)公园因子平均值存储在列中的方法,在这种情况下,它是同一列中另一列值的平均值 table 使用存储过程。它基本上通过除以 row_number 来计算平均值,row_number 对应于在计算此变量时循环的值的行数。如果您已经收集了想要一次性更新或很少更新的数据,则此方法有效,但正如 Matt Raines 所暗示的那样,可能必须比他建议的方法 运行 更频繁。如果 table 至少每天更新一次连续几天比赛结果的数据,我认为使用他的方法会减少劳动强度。请告诉我可以消除的内容:
DROP PROCEDURE IF EXISTS std_park_factor_avg;
DELIMITER $$
CREATE PROCEDURE std_park_factor_avg()
BEGIN
DECLARE pit_id CHAR(10);
DECLARE lgID CHAR (2);
DECLARE YEARID INT;
DECLARE gdate DATE;
DECLARE seq INT;
DECLARE F_park_factor INT;
DECLARE RNUMBER INT;
DECLARE accum_F_parkfactor REAL;
DECLARE accum_row_number INT;
DECLARE accum_avg_F_parkfactor REAL;
DECLARE prev_year YEAR(4);
DECLARE end_of_cursor BOOLEAN;
DECLARE no_table CONDITION FOR SQLSTATE '42S02';
DECLARE c1 CURSOR FOR
SELECT Starting_Pitcher, lg_ID, YEAR_ID, Game_Date, Game_Number, full_park_factor, ROW_NUMBER
FROM starting_pitcher_stats
GROUP BY Starting_Pitcher, lg_ID, YEAR_ID, Game_Date, Game_Number;
DECLARE CONTINUE HANDLER FOR NOT FOUND
SET end_of_cursor := TRUE;
SET end_of_cursor := FALSE; -- reset
SET prev_year := 0; -- reset control-break
OPEN c1;
fetch_loop: LOOP
FETCH c1 INTO pit_id, lgID, YEARID, gdate,seq, F_park_factor, RNUMBER;
IF end_of_cursor THEN
LEAVE fetch_loop;
END IF;
-- check control-break conditions
IF YEAR(gdate) != prev_year THEN
SET accum_F_parkfactor := 0.0;
SET RNUMBER:= 1.0;
SET accum_avg_F_parkfactor := 0.0;
SET prev_year := YEAR(gdate);
END IF;
SET accum_F_parkfactor := accum_F_parkfactor + F_park_factor;
SET accum_avg_F_parkfactor := accum_F_parkfactor/RNUMBER;
UPDATE starting_pitcher_stats
SET std_F_parkfactor =accum_avg_F_parkfactor
WHERE Starting_Pitcher = pit_id
AND lg_ID = lgID
AND YEAR_ID = YEARID
AND Game_Date = gdate
AND Game_Number = seq;
END LOOP;
CLOSE c1;
END
$$
DELIMITER ;