将数据从一个 table 分块插入另一个 table - MySQL
Insert data from one table to another table in chunks - MySQL
我一直在尝试将数据从一个 table 中加载到一个基于过滤器的新 table 中,该 table 有 1 亿多行。我尝试使用下面的程序加载,但由于某种原因,它只是坐在那里没有结果或 activity 在 processlist.
我一直在尝试根据更新日期插入数据,但最终要么每次 运行 插入相同的数据到 PK 违规中,要么没有更新。
请告知将此数据加载到新 table 中的最佳方法。谢谢
USE `mydb`;
DROP procedure IF EXISTS `insert_data`;
DELIMITER $$
USE `ods`$$
CREATE PROCEDURE `insert_data` ()
BEGIN
DECLARE i INT DEFAULT 0;
DECLARE limitSize INT DEFAULT 2000;
DECLARE maxupdate datetime;
SET maxupdate = (SELECT MAX(updated) FROM test_data);
WHILE i <= maxId DO
START TRANSACTION;
INSERT INTO test_arc
SELECT * FROM test_data
WHERE check_time>='2022-02-05' and updated> i
ORDER BY updated ASC
LIMIT limitSize;
COMMIT;
SET i = i + limitSize;
END WHILE;
END$$
DELIMITER;
--My Tables:
--Actual Table
CREATE TABLE test_data (
UNIQUE_ID bigint(11) NOT NULL,
DEVICE_NAME varchar(128) NOT NULL DEFAULT '',
PARAMETER_NAME varchar(256) DEFAULT NULL,
GMT_OFFSET varchar(6) DEFAULT NULL,
CHECK_TIME datetime NOT NULL DEFAULT '1970-01-01 00:00:00',
OUTPUT_DATA text NOT NULL,
PERF_DATA text NOT NULL,
PRIMARY KEY (UNIQUE_ID)
);
--Archive Table
CREATE TABLE test_arc (
UNIQUE_ID bigint(11) NOT NULL,
DEVICE_NAME varchar(128) NOT NULL DEFAULT '',
PARAMETER_NAME varchar(256) DEFAULT NULL,
GMT_OFFSET varchar(6) DEFAULT NULL,
CHECK_TIME datetime NOT NULL DEFAULT '1970-01-01 00:00:00',
OUTPUT_DATA text NOT NULL,
PERF_DATA text NOT NULL,
PRIMARY KEY (UNIQUE_ID)
);
交易你真的不知道,这就是我删除它的原因,其余的由 LIMIT 和 OFFSET 控制,你需要在块中进行这样的部分更新
CREATE TABLE test_arc (id int,check_time datetime, updated datetime)
CREATE TABLE test_data (id int,check_time datetime, updated Datetime)
INSERT INTO test_data VALUES (1,NOW() - INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY),(2,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY)
,(3,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY),(4,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY)
,(5,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY),
(6,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY),(7,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY)
,(8,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY)
,(9,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY),(10,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY)
SELECT * FROM test_data
id | check_time | updated
-: | :------------------ | :------------------
1 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
2 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
3 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
4 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
5 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
6 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
7 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
8 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
9 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
10 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
CREATE PROCEDURE `insert_data` ()
BEGIN
DECLARE i BIGINT DEFAULT 0;
DECLARE limitSize INT DEFAULT 5;
DECLARE maxnumber BIGINT;
SET maxnumber = (SELECT COUNT(*) FROM test_data) + 1;
WHILE i <= maxnumber DO
INSERT INTO test_arc
SELECT id ,check_time, NOW() FROM test_data
WHERE check_time>='2022-02-05'
ORDER BY updated ASC
LIMIT i, limitSize
ON DUPLICATE KEY UPDATE id =VALUES(id), check_time=VALUES(check_time),updated = Values(updated ); SET i = i + limitSize;
END WHILE;
END
✓
CALL insert_data();
SELECT * FROM test_arc
id | check_time | updated
-: | :------------------ | :------------------
1 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
2 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
3 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
4 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
5 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
6 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
7 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
8 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
9 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
10 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
db<>fiddle here
我一直在尝试将数据从一个 table 中加载到一个基于过滤器的新 table 中,该 table 有 1 亿多行。我尝试使用下面的程序加载,但由于某种原因,它只是坐在那里没有结果或 activity 在 processlist.
我一直在尝试根据更新日期插入数据,但最终要么每次 运行 插入相同的数据到 PK 违规中,要么没有更新。
请告知将此数据加载到新 table 中的最佳方法。谢谢
USE `mydb`;
DROP procedure IF EXISTS `insert_data`;
DELIMITER $$
USE `ods`$$
CREATE PROCEDURE `insert_data` ()
BEGIN
DECLARE i INT DEFAULT 0;
DECLARE limitSize INT DEFAULT 2000;
DECLARE maxupdate datetime;
SET maxupdate = (SELECT MAX(updated) FROM test_data);
WHILE i <= maxId DO
START TRANSACTION;
INSERT INTO test_arc
SELECT * FROM test_data
WHERE check_time>='2022-02-05' and updated> i
ORDER BY updated ASC
LIMIT limitSize;
COMMIT;
SET i = i + limitSize;
END WHILE;
END$$
DELIMITER;
--My Tables:
--Actual Table
CREATE TABLE test_data (
UNIQUE_ID bigint(11) NOT NULL,
DEVICE_NAME varchar(128) NOT NULL DEFAULT '',
PARAMETER_NAME varchar(256) DEFAULT NULL,
GMT_OFFSET varchar(6) DEFAULT NULL,
CHECK_TIME datetime NOT NULL DEFAULT '1970-01-01 00:00:00',
OUTPUT_DATA text NOT NULL,
PERF_DATA text NOT NULL,
PRIMARY KEY (UNIQUE_ID)
);
--Archive Table
CREATE TABLE test_arc (
UNIQUE_ID bigint(11) NOT NULL,
DEVICE_NAME varchar(128) NOT NULL DEFAULT '',
PARAMETER_NAME varchar(256) DEFAULT NULL,
GMT_OFFSET varchar(6) DEFAULT NULL,
CHECK_TIME datetime NOT NULL DEFAULT '1970-01-01 00:00:00',
OUTPUT_DATA text NOT NULL,
PERF_DATA text NOT NULL,
PRIMARY KEY (UNIQUE_ID)
);
交易你真的不知道,这就是我删除它的原因,其余的由 LIMIT 和 OFFSET 控制,你需要在块中进行这样的部分更新
CREATE TABLE test_arc (id int,check_time datetime, updated datetime)
CREATE TABLE test_data (id int,check_time datetime, updated Datetime)
INSERT INTO test_data VALUES (1,NOW() - INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY),(2,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY) ,(3,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY),(4,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY) ,(5,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY), (6,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY),(7,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY) ,(8,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY) ,(9,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY),(10,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY)
SELECT * FROM test_data
id | check_time | updated -: | :------------------ | :------------------ 1 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12 2 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12 3 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12 4 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12 5 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12 6 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12 7 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12 8 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12 9 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12 10 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
CREATE PROCEDURE `insert_data` () BEGIN DECLARE i BIGINT DEFAULT 0; DECLARE limitSize INT DEFAULT 5; DECLARE maxnumber BIGINT; SET maxnumber = (SELECT COUNT(*) FROM test_data) + 1; WHILE i <= maxnumber DO INSERT INTO test_arc SELECT id ,check_time, NOW() FROM test_data WHERE check_time>='2022-02-05' ORDER BY updated ASC LIMIT i, limitSize ON DUPLICATE KEY UPDATE id =VALUES(id), check_time=VALUES(check_time),updated = Values(updated ); SET i = i + limitSize; END WHILE; END
✓
CALL insert_data();
SELECT * FROM test_arc
id | check_time | updated -: | :------------------ | :------------------ 1 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12 2 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12 3 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12 4 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12 5 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12 6 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12 7 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12 8 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12 9 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12 10 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
db<>fiddle here