使用 MySQL 从 IMDB 数据库查询
Querying from IMDB Database using MySQL
我写了一个 SQL 查询来回答以下问题:
Find all the actors that made more movies with Yash Chopra than any other director in the IMBD database.
示例架构:
person
(pid *
,name
);
m_cast
(mid *
,pid *
);
m_director
(mid*
,pid*
);
* = (component of) PRIMARY KEY
以下是我的查询:
WITH common_actors AS
(SELECT A.actor_id as actors, B.director_id as director_id, B.movies as movies_with_director,
B.director_id as yash_chops_id, B.movies as movies_with_yash_chops FROM
(SELECT M_Cast.PID as actor_id, M_Director.PID as director_id, COUNT(*) as movies from M_Cast
left join M_Director
ON M_Cast.MID = M_Director.MID
GROUP BY actor_id, director_id) A
JOIN
(SELECT M_Cast.PID as actor_id, M_Director.PID as director_id, COUNT(*) as movies from M_Cast
left join M_Director
ON M_Cast.MID = M_Director.MID
GROUP BY actor_id, director_id
)B
ON A.actor_id = B.actor_id
WHERE B.director_id in (SELECT PID FROM Person WHERE Name LIKE
'%Yash%Chopra%'))
SELECT distinct actors as actor_id, movies_with_yash_chops as total_movies FROM common_actors
WHERE actors NOT IN (SELECT actors FROM common_actors WHERE movies_with_director > movies_with_yash_chops)
由此得到的结果长度为:430行。然而,获得的结果应该是 243 行的长度。任何人都可以建议我在查询中出错的地方吗?我的做法对吗?
示例结果:
Actor name
0 Sharib Hashmi
1 Kulbir Badesron
2 Gurdas Maan
3 Parikshat Sahni
...
242 Ramlal Shyamlal
提前致谢!
考虑以下因素:
DROP TABLE IF EXISTS person;
CREATE TABLE person
(person_id SERIAL PRIMARY KEY
,name VARCHAR(20) NOT NULL UNIQUE
);
DROP TABLE IF EXISTS movie;
CREATE TABLE movie
(movie_id SERIAL PRIMARY KEY
,title VARCHAR(50) NOT NULL UNIQUE
);
DROP TABLE IF EXISTS m_cast;
CREATE TABLE m_cast
(movie_id INT NOT NULL
,person_id INT NOT NULL
,PRIMARY KEY(movie_id,person_id)
);
DROP TABLE IF EXISTS m_director;
CREATE TABLE m_director
(movie_id INT NOT NULL
,person_id INT NOT NULL
,PRIMARY KEY(movie_id,person_id)
);
INSERT INTO person (name) VALUES
('Steven Feelberg'),
('Manly Kubrick'),
('Alfred Spatchcock'),
('Fred Pitt'),
('Raphael DiMaggio'),
('Bill Smith');
INSERT INTO movie VALUES
(1,'Feelberg\'s Movie with Fred & Raph'),
(2,'Feelberg and Fred Ride Again'),
(3,'Kubrick shoots DiMaggio'),
(4,'Kubrick\'s Movie with Bill Smith'),
(5,'Spatchcock Presents Bill Smith');
INSERT INTO m_director VALUES
(1,1),
(2,1),
(3,2),
(4,2),
(5,3);
INSERT INTO m_cast VALUES
(1,4),
(1,5),
(2,4),
(3,5),
(4,6),
(5,6);
我收录了电影 table 只是为了方便参考。它与实际问题无关。
另请注意,此模型假设演员只列出一次,无论他们是否在给定电影中扮演多个角色。
以下查询询问 'how often have each actor and director worked together'...
演员是任何电影演员的任何人。
导演是指担任任何电影导演的任何人。
SELECT a.name actor
, d.name director
, COUNT(DISTINCT ma.movie_id) total
FROM person d
JOIN m_director md
ON md.person_id = d.person_id
JOIN person a
LEFT
JOIN m_cast ma
ON ma.person_id = a.person_id
AND ma.movie_id = md.movie_id
JOIN m_cast x
ON x.person_id = a.person_id
GROUP
BY actor
, director;
+-------------------+-------------------+-------+
| actor | director | total |
+-------------------+-------------------+-------+
| Fred Pitt | Alfred Spatchcock | 0 |
| Fred Pitt | Manly Kubrick | 0 |
| Fred Pitt | Steven Feelberg | 2 |
| Raphael DiMaggio | Alfred Spatchcock | 0 |
| Raphael DiMaggio | Manly Kubrick | 1 |
| Raphael DiMaggio | Steven Feelberg | 1 |
| Bill Smith | Alfred Spatchcock | 1 |
| Bill Smith | Manly Kubrick | 1 |
| Bill Smith | Steven Feelberg | 0 |
+-------------------+-------------------+-------+
通过观察可以看出:
- 与其他导演相比,唯一与 Feelberg 合作更频繁的演员是 Fred Pritt
- 拉斐尔·迪卡普里奥和比尔·史密斯与两位导演(尽管导演不同)的合作频率相同
编辑:虽然我并不是认真地提倡将此作为解决方案,但以下内容只是为了证明上面提供的内核确实是您解决问题所需要的一切...
SELECT x.*
FROM
( SELECT a.*
FROM
( SELECT a.name actor
, d.name director
, COUNT(DISTINCT ma.movie_id) total
FROM person d
JOIN m_director md
ON md.person_id = d.person_id
JOIN person a
LEFT
JOIN m_cast ma
ON ma.person_id = a.person_id
AND ma.movie_id = md.movie_id
JOIN m_cast x
ON x.person_id = a.person_id
GROUP
BY actor
, director
) a
LEFT
JOIN
( SELECT a.name actor
, d.name director
, COUNT(DISTINCT ma.movie_id) total
FROM person d
JOIN m_director md
ON md.person_id = d.person_id
JOIN person a
LEFT
JOIN m_cast ma
ON ma.person_id = a.person_id
AND ma.movie_id = md.movie_id
JOIN m_cast x
ON x.person_id = a.person_id
GROUP
BY actor
, director
) b
ON b.actor = a.actor
AND b.director <> a.director
AND b.total > a.total
WHERE b.actor IS NULL
) x
LEFT JOIN
( SELECT a.*
FROM
( SELECT a.name actor
, d.name director
, COUNT(DISTINCT ma.movie_id) total
FROM person d
JOIN m_director md
ON md.person_id = d.person_id
JOIN person a
LEFT
JOIN m_cast ma
ON ma.person_id = a.person_id
AND ma.movie_id = md.movie_id
JOIN m_cast x
ON x.person_id = a.person_id
GROUP
BY actor
, director
) a
LEFT
JOIN
( SELECT a.name actor
, d.name director
, COUNT(DISTINCT ma.movie_id) total
FROM person d
JOIN m_director md
ON md.person_id = d.person_id
JOIN person a
LEFT
JOIN m_cast ma
ON ma.person_id = a.person_id
AND ma.movie_id = md.movie_id
JOIN m_cast x
ON x.person_id = a.person_id
GROUP
BY actor
, director
) b
ON b.actor = a.actor
AND b.director <> a.director
AND b.total > a.total
WHERE b.actor IS NULL
) y
ON y.actor = x.actor AND y.director <> x.director
WHERE y.actor IS NULL;
+-----------+-----------------+-------+
| actor | director | total |
+-----------+-----------------+-------+
| Fred Pitt | Steven Feelberg | 2 |
+-----------+-----------------+-------+
这个 returns 每个演员的名单,以及与他们合作最频繁的导演。在这种情况下,由于 Bill Smith 和 Raphael DiMaggio 最常与两位导演平等合作,因此他们被排除在结果之外。
您的问题的答案很简单,就是 select 从此列表中将 Yash Chopra 列为导演的所有行。
我写了一个 SQL 查询来回答以下问题:
Find all the actors that made more movies with Yash Chopra than any other director in the IMBD database.
示例架构:
person
(pid *
,name
);
m_cast
(mid *
,pid *
);
m_director
(mid*
,pid*
);
* = (component of) PRIMARY KEY
以下是我的查询:
WITH common_actors AS
(SELECT A.actor_id as actors, B.director_id as director_id, B.movies as movies_with_director,
B.director_id as yash_chops_id, B.movies as movies_with_yash_chops FROM
(SELECT M_Cast.PID as actor_id, M_Director.PID as director_id, COUNT(*) as movies from M_Cast
left join M_Director
ON M_Cast.MID = M_Director.MID
GROUP BY actor_id, director_id) A
JOIN
(SELECT M_Cast.PID as actor_id, M_Director.PID as director_id, COUNT(*) as movies from M_Cast
left join M_Director
ON M_Cast.MID = M_Director.MID
GROUP BY actor_id, director_id
)B
ON A.actor_id = B.actor_id
WHERE B.director_id in (SELECT PID FROM Person WHERE Name LIKE
'%Yash%Chopra%'))
SELECT distinct actors as actor_id, movies_with_yash_chops as total_movies FROM common_actors
WHERE actors NOT IN (SELECT actors FROM common_actors WHERE movies_with_director > movies_with_yash_chops)
由此得到的结果长度为:430行。然而,获得的结果应该是 243 行的长度。任何人都可以建议我在查询中出错的地方吗?我的做法对吗?
示例结果:
Actor name
0 Sharib Hashmi
1 Kulbir Badesron
2 Gurdas Maan
3 Parikshat Sahni
...
242 Ramlal Shyamlal
提前致谢!
考虑以下因素:
DROP TABLE IF EXISTS person;
CREATE TABLE person
(person_id SERIAL PRIMARY KEY
,name VARCHAR(20) NOT NULL UNIQUE
);
DROP TABLE IF EXISTS movie;
CREATE TABLE movie
(movie_id SERIAL PRIMARY KEY
,title VARCHAR(50) NOT NULL UNIQUE
);
DROP TABLE IF EXISTS m_cast;
CREATE TABLE m_cast
(movie_id INT NOT NULL
,person_id INT NOT NULL
,PRIMARY KEY(movie_id,person_id)
);
DROP TABLE IF EXISTS m_director;
CREATE TABLE m_director
(movie_id INT NOT NULL
,person_id INT NOT NULL
,PRIMARY KEY(movie_id,person_id)
);
INSERT INTO person (name) VALUES
('Steven Feelberg'),
('Manly Kubrick'),
('Alfred Spatchcock'),
('Fred Pitt'),
('Raphael DiMaggio'),
('Bill Smith');
INSERT INTO movie VALUES
(1,'Feelberg\'s Movie with Fred & Raph'),
(2,'Feelberg and Fred Ride Again'),
(3,'Kubrick shoots DiMaggio'),
(4,'Kubrick\'s Movie with Bill Smith'),
(5,'Spatchcock Presents Bill Smith');
INSERT INTO m_director VALUES
(1,1),
(2,1),
(3,2),
(4,2),
(5,3);
INSERT INTO m_cast VALUES
(1,4),
(1,5),
(2,4),
(3,5),
(4,6),
(5,6);
我收录了电影 table 只是为了方便参考。它与实际问题无关。 另请注意,此模型假设演员只列出一次,无论他们是否在给定电影中扮演多个角色。
以下查询询问 'how often have each actor and director worked together'...
演员是任何电影演员的任何人。 导演是指担任任何电影导演的任何人。
SELECT a.name actor
, d.name director
, COUNT(DISTINCT ma.movie_id) total
FROM person d
JOIN m_director md
ON md.person_id = d.person_id
JOIN person a
LEFT
JOIN m_cast ma
ON ma.person_id = a.person_id
AND ma.movie_id = md.movie_id
JOIN m_cast x
ON x.person_id = a.person_id
GROUP
BY actor
, director;
+-------------------+-------------------+-------+
| actor | director | total |
+-------------------+-------------------+-------+
| Fred Pitt | Alfred Spatchcock | 0 |
| Fred Pitt | Manly Kubrick | 0 |
| Fred Pitt | Steven Feelberg | 2 |
| Raphael DiMaggio | Alfred Spatchcock | 0 |
| Raphael DiMaggio | Manly Kubrick | 1 |
| Raphael DiMaggio | Steven Feelberg | 1 |
| Bill Smith | Alfred Spatchcock | 1 |
| Bill Smith | Manly Kubrick | 1 |
| Bill Smith | Steven Feelberg | 0 |
+-------------------+-------------------+-------+
通过观察可以看出:
- 与其他导演相比,唯一与 Feelberg 合作更频繁的演员是 Fred Pritt
- 拉斐尔·迪卡普里奥和比尔·史密斯与两位导演(尽管导演不同)的合作频率相同
编辑:虽然我并不是认真地提倡将此作为解决方案,但以下内容只是为了证明上面提供的内核确实是您解决问题所需要的一切...
SELECT x.*
FROM
( SELECT a.*
FROM
( SELECT a.name actor
, d.name director
, COUNT(DISTINCT ma.movie_id) total
FROM person d
JOIN m_director md
ON md.person_id = d.person_id
JOIN person a
LEFT
JOIN m_cast ma
ON ma.person_id = a.person_id
AND ma.movie_id = md.movie_id
JOIN m_cast x
ON x.person_id = a.person_id
GROUP
BY actor
, director
) a
LEFT
JOIN
( SELECT a.name actor
, d.name director
, COUNT(DISTINCT ma.movie_id) total
FROM person d
JOIN m_director md
ON md.person_id = d.person_id
JOIN person a
LEFT
JOIN m_cast ma
ON ma.person_id = a.person_id
AND ma.movie_id = md.movie_id
JOIN m_cast x
ON x.person_id = a.person_id
GROUP
BY actor
, director
) b
ON b.actor = a.actor
AND b.director <> a.director
AND b.total > a.total
WHERE b.actor IS NULL
) x
LEFT JOIN
( SELECT a.*
FROM
( SELECT a.name actor
, d.name director
, COUNT(DISTINCT ma.movie_id) total
FROM person d
JOIN m_director md
ON md.person_id = d.person_id
JOIN person a
LEFT
JOIN m_cast ma
ON ma.person_id = a.person_id
AND ma.movie_id = md.movie_id
JOIN m_cast x
ON x.person_id = a.person_id
GROUP
BY actor
, director
) a
LEFT
JOIN
( SELECT a.name actor
, d.name director
, COUNT(DISTINCT ma.movie_id) total
FROM person d
JOIN m_director md
ON md.person_id = d.person_id
JOIN person a
LEFT
JOIN m_cast ma
ON ma.person_id = a.person_id
AND ma.movie_id = md.movie_id
JOIN m_cast x
ON x.person_id = a.person_id
GROUP
BY actor
, director
) b
ON b.actor = a.actor
AND b.director <> a.director
AND b.total > a.total
WHERE b.actor IS NULL
) y
ON y.actor = x.actor AND y.director <> x.director
WHERE y.actor IS NULL;
+-----------+-----------------+-------+
| actor | director | total |
+-----------+-----------------+-------+
| Fred Pitt | Steven Feelberg | 2 |
+-----------+-----------------+-------+
这个 returns 每个演员的名单,以及与他们合作最频繁的导演。在这种情况下,由于 Bill Smith 和 Raphael DiMaggio 最常与两位导演平等合作,因此他们被排除在结果之外。
您的问题的答案很简单,就是 select 从此列表中将 Yash Chopra 列为导演的所有行。