使用初始查询合并跨多个数据库的查询?
Use an initial query to merge queries across multiple databases?
使用 Data Explorer (SEDE),我想找出哪些用户在 Stack Overflow 上的声誉超过 200000,以及然后查找他们在其他 Stack Exchange 网站上拥有的任何帐户的详细信息。
这是为列表提供此阈值的查询:
Select id, reputation, accountid
From users
Where reputation > 200000
AccountId
是所有 Stack Exchange 站点的密钥。
我找到了 this query for aggregating across SEDE databases,但是如何根据 previous/baseline 查询的动态结果找到它?
这是我想要的输出类型:
id_so, reputation_so, accounted, other_stackexchange_site_name, reputation_othersite, number_of_answers_other_site, number_of_questions_other_site
1, 250000, 23, serverfault, 500, 5, 1
1, 250000, 23, superuser, 120, 1, 0
2, 300000, 21, serverfault, 300, 3, 2
2, 300000, 21, webmasters, 230, 1, 1
3, 350000, 20, NA, NA, NA, NA
#the case with id 3 has an SO profile with reputation but it has no other profile in other Stack Exchange site
到 运行 跨数据库的非平凡查询,基于初始查询:
- 找出所有数据库中的公共键。在这种情况下,它是
AccountId
(这是用户的 Stack-Exchange-wide Id)。
创建您的初始查询以将该密钥输入临时文件table。在这种情况下:
CREATE TABLE #UsersOfInterest (AccountId INT)
INSERT INTO #UsersOfInterest
SELECT u.AccountId
FROM Users u
Where u.Reputation > 200000
- 创建另一个 temp table 来保存最终结果(见下文)。
确定查询,在每个站点到运行,获取您想要的信息。例如:
SELECT u.AccountId, u.DisplayName, u.Reputation, u.Id
, numQst = (SELECT COUNT(q.Id) FROM Posts q WHERE q.OwnerUserId = u.Id AND q.PostTypeId = 1)
, numAns = (SELECT COUNT(q.Id) FROM Posts q WHERE q.OwnerUserId = u.Id AND q.PostTypeId = 2)
FROM Users u
WHERE u.AccountId = ##seAccntId##
使用系统查询获取适当的数据库。对于 Data Explorer (SEDE),此类型的查询:
SELECT name
FROM sys.databases
WHERE CASE WHEN state_desc = 'ONLINE'
THEN OBJECT_ID (QUOTENAME (name) + '.[dbo].[PostNotices]', 'U')
END
IS NOT NULL
在上述查询上创建游标 并使用它单步执行数据库.
对于每个数据库:
- 构建一个查询字符串,将第 4 步的查询放入第 3 步的临时 table。
- 运行 查询字符串使用
sp_executesql
.
游标完成后,对步骤 3 中的临时 table 执行最终查询。
请参阅 this other answer,了解用于查询所有 Stack Exchange 站点的工作模板。
将它们放在一起,会产生以下查询,您可以 运行 live on SEDE:
-- MinMasterSiteRep: User's must have this much rep on whichever site this query is run against
-- MinRep: User's must have this much rep on all other sites
CREATE TABLE #UsersOfInterest (
AccountId INT NOT NULL
, Reputation INT
, UserId INT
, PRIMARY KEY (AccountId)
)
INSERT INTO #UsersOfInterest
SELECT u.AccountId, u.Reputation, u.Id
FROM Users u
Where u.Reputation > ##MinMasterSiteRep:INT?200000##
CREATE TABLE #AllSiteResults (
[Master Rep] INT
, [Mstr UsrId] NVARCHAR(777)
, AccountId NVARCHAR(777)
, [Site name] NVARCHAR(777)
, [Username on site] NVARCHAR(777)
, [Rep] INT
, [# Ans] INT
, [# Qst] INT
)
DECLARE @seDbName AS NVARCHAR(777)
DECLARE @seSiteURL AS NVARCHAR(777)
DECLARE @sitePrettyName AS NVARCHAR(777)
DECLARE @seSiteQuery AS NVARCHAR(max)
DECLARE seSites_crsr CURSOR FOR
WITH dbsAndDomainNames AS (
SELECT dbL.dbName
, STRING_AGG (dbL.domainPieces, '.') AS siteDomain
FROM (
SELECT TOP 50000 -- Never be that many sites and TOP is needed for order by, below
name AS dbName
, value AS domainPieces
, row_number () OVER (ORDER BY (SELECT 0)) AS [rowN]
FROM sys.databases
CROSS APPLY STRING_SPLIT (name, '.')
WHERE CASE WHEN state_desc = 'ONLINE'
THEN OBJECT_ID (QUOTENAME (name) + '.[dbo].[PostNotices]', 'U') -- Pick a table unique to SE data
END
IS NOT NULL
ORDER BY dbName, [rowN] DESC
) AS dbL
GROUP BY dbL.dbName
)
SELECT REPLACE (REPLACE (dadn.dbName, 'StackExchange.', ''), '.', ' ' ) AS [Site Name]
, dadn.dbName
, CASE -- See https://meta.stackexchange.com/q/215071
WHEN dadn.dbName = 'StackExchange.Mathoverflow.Meta'
THEN 'https://meta.mathoverflow.net/'
-- Some AVP/Audio/Video/Sound kerfuffle?
WHEN dadn.dbName = 'StackExchange.Audio'
THEN 'https://video.stackexchange.com/'
-- Ditto
WHEN dadn.dbName = 'StackExchange.Audio.Meta'
THEN 'https://video.meta.stackexchange.com/'
-- Normal site
ELSE 'https://' + LOWER (siteDomain) + '.com/'
END AS siteURL
FROM dbsAndDomainNames dadn
WHERE (dadn.dbName = 'StackExchange.Meta' OR dadn.dbName NOT LIKE '%Meta%')
-- Step through cursor
OPEN seSites_crsr
FETCH NEXT FROM seSites_crsr INTO @sitePrettyName, @seDbName, @seSiteURL
WHILE @@FETCH_STATUS = 0
BEGIN
SET @seSiteQuery = '
USE [' + @seDbName + ']
INSERT INTO #AllSiteResults
SELECT
uoi.Reputation AS [Master Rep]
, ''site://u/'' + CAST(uoi.UserId AS NVARCHAR(88)) + ''|'' + CAST(uoi.UserId AS NVARCHAR(88)) AS [Mstr UsrId]
, [AccountId] = ''https://stackexchange.com/users/'' + CAST(u.AccountId AS NVARCHAR(88)) + ''?tab=accounts|'' + CAST(u.AccountId AS NVARCHAR(88))
, ''' + @sitePrettyName + ''' AS [Site name]
, ''' + @seSiteURL + ''' + ''u/'' + CAST(u.Id AS NVARCHAR(88)) + ''|'' + u.DisplayName AS [Username on site]
, u.Reputation AS [Rep]
, (SELECT COUNT(q.Id) FROM Posts q WHERE q.OwnerUserId = u.Id AND q.PostTypeId = 2) AS [# Ans]
, (SELECT COUNT(q.Id) FROM Posts q WHERE q.OwnerUserId = u.Id AND q.PostTypeId = 1) AS [# Qst]
FROM #UsersOfInterest uoi
INNER JOIN Users u ON uoi.AccountId = u.AccountId
WHERE u.Reputation > ##MinRep:INT?200##
'
EXEC sp_executesql @seSiteQuery
FETCH NEXT FROM seSites_crsr INTO @sitePrettyName, @seDbName, @seSiteURL
END
CLOSE seSites_crsr
DEALLOCATE seSites_crsr
SELECT *
FROM #AllSiteResults
ORDER BY [Master Rep] DESC, AccountId, [Rep] DESC
它给出的结果如下:
-- 蓝色值超链接的地方。
请注意,用户必须在网站上拥有 200 个代表才能成为 "significant"。这也是该站点包含在 Stack Exchange 风格中所需的代表。
使用 Data Explorer (SEDE),我想找出哪些用户在 Stack Overflow 上的声誉超过 200000,以及然后查找他们在其他 Stack Exchange 网站上拥有的任何帐户的详细信息。
这是为列表提供此阈值的查询:
Select id, reputation, accountid
From users
Where reputation > 200000
AccountId
是所有 Stack Exchange 站点的密钥。
我找到了 this query for aggregating across SEDE databases,但是如何根据 previous/baseline 查询的动态结果找到它?
这是我想要的输出类型:
id_so, reputation_so, accounted, other_stackexchange_site_name, reputation_othersite, number_of_answers_other_site, number_of_questions_other_site
1, 250000, 23, serverfault, 500, 5, 1
1, 250000, 23, superuser, 120, 1, 0
2, 300000, 21, serverfault, 300, 3, 2
2, 300000, 21, webmasters, 230, 1, 1
3, 350000, 20, NA, NA, NA, NA
#the case with id 3 has an SO profile with reputation but it has no other profile in other Stack Exchange site
到 运行 跨数据库的非平凡查询,基于初始查询:
- 找出所有数据库中的公共键。在这种情况下,它是
AccountId
(这是用户的 Stack-Exchange-wide Id)。 创建您的初始查询以将该密钥输入临时文件table。在这种情况下:
CREATE TABLE #UsersOfInterest (AccountId INT) INSERT INTO #UsersOfInterest SELECT u.AccountId FROM Users u Where u.Reputation > 200000
- 创建另一个 temp table 来保存最终结果(见下文)。
确定查询,在每个站点到运行,获取您想要的信息。例如:
SELECT u.AccountId, u.DisplayName, u.Reputation, u.Id , numQst = (SELECT COUNT(q.Id) FROM Posts q WHERE q.OwnerUserId = u.Id AND q.PostTypeId = 1) , numAns = (SELECT COUNT(q.Id) FROM Posts q WHERE q.OwnerUserId = u.Id AND q.PostTypeId = 2) FROM Users u WHERE u.AccountId = ##seAccntId##
使用系统查询获取适当的数据库。对于 Data Explorer (SEDE),此类型的查询:
SELECT name FROM sys.databases WHERE CASE WHEN state_desc = 'ONLINE' THEN OBJECT_ID (QUOTENAME (name) + '.[dbo].[PostNotices]', 'U') END IS NOT NULL
在上述查询上创建游标 并使用它单步执行数据库.
对于每个数据库:- 构建一个查询字符串,将第 4 步的查询放入第 3 步的临时 table。
- 运行 查询字符串使用
sp_executesql
.
游标完成后,对步骤 3 中的临时 table 执行最终查询。
请参阅 this other answer,了解用于查询所有 Stack Exchange 站点的工作模板。
将它们放在一起,会产生以下查询,您可以 运行 live on SEDE:
-- MinMasterSiteRep: User's must have this much rep on whichever site this query is run against
-- MinRep: User's must have this much rep on all other sites
CREATE TABLE #UsersOfInterest (
AccountId INT NOT NULL
, Reputation INT
, UserId INT
, PRIMARY KEY (AccountId)
)
INSERT INTO #UsersOfInterest
SELECT u.AccountId, u.Reputation, u.Id
FROM Users u
Where u.Reputation > ##MinMasterSiteRep:INT?200000##
CREATE TABLE #AllSiteResults (
[Master Rep] INT
, [Mstr UsrId] NVARCHAR(777)
, AccountId NVARCHAR(777)
, [Site name] NVARCHAR(777)
, [Username on site] NVARCHAR(777)
, [Rep] INT
, [# Ans] INT
, [# Qst] INT
)
DECLARE @seDbName AS NVARCHAR(777)
DECLARE @seSiteURL AS NVARCHAR(777)
DECLARE @sitePrettyName AS NVARCHAR(777)
DECLARE @seSiteQuery AS NVARCHAR(max)
DECLARE seSites_crsr CURSOR FOR
WITH dbsAndDomainNames AS (
SELECT dbL.dbName
, STRING_AGG (dbL.domainPieces, '.') AS siteDomain
FROM (
SELECT TOP 50000 -- Never be that many sites and TOP is needed for order by, below
name AS dbName
, value AS domainPieces
, row_number () OVER (ORDER BY (SELECT 0)) AS [rowN]
FROM sys.databases
CROSS APPLY STRING_SPLIT (name, '.')
WHERE CASE WHEN state_desc = 'ONLINE'
THEN OBJECT_ID (QUOTENAME (name) + '.[dbo].[PostNotices]', 'U') -- Pick a table unique to SE data
END
IS NOT NULL
ORDER BY dbName, [rowN] DESC
) AS dbL
GROUP BY dbL.dbName
)
SELECT REPLACE (REPLACE (dadn.dbName, 'StackExchange.', ''), '.', ' ' ) AS [Site Name]
, dadn.dbName
, CASE -- See https://meta.stackexchange.com/q/215071
WHEN dadn.dbName = 'StackExchange.Mathoverflow.Meta'
THEN 'https://meta.mathoverflow.net/'
-- Some AVP/Audio/Video/Sound kerfuffle?
WHEN dadn.dbName = 'StackExchange.Audio'
THEN 'https://video.stackexchange.com/'
-- Ditto
WHEN dadn.dbName = 'StackExchange.Audio.Meta'
THEN 'https://video.meta.stackexchange.com/'
-- Normal site
ELSE 'https://' + LOWER (siteDomain) + '.com/'
END AS siteURL
FROM dbsAndDomainNames dadn
WHERE (dadn.dbName = 'StackExchange.Meta' OR dadn.dbName NOT LIKE '%Meta%')
-- Step through cursor
OPEN seSites_crsr
FETCH NEXT FROM seSites_crsr INTO @sitePrettyName, @seDbName, @seSiteURL
WHILE @@FETCH_STATUS = 0
BEGIN
SET @seSiteQuery = '
USE [' + @seDbName + ']
INSERT INTO #AllSiteResults
SELECT
uoi.Reputation AS [Master Rep]
, ''site://u/'' + CAST(uoi.UserId AS NVARCHAR(88)) + ''|'' + CAST(uoi.UserId AS NVARCHAR(88)) AS [Mstr UsrId]
, [AccountId] = ''https://stackexchange.com/users/'' + CAST(u.AccountId AS NVARCHAR(88)) + ''?tab=accounts|'' + CAST(u.AccountId AS NVARCHAR(88))
, ''' + @sitePrettyName + ''' AS [Site name]
, ''' + @seSiteURL + ''' + ''u/'' + CAST(u.Id AS NVARCHAR(88)) + ''|'' + u.DisplayName AS [Username on site]
, u.Reputation AS [Rep]
, (SELECT COUNT(q.Id) FROM Posts q WHERE q.OwnerUserId = u.Id AND q.PostTypeId = 2) AS [# Ans]
, (SELECT COUNT(q.Id) FROM Posts q WHERE q.OwnerUserId = u.Id AND q.PostTypeId = 1) AS [# Qst]
FROM #UsersOfInterest uoi
INNER JOIN Users u ON uoi.AccountId = u.AccountId
WHERE u.Reputation > ##MinRep:INT?200##
'
EXEC sp_executesql @seSiteQuery
FETCH NEXT FROM seSites_crsr INTO @sitePrettyName, @seDbName, @seSiteURL
END
CLOSE seSites_crsr
DEALLOCATE seSites_crsr
SELECT *
FROM #AllSiteResults
ORDER BY [Master Rep] DESC, AccountId, [Rep] DESC
它给出的结果如下:
-- 蓝色值超链接的地方。
请注意,用户必须在网站上拥有 200 个代表才能成为 "significant"。这也是该站点包含在 Stack Exchange 风格中所需的代表。