使用 self JOIN 将 SQL 翻译成 SPARQL
Translating SQL with self JOIN into SPARQL
我正在尝试将 SQL 查询转换为 SPARQL,我已经接近了,但我似乎不太明白如何通过 SQL 查询的自连接确实。
这里是SQL(问:计算Whosebug用户的接受答案百分比):
SELECT
( Count(a.Id) / (SELECT Count(*) FROM posts WHERE OwnerUserId = 45 AND PostTypeId = 2) * 100) AS AcceptedPercentage
FROM
posts q
INNER JOIN
posts a ON q.AcceptedAnswerId = a.Id
WHERE
a.OwnerUserId = 45
AND
a.PostTypeId = 2;
这是我目前在 SPARQL 中得到的:
SELECT
(count(?answers) AS ?totalAnswers)
(count(?acceptedAnswers) AS ?totalAcceptedAnswers)
((count(?acceptedAnswers)/count(?answers))*100 AS ?acceptedPercentage)
WHERE {
{
#all answers
?answers a vocab:posts .
#user 45
?answers vocab:posts_OwnerUserId 45 .
#accepted answers
?answers vocab:posts_PostTypeId 2 .
#set the answers id variable
?answers vocab:posts_Id ?answerId .
} UNION {
#all answers
?acceptedAnswers a vocab:posts .
#user 45
?acceptedAnswers vocab:posts_OwnerUserId 45 .
#is type answer
?acceptedAnswers vocab:posts_PostTypeId 2 .
#accepted answer ID matches the overal ID of the question
?acceptedAnswers vocab:posts_AcceptedAnswerId ?acceptedAnswerId
FILTER( ?answerId = ?acceptedAnswerId )
}
}
这是典型的 post 中包含的数据(其中 vocab:posts_PostTypeId=2 表示 "answer"):
vocab:posts_AcceptedAnswerId 104 -
vocab:posts_AnswerCount 11 -
vocab:posts_Body "here's the body of the question" -
vocab:posts_CommentCount 0 -
vocab:posts_CreationDate "2009-04-30T07:48:06"^^xsd:dateTime -
vocab:posts_FavoriteCount 11 -
vocab:posts_Id 3 -
vocab:posts_LastActivityDate "2009-06-05T04:01:09"^^xsd:dateTime -
vocab:posts_LastEditDate "2009-04-30T08:05:02"^^xsd:dateTime -
vocab:posts_LastEditorUserId 22 -
vocab:posts_OwnerUserId 22 -
vocab:posts_PostTypeId 1 -
vocab:posts_Score 21 -
vocab:posts_Tags "<unix><package-management><server-management>" -
vocab:posts_Title "Best practices for keeping UNIX packages up to date?" -
vocab:posts_ViewCount 548 -
rdf:type vocab:posts -
rdfs:label "posts #3"
这将如何在 SPARQL 中执行?
一些数据
如果我们有一些示例数据可以使用,那就更容易了。根据您使用的属性(尽管为了便于阅读我稍微更改了名称),这里有一些示例数据描述了十个答案,其中六个被接受:
@prefix : < .
:answer0 a :Post ; :hasId 0 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question0 a :Post ; :hasAcceptedAnswer 0.
:answer1 a :Post ; :hasId 1 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question1 a :Post ; :hasAcceptedAnswer 1.
:answer2 a :Post ; :hasId 2 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question2 a :Post ; :hasAcceptedAnswer 2.
:answer3 a :Post ; :hasId 3 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question3 a :Post ; :hasAcceptedAnswer 3.
:answer4 a :Post ; :hasId 4 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question4 a :Post ; :hasAcceptedAnswer 4.
:answer5 a :Post ; :hasId 5 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question5 a :Post ; :hasAcceptedAnswer 5.
:answer6 a :Post ; :hasId 6 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question6 a :Post ; :hasAcceptedAnswer 96.
:answer7 a :Post ; :hasId 7 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question7 a :Post ; :hasAcceptedAnswer 97.
:answer8 a :Post ; :hasId 8 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question8 a :Post ; :hasAcceptedAnswer 98.
:answer9 a :Post ; :hasId 9 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question9 a :Post ; :hasAcceptedAnswer 99.
有点压缩,但是每一行都有这样的数据,一问一答:
:answer9 a :Post ;
:hasId 9 ;
:hasOwnerUserId 45 ;
:hasPostTypeId 2 .
:question9 a :Post ;
:hasAcceptedAnswer 99 .
一些 SPARQL
现在我们可以使用这样的查询来获得我们正在寻找的结果:
prefix : <
select (count(?answer) as ?nanswers)
(count(?question) as ?naccepted)
(100*?naccepted/?nanswers as ?percentAccepted)
where {
?answer a :Post ;
:hasId ?id ;
:hasOwnerUserId 45 ;
:hasPostTypeId 2 .
optional {
?question a :Post ;
:hasAcceptedAnswer ?id .
}
}
------------------------------------------
| nanswers | naccepted | percentAccepted |
==========================================
| 10 | 6 | 60.0 |
------------------------------------------
关于SQL
值得注意的是,在 SPARQL 查询中,我使用了可选的,所以我得到的“行”如下:
-------------------------
| answer | question |
=========================
| :answer0 | :question0 |
| :answer1 | :question1 |
| :answer2 | :question2 |
| :answer3 | :question3 |
| :answer4 | :question4 |
| :answer5 | :question5 |
| :answer6 | |
| :answer7 | |
| :answer8 | |
| :answer9 | |
-------------------------
这实际上也是您可以在 SQL 版本中执行的操作,只是不能使用 inner join。我发现 Jeff Atwood's A Visual Explanation of SQL Joins 在这里很有用。您实际上是在寻找 left outer join:
例如,this query on the Stack Exchange Data Explorer 计算我的答案被接受的百分比(但 UserId 是一个参数,因此您可以轻松更改它):
select count(a.id) as nAnswers,
count(q.id) as nAccepted,
100.0*count(q.id)/count(a.id) as rate
from Posts a
left outer join Posts q
on q.AcceptedAnswerId = a.id
where a.OwnerUserId = ##UserId##
and a.PostTypeId = 2
鉴于此,我们可以进行更多的逐行比较。可选项负责 左外连接 ,并且隐含它基于哪些变量;这是他们共有的任何变量。在这种情况下,这只是 ?id.
select (count(?answer) as ?nanswers)
(count(?question) as ?naccepted)
(100*?naccepted/?nanswers as ?percentAccepted)
where {
?answer a :Post ; #-- from Posts a
:hasId ?id ;
:hasOwnerUserId 45 ; #-- where a.OwnerUserId = 45
:hasPostTypeId 2 . #-- and a.PostTypeId = 2
optional { #-- left outer join on a.id
?question a :Post ; #-- from Posts q
:hasAcceptedAnswer ?id .
}
}
不过,实际上也可以翻译使用子查询的原始 SQL 查询。它看起来更像这样:
prefix : <
select ?nAnswers ?nAccepted (100*?nAccepted/?nAnswers as ?percentAccepted)
where {
{
select (count(?answer) as ?nAnswers) where {
?answer a :Post ;
:hasId ?id ;
:hasOwnerUserId 45 ;
:hasPostTypeId 2 .
}
}
{
select (count(?answer) as ?nAccepted) where {
?answer a :Post ;
:hasId ?id ;
:hasOwnerUserId 45 ;
:hasPostTypeId 2 .
?question a :Post ;
:hasAcceptedAnswer ?id .
}
}
}
------------------------------------------
| nAnswers | nAccepted | percentAccepted |
==========================================
| 10 | 6 | 60.0 |
------------------------------------------
我正在尝试将 SQL 查询转换为 SPARQL,我已经接近了,但我似乎不太明白如何通过 SQL 查询的自连接确实。
这里是SQL(问:计算Whosebug用户的接受答案百分比):
SELECT
( Count(a.Id) / (SELECT Count(*) FROM posts WHERE OwnerUserId = 45 AND PostTypeId = 2) * 100) AS AcceptedPercentage
FROM
posts q
INNER JOIN
posts a ON q.AcceptedAnswerId = a.Id
WHERE
a.OwnerUserId = 45
AND
a.PostTypeId = 2;
这是我目前在 SPARQL 中得到的:
SELECT
(count(?answers) AS ?totalAnswers)
(count(?acceptedAnswers) AS ?totalAcceptedAnswers)
((count(?acceptedAnswers)/count(?answers))*100 AS ?acceptedPercentage)
WHERE {
{
#all answers
?answers a vocab:posts .
#user 45
?answers vocab:posts_OwnerUserId 45 .
#accepted answers
?answers vocab:posts_PostTypeId 2 .
#set the answers id variable
?answers vocab:posts_Id ?answerId .
} UNION {
#all answers
?acceptedAnswers a vocab:posts .
#user 45
?acceptedAnswers vocab:posts_OwnerUserId 45 .
#is type answer
?acceptedAnswers vocab:posts_PostTypeId 2 .
#accepted answer ID matches the overal ID of the question
?acceptedAnswers vocab:posts_AcceptedAnswerId ?acceptedAnswerId
FILTER( ?answerId = ?acceptedAnswerId )
}
}
这是典型的 post 中包含的数据(其中 vocab:posts_PostTypeId=2 表示 "answer"):
vocab:posts_AcceptedAnswerId 104 -
vocab:posts_AnswerCount 11 -
vocab:posts_Body "here's the body of the question" -
vocab:posts_CommentCount 0 -
vocab:posts_CreationDate "2009-04-30T07:48:06"^^xsd:dateTime -
vocab:posts_FavoriteCount 11 -
vocab:posts_Id 3 -
vocab:posts_LastActivityDate "2009-06-05T04:01:09"^^xsd:dateTime -
vocab:posts_LastEditDate "2009-04-30T08:05:02"^^xsd:dateTime -
vocab:posts_LastEditorUserId 22 -
vocab:posts_OwnerUserId 22 -
vocab:posts_PostTypeId 1 -
vocab:posts_Score 21 -
vocab:posts_Tags "<unix><package-management><server-management>" -
vocab:posts_Title "Best practices for keeping UNIX packages up to date?" -
vocab:posts_ViewCount 548 -
rdf:type vocab:posts -
rdfs:label "posts #3"
这将如何在 SPARQL 中执行?
一些数据
如果我们有一些示例数据可以使用,那就更容易了。根据您使用的属性(尽管为了便于阅读我稍微更改了名称),这里有一些示例数据描述了十个答案,其中六个被接受:
@prefix : < .
:answer0 a :Post ; :hasId 0 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question0 a :Post ; :hasAcceptedAnswer 0.
:answer1 a :Post ; :hasId 1 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question1 a :Post ; :hasAcceptedAnswer 1.
:answer2 a :Post ; :hasId 2 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question2 a :Post ; :hasAcceptedAnswer 2.
:answer3 a :Post ; :hasId 3 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question3 a :Post ; :hasAcceptedAnswer 3.
:answer4 a :Post ; :hasId 4 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question4 a :Post ; :hasAcceptedAnswer 4.
:answer5 a :Post ; :hasId 5 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question5 a :Post ; :hasAcceptedAnswer 5.
:answer6 a :Post ; :hasId 6 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question6 a :Post ; :hasAcceptedAnswer 96.
:answer7 a :Post ; :hasId 7 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question7 a :Post ; :hasAcceptedAnswer 97.
:answer8 a :Post ; :hasId 8 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question8 a :Post ; :hasAcceptedAnswer 98.
:answer9 a :Post ; :hasId 9 ; :hasOwnerUserId 45 ; :hasPostTypeId 2 . :question9 a :Post ; :hasAcceptedAnswer 99.
有点压缩,但是每一行都有这样的数据,一问一答:
:answer9 a :Post ;
:hasId 9 ;
:hasOwnerUserId 45 ;
:hasPostTypeId 2 .
:question9 a :Post ;
:hasAcceptedAnswer 99 .
一些 SPARQL
现在我们可以使用这样的查询来获得我们正在寻找的结果:
prefix : <
select (count(?answer) as ?nanswers)
(count(?question) as ?naccepted)
(100*?naccepted/?nanswers as ?percentAccepted)
where {
?answer a :Post ;
:hasId ?id ;
:hasOwnerUserId 45 ;
:hasPostTypeId 2 .
optional {
?question a :Post ;
:hasAcceptedAnswer ?id .
}
}
------------------------------------------
| nanswers | naccepted | percentAccepted |
==========================================
| 10 | 6 | 60.0 |
------------------------------------------
关于SQL
值得注意的是,在 SPARQL 查询中,我使用了可选的,所以我得到的“行”如下:
-------------------------
| answer | question |
=========================
| :answer0 | :question0 |
| :answer1 | :question1 |
| :answer2 | :question2 |
| :answer3 | :question3 |
| :answer4 | :question4 |
| :answer5 | :question5 |
| :answer6 | |
| :answer7 | |
| :answer8 | |
| :answer9 | |
-------------------------
这实际上也是您可以在 SQL 版本中执行的操作,只是不能使用 inner join。我发现 Jeff Atwood's A Visual Explanation of SQL Joins 在这里很有用。您实际上是在寻找 left outer join:
例如,this query on the Stack Exchange Data Explorer 计算我的答案被接受的百分比(但 UserId 是一个参数,因此您可以轻松更改它):
select count(a.id) as nAnswers,
count(q.id) as nAccepted,
100.0*count(q.id)/count(a.id) as rate
from Posts a
left outer join Posts q
on q.AcceptedAnswerId = a.id
where a.OwnerUserId = ##UserId##
and a.PostTypeId = 2
鉴于此,我们可以进行更多的逐行比较。可选项负责 左外连接 ,并且隐含它基于哪些变量;这是他们共有的任何变量。在这种情况下,这只是 ?id.
select (count(?answer) as ?nanswers)
(count(?question) as ?naccepted)
(100*?naccepted/?nanswers as ?percentAccepted)
where {
?answer a :Post ; #-- from Posts a
:hasId ?id ;
:hasOwnerUserId 45 ; #-- where a.OwnerUserId = 45
:hasPostTypeId 2 . #-- and a.PostTypeId = 2
optional { #-- left outer join on a.id
?question a :Post ; #-- from Posts q
:hasAcceptedAnswer ?id .
}
}
不过,实际上也可以翻译使用子查询的原始 SQL 查询。它看起来更像这样:
prefix : <
select ?nAnswers ?nAccepted (100*?nAccepted/?nAnswers as ?percentAccepted)
where {
{
select (count(?answer) as ?nAnswers) where {
?answer a :Post ;
:hasId ?id ;
:hasOwnerUserId 45 ;
:hasPostTypeId 2 .
}
}
{
select (count(?answer) as ?nAccepted) where {
?answer a :Post ;
:hasId ?id ;
:hasOwnerUserId 45 ;
:hasPostTypeId 2 .
?question a :Post ;
:hasAcceptedAnswer ?id .
}
}
}
------------------------------------------
| nAnswers | nAccepted | percentAccepted |
==========================================
| 10 | 6 | 60.0 |
------------------------------------------