Sparql 查询获取超时异常
Sparql query getting timeout exception
当我在 java 中使用此查询时,出现 HTTP 异常
504,因为我正在从 Live 获取数据。
但是如果我删除 influenced
或 influencedBy
或 Paradigm
或其中任何一个,那么它就可以正常工作。然后我明白时间或内存有问题,另一个问题是现在太慢了。我想要他们全部。我现在可以做什么?
SELECT ?pl ?pl_label ?abstract ?_thumbnail
( group_concat ( DISTINCT ?_influenced_label; separator="; " ) AS ?influenced )
( group_concat ( DISTINCT ?_influencedBy_label; separator="; " ) AS ?influencedBy )
( group_concat ( ?_sameAs; separator=", " ) AS ?sameAs )
( group_concat ( ?_paradigm_label; separator=", " ) AS ?paradigm )
WHERE
{
?pl rdf:type dbo:ProgrammingLanguage .
OPTIONAL { ?pl dbo:abstract ?abstract .
FILTER ( LANG ( ?abstract ) = 'en' ) . }
?pl rdfs:label ?pl_label
FILTER ( LANG ( ?pl_label ) = 'en' ) .
OPTIONAL { ?pl dbo:influenced ?_influenced .
?_influenced rdfs:label ?_influenced_label .
FILTER ( LANG ( ?_influenced_label ) = 'en' ) . }
OPTIONAL { ?pl dbo:influencedBy ?_influencedBy .
?_influencedBy rdfs:label ?_influencedBy_label
FILTER ( LANG ( ?_influencedBy_label ) = 'en' ) . }
OPTIONAL { ?pl owl:sameAs ?_sameAs . }
OPTIONAL { ?pl dbp:paradigm ?_paradigm .
?_paradigm rdfs:label ?_paradigm_label . }
OPTIONAL { ?pl dbo:thumbnail ?_thumbnail . }
}
GROUP BY ?pl ?pl_label ?abstract ?_thumbnail
当前 http://live.dbpedia.org/sparql 中存在脏数据问题,因此某些谓词(包括 dbo:abstract
、 dbo:influenced
,和 dbo:influencedBy
),其中应该只有最近摄取的值(即最新编辑)。这意味着您的结果集将包含笛卡尔积的某些内容——其中一些行是重复的,但不正确地提供多个版本的谓词除外。
模那个数据问题,这个查询(不仅仅是一个额外的 DISTINCT
!)将提供我相信你现在想要的东西 --
SELECT DISTINCT ?pl
?pl_label
?abstract
?_thumbnail
( group_concat ( DISTINCT ?_influenced_label ; separator="; " ) AS ?influenced )
( group_concat ( DISTINCT ?_influencedBy_label ; separator="; " ) AS ?influencedBy )
( group_concat ( DISTINCT ?_sameAs ; separator=", " ) AS ?sameAs )
( group_concat ( DISTINCT ?_paradigm_label ; separator=", " ) AS ?paradigm )
WHERE
{
?pl rdf:type dbo:ProgrammingLanguage ;
rdfs:label ?pl_label
FILTER ( LANG ( ?pl_label ) = 'en' ) .
OPTIONAL { ?pl dbo:abstract ?abstract .
FILTER ( LANG ( ?abstract ) = 'en' ) . }
OPTIONAL { ?pl dbo:influenced ?_influenced .
?_influenced rdfs:label ?_influenced_label .
FILTER ( LANG ( ?_influenced_label ) = 'en' ) . }
OPTIONAL { ?pl dbo:influencedBy ?_influencedBy .
?_influencedBy rdfs:label ?_influencedBy_label
FILTER ( LANG ( ?_influencedBy_label ) = 'en' ) . }
OPTIONAL { ?pl owl:sameAs ?_sameAs . }
OPTIONAL { ?pl dbp:paradigm ?_paradigm .
?_paradigm rdfs:label ?_paradigm_label
FILTER ( LANG ( ?_paradigm_label ) = 'en' ) . }
OPTIONAL { ?pl dbo:thumbnail ?_thumbnail . }
}
GROUP BY ?pl ?pl_label ?abstract ?_thumbnail
当我在 java 中使用此查询时,出现 HTTP 异常 504,因为我正在从 Live 获取数据。
但是如果我删除 influenced
或 influencedBy
或 Paradigm
或其中任何一个,那么它就可以正常工作。然后我明白时间或内存有问题,另一个问题是现在太慢了。我想要他们全部。我现在可以做什么?
SELECT ?pl ?pl_label ?abstract ?_thumbnail
( group_concat ( DISTINCT ?_influenced_label; separator="; " ) AS ?influenced )
( group_concat ( DISTINCT ?_influencedBy_label; separator="; " ) AS ?influencedBy )
( group_concat ( ?_sameAs; separator=", " ) AS ?sameAs )
( group_concat ( ?_paradigm_label; separator=", " ) AS ?paradigm )
WHERE
{
?pl rdf:type dbo:ProgrammingLanguage .
OPTIONAL { ?pl dbo:abstract ?abstract .
FILTER ( LANG ( ?abstract ) = 'en' ) . }
?pl rdfs:label ?pl_label
FILTER ( LANG ( ?pl_label ) = 'en' ) .
OPTIONAL { ?pl dbo:influenced ?_influenced .
?_influenced rdfs:label ?_influenced_label .
FILTER ( LANG ( ?_influenced_label ) = 'en' ) . }
OPTIONAL { ?pl dbo:influencedBy ?_influencedBy .
?_influencedBy rdfs:label ?_influencedBy_label
FILTER ( LANG ( ?_influencedBy_label ) = 'en' ) . }
OPTIONAL { ?pl owl:sameAs ?_sameAs . }
OPTIONAL { ?pl dbp:paradigm ?_paradigm .
?_paradigm rdfs:label ?_paradigm_label . }
OPTIONAL { ?pl dbo:thumbnail ?_thumbnail . }
}
GROUP BY ?pl ?pl_label ?abstract ?_thumbnail
当前 http://live.dbpedia.org/sparql 中存在脏数据问题,因此某些谓词(包括 dbo:abstract
、 dbo:influenced
,和 dbo:influencedBy
),其中应该只有最近摄取的值(即最新编辑)。这意味着您的结果集将包含笛卡尔积的某些内容——其中一些行是重复的,但不正确地提供多个版本的谓词除外。
模那个数据问题,这个查询(不仅仅是一个额外的 DISTINCT
!)将提供我相信你现在想要的东西 --
SELECT DISTINCT ?pl
?pl_label
?abstract
?_thumbnail
( group_concat ( DISTINCT ?_influenced_label ; separator="; " ) AS ?influenced )
( group_concat ( DISTINCT ?_influencedBy_label ; separator="; " ) AS ?influencedBy )
( group_concat ( DISTINCT ?_sameAs ; separator=", " ) AS ?sameAs )
( group_concat ( DISTINCT ?_paradigm_label ; separator=", " ) AS ?paradigm )
WHERE
{
?pl rdf:type dbo:ProgrammingLanguage ;
rdfs:label ?pl_label
FILTER ( LANG ( ?pl_label ) = 'en' ) .
OPTIONAL { ?pl dbo:abstract ?abstract .
FILTER ( LANG ( ?abstract ) = 'en' ) . }
OPTIONAL { ?pl dbo:influenced ?_influenced .
?_influenced rdfs:label ?_influenced_label .
FILTER ( LANG ( ?_influenced_label ) = 'en' ) . }
OPTIONAL { ?pl dbo:influencedBy ?_influencedBy .
?_influencedBy rdfs:label ?_influencedBy_label
FILTER ( LANG ( ?_influencedBy_label ) = 'en' ) . }
OPTIONAL { ?pl owl:sameAs ?_sameAs . }
OPTIONAL { ?pl dbp:paradigm ?_paradigm .
?_paradigm rdfs:label ?_paradigm_label
FILTER ( LANG ( ?_paradigm_label ) = 'en' ) . }
OPTIONAL { ?pl dbo:thumbnail ?_thumbnail . }
}
GROUP BY ?pl ?pl_label ?abstract ?_thumbnail