如何减少 stackexchange api 的查询时间?
How to reduce query time for stackexchange api?
我尝试为用户列表收集问题。
所以我准备了这个命令行:
library(stackr)
dft <- data.frame()
for (j in 1:nrow(df)) {
questions <- stack_users(df$userid[j], "questions", num_pages=1000000, pagesize=100, filter="withbody")
for (s in 1:nrow(questions)){
dft <- rbind(dft, data.frame(
tags = ifelse(is.null(questions$tags[s]) , NA, questions$tags[s]),
is_answered = ifelse(is.null(questions$is_answered[s]) , NA, questions$is_answered[s]),
view_count = ifelse(is.null(questions$view_count[s]) , NA, questions$view_count[s]),
accepted_answer_id = ifelse(is.null(questions$accepted_answer_id[s]) , NA, questions$accepted_answer_id[s]),
answer_count = ifelse(is.null(questions$answer_count[s]) , NA, questions$answer_count[s]),
score = ifelse(is.null(questions$score[s]) , NA, questions$score[s]),
last_activity_date = ifelse(is.null(questions$last_activity_date[s]) , NA, questions$last_activity_date[s]),
creation_date = ifelse(is.null(questions$creation_date[s]) , NA, questions$creation_date[s]),
last_edit_date = ifelse(is.null(questions$last_edit_date[s]) , NA, questions$last_edit_date[s]),
question_id = ifelse(is.null(questions$question_id[s]) , NA, questions$question_id[s]),
link = ifelse(is.null(questions$link[s]) , NA, questions$link[s]),
title = ifelse(is.null(questions$title[s]) , NA, questions$title[s]),
body = ifelse(is.null(questions$body[s]) , NA, questions$body[s]),
owner_reputation = ifelse(is.null(questions$owner_reputation[s]) , NA, questions$owner_reputation[s]),
owner_user_id = ifelse(is.null(questions$owner_user_id[s]) , NA, questions$owner_user_id[s]),
owner_user_type = ifelse(is.null(questions$owner_user_type[s]) , NA, questions$owner_user_type[s]),
owner_accept_rate = ifelse(is.null(questions$owner_accept_rate[s]) , NA, questions$owner_accept_rate[s]),
owner_link = ifelse(is.null(questions$owner_link[s]) , NA, questions$owner_link[s])
))
}
}
但是收集不同用户 ID 的列表需要很长时间。有什么方法可以减少执行时间或更新我可以制作的代码吗?
部分答案,因为我不流利 r
:
您是否要获取一组给定用户的问题列表?
如果是,
for (j in 1:nrow(df)) {
questions <- stack_users(df$userid[j]...
是一种糟糕的方法。
参考the API's /users/{ids}/questions
doc:
(The) {ids} (parameter) can contain up to 100 semicolon delimited ids. To find ids programmatically look for user_id on user or shallow_user objects.
(已强调)
因此,而不是计算结果为 stack_users(1,...
(一个 id)
为该功能将 ID 以 100 个为一组进行分组。类似于:
stack_users(c(1,2,3,4,5,...),...
(但请记住我不是 r
编码员。)
我尝试为用户列表收集问题。
所以我准备了这个命令行:
library(stackr)
dft <- data.frame()
for (j in 1:nrow(df)) {
questions <- stack_users(df$userid[j], "questions", num_pages=1000000, pagesize=100, filter="withbody")
for (s in 1:nrow(questions)){
dft <- rbind(dft, data.frame(
tags = ifelse(is.null(questions$tags[s]) , NA, questions$tags[s]),
is_answered = ifelse(is.null(questions$is_answered[s]) , NA, questions$is_answered[s]),
view_count = ifelse(is.null(questions$view_count[s]) , NA, questions$view_count[s]),
accepted_answer_id = ifelse(is.null(questions$accepted_answer_id[s]) , NA, questions$accepted_answer_id[s]),
answer_count = ifelse(is.null(questions$answer_count[s]) , NA, questions$answer_count[s]),
score = ifelse(is.null(questions$score[s]) , NA, questions$score[s]),
last_activity_date = ifelse(is.null(questions$last_activity_date[s]) , NA, questions$last_activity_date[s]),
creation_date = ifelse(is.null(questions$creation_date[s]) , NA, questions$creation_date[s]),
last_edit_date = ifelse(is.null(questions$last_edit_date[s]) , NA, questions$last_edit_date[s]),
question_id = ifelse(is.null(questions$question_id[s]) , NA, questions$question_id[s]),
link = ifelse(is.null(questions$link[s]) , NA, questions$link[s]),
title = ifelse(is.null(questions$title[s]) , NA, questions$title[s]),
body = ifelse(is.null(questions$body[s]) , NA, questions$body[s]),
owner_reputation = ifelse(is.null(questions$owner_reputation[s]) , NA, questions$owner_reputation[s]),
owner_user_id = ifelse(is.null(questions$owner_user_id[s]) , NA, questions$owner_user_id[s]),
owner_user_type = ifelse(is.null(questions$owner_user_type[s]) , NA, questions$owner_user_type[s]),
owner_accept_rate = ifelse(is.null(questions$owner_accept_rate[s]) , NA, questions$owner_accept_rate[s]),
owner_link = ifelse(is.null(questions$owner_link[s]) , NA, questions$owner_link[s])
))
}
}
但是收集不同用户 ID 的列表需要很长时间。有什么方法可以减少执行时间或更新我可以制作的代码吗?
部分答案,因为我不流利 r
:
您是否要获取一组给定用户的问题列表?
如果是,
for (j in 1:nrow(df)) {
questions <- stack_users(df$userid[j]...
是一种糟糕的方法。
参考the API's /users/{ids}/questions
doc:
(The) {ids} (parameter) can contain up to 100 semicolon delimited ids. To find ids programmatically look for user_id on user or shallow_user objects.
(已强调)
因此,而不是计算结果为 stack_users(1,...
(一个 id)
为该功能将 ID 以 100 个为一组进行分组。类似于:
stack_users(c(1,2,3,4,5,...),...
(但请记住我不是 r
编码员。)