如何使用 rtwitter 库在 R 中使用分页获取用户的所有 Twitter 关注者 ID(> 75000)?
How to get all the Twitter followers ids (> 75000) of a user using pagination in R with rtwitter library?
我需要一个函数来获取 data.frame
中拥有超过 75000 个关注者(或朋友,超过 Twitter API 限制)的用户的所有关注者(或朋友)ID 使用 rtwitter
图书馆.
我首先尝试了 next_cursor
文档帮助示例:
# Retrieve user ids of accounts following POTUS
f1 <- get_followers("potus", n = 75000)
page <- next_cursor(f1)
# max. number of ids returned by one token is 75,000 every 15
# minutes, so you'll need to wait a bit before collecting the
# next batch of ids
sys.Sleep(15*60) # Suspend execution of R expressions for 15 mins
# Use the page value returned from \code{next_cursor} to continue
# where you left off.
f2 <- get_followers("potus", n = 75000, page = page)
但是,考虑到不同用户的不同关注者或好友数量,我如何才能在同一个 data.frame
中获取所有关注者(或好友)ID?如何管理分页?
经过一些测试,我编写了这个包含进度条并从用户获取所有关注者 ID 的递归函数:
# Load library
library('rtweet')
# Global variables:
ids <- 75000 # Max number of ids per token every 15 minutes
f <- list() # Vector where user_ids from followers will be appended
# Function to get all the followers from a user with pagination
GetFollowersRecursivePagination <- function(userId, followers, page) {
if (ids == 0) {
# API Twitter Limit reached - Wait
message("Waiting 15 mins...")
total <- 15*60 # Total time = 15 min ~ 900 sec
pb <- txtProgressBar(min = 0, max = total, style = 3) # create progress bar
for (i in 1:total) {
Sys.sleep(time = 1) # 1 second interval
setTxtProgressBar(pb, i) # update progress bar
}
close(pb)
# Check rate limit followers/ids query
if (!rate_limit(token = NULL)[38,]$reset > 14.9) {
message("Waiting 15 seconds more...")
Sys.sleep(time = 15) # wait 15 seconds more...
}
message("Go!")
ids <<- 75000
}
if (followers <= ids) {
message(paste("Followers < ids | Number of Followers: ",
followers, " | Number of resting ids: ", ids, sep = ""))
ftemp <- get_followers(user = userId, n = followers, page = page)
if (page == '-1') {
f <<- append(f, list(ftemp)) # append followers ids
}
if (page != '-1') {
df <- data.frame('user_id' = ftemp)
f <<- append(f, list(df)) # append followers ids
}
ids <<- ids - followers
message("Finished!")
rtemp <- f
f <<- list()
return(rtemp)
} else if (followers > ids) {
message(paste("Followers > ids | Number of Followers: ",
followers, " | Number of resting ids: ", ids, sep = ""))
ftemp <- get_followers(user = userId, n = ids, page = page)
if (page == '-1') {
f <<- append(f, list(ftemp)) # append followers ids
}
if (page != '-1') {
df <- data.frame('user_id' = ftemp)
f <<- append(f, list(df)) # append followers ids
}
n <- ids # n = count of followers ids already acquired
pageTemp <- next_cursor(ftemp) # Pagination
# API Twitter Limit reached - Wait
message("Waiting 15 mins...")
total <- 15*60 # Total time = 15 min ~ 900 sec
pb <- txtProgressBar(min = 0, max = total, style = 3) # create progress bar
for (i in 1:total) {
Sys.sleep(time = 1) # 1 second interval
setTxtProgressBar(pb, i) # update progress bar
}
close(pb)
# Check rate limit followers/ids query
if (!rate_limit(token = NULL)[38,]$reset > 14.9) {
message("Waiting 15 seconds more...")
Sys.sleep(time = 15) # wait 15 seconds more...
}
message("Go!")
ids <<- 75000
# Recursive function call
GetFollowersRecursivePagination(userId = userId,
followers = followers - n,
page = pageTemp)
}
}
# Test ( > 75000 followers )
# Get user
user1 <- lookup_users(users = "146620155")
FAOClimate <- GetFollowersRecursivePagination(userId = user1$user_id,
followers = user1$followers_count,
page = '-1')
# Output:
Followers > ids | Number of Followers: 87208 | Number of resting ids: 75000
Followers < ids | Number of Followers: 12208 | Number of resting ids: 62792
Finished!
str(FAOClimate)
List of 2
$ :'data.frame': 75000 obs. of 1 variable:
..$ user_id: chr [1:75000] "87189802" "884863013928546304" "886995444512964608" "852940633652301824" ...
..- attr(*, "next_cursor")= num 1.45e+18
$ :'data.frame': 12207 obs. of 1 variable:
..$ user_id: chr [1:12207] "2175314977" "2168898233" "1491745484" "2175065456" ...
注意:如果您想要朋友而不是关注者,请将功能get_followers
更改为get_friends
。
默认情况下,dev version of rtweet on Github中的get_followers
函数现在可以用来超过75k。前几天晚上我在测试中毫无问题地获得了 300 万特朗普的追随者。要获得超过 75k,请确保将 retryonratelimit
设置为 TRUE。
我需要一个函数来获取 data.frame
中拥有超过 75000 个关注者(或朋友,超过 Twitter API 限制)的用户的所有关注者(或朋友)ID 使用 rtwitter
图书馆.
我首先尝试了 next_cursor
文档帮助示例:
# Retrieve user ids of accounts following POTUS
f1 <- get_followers("potus", n = 75000)
page <- next_cursor(f1)
# max. number of ids returned by one token is 75,000 every 15
# minutes, so you'll need to wait a bit before collecting the
# next batch of ids
sys.Sleep(15*60) # Suspend execution of R expressions for 15 mins
# Use the page value returned from \code{next_cursor} to continue
# where you left off.
f2 <- get_followers("potus", n = 75000, page = page)
但是,考虑到不同用户的不同关注者或好友数量,我如何才能在同一个 data.frame
中获取所有关注者(或好友)ID?如何管理分页?
经过一些测试,我编写了这个包含进度条并从用户获取所有关注者 ID 的递归函数:
# Load library
library('rtweet')
# Global variables:
ids <- 75000 # Max number of ids per token every 15 minutes
f <- list() # Vector where user_ids from followers will be appended
# Function to get all the followers from a user with pagination
GetFollowersRecursivePagination <- function(userId, followers, page) {
if (ids == 0) {
# API Twitter Limit reached - Wait
message("Waiting 15 mins...")
total <- 15*60 # Total time = 15 min ~ 900 sec
pb <- txtProgressBar(min = 0, max = total, style = 3) # create progress bar
for (i in 1:total) {
Sys.sleep(time = 1) # 1 second interval
setTxtProgressBar(pb, i) # update progress bar
}
close(pb)
# Check rate limit followers/ids query
if (!rate_limit(token = NULL)[38,]$reset > 14.9) {
message("Waiting 15 seconds more...")
Sys.sleep(time = 15) # wait 15 seconds more...
}
message("Go!")
ids <<- 75000
}
if (followers <= ids) {
message(paste("Followers < ids | Number of Followers: ",
followers, " | Number of resting ids: ", ids, sep = ""))
ftemp <- get_followers(user = userId, n = followers, page = page)
if (page == '-1') {
f <<- append(f, list(ftemp)) # append followers ids
}
if (page != '-1') {
df <- data.frame('user_id' = ftemp)
f <<- append(f, list(df)) # append followers ids
}
ids <<- ids - followers
message("Finished!")
rtemp <- f
f <<- list()
return(rtemp)
} else if (followers > ids) {
message(paste("Followers > ids | Number of Followers: ",
followers, " | Number of resting ids: ", ids, sep = ""))
ftemp <- get_followers(user = userId, n = ids, page = page)
if (page == '-1') {
f <<- append(f, list(ftemp)) # append followers ids
}
if (page != '-1') {
df <- data.frame('user_id' = ftemp)
f <<- append(f, list(df)) # append followers ids
}
n <- ids # n = count of followers ids already acquired
pageTemp <- next_cursor(ftemp) # Pagination
# API Twitter Limit reached - Wait
message("Waiting 15 mins...")
total <- 15*60 # Total time = 15 min ~ 900 sec
pb <- txtProgressBar(min = 0, max = total, style = 3) # create progress bar
for (i in 1:total) {
Sys.sleep(time = 1) # 1 second interval
setTxtProgressBar(pb, i) # update progress bar
}
close(pb)
# Check rate limit followers/ids query
if (!rate_limit(token = NULL)[38,]$reset > 14.9) {
message("Waiting 15 seconds more...")
Sys.sleep(time = 15) # wait 15 seconds more...
}
message("Go!")
ids <<- 75000
# Recursive function call
GetFollowersRecursivePagination(userId = userId,
followers = followers - n,
page = pageTemp)
}
}
# Test ( > 75000 followers )
# Get user
user1 <- lookup_users(users = "146620155")
FAOClimate <- GetFollowersRecursivePagination(userId = user1$user_id,
followers = user1$followers_count,
page = '-1')
# Output:
Followers > ids | Number of Followers: 87208 | Number of resting ids: 75000
Followers < ids | Number of Followers: 12208 | Number of resting ids: 62792
Finished!
str(FAOClimate)
List of 2
$ :'data.frame': 75000 obs. of 1 variable:
..$ user_id: chr [1:75000] "87189802" "884863013928546304" "886995444512964608" "852940633652301824" ...
..- attr(*, "next_cursor")= num 1.45e+18
$ :'data.frame': 12207 obs. of 1 variable:
..$ user_id: chr [1:12207] "2175314977" "2168898233" "1491745484" "2175065456" ...
注意:如果您想要朋友而不是关注者,请将功能get_followers
更改为get_friends
。
默认情况下,dev version of rtweet on Github中的get_followers
函数现在可以用来超过75k。前几天晚上我在测试中毫无问题地获得了 300 万特朗普的追随者。要获得超过 75k,请确保将 retryonratelimit
设置为 TRUE。