按主题标签和时间范围查询 Instagram 帖子
Query Instagram posts by hashtag and time range
我正在尝试通过提供主题标签和时间范围(从日期到日期)来查询来自 Instagram 的帖子。
我使用 recent tags endpoint.
https://api.instagram.com/v1/tags/{tag-name}/media/recent?access_token=ACCESS-TOKEN
我的代码是使用 instagram-node
库在 Node.js 中编写的(请参阅内联注释):
// Require the config file
var config = require('../config.js');
// Require and intialize the instagram instance
var ig = require('instagram-node').instagram();
// Set the access token
ig.use({ access_token: config.instagram.access_token });
// We export this function for public use
// hashtag: the hashtag to search for
// minDate: the since date
// maxDate: the until date
// callback: the callback function (err, posts)
module.exports = function (hashtag, minDate, maxDate, callback) {
// Create the posts array (will be concated with new posts from pagination responses)
var posts = [];
// Convert the date objects into timestamps (seconds)
var sinceTime = Math.floor(minDate.getTime() / 1000);
var untilTime = Math.floor(maxDate.getTime() / 1000);
// Fetch the IG posts page by page
ig.tag_media_recent(hashtag, { count: 50 }, function fetchPosts(err, medias, pagination, remaining, limit) {
// Handle error
if (err) {
return callback(err);
}
// Manually filter by time
var filteredByTime = medias.filter(function (currentPost) {
// Convert the created_time string into number (seconds timestamp)
var createdTime = +currentPost.created_time;
// Check if it's after since date and before until date
return createdTime >= sinceTime && createdTime <= untilTime;
});
// Get the last post on this page
var lastPost = medias[medias.length - 1] || {};
// ...and its timestamp
var lastPostTimeStamp = +(lastPost.created_time || -1);
// ...and its timestamp date object
var lastPostDate = new Date(lastPostTimeStamp * 1000);
// Concat the new [filtered] posts to the big array
posts = posts.concat(filteredByTime);
// Show some output
console.log('found ' + filteredByTime.length + ' new items total: ' + posts.length, lastPostDate);
// Check if the last post is BEFORE until date and there are no new posts in the provided range
if (filteredByTime.length === 0 && lastPostTimeStamp <= untilTime) {
// ...if so, we can callback!
return callback(null, posts);
}
// Navigate to the next page
pagination.next(fetchPosts);
});
};
这将开始获取最新到最不最新的帖子,并手动过滤 created_time
。
这行得通,但效率非常低,因为如果我们想要,例如,获取一年前的帖子,我们必须迭代页面直到那个时间,这将使用大量请求(可能超过 5k / 小时这是速率限制)。
是否有更好的查询方式?如何通过提供主题标签和时间范围来获取 Instagram 帖子?
我认为这就是您正在寻找的基本想法。我对 Node.js 不是很熟悉,所以这一切都很简单 javascript。您必须修改它以满足您的需要,并可能从中创建一个功能。
我们的想法是将 instagram id(本例中为 1116307519311125603)转换为日期,反之亦然,使您能够快速获取特定时间点,而不是回溯所有结果,直到找到所需的时间戳。应删除下划线“_”之后的 ID 部分,因为它在某种程度上指的是用户 IIRC。示例中有4个函数希望对大家有所帮助
祝您黑客愉快!
//static
var epoch_hour = 3600,
epoch_day = 86400,
epoch_month = 2592000,
epoch_year = 31557600;
//you'll need to set this part up/integrate it with your code
var dataId = 1116307519311125603,
range = 2 * epoch_hour,
count = 1,
tagName = 'cars',
access = prompt('Enter access token:'),
baseUrl = 'https://api.instagram.com/v1/tags/' +
tagName + '/media/recent?access_token=' + access;
//date && id utilities
function idToEpoch(n){
return Math.round((n / 1000000000000 + 11024476.5839159095) / 0.008388608);
}
function epochToId(n){
return Math.round((n * 0.008388608 - 11024476.5839159095) * 1000000000000);
}
function newDateFromEpoch(n){
var d = new Date(0);
d.setUTCSeconds(n);
return d;
}
function dateToEpoch(d){
return (d.getTime()-d.getMilliseconds())/1000;
}
//start with your id and range; do the figuring
var epoch_time = idToEpoch(dataId),
minumumId = epochToId(epoch_time),
maximumId = epochToId(epoch_time + range),
minDate = newDateFromEpoch(epoch_time),
maxDate = newDateFromEpoch(epoch_time + range);
var newUrl = baseUrl +
'&count=' + count +
'&min_tag_id=' + minumumId +
'&max_tag_id=' + maximumId;
//used for testing
/*alert('Start: ' + minDate + ' (' + epoch_time +
')\nEnd: ' + maxDate + ' (' + (epoch_time +
range) + ')');
window.location = newUrl;*/
为了支持 优秀的答案,通过 plpgSQL 函数生成了一个 instagram ID:
CREATE OR REPLACE FUNCTION insta5.next_id(OUT result bigint) AS $$
DECLARE
our_epoch bigint := 1314220021721;
seq_id bigint;
now_millis bigint;
shard_id int := 5;
BEGIN
SELECT nextval('insta5.table_id_seq') %% 1024 INTO seq_id;
SELECT FLOOR(EXTRACT(EPOCH FROM clock_timestamp()) * 1000) INTO now_millis;
result := (now_millis - our_epoch) << 23;
result := result | (shard_id << 10);
result := result | (seq_id);
END;
$$ LANGUAGE PLPGSQL;
尽管获取帖子的过程相似,但我目前工作的 Data365.co Instagram API 似乎更合适和高效。它没有每小时 5,000 个帖子的限制,您可以在请求本身中指定您需要的帖子的时间段。此外,计费将仅考虑指定时间段内的帖子。您无需为不需要的数据付费。
您可以在下面看到一个任务示例,用于下载 2021 年 1 月 1 日至 2021 年 1 月 10 日期间标签比特币的帖子。
获取相应帖子列表的GET请求示例:
https://api.data365.co/v1.1/instagram/tag/bitcoins/posts?from_date=2021-01-01&to_date=2021-01-10&max_page_size=100&order_by=date_desc&access_token=TOKEN
在 https://api.data365.co/v1.1/instagram/docs#tag/Instagram-hashtag-search
的 API 文档中查看更多详细信息
我正在尝试通过提供主题标签和时间范围(从日期到日期)来查询来自 Instagram 的帖子。 我使用 recent tags endpoint.
https://api.instagram.com/v1/tags/{tag-name}/media/recent?access_token=ACCESS-TOKEN
我的代码是使用 instagram-node
库在 Node.js 中编写的(请参阅内联注释):
// Require the config file
var config = require('../config.js');
// Require and intialize the instagram instance
var ig = require('instagram-node').instagram();
// Set the access token
ig.use({ access_token: config.instagram.access_token });
// We export this function for public use
// hashtag: the hashtag to search for
// minDate: the since date
// maxDate: the until date
// callback: the callback function (err, posts)
module.exports = function (hashtag, minDate, maxDate, callback) {
// Create the posts array (will be concated with new posts from pagination responses)
var posts = [];
// Convert the date objects into timestamps (seconds)
var sinceTime = Math.floor(minDate.getTime() / 1000);
var untilTime = Math.floor(maxDate.getTime() / 1000);
// Fetch the IG posts page by page
ig.tag_media_recent(hashtag, { count: 50 }, function fetchPosts(err, medias, pagination, remaining, limit) {
// Handle error
if (err) {
return callback(err);
}
// Manually filter by time
var filteredByTime = medias.filter(function (currentPost) {
// Convert the created_time string into number (seconds timestamp)
var createdTime = +currentPost.created_time;
// Check if it's after since date and before until date
return createdTime >= sinceTime && createdTime <= untilTime;
});
// Get the last post on this page
var lastPost = medias[medias.length - 1] || {};
// ...and its timestamp
var lastPostTimeStamp = +(lastPost.created_time || -1);
// ...and its timestamp date object
var lastPostDate = new Date(lastPostTimeStamp * 1000);
// Concat the new [filtered] posts to the big array
posts = posts.concat(filteredByTime);
// Show some output
console.log('found ' + filteredByTime.length + ' new items total: ' + posts.length, lastPostDate);
// Check if the last post is BEFORE until date and there are no new posts in the provided range
if (filteredByTime.length === 0 && lastPostTimeStamp <= untilTime) {
// ...if so, we can callback!
return callback(null, posts);
}
// Navigate to the next page
pagination.next(fetchPosts);
});
};
这将开始获取最新到最不最新的帖子,并手动过滤 created_time
。
这行得通,但效率非常低,因为如果我们想要,例如,获取一年前的帖子,我们必须迭代页面直到那个时间,这将使用大量请求(可能超过 5k / 小时这是速率限制)。
是否有更好的查询方式?如何通过提供主题标签和时间范围来获取 Instagram 帖子?
我认为这就是您正在寻找的基本想法。我对 Node.js 不是很熟悉,所以这一切都很简单 javascript。您必须修改它以满足您的需要,并可能从中创建一个功能。
我们的想法是将 instagram id(本例中为 1116307519311125603)转换为日期,反之亦然,使您能够快速获取特定时间点,而不是回溯所有结果,直到找到所需的时间戳。应删除下划线“_”之后的 ID 部分,因为它在某种程度上指的是用户 IIRC。示例中有4个函数希望对大家有所帮助
祝您黑客愉快!
//static
var epoch_hour = 3600,
epoch_day = 86400,
epoch_month = 2592000,
epoch_year = 31557600;
//you'll need to set this part up/integrate it with your code
var dataId = 1116307519311125603,
range = 2 * epoch_hour,
count = 1,
tagName = 'cars',
access = prompt('Enter access token:'),
baseUrl = 'https://api.instagram.com/v1/tags/' +
tagName + '/media/recent?access_token=' + access;
//date && id utilities
function idToEpoch(n){
return Math.round((n / 1000000000000 + 11024476.5839159095) / 0.008388608);
}
function epochToId(n){
return Math.round((n * 0.008388608 - 11024476.5839159095) * 1000000000000);
}
function newDateFromEpoch(n){
var d = new Date(0);
d.setUTCSeconds(n);
return d;
}
function dateToEpoch(d){
return (d.getTime()-d.getMilliseconds())/1000;
}
//start with your id and range; do the figuring
var epoch_time = idToEpoch(dataId),
minumumId = epochToId(epoch_time),
maximumId = epochToId(epoch_time + range),
minDate = newDateFromEpoch(epoch_time),
maxDate = newDateFromEpoch(epoch_time + range);
var newUrl = baseUrl +
'&count=' + count +
'&min_tag_id=' + minumumId +
'&max_tag_id=' + maximumId;
//used for testing
/*alert('Start: ' + minDate + ' (' + epoch_time +
')\nEnd: ' + maxDate + ' (' + (epoch_time +
range) + ')');
window.location = newUrl;*/
为了支持
CREATE OR REPLACE FUNCTION insta5.next_id(OUT result bigint) AS $$
DECLARE
our_epoch bigint := 1314220021721;
seq_id bigint;
now_millis bigint;
shard_id int := 5;
BEGIN
SELECT nextval('insta5.table_id_seq') %% 1024 INTO seq_id;
SELECT FLOOR(EXTRACT(EPOCH FROM clock_timestamp()) * 1000) INTO now_millis;
result := (now_millis - our_epoch) << 23;
result := result | (shard_id << 10);
result := result | (seq_id);
END;
$$ LANGUAGE PLPGSQL;
尽管获取帖子的过程相似,但我目前工作的 Data365.co Instagram API 似乎更合适和高效。它没有每小时 5,000 个帖子的限制,您可以在请求本身中指定您需要的帖子的时间段。此外,计费将仅考虑指定时间段内的帖子。您无需为不需要的数据付费。
您可以在下面看到一个任务示例,用于下载 2021 年 1 月 1 日至 2021 年 1 月 10 日期间标签比特币的帖子。
获取相应帖子列表的GET请求示例: https://api.data365.co/v1.1/instagram/tag/bitcoins/posts?from_date=2021-01-01&to_date=2021-01-10&max_page_size=100&order_by=date_desc&access_token=TOKEN
在 https://api.data365.co/v1.1/instagram/docs#tag/Instagram-hashtag-search
的 API 文档中查看更多详细信息