使用 gRPC 从 Pinterest 中提取数据,Python
Extracting data from Pinterest using gRPC, Python
我想抓取我的 Pinterest 帐户的所有聊天记录
我有一个原型服务:
syntax = "proto3";
service Pinterest{
rpc GetConversations (request_chat_info) returns (chat_response);
}
message request_chat_info{
string conversation_id = 1;
string csrftoken = 2;
string _b = 3;
string _pinterest_sess = 4;
}
message chat_response{
string type = 1;
string id = 2;
string text = 3;
}
message chat_response_array{
repeated chat_response messages = 1;
}
这是我的 Pinterest 服务人员:
# GRPC Service
class PinterestService(pb2_grpc.PinterestServicer):
def GetConversations(self, request, context):
conversation_id = request.conversation_id
csrftoken = request.csrftoken
_b = request._b
_pinterest_sess = request._pinterest_sess
chats = _exec(
get_chat,
{"conversation_id": conversation_id, "csrftoken": csrftoken, "_b": _b, "_pinterest_sess": _pinterest_sess}
)
return pb2.chat_response_array(messages=chats)
和主程序是这样的:
# ENDPOINTS
CHAT_API = "https://www.pinterest.com/resource/ConversationMessagesResource/get/"
# Execute Fucntion
def _exec(func, params):
return func(**params)
# Make Requests here
def _get(url:str, cookies:Dict = None, headers:Dict = None) -> requests.Response:
response = requests.request("GET", url=url, cookies=cookies, headers=headers)
response.raise_for_status()
return response
# Chat Parser Function
def _chat_parser(chat_dict: Dict) -> Dict:
return {
"type": chat_dict.get("type", ""),
"id": chat_dict.get("id", ""),
"text": chat_dict.get("text", ""),
}
# Function to handle GRPC
def get_chat(conversation_id:str, csrftoken:str, _b:str, _pinterest_sess:str) -> Dict:
options = {"page_size":25,"conversation_id":conversation_id,"no_fetch_context_on_resource":False}
_cookies = {"csrftoken":csrftoken, "_b":_b, "_pinterest_sess":_pinterest_sess}
query = {"data": json.dumps({"options":options})}
encoded_query = urlencode(query).replace("+", "%20")
url = "{}?{}".format(CHAT_API, encoded_query)
msg_counter = 0
while True:
try:
return _chat_parser(_get(url, _cookies).json()["resource_response"]["data"][msg_counter])
except IndexError:
break
finally:
msg_counter += 1
我需要获取所有 CHAT,但我不知道该怎么做!
Pinterest 中的响应 JSON 完全是这样的:
["resource_response"]["data"][0]
["resource_response"]["data"][1]
["resource_response"]["data"][2]
["resource_response"]["data"][...]
根据消息计算最后一个数字从 0 到任意数字的变化
我不知道该如何处理!
我的 proto 有什么问题吗?
我应该在 proto 中使用流吗,如果是,双向流还是用于客户端、服务器...
谢谢你帮助我。
我找到了答案
它必须是服务器流并使用 for 循环和 yield
我想抓取我的 Pinterest 帐户的所有聊天记录
我有一个原型服务:
syntax = "proto3";
service Pinterest{
rpc GetConversations (request_chat_info) returns (chat_response);
}
message request_chat_info{
string conversation_id = 1;
string csrftoken = 2;
string _b = 3;
string _pinterest_sess = 4;
}
message chat_response{
string type = 1;
string id = 2;
string text = 3;
}
message chat_response_array{
repeated chat_response messages = 1;
}
这是我的 Pinterest 服务人员:
# GRPC Service
class PinterestService(pb2_grpc.PinterestServicer):
def GetConversations(self, request, context):
conversation_id = request.conversation_id
csrftoken = request.csrftoken
_b = request._b
_pinterest_sess = request._pinterest_sess
chats = _exec(
get_chat,
{"conversation_id": conversation_id, "csrftoken": csrftoken, "_b": _b, "_pinterest_sess": _pinterest_sess}
)
return pb2.chat_response_array(messages=chats)
和主程序是这样的:
# ENDPOINTS
CHAT_API = "https://www.pinterest.com/resource/ConversationMessagesResource/get/"
# Execute Fucntion
def _exec(func, params):
return func(**params)
# Make Requests here
def _get(url:str, cookies:Dict = None, headers:Dict = None) -> requests.Response:
response = requests.request("GET", url=url, cookies=cookies, headers=headers)
response.raise_for_status()
return response
# Chat Parser Function
def _chat_parser(chat_dict: Dict) -> Dict:
return {
"type": chat_dict.get("type", ""),
"id": chat_dict.get("id", ""),
"text": chat_dict.get("text", ""),
}
# Function to handle GRPC
def get_chat(conversation_id:str, csrftoken:str, _b:str, _pinterest_sess:str) -> Dict:
options = {"page_size":25,"conversation_id":conversation_id,"no_fetch_context_on_resource":False}
_cookies = {"csrftoken":csrftoken, "_b":_b, "_pinterest_sess":_pinterest_sess}
query = {"data": json.dumps({"options":options})}
encoded_query = urlencode(query).replace("+", "%20")
url = "{}?{}".format(CHAT_API, encoded_query)
msg_counter = 0
while True:
try:
return _chat_parser(_get(url, _cookies).json()["resource_response"]["data"][msg_counter])
except IndexError:
break
finally:
msg_counter += 1
我需要获取所有 CHAT,但我不知道该怎么做!
Pinterest 中的响应 JSON 完全是这样的:
["resource_response"]["data"][0]
["resource_response"]["data"][1]
["resource_response"]["data"][2]
["resource_response"]["data"][...]
根据消息计算最后一个数字从 0 到任意数字的变化
我不知道该如何处理!
我的 proto 有什么问题吗?
我应该在 proto 中使用流吗,如果是,双向流还是用于客户端、服务器...
谢谢你帮助我。
我找到了答案
它必须是服务器流并使用 for 循环和 yield