如何在 Python 中插入 cookie 以进行网络抓取?
How do I insert a cookie in Python for web scraping?
我正在制作一个网络抓取器,我将在我的 Raspberry Pi B 上托管它,但是我需要从中抓取的网站需要 cookie 才能访问它。特定的 cookie 是 .ROBLOSECURITY。我在仅使用该 cookie 之前登录(借助 Google Chrome 的 EditThisCookie 扩展)。如何让我的 Python 程序使用此 cookie 登录?
这是我的 .ROBLOSECURITY cookie(虽然有一些 letters/numbers 已更改):
26D59EEB62BB82BA679D88E391F5E43448FDC5EEE74BEBBFD9879204EABA2813E4C00248E65D7ADBFE0B91F1B140E4DD61CBA1F0EE5991E5099BE044AD9AF0C019EFAFDCF6A41355002355A602F9B8ADEF4CD14E70825687F9748B082089DE69C833E4F5AE9B358F1988B3D3BB04CA5D0BF96501E8B4AAACD68BBE3ACCAED5DA646BB4E7B3D8CC88D102DD53382C8FE8696C54445EB3716AF08DF9816E14EAC0DA451C04803BAB801BF61A20FD9BF6E3FE9BF06833D68C08BB1DF4FDD3ED969687F42BAA5D57C66246549F4323F3FAE71D7E38574690F6AB41D56C224C949018C5C24901EB7D8A4B6D262A173B60B16B413F347B21AC8901F86D818B039A88344A324670D726176F42485ADE295EE22ADEDA733452735B043B7A4FF8262D42DF60D63329C77E8AF9EF65AD25B01CEAD48FCBF59D8CB70AE32BDE1651FB372656C600DBCBF53F0D49FB89275830B0A5513EC201C808699428C0F09BF8FE64A227D9A94B43943E2F81E252B45297D38AF6D8E8FDA180DCB491AA33FA7EE87BB1D1E005050573294010E9169AB9AF716F69483128B93F87878C24380A57F64A8EF4BC9242A6125413548F88D15F6E6779A9B996BCADFEA7EABFEE3ED17EFEC148C33630CBCDCD9E1DDCB4B1C5DD42EF93C696C20D01A1E9D95AD40145ACE57C4664ACDF79EF78482DE6E40E7D3727C501A089993402F386A2D5997CDE530DBF93CDAD90E15F207D3B9DE168C3B669E1099B304192CD33D327150A57B9383BDBC99215448F21
这是一张屏幕截图,其中包含有关 cookie 的更多信息。
取自here
import requests
url = 'http://target_url'
cookies = dict(cookie='value')
r = requests.get(url, cookies=cookies)
这是使用 urllib 的方法。
import urllib2
handler = urllib2.HTTPHandler(debuglevel = 1)
req = urllib2.Request('http://www.example.com/')
req.add_header('Cookie', (".ROBLOSECURITY=26D59EEB62BB82BA679D88E391F5E43448FDC5EEE74BEBBFD9879204EABA2813E4C00248E65D7ADBFE0B91F1B"
"140E4DD61CBA1F0EE5991E5099BE044AD9AF0C019EFAFDCF6A41355002355A602F9B8ADEF4CD14E70825687F9748B082089DE69C833E4F5AE9B358F1988B3D3BB04CA5D0B"
"F96501E8B4AAACD68BBE3ACCAED5DA646BB4E7B3D8CC88D102DD53382C8FE8696C54445EB3716AF08DF9816E14EAC0DA451C04803BAB801BF61A20FD9BF6E3FE9BF06833D"
"68C08BB1DF4FDD3ED969687F42BAA5D57C66246549F4323F3FAE71D7E38574690F6AB41D56C224C949018C5C24901EB7D8A4B6D262A173B60B16B413F347B21AC8901F86D"
"818B039A88344A324670D726176F42485ADE295EE22ADEDA733452735B043B7A4FF8262D42DF60D63329C77E8AF9EF65AD25B01CEAD48FCBF59D8CB70AE32BDE1651FB372"
"656C600DBCBF53F0D49FB89275830B0A5513EC201C808699428C0F09BF8FE64A227D9A94B43943E2F81E252B45297D38AF6D8E8FDA180DCB491AA33FA7EE87BB1D1E00505"
"0573294010E9169AB9AF716F69483128B93F87878C24380A57F64A8EF4BC9242A6125413548F88D15F6E6779A9B996BCADFEA7EABFEE3ED17EFEC148C33630CBCDCD9E1DD"
"CB4B1C5DD42EF93C696C20D01A1E9D95AD40145ACE57C4664ACDF79EF78482DE6E40E7D3727C501A089993402F386A2D5997CDE530DBF93CDAD90E15F207D3B9DE168C3B6"
"69E1099B304192CD33D327150A57B9383BDBC99215448F21"))
opener = urllib2.build_opener(handler)
urllib2.install_opener(opener)
resp = urllib2.urlopen(req)
print resp.read()
我正在制作一个网络抓取器,我将在我的 Raspberry Pi B 上托管它,但是我需要从中抓取的网站需要 cookie 才能访问它。特定的 cookie 是 .ROBLOSECURITY。我在仅使用该 cookie 之前登录(借助 Google Chrome 的 EditThisCookie 扩展)。如何让我的 Python 程序使用此 cookie 登录?
这是我的 .ROBLOSECURITY cookie(虽然有一些 letters/numbers 已更改):
26D59EEB62BB82BA679D88E391F5E43448FDC5EEE74BEBBFD9879204EABA2813E4C00248E65D7ADBFE0B91F1B140E4DD61CBA1F0EE5991E5099BE044AD9AF0C019EFAFDCF6A41355002355A602F9B8ADEF4CD14E70825687F9748B082089DE69C833E4F5AE9B358F1988B3D3BB04CA5D0BF96501E8B4AAACD68BBE3ACCAED5DA646BB4E7B3D8CC88D102DD53382C8FE8696C54445EB3716AF08DF9816E14EAC0DA451C04803BAB801BF61A20FD9BF6E3FE9BF06833D68C08BB1DF4FDD3ED969687F42BAA5D57C66246549F4323F3FAE71D7E38574690F6AB41D56C224C949018C5C24901EB7D8A4B6D262A173B60B16B413F347B21AC8901F86D818B039A88344A324670D726176F42485ADE295EE22ADEDA733452735B043B7A4FF8262D42DF60D63329C77E8AF9EF65AD25B01CEAD48FCBF59D8CB70AE32BDE1651FB372656C600DBCBF53F0D49FB89275830B0A5513EC201C808699428C0F09BF8FE64A227D9A94B43943E2F81E252B45297D38AF6D8E8FDA180DCB491AA33FA7EE87BB1D1E005050573294010E9169AB9AF716F69483128B93F87878C24380A57F64A8EF4BC9242A6125413548F88D15F6E6779A9B996BCADFEA7EABFEE3ED17EFEC148C33630CBCDCD9E1DDCB4B1C5DD42EF93C696C20D01A1E9D95AD40145ACE57C4664ACDF79EF78482DE6E40E7D3727C501A089993402F386A2D5997CDE530DBF93CDAD90E15F207D3B9DE168C3B669E1099B304192CD33D327150A57B9383BDBC99215448F21
这是一张屏幕截图,其中包含有关 cookie 的更多信息。
取自here
import requests
url = 'http://target_url'
cookies = dict(cookie='value')
r = requests.get(url, cookies=cookies)
这是使用 urllib 的方法。
import urllib2
handler = urllib2.HTTPHandler(debuglevel = 1)
req = urllib2.Request('http://www.example.com/')
req.add_header('Cookie', (".ROBLOSECURITY=26D59EEB62BB82BA679D88E391F5E43448FDC5EEE74BEBBFD9879204EABA2813E4C00248E65D7ADBFE0B91F1B"
"140E4DD61CBA1F0EE5991E5099BE044AD9AF0C019EFAFDCF6A41355002355A602F9B8ADEF4CD14E70825687F9748B082089DE69C833E4F5AE9B358F1988B3D3BB04CA5D0B"
"F96501E8B4AAACD68BBE3ACCAED5DA646BB4E7B3D8CC88D102DD53382C8FE8696C54445EB3716AF08DF9816E14EAC0DA451C04803BAB801BF61A20FD9BF6E3FE9BF06833D"
"68C08BB1DF4FDD3ED969687F42BAA5D57C66246549F4323F3FAE71D7E38574690F6AB41D56C224C949018C5C24901EB7D8A4B6D262A173B60B16B413F347B21AC8901F86D"
"818B039A88344A324670D726176F42485ADE295EE22ADEDA733452735B043B7A4FF8262D42DF60D63329C77E8AF9EF65AD25B01CEAD48FCBF59D8CB70AE32BDE1651FB372"
"656C600DBCBF53F0D49FB89275830B0A5513EC201C808699428C0F09BF8FE64A227D9A94B43943E2F81E252B45297D38AF6D8E8FDA180DCB491AA33FA7EE87BB1D1E00505"
"0573294010E9169AB9AF716F69483128B93F87878C24380A57F64A8EF4BC9242A6125413548F88D15F6E6779A9B996BCADFEA7EABFEE3ED17EFEC148C33630CBCDCD9E1DD"
"CB4B1C5DD42EF93C696C20D01A1E9D95AD40145ACE57C4664ACDF79EF78482DE6E40E7D3727C501A089993402F386A2D5997CDE530DBF93CDAD90E15F207D3B9DE168C3B6"
"69E1099B304192CD33D327150A57B9383BDBC99215448F21"))
opener = urllib2.build_opener(handler)
urllib2.install_opener(opener)
resp = urllib2.urlopen(req)
print resp.read()