python 中的自定义用户代理格式错误
Custom Useragents wrong formatted in python
今天我尝试向一个网站发出简单的请求并实现我定义到一个名为 useragents.txt 的单独文件中的自定义 headers。我现在浪费了很多时间来让它工作。问题是,python 不会在 valueerror: Invalid header value b'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36\n'
期间请求站点
我不确定为什么要在其中添加一个 b' 和一个 \n。如果我打印变量,则输出没有这些符号。这里有一些代码可能会更好地表达我的意思:
def get_soup(url, header):
time.sleep(random.choice([1, 2, 3]))
return BeautifulSoup(urlopen(Request(url, headers=header)), "html.parser")
with open("useragents.txt", "r") as user_agents_file:
user_agents_lines = user_agents_file.read().splitlines()
print(user_agents_lines)
# count
user_agent = random.choice(user_agents_lines)
print(f"USER-AGENT: {user_agent}")
# for user_agent in user_agents_lines:
# count += 1
# print(f"Line{count}: {user_agent.strip()}")
完整错误是:
Traceback (most recent call last):
File "D:\my_python_projects\testingcodes\firstcode\main.py", line 118, in <module>
scraper() # run the function
File "D:\my_python_projects\testingcodes\firstcode\main.py", line 69, in scraper
soup = get_soup(surveyingurl, testheader)
File "D:\my_python_projects\testingcodes\firstcode\main.py", line 43, in get_soup
return BeautifulSoup(urlopen(Request(url, headers=header)), "html.parser")
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 214, in urlopen
return opener.open(url, data, timeout)
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 517, in open
response = self._open(req, data)
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 534, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 494, in _call_chain
result = func(*args)
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 1389, in https_open
return self.do_open(http.client.HTTPSConnection, req,
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 1346, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "D:\Downloads\Python\python 3.9.6\lib\http\client.py", line 1257, in request
self._send_request(method, url, body, headers, encode_chunked)
File "D:\Downloads\Python\python 3.9.6\lib\http\client.py", line 1298, in _send_request
self.putheader(hdr, value)
File "D:\Downloads\Python\python 3.9.6\lib\http\client.py", line 1235, in putheader
raise ValueError('Invalid header value %r' % (values[i],))
ValueError: Invalid header value b'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36\n'
Process finished with exit code 1
下面的屏幕截图显示了我的 useragents.txt 文件的样子:
with open("useragents.txt", "r") as user_agents_file:
user_agents_lines = user_agents_file.read().splitlines()
print(user_agents_lines)
user_agent = random.choice(user_agents_lines)
user_agent = user_agent.replace(b'\n', b'')
print(f"USER-AGENT: {user_agent}")
今天我尝试向一个网站发出简单的请求并实现我定义到一个名为 useragents.txt 的单独文件中的自定义 headers。我现在浪费了很多时间来让它工作。问题是,python 不会在 valueerror: Invalid header value b'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36\n'
我不确定为什么要在其中添加一个 b' 和一个 \n。如果我打印变量,则输出没有这些符号。这里有一些代码可能会更好地表达我的意思:
def get_soup(url, header):
time.sleep(random.choice([1, 2, 3]))
return BeautifulSoup(urlopen(Request(url, headers=header)), "html.parser")
with open("useragents.txt", "r") as user_agents_file:
user_agents_lines = user_agents_file.read().splitlines()
print(user_agents_lines)
# count
user_agent = random.choice(user_agents_lines)
print(f"USER-AGENT: {user_agent}")
# for user_agent in user_agents_lines:
# count += 1
# print(f"Line{count}: {user_agent.strip()}")
完整错误是:
Traceback (most recent call last):
File "D:\my_python_projects\testingcodes\firstcode\main.py", line 118, in <module>
scraper() # run the function
File "D:\my_python_projects\testingcodes\firstcode\main.py", line 69, in scraper
soup = get_soup(surveyingurl, testheader)
File "D:\my_python_projects\testingcodes\firstcode\main.py", line 43, in get_soup
return BeautifulSoup(urlopen(Request(url, headers=header)), "html.parser")
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 214, in urlopen
return opener.open(url, data, timeout)
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 517, in open
response = self._open(req, data)
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 534, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 494, in _call_chain
result = func(*args)
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 1389, in https_open
return self.do_open(http.client.HTTPSConnection, req,
File "D:\Downloads\Python\python 3.9.6\lib\urllib\request.py", line 1346, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "D:\Downloads\Python\python 3.9.6\lib\http\client.py", line 1257, in request
self._send_request(method, url, body, headers, encode_chunked)
File "D:\Downloads\Python\python 3.9.6\lib\http\client.py", line 1298, in _send_request
self.putheader(hdr, value)
File "D:\Downloads\Python\python 3.9.6\lib\http\client.py", line 1235, in putheader
raise ValueError('Invalid header value %r' % (values[i],))
ValueError: Invalid header value b'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36\n'
Process finished with exit code 1
下面的屏幕截图显示了我的 useragents.txt 文件的样子:
with open("useragents.txt", "r") as user_agents_file:
user_agents_lines = user_agents_file.read().splitlines()
print(user_agents_lines)
user_agent = random.choice(user_agents_lines)
user_agent = user_agent.replace(b'\n', b'')
print(f"USER-AGENT: {user_agent}")