Python urllib2 请求错误
Python urllib2 request error
Python 2.7.3 (default, Mar 13 2014, 11:03:55)
[GCC 4.7.2] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import urllib2
>>> req = urllib2.Request("http:///wp-login.php")
>>> website='kseek.com.my'
>>> req = urllib2.Request("http://"+website+"/wp-login.php")
>>> req.add_header('User-agent', 'Mozilla 5.10')
>>> req.add_header('Referer', 'http://'+website)
>>> data = urllib2.urlopen(req, timeout=6).read()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/urllib2.py", line 127, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 407, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 520, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 445, in error
return self._call_chain(*args)
File "/usr/lib/python2.7/urllib2.py", line 379, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 528, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 406: Not Acceptable
>>> req = urllib2.Request("http://"+website+"/")
>>> req.add_header('User-agent', 'Mozilla 5.10')
>>> req.add_header('Referer', 'http://'+website)
>>> data = urllib2.urlopen(req, timeout=6).read()
>>>
如您所见,
当请求我可以通过浏览器手动访问的 /wp-login.php ow 甚至 curl 我得到 406 错误
同时使用相同的方法请求 /index.php ,工作没有问题
有帮助吗?
您得到 HTTP error 406 是因为您缺少 Accept
header。在打开 URL 之前添加以下内容:
req.add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8')
输出:
>>> req.add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8')
>>> data = urllib2.urlopen(req, timeout=6).read()
>>> print data
<!DOCTYPE html>
<!--[if IE 8]>
<html xmlns="http://www.w3.org/1999/xhtml" class="ie8" lang="en-US">
<![endif]-->
<!--[if !(IE 8) ]><!-->
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-US">
<!--<![endif]-->
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>K SEE K architect › Log In</title>
<link rel='stylesheet' id='buttons-css' href='http://kseek.com.my/wp-includes/css/buttons.min.css?ver=4.2.2' type='text/css' media='all' />
<link rel='stylesheet' id='open-sans-css' href='//fonts.googleapis.com/css?family=Open+Sans%3A300italic%2C400italic%2C600italic%2C300%2C400%2C600&subset=latin%2Clatin-ext&ver=4.2.2' type='text/css' media='all' />
<link rel='stylesheet' id='dashicons-css' href='http://kseek.com.my/wp-includes/css/dashicons.min.css?ver=4.2.2' type='text/css' media='all' />
<link rel='stylesheet' id='login-css' href='http://kseek.com.my/wp-admin/css/login.min.css?ver=4.2.2' type='text/css' media='all' />
<link rel="apple-touch-icon" sizes="57x57" href="/wp-content/uploads/fbrfg/apple-touch-icon-57x57.png">
<link rel="apple-touch-icon" sizes="114x114" href="/wp-content/uploads/fbrfg/apple-touch-icon-114x114.png">
<link rel="apple-touch-icon" sizes="72x72" href="/wp-content/uploads/fbrfg/apple-touch-icon-72x72.png">
<link rel="apple-touch-icon" sizes="144x144" href="/wp-content/uploads/fbrfg/apple-touch-icon-144x144.png">
<link rel="apple-touch-icon" sizes="60x60" href="/wp-content/uploads/fbrfg/apple-touch-icon-60x60.png">
<link rel="apple-touch-icon" sizes="120x120" href="/wp-content/uploads/fbrfg/apple-touch-icon-120x120.png">
<link rel="apple-touch-icon" sizes="76x76" href="/wp-content/uploads/fbrfg/apple-touch-icon-76x76.png">
<link rel="apple-touch-icon" sizes="152x152" href="/wp-content/uploads/fbrfg/apple-touch-icon-152x152.png">
<link rel="apple-touch-icon" sizes="180x180" href="/wp-content/uploads/fbrfg/apple-touch-icon-180x180.png">
<link rel="shortcut icon" href="/wp-content/uploads/fbrfg/favicon.ico">
<link rel="icon" type="image/png" href="/wp-content/uploads/fbrfg/favicon-192x192.png" sizes="192x192">
<link rel="icon" type="image/png" href="/wp-content/uploads/fbrfg/favicon-160x160.png" sizes="160x160">
<link rel="icon" type="image/png" href="/wp-content/uploads/fbrfg/favicon-96x96.png" sizes="96x96">
<link rel="icon" type="image/png" href="/wp-content/uploads/fbrfg/favicon-16x16.png" sizes="16x16">
<link rel="icon" type="image/png" href="/wp-content/uploads/fbrfg/favicon-32x32.png" sizes="32x32">
<meta name="msapplication-TileColor" content="#da532c">
<meta name="msapplication-TileImage" content="/wp-content/uploads/fbrfg/mstile-144x144.png">
<meta name="msapplication-config" content="/wp-content/uploads/fbrfg/browserconfig.xml"><meta name='robots' content='noindex,follow' />
</head>
<body class="login login-action-login wp-core-ui locale-en-us">
<div id="login">
<h1><a href="https://wordpress.org/" title="Powered by WordPress" tabindex="-1">K SEE K architect</a></h1>
<form name="loginform" id="loginform" action="http://kseek.com.my/wp-login.php" method="post">
<p>
<label for="user_login">Username<br />
<input type="text" name="log" id="user_login" class="input" value="" size="20" /></label>
</p>
<p>
<label for="user_pass">Password<br />
<input type="password" name="pwd" id="user_pass" class="input" value="" size="20" /></label>
</p>
<p class="forgetmenot"><label for="rememberme"><input name="rememberme" type="checkbox" id="rememberme" value="forever" /> Remember Me</label></p>
<p class="submit">
<input type="submit" name="wp-submit" id="wp-submit" class="button button-primary button-large" value="Log In" />
<input type="hidden" name="redirect_to" value="http://kseek.com.my/wp-admin/" />
<input type="hidden" name="testcookie" value="1" />
</p>
</form>
<p id="nav">
<a href="http://kseek.com.my/wp-login.php?action=lostpassword" title="Password Lost and Found">Lost your password?</a>
</p>
<script type="text/javascript">
function wp_attempt_focus(){
setTimeout( function(){ try{
d = document.getElementById('user_login');
d.focus();
d.select();
} catch(e){}
}, 200);
}
wp_attempt_focus();
if(typeof wpOnload=='function')wpOnload();
</script>
<p id="backtoblog"><a href="http://kseek.com.my/" title="Are you lost?">← Back to K SEE K architect</a></p>
</div>
<div class="clear"></div>
</body>
</html>
>>>
Python 2.7.3 (default, Mar 13 2014, 11:03:55)
[GCC 4.7.2] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import urllib2
>>> req = urllib2.Request("http:///wp-login.php")
>>> website='kseek.com.my'
>>> req = urllib2.Request("http://"+website+"/wp-login.php")
>>> req.add_header('User-agent', 'Mozilla 5.10')
>>> req.add_header('Referer', 'http://'+website)
>>> data = urllib2.urlopen(req, timeout=6).read()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/urllib2.py", line 127, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 407, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 520, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 445, in error
return self._call_chain(*args)
File "/usr/lib/python2.7/urllib2.py", line 379, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 528, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 406: Not Acceptable
>>> req = urllib2.Request("http://"+website+"/")
>>> req.add_header('User-agent', 'Mozilla 5.10')
>>> req.add_header('Referer', 'http://'+website)
>>> data = urllib2.urlopen(req, timeout=6).read()
>>>
如您所见, 当请求我可以通过浏览器手动访问的 /wp-login.php ow 甚至 curl 我得到 406 错误 同时使用相同的方法请求 /index.php ,工作没有问题 有帮助吗?
您得到 HTTP error 406 是因为您缺少 Accept
header。在打开 URL 之前添加以下内容:
req.add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8')
输出:
>>> req.add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8')
>>> data = urllib2.urlopen(req, timeout=6).read()
>>> print data
<!DOCTYPE html>
<!--[if IE 8]>
<html xmlns="http://www.w3.org/1999/xhtml" class="ie8" lang="en-US">
<![endif]-->
<!--[if !(IE 8) ]><!-->
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-US">
<!--<![endif]-->
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>K SEE K architect › Log In</title>
<link rel='stylesheet' id='buttons-css' href='http://kseek.com.my/wp-includes/css/buttons.min.css?ver=4.2.2' type='text/css' media='all' />
<link rel='stylesheet' id='open-sans-css' href='//fonts.googleapis.com/css?family=Open+Sans%3A300italic%2C400italic%2C600italic%2C300%2C400%2C600&subset=latin%2Clatin-ext&ver=4.2.2' type='text/css' media='all' />
<link rel='stylesheet' id='dashicons-css' href='http://kseek.com.my/wp-includes/css/dashicons.min.css?ver=4.2.2' type='text/css' media='all' />
<link rel='stylesheet' id='login-css' href='http://kseek.com.my/wp-admin/css/login.min.css?ver=4.2.2' type='text/css' media='all' />
<link rel="apple-touch-icon" sizes="57x57" href="/wp-content/uploads/fbrfg/apple-touch-icon-57x57.png">
<link rel="apple-touch-icon" sizes="114x114" href="/wp-content/uploads/fbrfg/apple-touch-icon-114x114.png">
<link rel="apple-touch-icon" sizes="72x72" href="/wp-content/uploads/fbrfg/apple-touch-icon-72x72.png">
<link rel="apple-touch-icon" sizes="144x144" href="/wp-content/uploads/fbrfg/apple-touch-icon-144x144.png">
<link rel="apple-touch-icon" sizes="60x60" href="/wp-content/uploads/fbrfg/apple-touch-icon-60x60.png">
<link rel="apple-touch-icon" sizes="120x120" href="/wp-content/uploads/fbrfg/apple-touch-icon-120x120.png">
<link rel="apple-touch-icon" sizes="76x76" href="/wp-content/uploads/fbrfg/apple-touch-icon-76x76.png">
<link rel="apple-touch-icon" sizes="152x152" href="/wp-content/uploads/fbrfg/apple-touch-icon-152x152.png">
<link rel="apple-touch-icon" sizes="180x180" href="/wp-content/uploads/fbrfg/apple-touch-icon-180x180.png">
<link rel="shortcut icon" href="/wp-content/uploads/fbrfg/favicon.ico">
<link rel="icon" type="image/png" href="/wp-content/uploads/fbrfg/favicon-192x192.png" sizes="192x192">
<link rel="icon" type="image/png" href="/wp-content/uploads/fbrfg/favicon-160x160.png" sizes="160x160">
<link rel="icon" type="image/png" href="/wp-content/uploads/fbrfg/favicon-96x96.png" sizes="96x96">
<link rel="icon" type="image/png" href="/wp-content/uploads/fbrfg/favicon-16x16.png" sizes="16x16">
<link rel="icon" type="image/png" href="/wp-content/uploads/fbrfg/favicon-32x32.png" sizes="32x32">
<meta name="msapplication-TileColor" content="#da532c">
<meta name="msapplication-TileImage" content="/wp-content/uploads/fbrfg/mstile-144x144.png">
<meta name="msapplication-config" content="/wp-content/uploads/fbrfg/browserconfig.xml"><meta name='robots' content='noindex,follow' />
</head>
<body class="login login-action-login wp-core-ui locale-en-us">
<div id="login">
<h1><a href="https://wordpress.org/" title="Powered by WordPress" tabindex="-1">K SEE K architect</a></h1>
<form name="loginform" id="loginform" action="http://kseek.com.my/wp-login.php" method="post">
<p>
<label for="user_login">Username<br />
<input type="text" name="log" id="user_login" class="input" value="" size="20" /></label>
</p>
<p>
<label for="user_pass">Password<br />
<input type="password" name="pwd" id="user_pass" class="input" value="" size="20" /></label>
</p>
<p class="forgetmenot"><label for="rememberme"><input name="rememberme" type="checkbox" id="rememberme" value="forever" /> Remember Me</label></p>
<p class="submit">
<input type="submit" name="wp-submit" id="wp-submit" class="button button-primary button-large" value="Log In" />
<input type="hidden" name="redirect_to" value="http://kseek.com.my/wp-admin/" />
<input type="hidden" name="testcookie" value="1" />
</p>
</form>
<p id="nav">
<a href="http://kseek.com.my/wp-login.php?action=lostpassword" title="Password Lost and Found">Lost your password?</a>
</p>
<script type="text/javascript">
function wp_attempt_focus(){
setTimeout( function(){ try{
d = document.getElementById('user_login');
d.focus();
d.select();
} catch(e){}
}, 200);
}
wp_attempt_focus();
if(typeof wpOnload=='function')wpOnload();
</script>
<p id="backtoblog"><a href="http://kseek.com.my/" title="Are you lost?">← Back to K SEE K architect</a></p>
</div>
<div class="clear"></div>
</body>
</html>
>>>