使用 jsoup 获取验证码图像
Getting Captcha image using jsoup
我正在尝试通过从我的学术 URL 获取验证码来开发一个基于 java-GUI 的小型应用程序,询问用户他的用户名、密码和验证码,在登录后显示内容。但是我被困在登录页面本身,因为在提交表单后来自网络的响应是
alert('Please enter correct code.'); window.history.go(-1);
代码
public Map cookies;
public void downloadCaptcha()throws Exception {
Connection.Response response = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/")
.timeout(300000)
.userAgent("Mozilla/5.0")
.method(Connection.Method.GET).execute();
cookies = response.cookies();
Connection.Response resultImageResponse = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/modules/create_image.php")
.cookies(cookies)
.ignoreContentType(true)
.method(Connection.Method.GET).timeout(30000).execute();
FileOutputStream out = (new FileOutputStream(new java.io.File("F:\abc.jpg")));
out.write(resultImageResponse.bodyAsBytes());
out.close();
System.out.println("Captcha Fetched");
}
下载验证码后
public static void getData(String captacha)throws Exception{
Connection.Response response = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/index.php")
.userAgent("Mozilla/5.0")
.cookies(cookies)
.data("username",username)
.data("passwd",password)
.data("txtCaptcha",captacha)
.data("submit","Login")
.data("option","login")
.data("op2","login")
.data("lang","english")
.data("return","https://academics.ddn.upes.ac.in/upes/index.php?option=com_content&task=view&id=53&Itemid=6420")
.data("message","0")
.data("j1643f05a0c7fc7910424fb3fc4fbbb6f","1")
.timeout(0)
.method(Connection.Method.POST)
.execute();
cookies = response.cookies();
System.out.println(response.cookies());
Document doc= response.parse();
FileWriter fr = new FileWriter("F:\response.html");
PrintWriter pw= new PrintWriter(fr);
pw.println(doc.toString());
pw.close();
fr.close();
}
resonse.cookies()
给出输出 {PHPSESSID=ai0r017bmb55gv0m4ikeu6jfc6, 61c78a27855d239ae8682ff6befaa989=5ae2e5baf548bc293c943d3416e7d400}
网站是https://academics.ddn.upes.ac.in/upes/index.php
请指出我的错误
您需要进行两处更改才能使代码正常工作:
1 - 您需要获取第二次调用(图片下载)返回的 cookie,并将其添加到之前的 cookie 中。
2 - 如果您看到字段 "j1643f05a0c7fc7910424fb3fc4fbbb6f" 非常可疑,实际上该字段是可变的,您需要选择表单中的隐藏输入并使用它。
3(额外) - 情况并非如此,但如果您不发送一些 headers,有些服务器会抱怨,例如接受,Accept-Encoding,Accept-Language ...
当我使用您的代码进行这些更改时,我得到:
<script>alert('Incorrect username or password. Please try again.'); window.history.go(-1);</script>
当然我没有user/pass,我想你会得到想要的页面。
具有必要更改的代码是:
public class SO_28619161 {
public Map cookies;
private String username = "u";
private String password = "p";
public HashMap<String,String> downloadCaptcha()throws Exception {
Connection.Response response = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/")
.timeout(300000)
.userAgent("Mozilla/5.0")
.method(Connection.Method.GET).execute();
//nice
cookies = response.cookies();
//now we will load form's inputs
Document doc = response.parse();
Elements fields = doc.select("form input");
HashMap<String,String> formFields = new HashMap<String, String>();
for (Element field : fields ){
formFields.put(field.attr("name"), field.attr("value"));
}
Connection.Response resultImageResponse = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/modules/create_image.php")
.cookies(cookies)
.ignoreContentType(true)
.method(Connection.Method.GET).timeout(30000).execute();
//we will need these cookies also!
cookies.putAll(resultImageResponse.cookies());
FileOutputStream out = (new FileOutputStream(new java.io.File("abc.jpg")));
out.write(resultImageResponse.bodyAsBytes());
out.close();
System.out.println("Captcha Fetched");
return formFields;
}
public void getData(HashMap<String, String> formFields) throws Exception{
Connection conn = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/index.php")
.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:35.0) Gecko/20100101 Firefox/35.0")
//not neccesary but these extra headers won't hurt
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
.header("Accept-Encoding", "gzip, deflate")
.header("Accept-Language", "es-ES,es;q=0.8,en-US;q=0.5,en;q=0.3")
.header("Host", "academics.ddn.upes.ac.in")
.header("Referer", "https://academics.ddn.upes.ac.in/upes/index.php")
.cookies(cookies)
.timeout(0)
.method(Connection.Method.POST);
//we send the fields
conn.data(formFields);
Response response = conn.execute();
cookies = response.cookies();
System.out.println(response.cookies());
Document doc= response.parse();
FileWriter fr = new FileWriter("response.html");
PrintWriter pw= new PrintWriter(fr);
pw.println(doc.toString());
System.out.println(doc.toString());
pw.close();
fr.close();
}
private void run() throws Exception, IOException {
HashMap<String, String> formFields = downloadCaptcha();
BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
String captcha = br.readLine();
//we set user/pass and captcha
formFields.put("username", username);
formFields.put("passwd", password);
formFields.put("txtCaptcha", captcha);
getData(formFields);
}
public static void main(String[] args) throws Exception {
SO_28619161 main = new SO_28619161();
main.run();
}
}
我正在尝试通过从我的学术 URL 获取验证码来开发一个基于 java-GUI 的小型应用程序,询问用户他的用户名、密码和验证码,在登录后显示内容。但是我被困在登录页面本身,因为在提交表单后来自网络的响应是
alert('Please enter correct code.'); window.history.go(-1);
代码
public Map cookies;
public void downloadCaptcha()throws Exception {
Connection.Response response = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/")
.timeout(300000)
.userAgent("Mozilla/5.0")
.method(Connection.Method.GET).execute();
cookies = response.cookies();
Connection.Response resultImageResponse = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/modules/create_image.php")
.cookies(cookies)
.ignoreContentType(true)
.method(Connection.Method.GET).timeout(30000).execute();
FileOutputStream out = (new FileOutputStream(new java.io.File("F:\abc.jpg")));
out.write(resultImageResponse.bodyAsBytes());
out.close();
System.out.println("Captcha Fetched");
}
下载验证码后
public static void getData(String captacha)throws Exception{
Connection.Response response = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/index.php")
.userAgent("Mozilla/5.0")
.cookies(cookies)
.data("username",username)
.data("passwd",password)
.data("txtCaptcha",captacha)
.data("submit","Login")
.data("option","login")
.data("op2","login")
.data("lang","english")
.data("return","https://academics.ddn.upes.ac.in/upes/index.php?option=com_content&task=view&id=53&Itemid=6420")
.data("message","0")
.data("j1643f05a0c7fc7910424fb3fc4fbbb6f","1")
.timeout(0)
.method(Connection.Method.POST)
.execute();
cookies = response.cookies();
System.out.println(response.cookies());
Document doc= response.parse();
FileWriter fr = new FileWriter("F:\response.html");
PrintWriter pw= new PrintWriter(fr);
pw.println(doc.toString());
pw.close();
fr.close();
}
resonse.cookies()
给出输出 {PHPSESSID=ai0r017bmb55gv0m4ikeu6jfc6, 61c78a27855d239ae8682ff6befaa989=5ae2e5baf548bc293c943d3416e7d400}
网站是https://academics.ddn.upes.ac.in/upes/index.php
请指出我的错误
您需要进行两处更改才能使代码正常工作:
1 - 您需要获取第二次调用(图片下载)返回的 cookie,并将其添加到之前的 cookie 中。
2 - 如果您看到字段 "j1643f05a0c7fc7910424fb3fc4fbbb6f" 非常可疑,实际上该字段是可变的,您需要选择表单中的隐藏输入并使用它。
3(额外) - 情况并非如此,但如果您不发送一些 headers,有些服务器会抱怨,例如接受,Accept-Encoding,Accept-Language ...
当我使用您的代码进行这些更改时,我得到:
<script>alert('Incorrect username or password. Please try again.'); window.history.go(-1);</script>
当然我没有user/pass,我想你会得到想要的页面。
具有必要更改的代码是:
public class SO_28619161 {
public Map cookies;
private String username = "u";
private String password = "p";
public HashMap<String,String> downloadCaptcha()throws Exception {
Connection.Response response = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/")
.timeout(300000)
.userAgent("Mozilla/5.0")
.method(Connection.Method.GET).execute();
//nice
cookies = response.cookies();
//now we will load form's inputs
Document doc = response.parse();
Elements fields = doc.select("form input");
HashMap<String,String> formFields = new HashMap<String, String>();
for (Element field : fields ){
formFields.put(field.attr("name"), field.attr("value"));
}
Connection.Response resultImageResponse = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/modules/create_image.php")
.cookies(cookies)
.ignoreContentType(true)
.method(Connection.Method.GET).timeout(30000).execute();
//we will need these cookies also!
cookies.putAll(resultImageResponse.cookies());
FileOutputStream out = (new FileOutputStream(new java.io.File("abc.jpg")));
out.write(resultImageResponse.bodyAsBytes());
out.close();
System.out.println("Captcha Fetched");
return formFields;
}
public void getData(HashMap<String, String> formFields) throws Exception{
Connection conn = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/index.php")
.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:35.0) Gecko/20100101 Firefox/35.0")
//not neccesary but these extra headers won't hurt
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
.header("Accept-Encoding", "gzip, deflate")
.header("Accept-Language", "es-ES,es;q=0.8,en-US;q=0.5,en;q=0.3")
.header("Host", "academics.ddn.upes.ac.in")
.header("Referer", "https://academics.ddn.upes.ac.in/upes/index.php")
.cookies(cookies)
.timeout(0)
.method(Connection.Method.POST);
//we send the fields
conn.data(formFields);
Response response = conn.execute();
cookies = response.cookies();
System.out.println(response.cookies());
Document doc= response.parse();
FileWriter fr = new FileWriter("response.html");
PrintWriter pw= new PrintWriter(fr);
pw.println(doc.toString());
System.out.println(doc.toString());
pw.close();
fr.close();
}
private void run() throws Exception, IOException {
HashMap<String, String> formFields = downloadCaptcha();
BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
String captcha = br.readLine();
//we set user/pass and captcha
formFields.put("username", username);
formFields.put("passwd", password);
formFields.put("txtCaptcha", captcha);
getData(formFields);
}
public static void main(String[] args) throws Exception {
SO_28619161 main = new SO_28619161();
main.run();
}
}