使用 jsoup 获取验证码图像

Getting Captcha image using jsoup

我正在尝试通过从我的学术 URL 获取验证码来开发一个基于 java-GUI 的小型应用程序,询问用户他的用户名、密码和验证码,在登录后显示内容。但是我被困在登录页面本身,因为在提交表单后来自网络的响应是

alert('Please enter correct code.'); window.history.go(-1);

代码

public Map cookies;
public void downloadCaptcha()throws Exception {
Connection.Response response = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/")
.timeout(300000)
.userAgent("Mozilla/5.0")
.method(Connection.Method.GET).execute();
cookies = response.cookies();
Connection.Response resultImageResponse = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/modules/create_image.php")
.cookies(cookies)
.ignoreContentType(true)
.method(Connection.Method.GET).timeout(30000).execute();
FileOutputStream out = (new FileOutputStream(new java.io.File("F:\abc.jpg")));
out.write(resultImageResponse.bodyAsBytes()); 
out.close();
System.out.println("Captcha Fetched");

}

下载验证码后

public static void getData(String captacha)throws Exception{
Connection.Response response = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/index.php")
.userAgent("Mozilla/5.0")
.cookies(cookies)
.data("username",username)
.data("passwd",password)
.data("txtCaptcha",captacha)
.data("submit","Login")
.data("option","login")
.data("op2","login")
.data("lang","english")
.data("return","https://academics.ddn.upes.ac.in/upes/index.php?option=com_content&task=view&id=53&Itemid=6420")
.data("message","0")
.data("j1643f05a0c7fc7910424fb3fc4fbbb6f","1")
.timeout(0)
.method(Connection.Method.POST)
.execute();
cookies = response.cookies();
System.out.println(response.cookies());
Document doc= response.parse();
FileWriter fr = new FileWriter("F:\response.html");
PrintWriter pw= new PrintWriter(fr);
pw.println(doc.toString());
pw.close();
fr.close();
}

resonse.cookies() 给出输出 {PHPSESSID=ai0r017bmb55gv0m4ikeu6jfc6, 61c78a27855d239ae8682ff6befaa989=5ae2e5baf548bc293c943d3416e7d400}

网站是https://academics.ddn.upes.ac.in/upes/index.php

请指出我的错误

您需要进行两处更改才能使代码正常工作:

1 - 您需要获取第二次调用(图片下载)返回的 cookie,并将其添加到之前的 cookie 中。

2 - 如果您看到字段 "j1643f05a0c7fc7910424fb3fc4fbbb6f" 非常可疑,实际上该字段是可变的,您需要选择表单中的隐藏输入并使用它。

3(额外) - 情况并非如此,但如果您不发送一些 headers,有些服务器会抱怨,例如接受,Accept-Encoding,Accept-Language ...

当我使用您的代码进行这些更改时,我得到:

<script>alert('Incorrect username or password. Please try again.'); window.history.go(-1);</script> 

当然我没有user/pass,我想你会得到想要的页面。

具有必要更改的代码是:

public class SO_28619161 {


    public Map cookies;
    private String username = "u";
    private String password = "p";

    public HashMap<String,String> downloadCaptcha()throws Exception {
        Connection.Response response = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/")
                .timeout(300000)
                .userAgent("Mozilla/5.0")
                .method(Connection.Method.GET).execute();

        //nice
        cookies = response.cookies();

        //now we will load form's inputs 
        Document doc = response.parse();
        Elements fields = doc.select("form input");
        HashMap<String,String> formFields = new HashMap<String, String>();
        for (Element field : fields ){
            formFields.put(field.attr("name"), field.attr("value"));
        }

        Connection.Response resultImageResponse = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/modules/create_image.php")
                .cookies(cookies)
                .ignoreContentType(true)
                .method(Connection.Method.GET).timeout(30000).execute();

        //we will need these cookies also!
        cookies.putAll(resultImageResponse.cookies());

        FileOutputStream out = (new FileOutputStream(new java.io.File("abc.jpg")));
        out.write(resultImageResponse.bodyAsBytes()); 
        out.close();

        System.out.println("Captcha Fetched");

        return formFields;
    }

    public void getData(HashMap<String, String> formFields) throws Exception{
        Connection conn = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/index.php")
                .userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:35.0) Gecko/20100101 Firefox/35.0")
                //not neccesary but these extra headers won't hurt
                .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
                .header("Accept-Encoding", "gzip, deflate")
                .header("Accept-Language", "es-ES,es;q=0.8,en-US;q=0.5,en;q=0.3")
                .header("Host", "academics.ddn.upes.ac.in")
                .header("Referer", "https://academics.ddn.upes.ac.in/upes/index.php")
                .cookies(cookies)
                .timeout(0)
                .method(Connection.Method.POST);

        //we send the fields
        conn.data(formFields);

        Response response = conn.execute();
        cookies = response.cookies();
        System.out.println(response.cookies());
        Document doc= response.parse();
        FileWriter fr = new FileWriter("response.html");
        PrintWriter pw= new PrintWriter(fr);
        pw.println(doc.toString());
        System.out.println(doc.toString());
        pw.close();
        fr.close();
    }

    private void run() throws Exception, IOException {
        HashMap<String, String> formFields = downloadCaptcha();

        BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
        String captcha = br.readLine();

        //we set user/pass and captcha
        formFields.put("username", username);
        formFields.put("passwd", password);
        formFields.put("txtCaptcha", captcha);

        getData(formFields);
    }

    public static void main(String[] args) throws Exception {
        SO_28619161 main = new SO_28619161();
        main.run();
    }

}