java中如何使用jersey客户端获取网页的页面源

How to get the page source of a webpage using jersey client in java

在我使用 java 的 Web 应用程序中,我试图通过传递所需页面的 URL 来获取使用 jersey 客户端的网页的页面源。我一直在网上搜索以找到一些对我有帮助的好例子,但找不到任何例子。 谁能帮我解决这个问题。

如果您的目的只是下载 html 代码(而不是渲染它),您可以使用任何普通的 http 客户端(甚至 java URLConnection 类)

下面是我在我的一个工具中使用的现成示例。它使用 apache http core 4.1.4 和 apache http client 4.1.4。

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;

public class HttpUtil {

    public static String getFile(String sUrl) throws ClientProtocolException, IOException{
        HttpClient httpclient = new DefaultHttpClient();
        StringBuilder b = new StringBuilder();

         // Prepare a request object
         HttpGet httpget = new HttpGet(sUrl);

         // Execute the request
         HttpResponse response = httpclient.execute(httpget);

         // Examine the response status
         System.out.println(response.getStatusLine());

         // Get hold of the response entity
         HttpEntity entity = response.getEntity();

         // If the response does not enclose an entity, there is no need
         // to worry about connection release
         if (entity != null) {
             InputStream instream = entity.getContent();
             try {

                 BufferedReader reader = new BufferedReader(
                         new InputStreamReader(instream));
                 // do something useful with the response
                 String s = reader.readLine();
                 while(s!= null){
                     b.append(s);
                     b.append("\n");
                     s = reader.readLine();
                 }
             } catch (IOException ex) {

                 // In case of an IOException the connection will be released
                 // back to the connection manager automatically
                 throw ex;

             } catch (RuntimeException ex) {

                 // In case of an unexpected exception you may want to abort
                 // the HTTP request in order to shut down the underlying
                 // connection and release it back to the connection manager.
                 httpget.abort();
                 throw ex;

             } finally {

                 // Closing the input stream will trigger connection release
                 instream.close();

             }

             // When HttpClient instance is no longer needed,
             // shut down the connection manager to ensure
             // immediate deallocation of all system resources
             httpclient.getConnectionManager().shutdown();
         }
         return b.toString();
    }

}

Jersey 用于网络服务。但一般情况下,您可以获得 HTML 来源。 所有这 4 种 jax-rs 客户端都会向您打印代码:

  1. URL连接客户端

    进口java.io.BufferedReader; 导入 java.io.IOException; 导入 java.io.InputStreamReader; 导入 java.net.URL; 导入 java.net.URL连接; public class URLConnectionClient { public static void main(String[] args) 抛出 IOException { URL 休息URL = 新URL("http://localhost:8080/simple-service-webapp/resources/myresource/usernamepwdcontext"); URL连接连接=(URL连接)restURL.openConnection(); connection.setDoOutput(真); connection.connect(); InputStreamReader ins = new InputStreamReader(connection.getInputStream()); BufferedReader in = new BufferedReader(ins); 字符串输入线; while ((inputLine = in.readLine()) != null) { System.out.println(输入线); } in.close(); } }

  2. HttpConnection 客户端

    进口java.io.BufferedReader; 导入 java.io.IOException; 导入 java.io.InputStreamReader; 导入 java.net.HttpURLConnection; 导入 java.net.URL; public class HttpConnectionClient { public static void main(String[] args) 抛出 IOException { URL 休息URL = 新URL("http://localhost:8080/simple-service-webapp/resources/myresource/usernamepwdcontext"); HttpURLConnection connection = (HttpURLConnection) restURL.openConnection(); connection.setRequestMethod("GET"); connection.setReadTimeout(10000); connection.connect(); InputStreamReader ins = new InputStreamReader(connection.getInputStream()); BufferedReader in = new BufferedReader(ins); 字符串输入线; while ((inputLine = in.readLine())!=null) { System.out.println(输入线); }

    } }

  3. URL 流客户端

    进口java.io.BufferedReader; 导入 java.io.IOException; 导入 java.io.InputStreamReader; 导入 java.net.URL; public class URLOpenClient {

    public static void main(String[] args) 抛出 IOException { URL 休息URL = 新URL("http://localhost:8080/simple-service-webapp/resources/myresource/usernamepwdcontext"); InputStreamReader ins = new InputStreamReader(restURL.openStream()); BufferedReader in = new BufferedReader(ins); 字符串输入线; while ((inputLine = in.readLine()) != null) { System.out.println(输入线); } in.close(); } }

  4. 泽西岛客户。

    进口javax.ws.rs.client.Client; 导入 javax.ws.rs.client.Client生成器; 导入 javax.ws.rs.client.Invocation.Builder; 导入 javax.ws.rs.client.WebTarget; 导入 javax.ws.rs.core.Response; public class URLJerseyClient { public static void main(String[] args) { 客户端 cl = ClientBuilder.newClient(); WebTarget 目标 = cl.target("http://localhost:8080/simple-service-webapp/resources/myresource/usernamepwdcontext"); target.path("resource"); 生成器 requestBuilder = target.request(); 响应response = requestBuilder.get(); System.out.println(response.getStatus()); System.out.println(response.readEntity(String.class)); } }

对于这个你需要一个依赖项:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.vvirlan</groupId>
<artifactId>cert</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>Client</name>

<dependencies>
<dependency>
<groupId>org.glassfish.jersey.core</groupId>
<artifactId>jersey-client</artifactId>
<version>2.21</version>
</dependency>
</dependencies>
</project>

4 JAX-RS clients