java中如何使用jersey客户端获取网页的页面源
How to get the page source of a webpage using jersey client in java
在我使用 java 的 Web 应用程序中,我试图通过传递所需页面的 URL 来获取使用 jersey 客户端的网页的页面源。我一直在网上搜索以找到一些对我有帮助的好例子,但找不到任何例子。
谁能帮我解决这个问题。
如果您的目的只是下载 html 代码(而不是渲染它),您可以使用任何普通的 http 客户端(甚至 java URLConnection 类)
下面是我在我的一个工具中使用的现成示例。它使用 apache http core 4.1.4 和 apache http client 4.1.4。
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
public class HttpUtil {
public static String getFile(String sUrl) throws ClientProtocolException, IOException{
HttpClient httpclient = new DefaultHttpClient();
StringBuilder b = new StringBuilder();
// Prepare a request object
HttpGet httpget = new HttpGet(sUrl);
// Execute the request
HttpResponse response = httpclient.execute(httpget);
// Examine the response status
System.out.println(response.getStatusLine());
// Get hold of the response entity
HttpEntity entity = response.getEntity();
// If the response does not enclose an entity, there is no need
// to worry about connection release
if (entity != null) {
InputStream instream = entity.getContent();
try {
BufferedReader reader = new BufferedReader(
new InputStreamReader(instream));
// do something useful with the response
String s = reader.readLine();
while(s!= null){
b.append(s);
b.append("\n");
s = reader.readLine();
}
} catch (IOException ex) {
// In case of an IOException the connection will be released
// back to the connection manager automatically
throw ex;
} catch (RuntimeException ex) {
// In case of an unexpected exception you may want to abort
// the HTTP request in order to shut down the underlying
// connection and release it back to the connection manager.
httpget.abort();
throw ex;
} finally {
// Closing the input stream will trigger connection release
instream.close();
}
// When HttpClient instance is no longer needed,
// shut down the connection manager to ensure
// immediate deallocation of all system resources
httpclient.getConnectionManager().shutdown();
}
return b.toString();
}
}
Jersey 用于网络服务。但一般情况下,您可以获得 HTML 来源。
所有这 4 种 jax-rs 客户端都会向您打印代码:
URL连接客户端
进口java.io.BufferedReader;
导入 java.io.IOException;
导入 java.io.InputStreamReader;
导入 java.net.URL;
导入 java.net.URL连接;
public class URLConnectionClient {
public static void main(String[] args) 抛出 IOException {
URL 休息URL = 新URL("http://localhost:8080/simple-service-webapp/resources/myresource/usernamepwdcontext");
URL连接连接=(URL连接)restURL.openConnection();
connection.setDoOutput(真);
connection.connect();
InputStreamReader ins = new InputStreamReader(connection.getInputStream());
BufferedReader in = new BufferedReader(ins);
字符串输入线;
while ((inputLine = in.readLine()) != null) {
System.out.println(输入线);
}
in.close();
}
}
HttpConnection 客户端
进口java.io.BufferedReader;
导入 java.io.IOException;
导入 java.io.InputStreamReader;
导入 java.net.HttpURLConnection;
导入 java.net.URL;
public class HttpConnectionClient {
public static void main(String[] args) 抛出 IOException {
URL 休息URL = 新URL("http://localhost:8080/simple-service-webapp/resources/myresource/usernamepwdcontext");
HttpURLConnection connection = (HttpURLConnection) restURL.openConnection();
connection.setRequestMethod("GET");
connection.setReadTimeout(10000);
connection.connect();
InputStreamReader ins = new InputStreamReader(connection.getInputStream());
BufferedReader in = new BufferedReader(ins);
字符串输入线;
while ((inputLine = in.readLine())!=null) {
System.out.println(输入线);
}
}
}
URL 流客户端
进口java.io.BufferedReader;
导入 java.io.IOException;
导入 java.io.InputStreamReader;
导入 java.net.URL;
public class URLOpenClient {
public static void main(String[] args) 抛出 IOException {
URL 休息URL = 新URL("http://localhost:8080/simple-service-webapp/resources/myresource/usernamepwdcontext");
InputStreamReader ins = new InputStreamReader(restURL.openStream());
BufferedReader in = new BufferedReader(ins);
字符串输入线;
while ((inputLine = in.readLine()) != null) {
System.out.println(输入线);
}
in.close();
}
}
泽西岛客户。
进口javax.ws.rs.client.Client;
导入 javax.ws.rs.client.Client生成器;
导入 javax.ws.rs.client.Invocation.Builder;
导入 javax.ws.rs.client.WebTarget;
导入 javax.ws.rs.core.Response;
public class URLJerseyClient {
public static void main(String[] args) {
客户端 cl = ClientBuilder.newClient();
WebTarget 目标 = cl.target("http://localhost:8080/simple-service-webapp/resources/myresource/usernamepwdcontext");
target.path("resource");
生成器 requestBuilder = target.request();
响应response = requestBuilder.get();
System.out.println(response.getStatus());
System.out.println(response.readEntity(String.class));
}
}
对于这个你需要一个依赖项:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.vvirlan</groupId>
<artifactId>cert</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>Client</name>
<dependencies>
<dependency>
<groupId>org.glassfish.jersey.core</groupId>
<artifactId>jersey-client</artifactId>
<version>2.21</version>
</dependency>
</dependencies>
</project>
在我使用 java 的 Web 应用程序中,我试图通过传递所需页面的 URL 来获取使用 jersey 客户端的网页的页面源。我一直在网上搜索以找到一些对我有帮助的好例子,但找不到任何例子。 谁能帮我解决这个问题。
如果您的目的只是下载 html 代码(而不是渲染它),您可以使用任何普通的 http 客户端(甚至 java URLConnection 类)
下面是我在我的一个工具中使用的现成示例。它使用 apache http core 4.1.4 和 apache http client 4.1.4。
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
public class HttpUtil {
public static String getFile(String sUrl) throws ClientProtocolException, IOException{
HttpClient httpclient = new DefaultHttpClient();
StringBuilder b = new StringBuilder();
// Prepare a request object
HttpGet httpget = new HttpGet(sUrl);
// Execute the request
HttpResponse response = httpclient.execute(httpget);
// Examine the response status
System.out.println(response.getStatusLine());
// Get hold of the response entity
HttpEntity entity = response.getEntity();
// If the response does not enclose an entity, there is no need
// to worry about connection release
if (entity != null) {
InputStream instream = entity.getContent();
try {
BufferedReader reader = new BufferedReader(
new InputStreamReader(instream));
// do something useful with the response
String s = reader.readLine();
while(s!= null){
b.append(s);
b.append("\n");
s = reader.readLine();
}
} catch (IOException ex) {
// In case of an IOException the connection will be released
// back to the connection manager automatically
throw ex;
} catch (RuntimeException ex) {
// In case of an unexpected exception you may want to abort
// the HTTP request in order to shut down the underlying
// connection and release it back to the connection manager.
httpget.abort();
throw ex;
} finally {
// Closing the input stream will trigger connection release
instream.close();
}
// When HttpClient instance is no longer needed,
// shut down the connection manager to ensure
// immediate deallocation of all system resources
httpclient.getConnectionManager().shutdown();
}
return b.toString();
}
}
Jersey 用于网络服务。但一般情况下,您可以获得 HTML 来源。 所有这 4 种 jax-rs 客户端都会向您打印代码:
URL连接客户端
进口java.io.BufferedReader; 导入 java.io.IOException; 导入 java.io.InputStreamReader; 导入 java.net.URL; 导入 java.net.URL连接; public class URLConnectionClient { public static void main(String[] args) 抛出 IOException { URL 休息URL = 新URL("http://localhost:8080/simple-service-webapp/resources/myresource/usernamepwdcontext"); URL连接连接=(URL连接)restURL.openConnection(); connection.setDoOutput(真); connection.connect(); InputStreamReader ins = new InputStreamReader(connection.getInputStream()); BufferedReader in = new BufferedReader(ins); 字符串输入线; while ((inputLine = in.readLine()) != null) { System.out.println(输入线); } in.close(); } }
HttpConnection 客户端
进口java.io.BufferedReader; 导入 java.io.IOException; 导入 java.io.InputStreamReader; 导入 java.net.HttpURLConnection; 导入 java.net.URL; public class HttpConnectionClient { public static void main(String[] args) 抛出 IOException { URL 休息URL = 新URL("http://localhost:8080/simple-service-webapp/resources/myresource/usernamepwdcontext"); HttpURLConnection connection = (HttpURLConnection) restURL.openConnection(); connection.setRequestMethod("GET"); connection.setReadTimeout(10000); connection.connect(); InputStreamReader ins = new InputStreamReader(connection.getInputStream()); BufferedReader in = new BufferedReader(ins); 字符串输入线; while ((inputLine = in.readLine())!=null) { System.out.println(输入线); }
} }
URL 流客户端
进口java.io.BufferedReader; 导入 java.io.IOException; 导入 java.io.InputStreamReader; 导入 java.net.URL; public class URLOpenClient {
public static void main(String[] args) 抛出 IOException { URL 休息URL = 新URL("http://localhost:8080/simple-service-webapp/resources/myresource/usernamepwdcontext"); InputStreamReader ins = new InputStreamReader(restURL.openStream()); BufferedReader in = new BufferedReader(ins); 字符串输入线; while ((inputLine = in.readLine()) != null) { System.out.println(输入线); } in.close(); } }
泽西岛客户。
进口javax.ws.rs.client.Client; 导入 javax.ws.rs.client.Client生成器; 导入 javax.ws.rs.client.Invocation.Builder; 导入 javax.ws.rs.client.WebTarget; 导入 javax.ws.rs.core.Response; public class URLJerseyClient { public static void main(String[] args) { 客户端 cl = ClientBuilder.newClient(); WebTarget 目标 = cl.target("http://localhost:8080/simple-service-webapp/resources/myresource/usernamepwdcontext"); target.path("resource"); 生成器 requestBuilder = target.request(); 响应response = requestBuilder.get(); System.out.println(response.getStatus()); System.out.println(response.readEntity(String.class)); } }
对于这个你需要一个依赖项:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.vvirlan</groupId>
<artifactId>cert</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>Client</name>
<dependencies>
<dependency>
<groupId>org.glassfish.jersey.core</groupId>
<artifactId>jersey-client</artifactId>
<version>2.21</version>
</dependency>
</dependencies>
</project>