HttpURLConnection 字符编码
HttpURLConnection Character Encoding
我正在制作一个简单的程序。
使用字符集 "utf-8" 有一个 url。
我想从此页面获取全部资源,
但是存在字符编码问题。
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
class WholeTest {
HttpURLConnection conn;
public void openUrl() throws Exception {
URL pageUrl = new URL("http://naver.com");
conn = (HttpURLConnection)pageUrl.openConnection();
conn.setRequestMethod("GET");
conn.setUseCaches(false);
conn.setRequestProperty("Host", "naver.com");
conn.setRequestProperty("Connection", "keep-alive");
conn.setRequestProperty("Cache-Control", "max-age=0");
conn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
conn.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.115 Safari/537.36");
conn.setRequestProperty("Accept-Encoding", "gzip, deflate, sdch");
conn.setRequestProperty("Accept-Language", "ko-KR,ko;q=0.8,en-US;q=0.6,en;q=0.4");
BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream(),"utf-8"));
String inputLine;
StringBuffer response = new StringBuffer();
while ((inputLine = in.readLine()) != null) {
response.append(inputLine);
}
in.close();
System.out.println("---result---");
System.out.println(response.toString());
}
}
public class Whole {
public static void main(String args[]) throws Exception {
System.out.print("Test");
WholeTest w = new WholeTest();
w.openUrl();
}
}
结果是:
????????????????????????????????????????
我无法查看源代码。我在读取inputStream时使用了字符集"utf-8",我做错了什么?
我用的都是utf-8, UTF-8, euc-kr, EUC-KR
...
同样的结果。
正如我怀疑的那样,评论或删除下面的行。它会像一个魅力。
conn.setRequestProperty("Accept-Encoding", "gzip, deflate, sdch");
当 return 实际上是 text/html
时,您期待 gzip 二进制文件
我正在制作一个简单的程序。 使用字符集 "utf-8" 有一个 url。 我想从此页面获取全部资源, 但是存在字符编码问题。
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
class WholeTest {
HttpURLConnection conn;
public void openUrl() throws Exception {
URL pageUrl = new URL("http://naver.com");
conn = (HttpURLConnection)pageUrl.openConnection();
conn.setRequestMethod("GET");
conn.setUseCaches(false);
conn.setRequestProperty("Host", "naver.com");
conn.setRequestProperty("Connection", "keep-alive");
conn.setRequestProperty("Cache-Control", "max-age=0");
conn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
conn.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.115 Safari/537.36");
conn.setRequestProperty("Accept-Encoding", "gzip, deflate, sdch");
conn.setRequestProperty("Accept-Language", "ko-KR,ko;q=0.8,en-US;q=0.6,en;q=0.4");
BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream(),"utf-8"));
String inputLine;
StringBuffer response = new StringBuffer();
while ((inputLine = in.readLine()) != null) {
response.append(inputLine);
}
in.close();
System.out.println("---result---");
System.out.println(response.toString());
}
}
public class Whole {
public static void main(String args[]) throws Exception {
System.out.print("Test");
WholeTest w = new WholeTest();
w.openUrl();
}
}
结果是: ???????????????????????????????????????? 我无法查看源代码。我在读取inputStream时使用了字符集"utf-8",我做错了什么?
我用的都是utf-8, UTF-8, euc-kr, EUC-KR ... 同样的结果。
正如我怀疑的那样,评论或删除下面的行。它会像一个魅力。
conn.setRequestProperty("Accept-Encoding", "gzip, deflate, sdch");
当 return 实际上是 text/html