使用 UTF-8 标识符
Using UTF-8 identifier
我从 HTTP 请求中得到一个字符串流。流看起来像:
<?xml version="1.0" encoding="utf-8"?>
前三个标记表示字符串编码为 UTF-8。
我正在用字符串制作文件。阅读它们时出现错误:
用这个方法我用那个字符串制作文件:
private void writeToFile(String data, String fileName) {
try {
String UTF8 = "UTF-8";
int BUFFER_SIZE = 8192;
String xmlCut = data.substring(3);
File sdCard = Environment.getExternalStorageDirectory();
File dir = new File (sdCard.getAbsolutePath()+"/example/Test");
dir.mkdirs();
File file = new File(dir,fileName);
FileOutputStream f = new FileOutputStream(file);
FileOutputStream fileOutputStream = openFileOutput(fileName, Context.MODE_PRIVATE);
BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(fileOutputStream,UTF8),BUFFER_SIZE);
bufferedWriter.write(String.valueOf(data.getBytes("UTF-8")));
f.write(data.getBytes("UTF-8"));
f.close();
bufferedWriter.close();
} catch (IOException e) {
Log.e("writeToFile: ", "Datei-Erstellung fehlgeschlagen: " + e.toString());
}
}
如您所见,我添加了 substring 方法来删除前三个标记,因为这会导致崩溃。问题是文件是用 ASCI 编码的。
读取文件的方法:
private String readFromFile(String fileName) {
String ret = "";
String UTF8 = "UTF-8";
int BUFFER_SIZE = 8192;
try {
InputStream inputStream = openFileInput(fileName);
if (inputStream != null) {
BufferedReader bufferedReader1 = new BufferedReader(new InputStreamReader(inputStream,UTF8),BUFFER_SIZE);
String receiveString = "";
StringBuilder stringBuilder = new StringBuilder();
while ((receiveString = bufferedReader1.readLine()) != null) {
stringBuilder.append(receiveString);
}
inputStream.close();
ret = stringBuilder.toString();
}
} catch (FileNotFoundException e) {
Log.e("readFromFile: ", "Datei nicht gefunden: " + e.toString());
} catch (IOException e) {
Log.e("readFromFile: ", "Kann Datei nicht lesen: " + e.toString());
}
return ret;
}
如果我不剪切 UTF-8 令牌,那么我会从堆栈跟踪中得到这个错误:
Caused by: java.lang.NullPointerException: Attempt to invoke interface method 'org.w3c.dom.NodeList org.w3c.dom.Document.getElementsByTagName(java.lang.String)' on a null object reference
at de.example.app.ListViewActivity.setListProjectData(ListViewActivity.java:226)
在这里:
public void setListProjectData(String filename) {
XMLParser parser = new XMLParser();
String xmlData = readFromFile(filename);
String xmlCut = xmlData.substring(3);
Document doc = parser.getDomElement(filename);
NodeList nodeListProject = doc.getElementsByTagName(KEY_PROJECT);
for (int i = 0; i < nodeListProject.getLength(); i++) {
HashMap<String, String> map = new HashMap<String, String>();
Element e = (Element) nodeListProject.item(i);
map.put(KEY_UUID, parser.getValue(e, KEY_UUID));
map.put(KEY_NAME, parser.getValue(e, KEY_NAME));
map.put(KEY_JOBTITLE, parser.getValue(e, KEY_JOBTITLE));
map.put(KEY_JOBINFO, parser.getValue(e, KEY_JOBINFO));
map.put(KEY_PROJECTIMAGE, parser.getValue(e, KEY_PROJECTIMAGE));
projectItems.add(map);
}
}
我通过这里从 HTTP 获取数据:
public String getXMLFromUrl(String url) {
String xml = null;
if (cd.isConnectingToInternet()) {
try {
//defaultHttpClient
DefaultHttpClient httpClient = new DefaultHttpClient();
HttpPost httpPost = new HttpPost(url);
HttpResponse httpResponse = httpClient.execute(httpPost);
HttpEntity httpEntity = httpResponse.getEntity();
/*
final InputStream in = httpEntity.getContent();
Reader reader = new InputStreamReader(in,"UTF-8");
InputSource is = new InputSource(reader);
is.setEncoding("UTF-8");
*/
xml = EntityUtils.toString(httpEntity);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
} else {
return null;
}
return xml;
那么,我如何将它们编码为 UTF-8?我做对了吗?
您的问题不在于您发布的代码,而在于从 HTTP 请求获取数据的代码。
您正在将 String data
传递给 writeToFile
方法。 Java 中的字符串是 UTF-16
编码的。如果您在该字符串中有 UTF-8
编码数据,那么再多的编码解码也无法修复已经损坏的数据。
您应该使用 xml = EntityUtils.toString(httpEntity, HTTP.UTF_8)
正确解码数据。
如果 returned 数据包含 UTF-8 BOM
,则会出现其他问题。上面的行将正确解码数据,但它会留下多余的(和错误的)BOM
。
为了解决服务器必须 return 没有 BOM
的数据,或者 BOM
必须被剥离的问题。为此,请使用以下代码(或类似代码)
public static String stripBOM(InputStream stream)
{
try
{
byte[] buffer = new byte[1024];
ByteArrayOutputStream os = new ByteArrayOutputStream(1024);
byte[] bom = new byte[3];
stream.read(bom);
int bytesRead;
while ((bytesRead = stream.read(buffer)) != -1)
{
os.write(buffer, 0, bytesRead);
}
os.close();
return os.toString("UTF-8");
}
catch (IOException e)
{
return "";
}
}
所以xml = EntityUtils.toString(httpEntity, HTTP.UTF_8)
可以换成
InputStream is = httpEntity.getContent();
xml = stripBOM(is);
我从 HTTP 请求中得到一个字符串流。流看起来像:
<?xml version="1.0" encoding="utf-8"?>
前三个标记表示字符串编码为 UTF-8。
我正在用字符串制作文件。阅读它们时出现错误:
用这个方法我用那个字符串制作文件:
private void writeToFile(String data, String fileName) {
try {
String UTF8 = "UTF-8";
int BUFFER_SIZE = 8192;
String xmlCut = data.substring(3);
File sdCard = Environment.getExternalStorageDirectory();
File dir = new File (sdCard.getAbsolutePath()+"/example/Test");
dir.mkdirs();
File file = new File(dir,fileName);
FileOutputStream f = new FileOutputStream(file);
FileOutputStream fileOutputStream = openFileOutput(fileName, Context.MODE_PRIVATE);
BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(fileOutputStream,UTF8),BUFFER_SIZE);
bufferedWriter.write(String.valueOf(data.getBytes("UTF-8")));
f.write(data.getBytes("UTF-8"));
f.close();
bufferedWriter.close();
} catch (IOException e) {
Log.e("writeToFile: ", "Datei-Erstellung fehlgeschlagen: " + e.toString());
}
}
如您所见,我添加了 substring 方法来删除前三个标记,因为这会导致崩溃。问题是文件是用 ASCI 编码的。
读取文件的方法:
private String readFromFile(String fileName) {
String ret = "";
String UTF8 = "UTF-8";
int BUFFER_SIZE = 8192;
try {
InputStream inputStream = openFileInput(fileName);
if (inputStream != null) {
BufferedReader bufferedReader1 = new BufferedReader(new InputStreamReader(inputStream,UTF8),BUFFER_SIZE);
String receiveString = "";
StringBuilder stringBuilder = new StringBuilder();
while ((receiveString = bufferedReader1.readLine()) != null) {
stringBuilder.append(receiveString);
}
inputStream.close();
ret = stringBuilder.toString();
}
} catch (FileNotFoundException e) {
Log.e("readFromFile: ", "Datei nicht gefunden: " + e.toString());
} catch (IOException e) {
Log.e("readFromFile: ", "Kann Datei nicht lesen: " + e.toString());
}
return ret;
}
如果我不剪切 UTF-8 令牌,那么我会从堆栈跟踪中得到这个错误:
Caused by: java.lang.NullPointerException: Attempt to invoke interface method 'org.w3c.dom.NodeList org.w3c.dom.Document.getElementsByTagName(java.lang.String)' on a null object reference
at de.example.app.ListViewActivity.setListProjectData(ListViewActivity.java:226)
在这里:
public void setListProjectData(String filename) {
XMLParser parser = new XMLParser();
String xmlData = readFromFile(filename);
String xmlCut = xmlData.substring(3);
Document doc = parser.getDomElement(filename);
NodeList nodeListProject = doc.getElementsByTagName(KEY_PROJECT);
for (int i = 0; i < nodeListProject.getLength(); i++) {
HashMap<String, String> map = new HashMap<String, String>();
Element e = (Element) nodeListProject.item(i);
map.put(KEY_UUID, parser.getValue(e, KEY_UUID));
map.put(KEY_NAME, parser.getValue(e, KEY_NAME));
map.put(KEY_JOBTITLE, parser.getValue(e, KEY_JOBTITLE));
map.put(KEY_JOBINFO, parser.getValue(e, KEY_JOBINFO));
map.put(KEY_PROJECTIMAGE, parser.getValue(e, KEY_PROJECTIMAGE));
projectItems.add(map);
}
}
我通过这里从 HTTP 获取数据:
public String getXMLFromUrl(String url) {
String xml = null;
if (cd.isConnectingToInternet()) {
try {
//defaultHttpClient
DefaultHttpClient httpClient = new DefaultHttpClient();
HttpPost httpPost = new HttpPost(url);
HttpResponse httpResponse = httpClient.execute(httpPost);
HttpEntity httpEntity = httpResponse.getEntity();
/*
final InputStream in = httpEntity.getContent();
Reader reader = new InputStreamReader(in,"UTF-8");
InputSource is = new InputSource(reader);
is.setEncoding("UTF-8");
*/ xml = EntityUtils.toString(httpEntity);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
} else {
return null;
}
return xml;
那么,我如何将它们编码为 UTF-8?我做对了吗?
您的问题不在于您发布的代码,而在于从 HTTP 请求获取数据的代码。
您正在将 String data
传递给 writeToFile
方法。 Java 中的字符串是 UTF-16
编码的。如果您在该字符串中有 UTF-8
编码数据,那么再多的编码解码也无法修复已经损坏的数据。
您应该使用 xml = EntityUtils.toString(httpEntity, HTTP.UTF_8)
正确解码数据。
如果 returned 数据包含 UTF-8 BOM
,则会出现其他问题。上面的行将正确解码数据,但它会留下多余的(和错误的)BOM
。
为了解决服务器必须 return 没有 BOM
的数据,或者 BOM
必须被剥离的问题。为此,请使用以下代码(或类似代码)
public static String stripBOM(InputStream stream)
{
try
{
byte[] buffer = new byte[1024];
ByteArrayOutputStream os = new ByteArrayOutputStream(1024);
byte[] bom = new byte[3];
stream.read(bom);
int bytesRead;
while ((bytesRead = stream.read(buffer)) != -1)
{
os.write(buffer, 0, bytesRead);
}
os.close();
return os.toString("UTF-8");
}
catch (IOException e)
{
return "";
}
}
所以xml = EntityUtils.toString(httpEntity, HTTP.UTF_8)
可以换成
InputStream is = httpEntity.getContent();
xml = stripBOM(is);