位置 0 处的意外字符 (B)
Unexpected character (B) at position 0
我想从这个 url 中抓取数据:http://www.airfrance.fr/FR/fr/local/vols/getInstantFlexNewCalendar.do?idMonth=10&itineraryNumber=1。
我想提取 (Date + Price + Price HT+ Taxe
),然后将它们保存到 Excel 文件中。我使用了这段代码:
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import org.jsoup.Jsoup;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.javascript.host.dom.Document;
import jxl.CellView;
import jxl.Workbook;
import jxl.WorkbookSettings;
import jxl.format.UnderlineStyle;
import jxl.write.Formula;
import jxl.write.Label;
import jxl.write.Number;
import jxl.write.WritableCellFormat;
import jxl.write.WritableFont;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
import jxl.write.WriteException;
import jxl.write.biff.RowsExceededException;
public class MoisAirfrancee {
public static void main(String[] args)throws FailingHttpStatusCodeException, MalformedURLException, IOException, RowsExceededException, WriteException{
Map<String, Integer> prices = new TreeMap<String, Integer>();
File f=new File("C:\Users\tahab_000\Desktop\Test.xls");
WritableWorkbook myexcel=Workbook.createWorkbook(f);
WritableSheet mysheet=myexcel.createSheet("mySheet", 0);
try {
org.jsoup.nodes.Document doc = Jsoup.connect("http://www.airfrance.fr/FR/fr/local/vols/getInstantFlexNewCalendar.do?idMonth=10&itineraryNumber=1").get();
JSONObject obj = (JSONObject) new JSONParser().parse(doc.text());
obj = (JSONObject) obj.get("days");
for (Iterator<?> iterator = obj.keySet().iterator(); iterator.hasNext();) {
String key = (String) iterator.next();
JSONObject dateObject = (JSONObject) obj.get(key);
Double price = (Double) dateObject.get("price");
int roundedPrice = (int) Math.ceil(price);
prices.put(key, roundedPrice);
}
int j=1;
for (String key : prices.keySet()) {
addLabel(mysheet, 0, 0, "Date" );
addLabel(mysheet, 1, 0, "Prix" );
addLabel(mysheet, 1, j, prices.get(key).toString()+"€" );
addLabel(mysheet, 0, j, key );
j++;
System.out.println(key + ": " + prices.get(key) + " €");
}
}catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
myexcel.write();
myexcel.close();
}
private static void addLabel(WritableSheet sheet, int column, int row, String s)
throws WriteException, RowsExceededException {
Label label;
label = new Label(column, row, s);
sheet.addCell(label);
}
}
在 运行 之后我遇到了这个异常:
Unexpected character (B) at position 0.
at org.json.simple.parser.Yylex.yylex(Yylex.java:610)
at org.json.simple.parser.JSONParser.nextToken(JSONParser.java:269)
at org.json.simple.parser.JSONParser.parse(JSONParser.java:118)
at org.json.simple.parser.JSONParser.parse(JSONParser.java:81)
at org.json.simple.parser.JSONParser.parse(JSONParser.java:75)
at MoisAirfrancee.main(MoisAirfrancee.java:47)
首先连接到默认登录页面 (http://www.airfrance.fr/vols/paris+tunis)。
从响应中,我们可以使用 response.cookies()
获取所需的 cookie,并使用 .cookies(response.cookies())
设置 it/them 以连接到查询页面 (http://www.airfrance.fr/FR/fr/local/vols/getInstantFlexNewCalendar.do?idMonth=10&itineraryNumber=1)
注意:这里可能不需要设置用户代理和引荐来源网址,但它也无害并且可能会稳定抓取。
Response response = Jsoup.connect("http://www.airfrance.fr/vols/paris+tunis")
.userAgent("Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36")
.method(Method.GET)
.timeout(2000)
.execute();
Document doc = Jsoup
.connect("http://www.airfrance.fr/FR/fr/local/vols/getInstantFlexNewCalendar.do?idMonth=10&itineraryNumber=1")
.cookies(response.cookies())
.userAgent("Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36")
.referrer("http://www.airfrance.fr/vols/paris+tunis")
.timeout(2000)
.get();
String jsonResponse = doc.text();
System.out.println(jsonResponse);
输出:
{"idMonth":10,"month":"Novembre","bestPrice":270.0,"isLowest":false,"isAvailable":true, ...
我想从这个 url 中抓取数据:http://www.airfrance.fr/FR/fr/local/vols/getInstantFlexNewCalendar.do?idMonth=10&itineraryNumber=1。
我想提取 (Date + Price + Price HT+ Taxe
),然后将它们保存到 Excel 文件中。我使用了这段代码:
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import org.jsoup.Jsoup;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.javascript.host.dom.Document;
import jxl.CellView;
import jxl.Workbook;
import jxl.WorkbookSettings;
import jxl.format.UnderlineStyle;
import jxl.write.Formula;
import jxl.write.Label;
import jxl.write.Number;
import jxl.write.WritableCellFormat;
import jxl.write.WritableFont;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
import jxl.write.WriteException;
import jxl.write.biff.RowsExceededException;
public class MoisAirfrancee {
public static void main(String[] args)throws FailingHttpStatusCodeException, MalformedURLException, IOException, RowsExceededException, WriteException{
Map<String, Integer> prices = new TreeMap<String, Integer>();
File f=new File("C:\Users\tahab_000\Desktop\Test.xls");
WritableWorkbook myexcel=Workbook.createWorkbook(f);
WritableSheet mysheet=myexcel.createSheet("mySheet", 0);
try {
org.jsoup.nodes.Document doc = Jsoup.connect("http://www.airfrance.fr/FR/fr/local/vols/getInstantFlexNewCalendar.do?idMonth=10&itineraryNumber=1").get();
JSONObject obj = (JSONObject) new JSONParser().parse(doc.text());
obj = (JSONObject) obj.get("days");
for (Iterator<?> iterator = obj.keySet().iterator(); iterator.hasNext();) {
String key = (String) iterator.next();
JSONObject dateObject = (JSONObject) obj.get(key);
Double price = (Double) dateObject.get("price");
int roundedPrice = (int) Math.ceil(price);
prices.put(key, roundedPrice);
}
int j=1;
for (String key : prices.keySet()) {
addLabel(mysheet, 0, 0, "Date" );
addLabel(mysheet, 1, 0, "Prix" );
addLabel(mysheet, 1, j, prices.get(key).toString()+"€" );
addLabel(mysheet, 0, j, key );
j++;
System.out.println(key + ": " + prices.get(key) + " €");
}
}catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
myexcel.write();
myexcel.close();
}
private static void addLabel(WritableSheet sheet, int column, int row, String s)
throws WriteException, RowsExceededException {
Label label;
label = new Label(column, row, s);
sheet.addCell(label);
}
}
在 运行 之后我遇到了这个异常:
Unexpected character (B) at position 0.
at org.json.simple.parser.Yylex.yylex(Yylex.java:610)
at org.json.simple.parser.JSONParser.nextToken(JSONParser.java:269)
at org.json.simple.parser.JSONParser.parse(JSONParser.java:118)
at org.json.simple.parser.JSONParser.parse(JSONParser.java:81)
at org.json.simple.parser.JSONParser.parse(JSONParser.java:75)
at MoisAirfrancee.main(MoisAirfrancee.java:47)
首先连接到默认登录页面 (http://www.airfrance.fr/vols/paris+tunis)。
从响应中,我们可以使用 response.cookies()
获取所需的 cookie,并使用 .cookies(response.cookies())
设置 it/them 以连接到查询页面 (http://www.airfrance.fr/FR/fr/local/vols/getInstantFlexNewCalendar.do?idMonth=10&itineraryNumber=1)
注意:这里可能不需要设置用户代理和引荐来源网址,但它也无害并且可能会稳定抓取。
Response response = Jsoup.connect("http://www.airfrance.fr/vols/paris+tunis")
.userAgent("Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36")
.method(Method.GET)
.timeout(2000)
.execute();
Document doc = Jsoup
.connect("http://www.airfrance.fr/FR/fr/local/vols/getInstantFlexNewCalendar.do?idMonth=10&itineraryNumber=1")
.cookies(response.cookies())
.userAgent("Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36")
.referrer("http://www.airfrance.fr/vols/paris+tunis")
.timeout(2000)
.get();
String jsonResponse = doc.text();
System.out.println(jsonResponse);
输出:
{"idMonth":10,"month":"Novembre","bestPrice":270.0,"isLowest":false,"isAvailable":true, ...