java.lang.IndexOutOfBoundsException: 索引 0 无效,大小为 0.. 解析文件不正确?
java.lang.IndexOutOfBoundsException: Invalid index 0, size is 0.. parse file incorrect?
我正在尝试制作一个 android 应用程序来解析 grid/list 中的在线图片,但是我遇到了一些运行时错误..它说我对我的家犬的解析错误品种。有谁知道我在哪里犯错误?我知道为什么数组会越界,但我不知道如何修复它!!
我正在尝试解析 http://www.dogbreedslist.info/family-dog-breeds/ 此网站数据.. 但在我的
的这些部分出现运行时错误
DogActivity.class
private class RetrieveDogsTask extends AsyncTask<String, Void, Void> {
@Override
protected Void doInBackground(String... urls) {
for (String url : urls) {
Parser parser = new Parser(url, DogsActivity.this);
Breed.Name breedName = breed.getName();
if (breedName == Breed.Name.HERDING_DOG_BREED) {
dogs.add(parser.parseProfile(new Dog(url, breedName)));
} else {
dogs.addAll(parser.parseDogsPage(breedName, DogsActivity.this));
}
}
return null;
}
Parser.class
public class Parser {
Document doc;
Context context;
Elements dogRows;
public Parser(String url, Context context) {
this.context = context;
try {
doc = Jsoup.connect(url).get();
} catch (IOException e) {
Log.e("Page", "Wrong URL or network problems", e);
}
}
public ArrayList<Dog> parseDogsPage(Breed.Name breedName, Context context) {
ArrayList<Dog> dogs = new ArrayList<>();
try {
Element dogContainer;
if (breedName == Breed.Name.FAMILY_DOG_BREED) {
dogContainer = doc.getElementsByClass("familybreed").get(0);
} else {
dogContainer = doc.getElementsByClass("toybreed").get(0);
}
Log.i("Page", "A page has been parsed successfully");
dogRows = dogContainer.getElementsByTag("a");
for (Element dogRow : dogRows) {
String dogName, dogURL;
Dog dog;
dogURL = dogRow.getElementsByTag("a").get(0).absUrl("href");
String dogThumbnailURL = dogRow.
getElementsByTag("img").get(0).absUrl("src");
if (breedName == Breed.Name.FAMILY_DOG_BREED) {
dogName = dogRow.getElementsByTag("span").get(0).text();
dog = new Dog(dogName, dogURL, dogThumbnailURL, breedName);
} else {
dogName = dogRow.getElementsByTag("strong").get(0).text();
Element details = dogContainer.getElementsByClass("details").get(0);
Elements children = details.children();
if (breedName == Breed.Name.TOY_DOG_BREED || breedName == Breed.Name.HOUND_DOG_BREED) {
String origin = children.get(1).text();
String lifespan = children.get(3).text();
dog= new Dog(dogName, origin , lifespan, dogURL, dogThumbnailURL, breedName);
} else {
//for herding
String sizetype = children.get(1).text();
dog = new Dog(dogName, sizetype, dogThumbnailURL, dogURL, breedName);
}
}
dogs.add(dog);
}
} catch (Exception e) {
Log.e("Breed activity", "Wrong parsing for " + breedName, e);
}
return dogs;
}
public Dog parseProfile(Dog dog) {
if (!dog.isDetailDataReady()) {
//coaches already read the data in the coaches page
try {
Element dogContainer = doc.getElementById("dogscontainer");
Element bioContainer = dogContainer.getElementById("biocontainer");
Element bioDetails = bioContainer.getElementById("biodetails");
dog.setOtherNames(bioDetails.getElementsByTag("h1").text());
ArrayList<Dog.Detail> dogDetails = new ArrayList<>();
Elements rows = bioDetails.getElementsByTag("tr");
for (Element row : rows) {
Elements tds = row.getElementsByTag("td");
if (dog.getBreed() == Breed.Name.WORKING_DOG_BREED ||
dog.getBreed() == Breed.Name.TERRIER_DOG_BREED ||
dog.getBreed() == Breed.Name.HERDING_DOG_BREED) {
//coaches, manager and legends use th and td
Elements ths = row.getElementsByTag("th");
dogDetails.add(new Dog.Detail(ths.get(0).text(), tds.get(0).text()));
} else {
//dogs use two tds
dogDetails.add(new Dog.Detail(tds.get(0).text(), tds.get(1).text()));
}
}
dog.setDetails(dogDetails);
Element articleText = dogContainer.getElementsByClass("dogarticletext").get(0);
Elements paragraphs = articleText.getElementsByTag("p");
String text = "";
for (Element p : paragraphs) {
text = text + "\n\n\n" + p.text();
}
dog.setArticleText(dog.getArticleText() + text);
if (dog.getBreed() == Breed.Name.WORKING_DOG_BREED ||
dog.getBreed() == Breed.Name.TERRIER_DOG_BREED ||
dog.getBreed() == Breed.Name.HERDING_DOG_BREED) {
//get main image url
dog.setMainImageURL(bioContainer.getElementsByTag("img").get(0).absUrl("src"));
if (dog.getBreed() == Breed.Name.WORKING_DOG_BREED) {
dog.setThumbnailURL(dog.getMainImageURL());
//only need first name
dog.setName(dog.getOtherNames().split(" ")[1]);
}
} else {
dog.setMainImageURL(bioContainer.getElementsByClass("mainImage").get(0).absUrl("src"));
}
} catch (Exception e) {
Log.e("Profile activity", "Wrong parsing for " + dog.getUrl(), e);
}
if (dog.getBreed() == Breed.Name.WORKING_DOG_BREED) {
dog.setBasicDataReady(true);
}
dog.setDetailDataReady(true);
}
return dog;
}
}
检索狗任务:
private class RetrieveDogsTask extends AsyncTask<String, Void, Void> {
@Override
protected Void doInBackground(String... urls) {
for (String url : urls) {
Parser parser = new Parser(url, DogsActivity.this);
Breed.Name breedName = breed.getName();
if (breedName == Breed.Name.HERDING_DOG_BREED) {
dogs.add(parser.parseProfile(new Dog(url, breedName)));
} else {
dogs.addAll(parser.parseDogsPage(breedName, DogsActivity.this));
}
}
return null;
Logcat:
Wrong parsing for FAMILY_DOG_BREED
java.lang.IndexOutOfBoundsException: Invalid index 0, size is 0
at java.util.ArrayList.throwIndexOutOfBoundsException(ArrayList.java:255)
at java.util.ArrayList.get(ArrayList.java:308)
at org.jsoup.select.Elements.get(Elements.java:544)
at com.example.shannon.popular.Parser.parseDogsPage(Parser.java:35)
at com.example.shannon.popular.DogsActivity$RetrieveDogsTask.doInBackground(DogsActivity.java:140)
at com.example.shannon.popular.DogsActivity$RetrieveDogsTask.doInBackground(DogsActivity.java:131)
at android.os.AsyncTask.call(AsyncTask.java:288)
at java.util.concurrent.FutureTask.run(FutureTask.java:237)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1112)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:587)
at java.lang.Thread.run(Thread.java:818)
Breed.class:
public class Breed implements Serializable {
private Name name;
private String url;
Breed(Name name, String url) {
this.name = name;
this.url = url;
}
public Name getName() {
return name;
}
public String getNameString(Context context) {
String nameString = "";
switch (name) {
case FAMILY_DOG_BREED:
nameString = context.getString(R.string.family_breed);
break;
case TOY_DOG_BREED:
nameString = context.getString(R.string.toy_breed);
break;
case HOUND_DOG_BREED:
nameString = context.getString(R.string.hound_breed);
break;
case TERRIER_DOG_BREED:
nameString = context.getString(R.string.terrier_breed);
break;
case WORKING_DOG_BREED:
nameString = context.getString(R.string.working_breed);
break;
case HERDING_DOG_BREED:
nameString = context.getString(R.string.herding_breed);
break;
}
return nameString;
}
public String getURL() {
return url;
}
public enum Name {FAMILY_DOG_BREED, TOY_DOG_BREED, HOUND_DOG_BREED, TERRIER_DOG_BREED, WORKING_DOG_BREED, HERDING_DOG_BREED}
}
您可能对格式错误的 HTML 文档使用了严格的 XML 解析器。我只是尝试 XML-验证您正在解析的 URL 但它失败了,因为 <link>
元素永远不会关闭(在严格的 XML 中,它应该以 </link>
标签,但该页面中缺少它)。
这对于 HTML 页面很常见,因为当今的浏览器倾向于自动更正此类错误。
由于您使用了严格的 XML 解析器,解析器很可能会失败。
我建议切换到不同的解析器。我会使用 PULL 解析器(例如 http://www.xmlpull.org )——这种技术允许使用较低级别的控制进行解析,这意味着你可以轻松地忽略 HTML 中不需要的内容——比如这些 link 元素,或任何其他元素。
所以你可以这样做:
XmlPullParser parser = XmlPullParserFactory.newInstance().newPullParser();
parser.setInput(new BufferedReader(
new InputStreamReader(
new URL("http://.....").openConnection().getInputStream()
)
)
);
while(XmlPullParser.END_DOCUMENT != parser.next()){
if(XmlPullParser.START_TAG == parser.getEventType()){
String tagName = parser.getName();
if(parser.getAttributeCount() > 0 {
// parse attributes, if needed
}
if(parser.nextToken() == XmlPullParser.TEXT){
String tagValue = parser.getText()
}
// etc.
}
}
我正在尝试制作一个 android 应用程序来解析 grid/list 中的在线图片,但是我遇到了一些运行时错误..它说我对我的家犬的解析错误品种。有谁知道我在哪里犯错误?我知道为什么数组会越界,但我不知道如何修复它!!
我正在尝试解析 http://www.dogbreedslist.info/family-dog-breeds/ 此网站数据.. 但在我的
的这些部分出现运行时错误DogActivity.class
private class RetrieveDogsTask extends AsyncTask<String, Void, Void> {
@Override
protected Void doInBackground(String... urls) {
for (String url : urls) {
Parser parser = new Parser(url, DogsActivity.this);
Breed.Name breedName = breed.getName();
if (breedName == Breed.Name.HERDING_DOG_BREED) {
dogs.add(parser.parseProfile(new Dog(url, breedName)));
} else {
dogs.addAll(parser.parseDogsPage(breedName, DogsActivity.this));
}
}
return null;
}
Parser.class
public class Parser {
Document doc;
Context context;
Elements dogRows;
public Parser(String url, Context context) {
this.context = context;
try {
doc = Jsoup.connect(url).get();
} catch (IOException e) {
Log.e("Page", "Wrong URL or network problems", e);
}
}
public ArrayList<Dog> parseDogsPage(Breed.Name breedName, Context context) {
ArrayList<Dog> dogs = new ArrayList<>();
try {
Element dogContainer;
if (breedName == Breed.Name.FAMILY_DOG_BREED) {
dogContainer = doc.getElementsByClass("familybreed").get(0);
} else {
dogContainer = doc.getElementsByClass("toybreed").get(0);
}
Log.i("Page", "A page has been parsed successfully");
dogRows = dogContainer.getElementsByTag("a");
for (Element dogRow : dogRows) {
String dogName, dogURL;
Dog dog;
dogURL = dogRow.getElementsByTag("a").get(0).absUrl("href");
String dogThumbnailURL = dogRow.
getElementsByTag("img").get(0).absUrl("src");
if (breedName == Breed.Name.FAMILY_DOG_BREED) {
dogName = dogRow.getElementsByTag("span").get(0).text();
dog = new Dog(dogName, dogURL, dogThumbnailURL, breedName);
} else {
dogName = dogRow.getElementsByTag("strong").get(0).text();
Element details = dogContainer.getElementsByClass("details").get(0);
Elements children = details.children();
if (breedName == Breed.Name.TOY_DOG_BREED || breedName == Breed.Name.HOUND_DOG_BREED) {
String origin = children.get(1).text();
String lifespan = children.get(3).text();
dog= new Dog(dogName, origin , lifespan, dogURL, dogThumbnailURL, breedName);
} else {
//for herding
String sizetype = children.get(1).text();
dog = new Dog(dogName, sizetype, dogThumbnailURL, dogURL, breedName);
}
}
dogs.add(dog);
}
} catch (Exception e) {
Log.e("Breed activity", "Wrong parsing for " + breedName, e);
}
return dogs;
}
public Dog parseProfile(Dog dog) {
if (!dog.isDetailDataReady()) {
//coaches already read the data in the coaches page
try {
Element dogContainer = doc.getElementById("dogscontainer");
Element bioContainer = dogContainer.getElementById("biocontainer");
Element bioDetails = bioContainer.getElementById("biodetails");
dog.setOtherNames(bioDetails.getElementsByTag("h1").text());
ArrayList<Dog.Detail> dogDetails = new ArrayList<>();
Elements rows = bioDetails.getElementsByTag("tr");
for (Element row : rows) {
Elements tds = row.getElementsByTag("td");
if (dog.getBreed() == Breed.Name.WORKING_DOG_BREED ||
dog.getBreed() == Breed.Name.TERRIER_DOG_BREED ||
dog.getBreed() == Breed.Name.HERDING_DOG_BREED) {
//coaches, manager and legends use th and td
Elements ths = row.getElementsByTag("th");
dogDetails.add(new Dog.Detail(ths.get(0).text(), tds.get(0).text()));
} else {
//dogs use two tds
dogDetails.add(new Dog.Detail(tds.get(0).text(), tds.get(1).text()));
}
}
dog.setDetails(dogDetails);
Element articleText = dogContainer.getElementsByClass("dogarticletext").get(0);
Elements paragraphs = articleText.getElementsByTag("p");
String text = "";
for (Element p : paragraphs) {
text = text + "\n\n\n" + p.text();
}
dog.setArticleText(dog.getArticleText() + text);
if (dog.getBreed() == Breed.Name.WORKING_DOG_BREED ||
dog.getBreed() == Breed.Name.TERRIER_DOG_BREED ||
dog.getBreed() == Breed.Name.HERDING_DOG_BREED) {
//get main image url
dog.setMainImageURL(bioContainer.getElementsByTag("img").get(0).absUrl("src"));
if (dog.getBreed() == Breed.Name.WORKING_DOG_BREED) {
dog.setThumbnailURL(dog.getMainImageURL());
//only need first name
dog.setName(dog.getOtherNames().split(" ")[1]);
}
} else {
dog.setMainImageURL(bioContainer.getElementsByClass("mainImage").get(0).absUrl("src"));
}
} catch (Exception e) {
Log.e("Profile activity", "Wrong parsing for " + dog.getUrl(), e);
}
if (dog.getBreed() == Breed.Name.WORKING_DOG_BREED) {
dog.setBasicDataReady(true);
}
dog.setDetailDataReady(true);
}
return dog;
}
}
检索狗任务:
private class RetrieveDogsTask extends AsyncTask<String, Void, Void> {
@Override
protected Void doInBackground(String... urls) {
for (String url : urls) {
Parser parser = new Parser(url, DogsActivity.this);
Breed.Name breedName = breed.getName();
if (breedName == Breed.Name.HERDING_DOG_BREED) {
dogs.add(parser.parseProfile(new Dog(url, breedName)));
} else {
dogs.addAll(parser.parseDogsPage(breedName, DogsActivity.this));
}
}
return null;
Logcat:
Wrong parsing for FAMILY_DOG_BREED
java.lang.IndexOutOfBoundsException: Invalid index 0, size is 0
at java.util.ArrayList.throwIndexOutOfBoundsException(ArrayList.java:255)
at java.util.ArrayList.get(ArrayList.java:308)
at org.jsoup.select.Elements.get(Elements.java:544)
at com.example.shannon.popular.Parser.parseDogsPage(Parser.java:35)
at com.example.shannon.popular.DogsActivity$RetrieveDogsTask.doInBackground(DogsActivity.java:140)
at com.example.shannon.popular.DogsActivity$RetrieveDogsTask.doInBackground(DogsActivity.java:131)
at android.os.AsyncTask.call(AsyncTask.java:288)
at java.util.concurrent.FutureTask.run(FutureTask.java:237)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1112)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:587)
at java.lang.Thread.run(Thread.java:818)
Breed.class:
public class Breed implements Serializable {
private Name name;
private String url;
Breed(Name name, String url) {
this.name = name;
this.url = url;
}
public Name getName() {
return name;
}
public String getNameString(Context context) {
String nameString = "";
switch (name) {
case FAMILY_DOG_BREED:
nameString = context.getString(R.string.family_breed);
break;
case TOY_DOG_BREED:
nameString = context.getString(R.string.toy_breed);
break;
case HOUND_DOG_BREED:
nameString = context.getString(R.string.hound_breed);
break;
case TERRIER_DOG_BREED:
nameString = context.getString(R.string.terrier_breed);
break;
case WORKING_DOG_BREED:
nameString = context.getString(R.string.working_breed);
break;
case HERDING_DOG_BREED:
nameString = context.getString(R.string.herding_breed);
break;
}
return nameString;
}
public String getURL() {
return url;
}
public enum Name {FAMILY_DOG_BREED, TOY_DOG_BREED, HOUND_DOG_BREED, TERRIER_DOG_BREED, WORKING_DOG_BREED, HERDING_DOG_BREED}
}
您可能对格式错误的 HTML 文档使用了严格的 XML 解析器。我只是尝试 XML-验证您正在解析的 URL 但它失败了,因为 <link>
元素永远不会关闭(在严格的 XML 中,它应该以 </link>
标签,但该页面中缺少它)。
这对于 HTML 页面很常见,因为当今的浏览器倾向于自动更正此类错误。
由于您使用了严格的 XML 解析器,解析器很可能会失败。
我建议切换到不同的解析器。我会使用 PULL 解析器(例如 http://www.xmlpull.org )——这种技术允许使用较低级别的控制进行解析,这意味着你可以轻松地忽略 HTML 中不需要的内容——比如这些 link 元素,或任何其他元素。
所以你可以这样做:
XmlPullParser parser = XmlPullParserFactory.newInstance().newPullParser();
parser.setInput(new BufferedReader(
new InputStreamReader(
new URL("http://.....").openConnection().getInputStream()
)
)
);
while(XmlPullParser.END_DOCUMENT != parser.next()){
if(XmlPullParser.START_TAG == parser.getEventType()){
String tagName = parser.getName();
if(parser.getAttributeCount() > 0 {
// parse attributes, if needed
}
if(parser.nextToken() == XmlPullParser.TEXT){
String tagValue = parser.getText()
}
// etc.
}
}