Java 如何将 URL 格式化为字符串以连接 JSoup Malformed URL 错误
Java How to format URL as a String to connect with JSoup Malformed URL error
我有一个程序可以从 TextField 连接到用户定义的 URL 并抓取该网页上的图像。用户定义的 URL 通过 .getText() 从文本字段中获取并分配给一个字符串。然后使用 String 连接到带有 JSoup 的网页,并将网页放入文档中。
String address = labelforAddress.getText();
try {
document = Jsoup.connect(address).get();
}catch(IOException ex){
ex.printStackTrace();
}
我尝试了不同格式的 URLS: "https://www.", "www.", "https://" 但我使用的所有内容都会抛出格式错误的 URL错误。
请有人告诉我如何以正确的方式从 TextField 获取文本。
下面是整个代码。
包装样品;
import javafx.application.Application;
import javafx.fxml.FXMLLoader;
import javafx.scene.Parent;
import javafx.scene.Scene;
import javafx.scene.control.Button;
import javafx.scene.control.Label;
import javafx.scene.control.TextField;
import javafx.scene.layout.GridPane;
import javafx.stage.FileChooser;
import javafx.stage.Stage;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.net.URL;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
public class Main extends Application {
Document document;
LinkedList<String> imageURLList = new LinkedList<String>();
ArrayList<File> fileList = new ArrayList<File>();
int fileCount = 1;
@Override
public void start(Stage primaryStage) throws Exception{
Parent root = FXMLLoader.load(getClass().getResource("sample.fxml"));
primaryStage.setTitle("Webpage Photo Scraper");
GridPane gp = new GridPane();
Label labelforAddress = new Label("URL");
GridPane.setConstraints(labelforAddress, 0,0);
TextField URLAddress = new TextField();
GridPane.setConstraints(URLAddress, 1,0);
Button scrape = new Button("Scrape for Photos");
GridPane.setConstraints(scrape, 0,1);
scrape.setOnAction(event->{
String address = labelforAddress.getText();
try {
document = Jsoup.connect(address).get();
}catch(IOException ex){
ex.printStackTrace();
}
Elements imgTags = document.getElementsByAttributeValueContaining("src", "/CharacterImages");
for(Element imgTag: imgTags){
imageURLList.add(imgTag.absUrl("src"));
}
for(String url: imageURLList){
File file = new File("C:\Users\Andrei\Documents\file" + fileCount + ".txt");
downloadFromURL(url, file);
fileList.add(file);
fileCount++;
}
});
Button exportToZipFile = new Button("Export to Zip File");
GridPane.setConstraints(exportToZipFile, 0,2);
exportToZipFile.setOnAction(event -> {
FileChooser fileChooser = new FileChooser();
FileChooser.ExtensionFilter exfilt = new FileChooser.ExtensionFilter("Zip Files", ".zip");
fileChooser.getExtensionFilters().add(exfilt);
try{
FileOutputStream fos = new FileOutputStream(fileChooser.showSaveDialog(primaryStage));
ZipOutputStream zipOut = new ZipOutputStream(fos);
for(int count = 0; count<=fileList.size()-1; count++){
File fileToZip = new File(String.valueOf(fileList.get(count)));
FileInputStream fis = new FileInputStream(fileToZip);
ZipEntry zipEntry = new ZipEntry(fileToZip.getName());
zipOut.putNextEntry(zipEntry);
byte[] bytes = new byte[1024];
int length;
while((length = fis.read(bytes)) >= 0) {
zipOut.write(bytes, 0, length);
}
fis.close();
}
zipOut.close();
fos.close();
}catch(IOException e1){
e1.printStackTrace();
}
});
primaryStage.setScene(new Scene(gp, 300, 275));
primaryStage.show();
gp.getChildren().addAll(exportToZipFile, labelforAddress, scrape, URLAddress);
}
public static void downloadFromURL(String url, File file){
try {
URL Url = new URL(url);
BufferedInputStream bis = new BufferedInputStream(Url.openStream());
FileOutputStream fis = new FileOutputStream(file);
byte[] buffer = new byte[1024];
int count = 0;
while((count = bis.read(buffer, 0,1024)) !=-1){
fis.write(buffer, 0, count);
}
fis.close();
bis.close();
}catch(IOException e){
e.printStackTrace();
}
}
public static void main(String[] args) {
launch(args);
}
}
包含用户输入值的文本字段存储在 URLAddress
对象中,但您总是尝试从 labelforAddress
对象中获取 url,这是一个始终包含 "URL"文本。
所以解决方案是使用:
String address = URLAddress.getText();
如果你仔细阅读错误信息,它会帮助你找到原因,因为它总是显示它认为错误的值。在这种情况下,我看到:
Caused by: java.net.MalformedURLException: no protocol: URL
显示无法识别的地址是:URL
。
如果下次遇到这种错误试试:
- 在运行时调试应用程序以查看每个变量的值
- 在控制台中记录变量值以查看变量是否包含您期望的值
我有一个程序可以从 TextField 连接到用户定义的 URL 并抓取该网页上的图像。用户定义的 URL 通过 .getText() 从文本字段中获取并分配给一个字符串。然后使用 String 连接到带有 JSoup 的网页,并将网页放入文档中。
String address = labelforAddress.getText();
try {
document = Jsoup.connect(address).get();
}catch(IOException ex){
ex.printStackTrace();
}
我尝试了不同格式的 URLS: "https://www.", "www.", "https://" 但我使用的所有内容都会抛出格式错误的 URL错误。 请有人告诉我如何以正确的方式从 TextField 获取文本。 下面是整个代码。 包装样品;
import javafx.application.Application;
import javafx.fxml.FXMLLoader;
import javafx.scene.Parent;
import javafx.scene.Scene;
import javafx.scene.control.Button;
import javafx.scene.control.Label;
import javafx.scene.control.TextField;
import javafx.scene.layout.GridPane;
import javafx.stage.FileChooser;
import javafx.stage.Stage;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.net.URL;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
public class Main extends Application {
Document document;
LinkedList<String> imageURLList = new LinkedList<String>();
ArrayList<File> fileList = new ArrayList<File>();
int fileCount = 1;
@Override
public void start(Stage primaryStage) throws Exception{
Parent root = FXMLLoader.load(getClass().getResource("sample.fxml"));
primaryStage.setTitle("Webpage Photo Scraper");
GridPane gp = new GridPane();
Label labelforAddress = new Label("URL");
GridPane.setConstraints(labelforAddress, 0,0);
TextField URLAddress = new TextField();
GridPane.setConstraints(URLAddress, 1,0);
Button scrape = new Button("Scrape for Photos");
GridPane.setConstraints(scrape, 0,1);
scrape.setOnAction(event->{
String address = labelforAddress.getText();
try {
document = Jsoup.connect(address).get();
}catch(IOException ex){
ex.printStackTrace();
}
Elements imgTags = document.getElementsByAttributeValueContaining("src", "/CharacterImages");
for(Element imgTag: imgTags){
imageURLList.add(imgTag.absUrl("src"));
}
for(String url: imageURLList){
File file = new File("C:\Users\Andrei\Documents\file" + fileCount + ".txt");
downloadFromURL(url, file);
fileList.add(file);
fileCount++;
}
});
Button exportToZipFile = new Button("Export to Zip File");
GridPane.setConstraints(exportToZipFile, 0,2);
exportToZipFile.setOnAction(event -> {
FileChooser fileChooser = new FileChooser();
FileChooser.ExtensionFilter exfilt = new FileChooser.ExtensionFilter("Zip Files", ".zip");
fileChooser.getExtensionFilters().add(exfilt);
try{
FileOutputStream fos = new FileOutputStream(fileChooser.showSaveDialog(primaryStage));
ZipOutputStream zipOut = new ZipOutputStream(fos);
for(int count = 0; count<=fileList.size()-1; count++){
File fileToZip = new File(String.valueOf(fileList.get(count)));
FileInputStream fis = new FileInputStream(fileToZip);
ZipEntry zipEntry = new ZipEntry(fileToZip.getName());
zipOut.putNextEntry(zipEntry);
byte[] bytes = new byte[1024];
int length;
while((length = fis.read(bytes)) >= 0) {
zipOut.write(bytes, 0, length);
}
fis.close();
}
zipOut.close();
fos.close();
}catch(IOException e1){
e1.printStackTrace();
}
});
primaryStage.setScene(new Scene(gp, 300, 275));
primaryStage.show();
gp.getChildren().addAll(exportToZipFile, labelforAddress, scrape, URLAddress);
}
public static void downloadFromURL(String url, File file){
try {
URL Url = new URL(url);
BufferedInputStream bis = new BufferedInputStream(Url.openStream());
FileOutputStream fis = new FileOutputStream(file);
byte[] buffer = new byte[1024];
int count = 0;
while((count = bis.read(buffer, 0,1024)) !=-1){
fis.write(buffer, 0, count);
}
fis.close();
bis.close();
}catch(IOException e){
e.printStackTrace();
}
}
public static void main(String[] args) {
launch(args);
}
}
包含用户输入值的文本字段存储在 URLAddress
对象中,但您总是尝试从 labelforAddress
对象中获取 url,这是一个始终包含 "URL"文本。
所以解决方案是使用:
String address = URLAddress.getText();
如果你仔细阅读错误信息,它会帮助你找到原因,因为它总是显示它认为错误的值。在这种情况下,我看到:
Caused by: java.net.MalformedURLException: no protocol: URL
显示无法识别的地址是:URL
。
如果下次遇到这种错误试试:
- 在运行时调试应用程序以查看每个变量的值
- 在控制台中记录变量值以查看变量是否包含您期望的值