正在下载 Google 学者的参考文献列表
Dowloading list of References from Google Scholar
我已经将一份研究论文的参考文献存储在一个列表中(如下所示):我想从 google scholar 下载它们。我已经通过在下面给出 url 成功下载了一篇论文,我现在需要做的是,因为我已经将研究论文中的所有参考文献存储在列表中(列表中有 15 个参考文献,这意味着至少 5 个它们将是 pdf 格式),我想 运行 Google Scholar 上的列表并下载可用的 pdf 参考文献。如果pdf没有可供参考,它必须显示"Pdf is not avaialable":我已经分享了下载单个文件的代码,我不知道如何修改列表下载多篇论文的代码。
public static void main(String[] args) throws IOException {
Scanner s = new Scanner(new File("D:\ref.txt"));
ArrayList<String> list = new ArrayList<String>();
while (s.hasNextLine()){
list.add(s.nextLine());
{
for (String Z : list)
{
System.out.println(Z);
}
}
}
//System.out.println("LISTZ:" +list);
s.close();//LIST completed
//code to download the paper from scholar
try {
//var a= doc.replace(" ","+");
Document doc = Jsoup
.connect("https://scholar.google.com.pk/scholar?q=%5B3%5D+W.+H.+Walters%2C+%E2%80%9CGoogle+scholar+coverage+of+a+multidisciplinary+field%2C%E2%80%9D+Information+Processing+%26+Management+%2C+vol.+43%2C+no.+4%2C+pp.+1121+%E2%80%93+1132%2C+July+2007.&btnG=&hl=en&as_sdt=0%2C5")
.userAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36")
.get();
String title = doc.title();
System.out.println("title : " + title);
Elements links = doc.select("div.gs_ggsd").select("a[href]");
//Element = doc.select("div.gs_ggs gs_fl").first();
for (Element link : links) {
//System.out.println("\nlink : " + link.attr("href"));
URL website = new URL(link.attr("href"));
ReadableByteChannel rbc = Channels.newChannel(website.openStream());
FileOutputStream fos = new FileOutputStream("D:\paper.pdf");
fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
}
// System.out.println("text : " + link.text());
}
/* ByteArrayOutputStream href = new ByteArrayOutputStream();
PrintStream PS = new PrintStream(href);
PrintStream old = System.out;
System.setOut(PS);
System.out.println("Here: " + href.toString());*/
catch (IOException e) {
e.printStackTrace();
}
}
}
您只需通过循环将查询传递给 Url:
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.util.ArrayList;
import java.util.Scanner;
/**
* User: lihongxu
* Date: 17/6/13
* Time: 19:42
* Comments
*/
public class Test {
public static void main(String[] args) throws IOException {
Scanner s = new Scanner(new File("D:\ref.txt"));
ArrayList<String> list = new ArrayList<String>();
while (s.hasNextLine()) {
list.add(s.nextLine());
{
for (String Z : list) {
System.out.println(Z);
}
}
}
s.close();// LIST completed
// code to download the paper from scholar
for (String query : list) {
try {
Document doc = Jsoup
.connect("https://scholar.google.com.pk/scholar?q=" + query)
.userAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like " +
"Gecko)" +
" Chrome/33.0.1750.152 Safari/537.36")
.get();
String title = doc.title();
System.out.println("title : " + title);
Elements links = doc.select("div.gs_ggsd").select("a[href]");
//Element = doc.select("div.gs_ggs gs_fl").first();
for (Element link : links) {
//System.out.println("\nlink : " + link.attr("href"));
URL website = new URL(link.attr("href"));
ReadableByteChannel rbc = Channels.newChannel(website.openStream());
FileOutputStream fos = new FileOutputStream("D:\paper.pdf");
fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
}
// System.out.println("text : " + link.text());
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
我已经将一份研究论文的参考文献存储在一个列表中(如下所示):我想从 google scholar 下载它们。我已经通过在下面给出 url 成功下载了一篇论文,我现在需要做的是,因为我已经将研究论文中的所有参考文献存储在列表中(列表中有 15 个参考文献,这意味着至少 5 个它们将是 pdf 格式),我想 运行 Google Scholar 上的列表并下载可用的 pdf 参考文献。如果pdf没有可供参考,它必须显示"Pdf is not avaialable":我已经分享了下载单个文件的代码,我不知道如何修改列表下载多篇论文的代码。
public static void main(String[] args) throws IOException {
Scanner s = new Scanner(new File("D:\ref.txt"));
ArrayList<String> list = new ArrayList<String>();
while (s.hasNextLine()){
list.add(s.nextLine());
{
for (String Z : list)
{
System.out.println(Z);
}
}
}
//System.out.println("LISTZ:" +list);
s.close();//LIST completed
//code to download the paper from scholar
try {
//var a= doc.replace(" ","+");
Document doc = Jsoup
.connect("https://scholar.google.com.pk/scholar?q=%5B3%5D+W.+H.+Walters%2C+%E2%80%9CGoogle+scholar+coverage+of+a+multidisciplinary+field%2C%E2%80%9D+Information+Processing+%26+Management+%2C+vol.+43%2C+no.+4%2C+pp.+1121+%E2%80%93+1132%2C+July+2007.&btnG=&hl=en&as_sdt=0%2C5")
.userAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36")
.get();
String title = doc.title();
System.out.println("title : " + title);
Elements links = doc.select("div.gs_ggsd").select("a[href]");
//Element = doc.select("div.gs_ggs gs_fl").first();
for (Element link : links) {
//System.out.println("\nlink : " + link.attr("href"));
URL website = new URL(link.attr("href"));
ReadableByteChannel rbc = Channels.newChannel(website.openStream());
FileOutputStream fos = new FileOutputStream("D:\paper.pdf");
fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
}
// System.out.println("text : " + link.text());
}
/* ByteArrayOutputStream href = new ByteArrayOutputStream();
PrintStream PS = new PrintStream(href);
PrintStream old = System.out;
System.setOut(PS);
System.out.println("Here: " + href.toString());*/
catch (IOException e) {
e.printStackTrace();
}
}
}
您只需通过循环将查询传递给 Url:
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.util.ArrayList;
import java.util.Scanner;
/**
* User: lihongxu
* Date: 17/6/13
* Time: 19:42
* Comments
*/
public class Test {
public static void main(String[] args) throws IOException {
Scanner s = new Scanner(new File("D:\ref.txt"));
ArrayList<String> list = new ArrayList<String>();
while (s.hasNextLine()) {
list.add(s.nextLine());
{
for (String Z : list) {
System.out.println(Z);
}
}
}
s.close();// LIST completed
// code to download the paper from scholar
for (String query : list) {
try {
Document doc = Jsoup
.connect("https://scholar.google.com.pk/scholar?q=" + query)
.userAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like " +
"Gecko)" +
" Chrome/33.0.1750.152 Safari/537.36")
.get();
String title = doc.title();
System.out.println("title : " + title);
Elements links = doc.select("div.gs_ggsd").select("a[href]");
//Element = doc.select("div.gs_ggs gs_fl").first();
for (Element link : links) {
//System.out.println("\nlink : " + link.attr("href"));
URL website = new URL(link.attr("href"));
ReadableByteChannel rbc = Channels.newChannel(website.openStream());
FileOutputStream fos = new FileOutputStream("D:\paper.pdf");
fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
}
// System.out.println("text : " + link.text());
} catch (IOException e) {
e.printStackTrace();
}
}
}
}