从网页中检索标题和 ID 数据并保存在 excel 文件中
Retrieving title and ID data from a webpage and saving in the excel file
我希望从网页中检索标题和 PMID (PubMed ID) 记录并将其保存在 MS excel 文件中。我尝试使用 R 中的 easyPubMed
库进行提取,但是,我无法获得相同的内容。是否有任何图书馆或包获得这个。请协助我。
下面提供了输入数据和预期输出数据的示例:
代码:
library(easyPubMed)
my_query <- '"ACSL1" [ti] OR "Acyl-CoA Synthetase Long Chain Family Member 1" [ti] OR "Acyl-CoA Synthetase Long Chain Family Member 1" [ti] OR "Fatty-Acid-Coenzyme A Ligase, Long-Chain 2" [ti] OR "Long-Chain Fatty-Acid-Coenzyme A Ligase 1" [ti] OR "Long-Chain-Fatty-Acid–CoA Ligase 1" [ti] OR "Long-Chain Fatty Acid-CoA Ligase 2" [ti] OR "Long-Chain Acyl-CoA Synthetase" [ti] OR "Long-Chain Acyl-CoA Synthetase 1" [ti] OR "Long-Chain Acyl-CoA Synthetase 2" [ti] OR "Lignoceroyl-CoA Synthase" [ti] OR "Palmitoyl-CoA Ligase 1" [ti] OR "Palmitoyl-CoA Ligase 2" [ti] OR "Acyl-CoA Synthetase 1" [ti] OR "LACS 1" [ti] OR "LACS 2" [ti] OR "LACS-1" [ti] OR "LACS-2" [ti] OR "FACL2" [ti] OR "FACL1" [ti] OR "LACS1" [ti] OR "LACS2" [ti] OR "ACS1" [ti] OR "LACS" [ti] OR "Fatty-Acid-Coenzyme A Ligase, Long-Chain 1" [ti] OR "Palmitoyl-CoA Ligase 1" [ti] AND (acyl [ti] OR CoA [ti] OR fatty [ti] OR synthetase [ti] OR Palmitoyl [ti] OR ligase [ti] OR ACSL1 [ti]'
#### To count the number of PubMed IDs####
my_entrez_id <- get_pubmed_ids(my_query)
my_entrez_id$Count
输入
Webpage: https://pubmed.ncbi.nlm.nih.gov/
Search String: "ACSL1" [ti] OR "Acyl-CoA Synthetase Long Chain Family Member 1" [ti] OR "Acyl-CoA Synthetase Long Chain Family Member 1" [ti] OR "Fatty-Acid-Coenzyme A Ligase, Long-Chain 2" [ti] OR "Long-Chain Fatty-Acid-Coenzyme A Ligase 1" [ti] OR "Long-Chain-Fatty-Acid–CoA Ligase 1" [ti] OR "Long-Chain Fatty Acid-CoA Ligase 2" [ti] OR "Long-Chain Acyl-CoA Synthetase" [ti] OR "Long-Chain Acyl-CoA Synthetase 1" [ti] OR "Long-Chain Acyl-CoA Synthetase 2" [ti] OR "Lignoceroyl-CoA Synthase" [ti] OR "Palmitoyl-CoA Ligase 1" [ti] OR "Palmitoyl-CoA Ligase 2" [ti] OR "Acyl-CoA Synthetase 1" [ti] OR "LACS 1" [ti] OR "LACS 2" [ti] OR "LACS-1" [ti] OR "LACS-2" [ti] OR "FACL2" [ti] OR "FACL1" [ti] OR "LACS1" [ti] OR "LACS2" [ti] OR "ACS1" [ti] OR "LACS" [ti] OR "Fatty-Acid-Coenzyme A Ligase, Long-Chain 1" [ti] OR "Palmitoyl-CoA Ligase 1" [ti] AND (acyl [ti] OR CoA [ti] OR fatty [ti] OR synthetase [ti] OR Palmitoyl [ti] OR ligase [ti] OR ACSL1 [ti]
预期输出:
dput(Output)
structure(list(Title = c("The 3-ketoacyl-CoA thiolase: an engineered enzyme for carbon chain elongation of chemical compounds.",
"Potential influence of miR-192 on the efficacy of saxagliptin treatment in T2DM complicated with non-alcoholic fatty liver disease.",
"Myosteatosis in nonalcoholic fatty liver disease: An exploratory study."
), PMID = c(32830293L, 32829627L, 32828745L)), class = "data.frame", row.names = c(NA,
-3L))
您需要收集和分析您的查询结果。我认为应该这样做
my_entrez_id <- get_pubmed_ids(my_query)
my_entrez_data <- fetch_pubmed_data(my_entrez_id)
my_entrez_list <- my_entrez_data %>%
XML::xmlParse() %>%
XML::xmlToList() #turn the xml int an R List thats is easier to handle
my_entrez_df <- my_entrez_list %>%
purrr::map_df(function(x){ # use the map function from the package purrr to select the attributes that we need
tibble(
Title = x$MedlineCitation$Article$ArticleTitle[[1]],
PMID = x$MedlineCitation$PMID[[1]])}
)
我希望从网页中检索标题和 PMID (PubMed ID) 记录并将其保存在 MS excel 文件中。我尝试使用 R 中的 easyPubMed
库进行提取,但是,我无法获得相同的内容。是否有任何图书馆或包获得这个。请协助我。
下面提供了输入数据和预期输出数据的示例:
代码:
library(easyPubMed)
my_query <- '"ACSL1" [ti] OR "Acyl-CoA Synthetase Long Chain Family Member 1" [ti] OR "Acyl-CoA Synthetase Long Chain Family Member 1" [ti] OR "Fatty-Acid-Coenzyme A Ligase, Long-Chain 2" [ti] OR "Long-Chain Fatty-Acid-Coenzyme A Ligase 1" [ti] OR "Long-Chain-Fatty-Acid–CoA Ligase 1" [ti] OR "Long-Chain Fatty Acid-CoA Ligase 2" [ti] OR "Long-Chain Acyl-CoA Synthetase" [ti] OR "Long-Chain Acyl-CoA Synthetase 1" [ti] OR "Long-Chain Acyl-CoA Synthetase 2" [ti] OR "Lignoceroyl-CoA Synthase" [ti] OR "Palmitoyl-CoA Ligase 1" [ti] OR "Palmitoyl-CoA Ligase 2" [ti] OR "Acyl-CoA Synthetase 1" [ti] OR "LACS 1" [ti] OR "LACS 2" [ti] OR "LACS-1" [ti] OR "LACS-2" [ti] OR "FACL2" [ti] OR "FACL1" [ti] OR "LACS1" [ti] OR "LACS2" [ti] OR "ACS1" [ti] OR "LACS" [ti] OR "Fatty-Acid-Coenzyme A Ligase, Long-Chain 1" [ti] OR "Palmitoyl-CoA Ligase 1" [ti] AND (acyl [ti] OR CoA [ti] OR fatty [ti] OR synthetase [ti] OR Palmitoyl [ti] OR ligase [ti] OR ACSL1 [ti]'
#### To count the number of PubMed IDs####
my_entrez_id <- get_pubmed_ids(my_query)
my_entrez_id$Count
输入
Webpage: https://pubmed.ncbi.nlm.nih.gov/
Search String: "ACSL1" [ti] OR "Acyl-CoA Synthetase Long Chain Family Member 1" [ti] OR "Acyl-CoA Synthetase Long Chain Family Member 1" [ti] OR "Fatty-Acid-Coenzyme A Ligase, Long-Chain 2" [ti] OR "Long-Chain Fatty-Acid-Coenzyme A Ligase 1" [ti] OR "Long-Chain-Fatty-Acid–CoA Ligase 1" [ti] OR "Long-Chain Fatty Acid-CoA Ligase 2" [ti] OR "Long-Chain Acyl-CoA Synthetase" [ti] OR "Long-Chain Acyl-CoA Synthetase 1" [ti] OR "Long-Chain Acyl-CoA Synthetase 2" [ti] OR "Lignoceroyl-CoA Synthase" [ti] OR "Palmitoyl-CoA Ligase 1" [ti] OR "Palmitoyl-CoA Ligase 2" [ti] OR "Acyl-CoA Synthetase 1" [ti] OR "LACS 1" [ti] OR "LACS 2" [ti] OR "LACS-1" [ti] OR "LACS-2" [ti] OR "FACL2" [ti] OR "FACL1" [ti] OR "LACS1" [ti] OR "LACS2" [ti] OR "ACS1" [ti] OR "LACS" [ti] OR "Fatty-Acid-Coenzyme A Ligase, Long-Chain 1" [ti] OR "Palmitoyl-CoA Ligase 1" [ti] AND (acyl [ti] OR CoA [ti] OR fatty [ti] OR synthetase [ti] OR Palmitoyl [ti] OR ligase [ti] OR ACSL1 [ti]
预期输出:
dput(Output)
structure(list(Title = c("The 3-ketoacyl-CoA thiolase: an engineered enzyme for carbon chain elongation of chemical compounds.",
"Potential influence of miR-192 on the efficacy of saxagliptin treatment in T2DM complicated with non-alcoholic fatty liver disease.",
"Myosteatosis in nonalcoholic fatty liver disease: An exploratory study."
), PMID = c(32830293L, 32829627L, 32828745L)), class = "data.frame", row.names = c(NA,
-3L))
您需要收集和分析您的查询结果。我认为应该这样做
my_entrez_id <- get_pubmed_ids(my_query)
my_entrez_data <- fetch_pubmed_data(my_entrez_id)
my_entrez_list <- my_entrez_data %>%
XML::xmlParse() %>%
XML::xmlToList() #turn the xml int an R List thats is easier to handle
my_entrez_df <- my_entrez_list %>%
purrr::map_df(function(x){ # use the map function from the package purrr to select the attributes that we need
tibble(
Title = x$MedlineCitation$Article$ArticleTitle[[1]],
PMID = x$MedlineCitation$PMID[[1]])}
)