如何将 PubMed 中的网格写入数据框
How to write the Mesh from PubMed into a data frame
我最近下载了 RISMed 软件包,从 PubMed 下载了不到 500,000 篇文章摘要,我正在尝试将此数据输入数据框。我在尝试包含网格数据时出现问题,我收到错误消息
'Error in (function (..., row.names = NULL, check.rows = FALSE, check.names = TRUE, : arguments imply differing number of rows:'
有没有办法将这些信息嵌套到数据框中进行分析?
RCT_topic <- 'randomized clinical trial'
RCT_query <- EUtilsSummary(RCT_topic, mindate=2016, maxdate=2017, retmax=100)
summary(RCT_query)
RCT_records <- EUtilsGet(RCT_query)
RCT_data <- data.frame('PMID'=PMID(RCT_records),
'Title'=ArticleTitle(RCT_records),
'Abstract'=AbstractText(RCT_records),
'YearPublished'=YearPubmed(RCT_records),
'Month.Published'=MonthPubmed(RCT_records),
'Country'=Country(RCT_records),
'Grant'=GrantID(RCT_records),
'Acronym'=Acronym(RCT_records),
'Agency'=Agency(RCT_records),
'Mesh'=Mesh(RCT_records))
考虑使用 tibble:
library(RISmed)
library(dplyr) # tibble and other functions
RCT_topic <- 'randomized clinical trial'
RCT_query <- EUtilsSummary(RCT_topic, mindate=2016, maxdate=2017, retmax=100)
summary(RCT_query)
RCT_records <- EUtilsGet(RCT_query)
RCT_data <- data_frame('PMID'=PMID(RCT_records),
'Title'=ArticleTitle(RCT_records),
'Abstract'=AbstractText(RCT_records),
'YearPublished'=YearPubmed(RCT_records),
'Month.Published'=MonthPubmed(RCT_records),
'Country'= Country(RCT_records),
'Grant' =GrantID(RCT_records),
'Acronym' =Acronym(RCT_records),
'Agency' =Agency(RCT_records),
'Mesh'=Mesh(RCT_records))
Mesh
列现在是数据帧列表:
select(RCT_data, PMID, Mesh)
# # A tibble: 100 x 2
# PMID Mesh
# <chr> <list>
# 1 29288775 <data.frame [21 × 2]>
# 2 29288545 <data.frame [19 × 2]>
# 3 29288510 <data.frame [15 × 2]>
# 4 29288507 <data.frame [19 × 2]>
# 5 29288478 <data.frame [16 × 2]>
# 6 29288309 <data.frame [19 × 2]>
# 7 29288191 <data.frame [11 × 2]>
# 8 29288190 <data.frame [23 × 2]>
# 9 29288184 <data.frame [21 × 2]>
# 10 29288175 <data.frame [12 × 2]>
# # ... with 90 more rows
我最近下载了 RISMed 软件包,从 PubMed 下载了不到 500,000 篇文章摘要,我正在尝试将此数据输入数据框。我在尝试包含网格数据时出现问题,我收到错误消息
'Error in (function (..., row.names = NULL, check.rows = FALSE, check.names = TRUE, : arguments imply differing number of rows:'
有没有办法将这些信息嵌套到数据框中进行分析?
RCT_topic <- 'randomized clinical trial'
RCT_query <- EUtilsSummary(RCT_topic, mindate=2016, maxdate=2017, retmax=100)
summary(RCT_query)
RCT_records <- EUtilsGet(RCT_query)
RCT_data <- data.frame('PMID'=PMID(RCT_records),
'Title'=ArticleTitle(RCT_records),
'Abstract'=AbstractText(RCT_records),
'YearPublished'=YearPubmed(RCT_records),
'Month.Published'=MonthPubmed(RCT_records),
'Country'=Country(RCT_records),
'Grant'=GrantID(RCT_records),
'Acronym'=Acronym(RCT_records),
'Agency'=Agency(RCT_records),
'Mesh'=Mesh(RCT_records))
考虑使用 tibble:
library(RISmed)
library(dplyr) # tibble and other functions
RCT_topic <- 'randomized clinical trial'
RCT_query <- EUtilsSummary(RCT_topic, mindate=2016, maxdate=2017, retmax=100)
summary(RCT_query)
RCT_records <- EUtilsGet(RCT_query)
RCT_data <- data_frame('PMID'=PMID(RCT_records),
'Title'=ArticleTitle(RCT_records),
'Abstract'=AbstractText(RCT_records),
'YearPublished'=YearPubmed(RCT_records),
'Month.Published'=MonthPubmed(RCT_records),
'Country'= Country(RCT_records),
'Grant' =GrantID(RCT_records),
'Acronym' =Acronym(RCT_records),
'Agency' =Agency(RCT_records),
'Mesh'=Mesh(RCT_records))
Mesh
列现在是数据帧列表:
select(RCT_data, PMID, Mesh)
# # A tibble: 100 x 2
# PMID Mesh
# <chr> <list>
# 1 29288775 <data.frame [21 × 2]>
# 2 29288545 <data.frame [19 × 2]>
# 3 29288510 <data.frame [15 × 2]>
# 4 29288507 <data.frame [19 × 2]>
# 5 29288478 <data.frame [16 × 2]>
# 6 29288309 <data.frame [19 × 2]>
# 7 29288191 <data.frame [11 × 2]>
# 8 29288190 <data.frame [23 × 2]>
# 9 29288184 <data.frame [21 × 2]>
# 10 29288175 <data.frame [12 × 2]>
# # ... with 90 more rows