在解释 R 中的警告时需要帮助
Need help in intrepreting a warning in R
我在 R 中编写了一个函数,它解析来自数据帧的参数,并输出旧数据帧 + 一个包含每行统计信息的新列。
我收到以下警告:
警告信息:
在 [[.data.frame
(xx, sxx[j]) 中:
不鼓励 'exact' 以外的命名参数
老实说,我不确定这是什么意思。我对结果进行了抽查,对我来说似乎没问题。
函数本身很长,如果需要更好地回答问题,我会 post 它。
编辑:
这是一个示例数据框:
my_df<- data.frame('ALT'= c('A,C', 'A,G'),
'Sample1'= c('1/1:35,3,0,35,3,35:1:1:0:0,1,0', './.:0,0,0,0,0,0:0:0:0:0,0,0'),
'Sample2'= c('2/2:188,188,188,33,33,0:11:11:0:0,0,11', '1/1:255,99,0,255,99,255:33:33:0:0,33,0'),
'Sample3'= c('1/1:219,69,0,219,69,219:23:23:0:0,23,0', '0/1:36,0,78,48,87,120:7:3:0:4,3,0'))
这是函数:
multi_allelic_filter_v2<- function(in_vcf, start_col, end_col, threshold=1){
#input: must have gone through biallelic_assessment first
table0<- in_vcf
#ALT_alleles is the number of alt alleles with coverage > threshold across samples
#The following function calculates coverage across samples for a single allele
single_allele_tot_cov_count<- function(list_of_unparsed_cov,
allele_pos){
single_allele_coverage_count<- 0
for (i in 1:length(list_of_unparsed_cov)) { # i is each group of coverages/sample
single_allele_coverage_count<- single_allele_coverage_count+
as.numeric(strsplit(as.character(list_of_unparsed_cov[i]),
split= ',')[[1]])[allele_pos]}
return(single_allele_coverage_count)}
#single row function
#Now we need to reiterate on each ALT allele in the row
single_row_assessment<- function(single_row){
# No. of alternative alleles over threshold
alt_alleles0 <- 0
if (single_row$is_biallelic==TRUE){
alt_alleles0<- 1
} else {
alt_coverages <- numeric() #coverages across sample of each ALT allele
altcovs_unparsed<- character() #Unparsed coverages from each sample
for (i in start_col:end_col) {
#Let's fill altcovs_unparsed
altcovs_unparsed<- c(altcovs_unparsed,
strsplit(x = as.character(single_row[1,i]), split = ':')[[1]][6])}
#Now let's calculate alt_coverages
for (i in 1:lengths(strsplit(as.character(
single_row$ALT),',',fixed = TRUE))) {
alt_coverages<- c(alt_coverages, single_allele_tot_cov_count(
list_of_unparsed_cov = altcovs_unparsed, allele_pos = i+1))}
#Now, let's see how many ALT alleles are over threshold
alt_alleles0<- sum(alt_coverages>threshold)}
return(alt_alleles0)}
#Now, let's reiterate across each row:
#ALT_alleles is no. of alt alleles with coverage >threshold across samples
table0$ALT_alleles<- -99 # Just as a marker, to make sure function works
for (i in 1:nrow(table0)){
table0[i,'ALT_alleles'] <- single_row_assessment(single_row = table0[i,])}
#Now we now how many ALT alleles >= threshold coverage are in each SNP
return(table0)}
基本上,在以下行中:
'1/1:219,69,0,219,69,219:23:23:0:0,23,0'
字段由“:”分隔,我对最后一个字段的最后两个数字(23 和 0)感兴趣;在每一行中,我想对这些位置的所有数字求和(两个单独的总和),并输出 "sums" 中有多少超过阈值。希望它有意义...
好的,
我在同一台计算机上重新运行具有相同数据集的脚本(同一项目,然后是新项目),然后在另一台计算机上再次运行它,无法收到警告无论如何再次。我不确定发生了什么,结果似乎是正确的。没关系。无论如何感谢您的评论和建议
我在 R 中编写了一个函数,它解析来自数据帧的参数,并输出旧数据帧 + 一个包含每行统计信息的新列。
我收到以下警告:
警告信息:
在 [[.data.frame
(xx, sxx[j]) 中:
不鼓励 'exact' 以外的命名参数
老实说,我不确定这是什么意思。我对结果进行了抽查,对我来说似乎没问题。 函数本身很长,如果需要更好地回答问题,我会 post 它。 编辑:
这是一个示例数据框:
my_df<- data.frame('ALT'= c('A,C', 'A,G'),
'Sample1'= c('1/1:35,3,0,35,3,35:1:1:0:0,1,0', './.:0,0,0,0,0,0:0:0:0:0,0,0'),
'Sample2'= c('2/2:188,188,188,33,33,0:11:11:0:0,0,11', '1/1:255,99,0,255,99,255:33:33:0:0,33,0'),
'Sample3'= c('1/1:219,69,0,219,69,219:23:23:0:0,23,0', '0/1:36,0,78,48,87,120:7:3:0:4,3,0'))
这是函数:
multi_allelic_filter_v2<- function(in_vcf, start_col, end_col, threshold=1){
#input: must have gone through biallelic_assessment first
table0<- in_vcf
#ALT_alleles is the number of alt alleles with coverage > threshold across samples
#The following function calculates coverage across samples for a single allele
single_allele_tot_cov_count<- function(list_of_unparsed_cov,
allele_pos){
single_allele_coverage_count<- 0
for (i in 1:length(list_of_unparsed_cov)) { # i is each group of coverages/sample
single_allele_coverage_count<- single_allele_coverage_count+
as.numeric(strsplit(as.character(list_of_unparsed_cov[i]),
split= ',')[[1]])[allele_pos]}
return(single_allele_coverage_count)}
#single row function
#Now we need to reiterate on each ALT allele in the row
single_row_assessment<- function(single_row){
# No. of alternative alleles over threshold
alt_alleles0 <- 0
if (single_row$is_biallelic==TRUE){
alt_alleles0<- 1
} else {
alt_coverages <- numeric() #coverages across sample of each ALT allele
altcovs_unparsed<- character() #Unparsed coverages from each sample
for (i in start_col:end_col) {
#Let's fill altcovs_unparsed
altcovs_unparsed<- c(altcovs_unparsed,
strsplit(x = as.character(single_row[1,i]), split = ':')[[1]][6])}
#Now let's calculate alt_coverages
for (i in 1:lengths(strsplit(as.character(
single_row$ALT),',',fixed = TRUE))) {
alt_coverages<- c(alt_coverages, single_allele_tot_cov_count(
list_of_unparsed_cov = altcovs_unparsed, allele_pos = i+1))}
#Now, let's see how many ALT alleles are over threshold
alt_alleles0<- sum(alt_coverages>threshold)}
return(alt_alleles0)}
#Now, let's reiterate across each row:
#ALT_alleles is no. of alt alleles with coverage >threshold across samples
table0$ALT_alleles<- -99 # Just as a marker, to make sure function works
for (i in 1:nrow(table0)){
table0[i,'ALT_alleles'] <- single_row_assessment(single_row = table0[i,])}
#Now we now how many ALT alleles >= threshold coverage are in each SNP
return(table0)}
基本上,在以下行中: '1/1:219,69,0,219,69,219:23:23:0:0,23,0' 字段由“:”分隔,我对最后一个字段的最后两个数字(23 和 0)感兴趣;在每一行中,我想对这些位置的所有数字求和(两个单独的总和),并输出 "sums" 中有多少超过阈值。希望它有意义...
好的,
我在同一台计算机上重新运行具有相同数据集的脚本(同一项目,然后是新项目),然后在另一台计算机上再次运行它,无法收到警告无论如何再次。我不确定发生了什么,结果似乎是正确的。没关系。无论如何感谢您的评论和建议