跳过空数据帧并产生输出
skip the empty dataframes and produce the output
sessionInfo()
R version 3.2.0 (2015-04-16)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
locale:
[1] LC_COLLATE=German_Germany.1252 LC_CTYPE=German_Germany.1252
[3] LC_MONETARY=German_Germany.1252 LC_NUMERIC=C
[5] LC_TIME=German_Germany.1252
attached base packages:
[1] grid stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] WriteXLS_3.5.1 tidyr_0.2.0 scales_0.2.4 gridExtra_0.9.1
[5] ggplot2_1.0.1 RPostgreSQL_0.4 DBI_0.3.1
loaded via a namespace (and not attached):
[1] Rcpp_0.11.6 assertthat_0.1 dplyr_0.4.1 digest_0.6.8
[5] MASS_7.3-40 plyr_1.8.2 gtable_0.1.2 magrittr_1.5
[9] stringi_0.4-1 lazyeval_0.1.10 reshape2_1.4.1 proto_0.3-10
[13] tools_3.2.0 stringr_1.0.0 munsell_0.4.2 parallel_3.2.0
[17] colorspace_1.2-6
#
library(RPostgreSQL)
library(ggplot2)
library(gridExtra)
library(scales)
library(tidyr)
blue.bold.italic.16.text <- element_text(face = "bold", color = "black", size = 12)
#
考虑四台机器运行并列生产产品。下面的每个数据帧(l1、l2、l3、l4)代表每台机器每小时的件数(实际上我使用 RPostgreSQL 从数据库收集数据,这些是它的外观示例)
l1 <- structure(list(hours = structure(c(1434081600, 1434085200, 1434088800,
1434092400, 1434096000, 1434099600, 1434103200, 1434106800, 1434110400,
1434114000, 1434117600, 1434121200, 1434124800, 1434128400, 1434132000,
1434135600, 1434139200, 1434142800, 1434146400, 1434150000, 1434153600,
1434157200, 1434160800, 1434164400), class = c("POSIXct", "POSIXt"
), tzone = ""), count = c(25, 29, 28, 32, 33, 13, 33, 29, 32,
33, 27, 34, 25, 30, 13, 24, 26, 33, 40, 34, 26, 30, 22, 30)), .Names = c("hours",
"count"), row.names = c(NA, 24L), class = "data.frame")
l2 <- structure(list(hours = structure(c(1434081600, 1434085200, 1434088800,
1434092400, 1434096000, 1434099600, 1434103200, 1434106800, 1434110400,
1434114000, 1434117600, 1434121200, 1434124800, 1434128400, 1434132000,
1434135600, 1434139200, 1434142800, 1434146400, 1434150000, 1434153600,
1434157200, 1434160800, 1434164400), class = c("POSIXct", "POSIXt"
), tzone = ""), count = c(25, 29, 28, 32, 33, 13, 33, 29, 32,
33, 27, 34, 25, 30, 13, 24, 26, 33, 40, 34, 26, 30, 22, 30)), .Names = c("hours",
"count"), row.names = c(NA, 24L), class = "data.frame")
l3 <- structure(list(hours = structure(c(1434081600, 1434085200, 1434088800,
1434092400, 1434096000, 1434099600, 1434103200, 1434106800, 1434110400,
1434114000, 1434117600, 1434121200, 1434124800, 1434128400, 1434132000,
1434135600, 1434139200, 1434142800, 1434146400, 1434150000, 1434153600,
1434157200, 1434160800, 1434164400), class = c("POSIXct", "POSIXt"
), tzone = ""), count = c(25, 29, 28, 32, 33, 13, 33, 29, 32,
33, 27, 34, 25, 30, 13, 24, 26, 33, 40, 34, 26, 30, 22, 30)), .Names = c("hours",
"count"), row.names = c(NA, 24L), class = "data.frame")
l4 <- structure(list(hours = structure(c(1434081600, 1434085200, 1434088800,
1434092400, 1434096000, 1434099600, 1434103200, 1434106800, 1434110400,
1434114000, 1434117600, 1434121200, 1434124800, 1434128400, 1434132000,
1434135600, 1434139200, 1434142800, 1434146400, 1434150000, 1434153600,
1434157200, 1434160800, 1434164400), class = c("POSIXct", "POSIXt"
), tzone = ""), count = c(25, 29, 28, 32, 33, 13, 33, 29, 32,
33, 27, 34, 25, 30, 13, 24, 26, 33, 40, 34, 26, 30, 22, 30)), .Names = c("hours",
"count"), row.names = c(NA, 24L), class = "data.frame")
#
这是我的附加情节脚本(输出)
df <- merge(l1,l2, by="hours")
df <- merge(df,l3, by="hours")
df <- merge(df,l4, by="hours")
colnames(df) <- c("hours","L 1","L 2","L 3","L 4")
pd <- gather(df, 'Ls', 'count', 2:5)
q <- ggplot(pd, aes(x = hours, y = count)) + geom_bar(stat = "identity") + theme(legend.position = "none")+
xlab("Time") + ylab("No.Of Pecies") +
ggtitle("my sample")+
scale_y_continuous(breaks=seq(0,45, by = 5))+
theme(axis.text = blue.bold.italic.16.text) +
scale_x_datetime(breaks=date_breaks("2 hour"),minor_breaks=date_breaks("2 hour"),labels=date_format("%H")) +
theme(axis.text.x=element_text(angle=0))+
facet_grid(~ Ls)
# 当所有 4 台机器都在工作时 - 一切正常,我将 运行 上面的脚本,我将得到所需的输出。
万一有任何机器不工作并且我有一个空行的数据框..那么我在 运行 宁我的脚本文件时会得到一个错误。
@ df <- merge(l1,l2, by="hours")
df <- merge(df,l3, by="hours")
df <- merge(df,l4, by="hours")
Error in fix.by(by.y, y) : 'by' must specify a uniquely valid column
下一个错误在
pd <- gather(df, 'Ls', 'count', 2:5)
如何避免空数据帧和运行脚本成功生成输出,无论机器数量是多少(无论是 2 或 3 还是 4)
从报错信息来看,导致报错的data.frame既没有行也没有列,好像是NULL。因此,最简单的方法是检查这种情况,如果 data.frame 为 NULL,则创建一个可以是 merge()
d 和 gather()
ed 的虚拟对象。
我会做的(不是说这是最好的方法)是
# for easier looping, put your data.frames in a list
l <- list( l1, l2, l3, l4 )
# create a dummy that mimics the structure of your data.frames
dummy <- structure( list( hours = structure( c( Sys.time() ),
class = c( "POSIXct", "POSIXt" ), tzone = ""),
count = c(0)), .Names = c("hours", "count"),
row.names = c(NA, 1L), class = "data.frame")
# check for empty data.frames and replace with dummy (will be NA)
for( i in 1:4 ) if( length( l[[ i ]] ) == 0 ) l[[ i ]] <- dummy
# merge
for( i in 2:4 ) l[[ 1 ]] <- merge( l[[ 1 ]], l[[ i ]],
by = "hours", all = TRUE )
# remove dummy and go back to your code
df <- l[[ 1 ]][ 1:24, ]
colnames( df ) <- c( "hours","L 1","L 2","L 3","L 4" )
有改进的余地,但至少它应该显示结果,无论机器是否在运行:
l2 <- NULL
一种替代方法是跳过所有合并并直接堆叠数据集。您只需要先将 Ls
列添加到每个单独的数据集。
l1$Ls = "L 1"
l2$Ls = "L 2"
l3$Ls = "L 3"
l4$Ls = "L 4"
然后您可以使用 dplyr 中的 bind_rows
来制作您的长数据集 pd
.
bind_rows(l1, l2, l3, l4)
Source: local data frame [96 x 3]
hours count Ls
1 2015-06-11 21:00:00 25 L 1
2 2015-06-11 22:00:00 29 L 1
3 2015-06-11 23:00:00 28 L 1
4 2015-06-12 00:00:00 32 L 1
5 2015-06-12 01:00:00 33 L 1
6 2015-06-12 02:00:00 13 L 1
7 2015-06-12 03:00:00 33 L 1
8 2015-06-12 04:00:00 29 L 1
9 2015-06-12 05:00:00 32 L 1
10 2015-06-12 06:00:00 33 L 1
.. ... ... ...
这种方法的优点是您绑定的对象之一可以是空的 data.frame
或 NULL
,它仍然有效。
示例空 data.frame
:
l4.2 = data.frame()
bind_rows(l1, l2, l3, l4.2)
Source: local data frame [72 x 3]
hours count Ls
1 2015-06-11 21:00:00 25 L 1
2 2015-06-11 22:00:00 29 L 1
3 2015-06-11 23:00:00 28 L 1
4 2015-06-12 00:00:00 32 L 1
5 2015-06-12 01:00:00 33 L 1
6 2015-06-12 02:00:00 13 L 1
7 2015-06-12 03:00:00 33 L 1
8 2015-06-12 04:00:00 29 L 1
9 2015-06-12 05:00:00 32 L 1
10 2015-06-12 06:00:00 33 L 1
.. ... ... ...
示例NULL
:
l4.3 = NULL
bind_rows(l1, l2, l3, l4.3)
Source: local data frame [72 x 3]
hours count Ls
1 2015-06-11 21:00:00 25 L 1
2 2015-06-11 22:00:00 29 L 1
3 2015-06-11 23:00:00 28 L 1
4 2015-06-12 00:00:00 32 L 1
5 2015-06-12 01:00:00 33 L 1
6 2015-06-12 02:00:00 13 L 1
7 2015-06-12 03:00:00 33 L 1
8 2015-06-12 04:00:00 29 L 1
9 2015-06-12 05:00:00 32 L 1
10 2015-06-12 06:00:00 33 L 1
.. ... ... ...
sessionInfo()
R version 3.2.0 (2015-04-16)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
locale:
[1] LC_COLLATE=German_Germany.1252 LC_CTYPE=German_Germany.1252
[3] LC_MONETARY=German_Germany.1252 LC_NUMERIC=C
[5] LC_TIME=German_Germany.1252
attached base packages:
[1] grid stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] WriteXLS_3.5.1 tidyr_0.2.0 scales_0.2.4 gridExtra_0.9.1
[5] ggplot2_1.0.1 RPostgreSQL_0.4 DBI_0.3.1
loaded via a namespace (and not attached):
[1] Rcpp_0.11.6 assertthat_0.1 dplyr_0.4.1 digest_0.6.8
[5] MASS_7.3-40 plyr_1.8.2 gtable_0.1.2 magrittr_1.5
[9] stringi_0.4-1 lazyeval_0.1.10 reshape2_1.4.1 proto_0.3-10
[13] tools_3.2.0 stringr_1.0.0 munsell_0.4.2 parallel_3.2.0
[17] colorspace_1.2-6
#
library(RPostgreSQL)
library(ggplot2)
library(gridExtra)
library(scales)
library(tidyr)
blue.bold.italic.16.text <- element_text(face = "bold", color = "black", size = 12)
#
考虑四台机器运行并列生产产品。下面的每个数据帧(l1、l2、l3、l4)代表每台机器每小时的件数(实际上我使用 RPostgreSQL 从数据库收集数据,这些是它的外观示例)
l1 <- structure(list(hours = structure(c(1434081600, 1434085200, 1434088800,
1434092400, 1434096000, 1434099600, 1434103200, 1434106800, 1434110400,
1434114000, 1434117600, 1434121200, 1434124800, 1434128400, 1434132000,
1434135600, 1434139200, 1434142800, 1434146400, 1434150000, 1434153600,
1434157200, 1434160800, 1434164400), class = c("POSIXct", "POSIXt"
), tzone = ""), count = c(25, 29, 28, 32, 33, 13, 33, 29, 32,
33, 27, 34, 25, 30, 13, 24, 26, 33, 40, 34, 26, 30, 22, 30)), .Names = c("hours",
"count"), row.names = c(NA, 24L), class = "data.frame")
l2 <- structure(list(hours = structure(c(1434081600, 1434085200, 1434088800,
1434092400, 1434096000, 1434099600, 1434103200, 1434106800, 1434110400,
1434114000, 1434117600, 1434121200, 1434124800, 1434128400, 1434132000,
1434135600, 1434139200, 1434142800, 1434146400, 1434150000, 1434153600,
1434157200, 1434160800, 1434164400), class = c("POSIXct", "POSIXt"
), tzone = ""), count = c(25, 29, 28, 32, 33, 13, 33, 29, 32,
33, 27, 34, 25, 30, 13, 24, 26, 33, 40, 34, 26, 30, 22, 30)), .Names = c("hours",
"count"), row.names = c(NA, 24L), class = "data.frame")
l3 <- structure(list(hours = structure(c(1434081600, 1434085200, 1434088800,
1434092400, 1434096000, 1434099600, 1434103200, 1434106800, 1434110400,
1434114000, 1434117600, 1434121200, 1434124800, 1434128400, 1434132000,
1434135600, 1434139200, 1434142800, 1434146400, 1434150000, 1434153600,
1434157200, 1434160800, 1434164400), class = c("POSIXct", "POSIXt"
), tzone = ""), count = c(25, 29, 28, 32, 33, 13, 33, 29, 32,
33, 27, 34, 25, 30, 13, 24, 26, 33, 40, 34, 26, 30, 22, 30)), .Names = c("hours",
"count"), row.names = c(NA, 24L), class = "data.frame")
l4 <- structure(list(hours = structure(c(1434081600, 1434085200, 1434088800,
1434092400, 1434096000, 1434099600, 1434103200, 1434106800, 1434110400,
1434114000, 1434117600, 1434121200, 1434124800, 1434128400, 1434132000,
1434135600, 1434139200, 1434142800, 1434146400, 1434150000, 1434153600,
1434157200, 1434160800, 1434164400), class = c("POSIXct", "POSIXt"
), tzone = ""), count = c(25, 29, 28, 32, 33, 13, 33, 29, 32,
33, 27, 34, 25, 30, 13, 24, 26, 33, 40, 34, 26, 30, 22, 30)), .Names = c("hours",
"count"), row.names = c(NA, 24L), class = "data.frame")
# 这是我的附加情节脚本(输出)
df <- merge(l1,l2, by="hours")
df <- merge(df,l3, by="hours")
df <- merge(df,l4, by="hours")
colnames(df) <- c("hours","L 1","L 2","L 3","L 4")
pd <- gather(df, 'Ls', 'count', 2:5)
q <- ggplot(pd, aes(x = hours, y = count)) + geom_bar(stat = "identity") + theme(legend.position = "none")+
xlab("Time") + ylab("No.Of Pecies") +
ggtitle("my sample")+
scale_y_continuous(breaks=seq(0,45, by = 5))+
theme(axis.text = blue.bold.italic.16.text) +
scale_x_datetime(breaks=date_breaks("2 hour"),minor_breaks=date_breaks("2 hour"),labels=date_format("%H")) +
theme(axis.text.x=element_text(angle=0))+
facet_grid(~ Ls)
# 当所有 4 台机器都在工作时 - 一切正常,我将 运行 上面的脚本,我将得到所需的输出。
万一有任何机器不工作并且我有一个空行的数据框..那么我在 运行 宁我的脚本文件时会得到一个错误。
@ df <- merge(l1,l2, by="hours")
df <- merge(df,l3, by="hours")
df <- merge(df,l4, by="hours")
Error in fix.by(by.y, y) : 'by' must specify a uniquely valid column
下一个错误在
pd <- gather(df, 'Ls', 'count', 2:5)
如何避免空数据帧和运行脚本成功生成输出,无论机器数量是多少(无论是 2 或 3 还是 4)
从报错信息来看,导致报错的data.frame既没有行也没有列,好像是NULL。因此,最简单的方法是检查这种情况,如果 data.frame 为 NULL,则创建一个可以是 merge()
d 和 gather()
ed 的虚拟对象。
我会做的(不是说这是最好的方法)是
# for easier looping, put your data.frames in a list
l <- list( l1, l2, l3, l4 )
# create a dummy that mimics the structure of your data.frames
dummy <- structure( list( hours = structure( c( Sys.time() ),
class = c( "POSIXct", "POSIXt" ), tzone = ""),
count = c(0)), .Names = c("hours", "count"),
row.names = c(NA, 1L), class = "data.frame")
# check for empty data.frames and replace with dummy (will be NA)
for( i in 1:4 ) if( length( l[[ i ]] ) == 0 ) l[[ i ]] <- dummy
# merge
for( i in 2:4 ) l[[ 1 ]] <- merge( l[[ 1 ]], l[[ i ]],
by = "hours", all = TRUE )
# remove dummy and go back to your code
df <- l[[ 1 ]][ 1:24, ]
colnames( df ) <- c( "hours","L 1","L 2","L 3","L 4" )
有改进的余地,但至少它应该显示结果,无论机器是否在运行:
l2 <- NULL
一种替代方法是跳过所有合并并直接堆叠数据集。您只需要先将 Ls
列添加到每个单独的数据集。
l1$Ls = "L 1"
l2$Ls = "L 2"
l3$Ls = "L 3"
l4$Ls = "L 4"
然后您可以使用 dplyr 中的 bind_rows
来制作您的长数据集 pd
.
bind_rows(l1, l2, l3, l4)
Source: local data frame [96 x 3]
hours count Ls
1 2015-06-11 21:00:00 25 L 1
2 2015-06-11 22:00:00 29 L 1
3 2015-06-11 23:00:00 28 L 1
4 2015-06-12 00:00:00 32 L 1
5 2015-06-12 01:00:00 33 L 1
6 2015-06-12 02:00:00 13 L 1
7 2015-06-12 03:00:00 33 L 1
8 2015-06-12 04:00:00 29 L 1
9 2015-06-12 05:00:00 32 L 1
10 2015-06-12 06:00:00 33 L 1
.. ... ... ...
这种方法的优点是您绑定的对象之一可以是空的 data.frame
或 NULL
,它仍然有效。
示例空 data.frame
:
l4.2 = data.frame()
bind_rows(l1, l2, l3, l4.2)
Source: local data frame [72 x 3]
hours count Ls
1 2015-06-11 21:00:00 25 L 1
2 2015-06-11 22:00:00 29 L 1
3 2015-06-11 23:00:00 28 L 1
4 2015-06-12 00:00:00 32 L 1
5 2015-06-12 01:00:00 33 L 1
6 2015-06-12 02:00:00 13 L 1
7 2015-06-12 03:00:00 33 L 1
8 2015-06-12 04:00:00 29 L 1
9 2015-06-12 05:00:00 32 L 1
10 2015-06-12 06:00:00 33 L 1
.. ... ... ...
示例NULL
:
l4.3 = NULL
bind_rows(l1, l2, l3, l4.3)
Source: local data frame [72 x 3]
hours count Ls
1 2015-06-11 21:00:00 25 L 1
2 2015-06-11 22:00:00 29 L 1
3 2015-06-11 23:00:00 28 L 1
4 2015-06-12 00:00:00 32 L 1
5 2015-06-12 01:00:00 33 L 1
6 2015-06-12 02:00:00 13 L 1
7 2015-06-12 03:00:00 33 L 1
8 2015-06-12 04:00:00 29 L 1
9 2015-06-12 05:00:00 32 L 1
10 2015-06-12 06:00:00 33 L 1
.. ... ... ...