diff --git a/vldb-protocols.R b/vldb-protocols.R index 932f1027110e9ff8a87b19317d2c0868f4ddba2b..a986aab496c948c40f84b8712de9b50c4c5e7a93 100644 --- a/vldb-protocols.R +++ b/vldb-protocols.R @@ -64,11 +64,11 @@ theme(axis.title.y=element_text(vjust=0.9), axis.ticks.x=element_blank()) -read.table("10.csv", header=T, sep=",", stringsAsFactors=F, na.strings="-1") -> dd3 +read.table("13.csv", header=T, sep=",", stringsAsFactors=F, na.strings="-1") -> dd3 str(dd3) -dd3 %>% filter(tuple > 1, network=="unlimited", protocol == "native", timeout != 1) %>% group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df4 +dd3 %>% filter(tuple > 1, network=="unlimited", protocol == "native", timeout != 1, is.na(bin_chunksize) | system %in% c("netcat-prot-col-chunk-100000-snappy"), !grepl("netcat-csv-", system, fixed=T)) %>% group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df4 tuplelabels <- c("100"="10^2","1000"="10^3","10000"="10^4", "100000"="10^5", "1000000"="10^6", "10000000"="10^7") tuplebreaks <- as.numeric(names(tuplelabels)) @@ -82,6 +82,15 @@ ggplot(df4, aes(color=system, y=time_mean, x=tuple, label=round(time_mean, 2))) dev.off() +pdf("protocols-time2.pdf", width=10, height=5) + +ggplot(df4 %>% filter(tuple==1000000), aes(fill=system, y=time_mean, x=tuple, label=round(time_mean, 2))) + geom_bar(stat="identity", position="dodge") + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + theme + ggtitle("Wall clock time") + xlab("Tuples (#, log)") + ylab("Wall clock time (s)") + +dev.off() + + + + pdf("protocols-bytes1.pdf", width=10, height=5) # bytes per db @@ -120,16 +129,17 @@ ggplot(df4, aes(color=system, y=memory_max_kb_mean, x=tuple)) + geom_line(size=1 dev.off() -#pdf("protocols-bytes1.pdf", width=10, height=5) +# protocols -dd3 %>% filter(system != "mariadb-compress", network=="unlimited", db != "mongodb" & db != "hbase" & db != "netcat", timeout != 1) %>% group_by(db, protocol, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df5 +dd3 %>% filter(system != "mariadb-compress", network=="unlimited", db != "mongodb" & db != "hbase" & db != "netcat", timeout != 1, is.na(bin_chunksize)) %>% group_by(db, protocol, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df5 -# protocols pdf("protocols-wrapper-time.pdf", width=10, height=5) + + ggplot(df5, aes(color=protocol, y=time_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10() + scale_y_log10() + ylab("Wall clock time (s, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Wrapper overhead") + facet_grid( ~ db) + theme( axis.text.x=element_blank()) @@ -165,3 +175,11 @@ dev.off() +pdf("protocols-network2.pdf", width=10, height=5) + +ggplot(df6, aes(fill=system, y=time_mean, x=tuple)) + geom_bar(stat="identity", position="dodge") +scale_x_log10() + ylab("Wall clock time (s)") + theme + xlab("Tuples (#, log)") + ggtitle ("Network speed impact") + facet_grid( ~ network) + theme( axis.text.x=element_blank()) + +dev.off() + + +