diff --git a/vldb-protocols.R b/vldb-protocols.R new file mode 100644 index 0000000000000000000000000000000000000000..fa83c4a488b7d6a9525513e9b1ccaf616277a626 --- /dev/null +++ b/vldb-protocols.R @@ -0,0 +1,167 @@ +# library(dplyr) +# library(ggplot2) + + +# read.table("~/Desktop/secondrun.csv", header=F, sep=",", stringsAsFactors=F) -> dd +# names(dd) <- c('system', 'network', 'tuple', 'run', 'time', "bytes") + + + +# # dd %>% filter(network == "unlimited") %>% select(system,tuple,time) %>% group_by(system,tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df +# # limits <- aes(ymax = mean + se, ymin=mean - se) + +# # p <- ggplot(df, aes(color=system, y=mean, x=tuple)) + scale_x_log10() + scale_y_log10() + geom_point() + geom_errorbar(limits, width=0.25) +# # print(p) + + + + + +# dd %>% select(system,network,tuple,time) %>% group_by(system,network, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df +# limits <- aes(ymax = mean + se, ymin=mean - se) + +# p <- ggplot(df, aes(color=system, y=mean, x=tuple)) + scale_x_log10() + scale_y_log10() + geom_point() + geom_errorbar(limits, width=0.25) + geom_line() + facet_grid( . ~ network) +# print(p) + + + + +# dd %>% filter(network=="unlimited") %>% select(system,tuple,bytes) %>% group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df2 + +# p <- ggplot(df2, aes(color=system, y=mean, x=tuple)) + scale_x_log10() + scale_y_log10() + geom_point() + geom_errorbar(limits, width=0.25) +# print(p) + + + + +# read.table("~/Desktop/test.csv", header=F, sep=",", stringsAsFactors=F) -> dd2 +# names(dd2) <- c('system', 'db', 'protocol', 'network', 'throughput', 'latency', 'tuple', 'run', 'time', "bytes") + + + +# dd2 %>% filter(network=="unlimited", tuple == 10000000) %>% select(db,protocol,time,bytes) %>% group_by(db,protocol) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df3 + + +# ggplot(df3, aes(fill=db, y=time_mean, x=db)) + geom_bar(stat="identity") + facet_grid( . ~ protocol) + + + +# ggplot(df3, aes(fill=db, y=time_mean, x=protocol)) + geom_bar(stat="identity") + facet_grid( . ~ db) + + +# ggplot(df3, aes(fill=db, y=bytes_mean, x=protocol)) + geom_bar(stat="identity") + facet_grid( . ~ db) + + + + +library(dplyr) +library(ggplot2) +library(ggthemes) + +theme <- theme_few(base_size = 24) + +theme(axis.title.y=element_text(vjust=0.9), + axis.title.x=element_text(vjust=-0.1), + axis.ticks.x=element_blank()) + + +read.table("~/Desktop/10.csv", header=T, sep=",", stringsAsFactors=F, na.strings="-1") -> dd3 + +str(dd3) + +dd3 %>% filter(tuple > 1, network=="unlimited", protocol == "native", timeout != 1) %>% group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df4 + +tuplelabels <- c("100"="10^2","1000"="10^3","10000"="10^4", "100000"="10^5", "1000000"="10^6", "10000000"="10^7") +tuplebreaks <- as.numeric(names(tuplelabels)) + + +# time per db as tuples increase +pdf("protocols-time.pdf", width=10, height=5) + +ggplot(df4, aes(color=system, y=time_mean, x=tuple, label=round(time_mean, 2))) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + scale_y_log10() + geom_errorbar(aes(ymax = time_mean + time_se, ymin=time_mean - time_se), width=0.1, size=1) + theme + ggtitle("Wall clock time") + xlab("Tuples (#, log)") + ylab("Wall clock time (s, log)") + +dev.off() + + +pdf("protocols-bytes1.pdf", width=10, height=5) + +# bytes per db +ggplot(df4, aes(color=system, y=bytes_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + scale_y_log10() + theme + ggtitle("Bytes Transferred") + xlab("Tuples (#, log)") + ylab ("Bytes (#, log)") + + +dev.off() + + +pdf("protocols-bytes2.pdf", width=10, height=5) + + +ggplot(df4%>% filter(tuple> 1000), aes(fill=system, y=bytes_mean, x=tuple)) + geom_bar(stat="identity", position="dodge") + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + theme + xlab("Tuples (#, log)") + ylab ("Bytes (#)") + ggtitle("Bytes transferred") + + +dev.off() + + +pdf("protocols-packets.pdf", width=10, height=5) + + +# packets +ggplot(df4, aes(color=system, y=packets_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + scale_y_log10() + theme + xlab("Tuples (#, log)") + ylab("Packets sent (#, log)") + ggtitle("Packets sent") + + +dev.off() + + +pdf("protocols-memory.pdf", width=10, height=5) + + +# client memory +ggplot(df4, aes(color=system, y=memory_max_kb_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + scale_y_log10() + ylab("Max memory (KB, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Memory Footprint") + + +dev.off() + + +#pdf("protocols-bytes1.pdf", width=10, height=5) + + +dd3 %>% filter(system != "mariadb-compress", network=="unlimited", db != "mongodb" & db != "hbase" & db != "netcat", timeout != 1) %>% group_by(db, protocol, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df5 + + +# protocols + +pdf("protocols-wrapper-time.pdf", width=10, height=5) + +ggplot(df5, aes(color=protocol, y=time_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10() + scale_y_log10() + ylab("Wall clock time (s, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Wrapper overhead") + facet_grid( ~ db) + theme( axis.text.x=element_blank()) + + +dev.off() + + +pdf("protocols-wrapper-bytes.pdf", width=10, height=5) + + +ggplot(df5, aes(color=protocol, y=bytes_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10() + scale_y_log10() + ylab ("Bytes (#, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Wrapper overhead") + facet_grid( ~ db) + theme( axis.text.x=element_blank()) + + +dev.off() + + +pdf("protocols-wrapper-memory.pdf", width=10, height=5) + + +ggplot(df5, aes(color=protocol, y=memory_max_kb_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10() + scale_y_log10() + ylab("Max memory (KB, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Wrapper overhead") + facet_grid( ~ db) + theme( axis.text.x=element_blank()) + + +dev.off() + +# networks + +dd3 %>% filter(protocol=="native", db != "netcat", timeout != 1) %>% filter(network %in% c("unlimited", "gigabitethld", "10mbitethhd")) %>% group_by(system, network, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df6 + +pdf("protocols-network.pdf", width=10, height=5) + +ggplot(df6, aes(color=network, y=time_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10() + scale_y_log10() + ylab("Wall clock time (s, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Network speed impact") + facet_grid( ~ system) + theme( axis.text.x=element_blank()) + +dev.off() + + +