# library(dplyr) # library(ggplot2) # read.table("~/Desktop/secondrun.csv", header=F, sep=",", stringsAsFactors=F) -> dd # names(dd) <- c('system', 'network', 'tuple', 'run', 'time', "bytes") # # dd %>% filter(network == "unlimited") %>% select(system,tuple,time) %>% group_by(system,tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df # # limits <- aes(ymax = mean + se, ymin=mean - se) # # p <- ggplot(df, aes(color=system, y=mean, x=tuple)) + scale_x_log10() + scale_y_log10() + geom_point() + geom_errorbar(limits, width=0.25) # # print(p) # dd %>% select(system,network,tuple,time) %>% group_by(system,network, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df # limits <- aes(ymax = mean + se, ymin=mean - se) # p <- ggplot(df, aes(color=system, y=mean, x=tuple)) + scale_x_log10() + scale_y_log10() + geom_point() + geom_errorbar(limits, width=0.25) + geom_line() + facet_grid( . ~ network) # print(p) # dd %>% filter(network=="unlimited") %>% select(system,tuple,bytes) %>% group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df2 # p <- ggplot(df2, aes(color=system, y=mean, x=tuple)) + scale_x_log10() + scale_y_log10() + geom_point() + geom_errorbar(limits, width=0.25) # print(p) # read.table("~/Desktop/test.csv", header=F, sep=",", stringsAsFactors=F) -> dd2 # names(dd2) <- c('system', 'db', 'protocol', 'network', 'throughput', 'latency', 'tuple', 'run', 'time', "bytes") # dd2 %>% filter(network=="unlimited", tuple == 10000000) %>% select(db,protocol,time,bytes) %>% group_by(db,protocol) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df3 # ggplot(df3, aes(fill=db, y=time_mean, x=db)) + geom_bar(stat="identity") + facet_grid( . ~ protocol) # ggplot(df3, aes(fill=db, y=time_mean, x=protocol)) + geom_bar(stat="identity") + facet_grid( . ~ db) # ggplot(df3, aes(fill=db, y=bytes_mean, x=protocol)) + geom_bar(stat="identity") + facet_grid( . ~ db) library(dplyr) library(ggplot2) library(ggthemes) theme <- theme_few(base_size = 24) + theme(axis.title.y=element_text(vjust=0.9), axis.title.x=element_text(vjust=-0.1), axis.ticks.x=element_blank()) read.table("13.csv", header=T, sep=",", stringsAsFactors=F, na.strings="-1") -> dd3 str(dd3) dd3 %>% filter(tuple > 1, network=="unlimited", protocol == "native", timeout != 1, is.na(bin_chunksize) | system %in% c("netcat-prot-col-chunk-100000-snappy"), !grepl("netcat-csv-", system, fixed=T)) %>% group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df4 tuplelabels <- c("100"="10^2","1000"="10^3","10000"="10^4", "100000"="10^5", "1000000"="10^6", "10000000"="10^7") tuplebreaks <- as.numeric(names(tuplelabels)) # time per db as tuples increase pdf("protocols-time.pdf", width=10, height=5) ggplot(df4, aes(color=system, y=time_mean, x=tuple, label=round(time_mean, 2))) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + scale_y_log10() + geom_errorbar(aes(ymax = time_mean + time_se, ymin=time_mean - time_se), width=0.1, size=1) + theme + ggtitle("Wall clock time") + xlab("Tuples (#, log)") + ylab("Wall clock time (s, log)") dev.off() pdf("protocols-time2.pdf", width=10, height=5) ggplot(df4 %>% filter(tuple==1000000), aes(fill=system, y=time_mean, x=tuple, label=round(time_mean, 2))) + geom_bar(stat="identity", position="dodge") + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + theme + ggtitle("Wall clock time") + xlab("Tuples (#, log)") + ylab("Wall clock time (s)") dev.off() pdf("protocols-bytes1.pdf", width=10, height=5) # bytes per db ggplot(df4, aes(color=system, y=bytes_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + scale_y_log10() + theme + ggtitle("Bytes Transferred") + xlab("Tuples (#, log)") + ylab ("Bytes (#, log)") dev.off() pdf("protocols-bytes2.pdf", width=10, height=5) ggplot(df4%>% filter(tuple> 1000), aes(fill=system, y=bytes_mean, x=tuple)) + geom_bar(stat="identity", position="dodge") + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + theme + xlab("Tuples (#, log)") + ylab ("Bytes (#)") + ggtitle("Bytes transferred") dev.off() pdf("protocols-packets.pdf", width=10, height=5) # packets ggplot(df4, aes(color=system, y=packets_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + scale_y_log10() + theme + xlab("Tuples (#, log)") + ylab("Packets sent (#, log)") + ggtitle("Packets sent") dev.off() pdf("protocols-memory.pdf", width=10, height=5) # client memory ggplot(df4, aes(color=system, y=memory_max_kb_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + scale_y_log10() + ylab("Max memory (KB, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Memory Footprint") dev.off() # protocols dd3 %>% filter(system != "mariadb-compress", network=="unlimited", db != "mongodb" & db != "hbase" & db != "netcat", timeout != 1, is.na(bin_chunksize)) %>% group_by(db, protocol, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df5 pdf("protocols-wrapper-time.pdf", width=10, height=5) ggplot(df5, aes(color=protocol, y=time_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10() + scale_y_log10() + ylab("Wall clock time (s, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Wrapper overhead") + facet_grid( ~ db) + theme( axis.text.x=element_blank()) dev.off() pdf("protocols-wrapper-bytes.pdf", width=10, height=5) ggplot(df5, aes(color=protocol, y=bytes_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10() + scale_y_log10() + ylab ("Bytes (#, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Wrapper overhead") + facet_grid( ~ db) + theme( axis.text.x=element_blank()) dev.off() pdf("protocols-wrapper-memory.pdf", width=10, height=5) ggplot(df5, aes(color=protocol, y=memory_max_kb_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10() + scale_y_log10() + ylab("Max memory (KB, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Wrapper overhead") + facet_grid( ~ db) + theme( axis.text.x=element_blank()) dev.off() # networks dd3 %>% filter(protocol=="native", db != "netcat", timeout != 1) %>% filter(network %in% c("unlimited", "gigabitethld", "10mbitethhd")) %>% group_by(system, network, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df6 pdf("protocols-network.pdf", width=10, height=5) ggplot(df6, aes(color=network, y=time_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10() + scale_y_log10() + ylab("Wall clock time (s, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Network speed impact") + facet_grid( ~ system) + theme( axis.text.x=element_blank()) dev.off() pdf("protocols-network2.pdf", width=10, height=5) ggplot(df6, aes(fill=system, y=time_mean, x=tuple)) + geom_bar(stat="identity", position="dodge") +scale_x_log10() + ylab("Wall clock time (s)") + theme + xlab("Tuples (#, log)") + ggtitle ("Network speed impact") + facet_grid( ~ network) + theme( axis.text.x=element_blank()) dev.off()