Files @ e23e3482a0b7
Branch filter:

Location: DA/protocols/vldb-protocols.R - annotation

Hannes Muehleisen
more stuff
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
3c85f21497f0
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
3c85f21497f0
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
3c85f21497f0
3c85f21497f0
3c85f21497f0
3c85f21497f0
3c85f21497f0
3c85f21497f0
3c85f21497f0
3c85f21497f0
3c85f21497f0
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
3c85f21497f0
e70ef62c68b1
e70ef62c68b1
3c85f21497f0
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
3c85f21497f0
3c85f21497f0
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
3c85f21497f0
3c85f21497f0
3c85f21497f0
3c85f21497f0
3c85f21497f0
3c85f21497f0
3c85f21497f0
3c85f21497f0
# library(dplyr)
# library(ggplot2)


# read.table("~/Desktop/secondrun.csv", header=F,  sep=",", stringsAsFactors=F) -> dd
# names(dd) <- c('system', 'network', 'tuple', 'run', 'time', "bytes")



# # dd %>% filter(network == "unlimited") %>% select(system,tuple,time) %>%  group_by(system,tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df
# # limits <- aes(ymax = mean + se, ymin=mean - se)

# # p <- ggplot(df, aes(color=system, y=mean, x=tuple)) + scale_x_log10() + scale_y_log10() + geom_point() + geom_errorbar(limits, width=0.25)
# # print(p)





# dd %>% select(system,network,tuple,time) %>%  group_by(system,network, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df
# limits <- aes(ymax = mean + se, ymin=mean - se)

# p <- ggplot(df, aes(color=system, y=mean, x=tuple)) + scale_x_log10() + scale_y_log10() + geom_point() + geom_errorbar(limits, width=0.25) + geom_line() + facet_grid( . ~ network)
# print(p)




# dd %>% filter(network=="unlimited") %>% select(system,tuple,bytes) %>%  group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df2

# p <- ggplot(df2, aes(color=system, y=mean, x=tuple)) + scale_x_log10() + scale_y_log10() + geom_point() + geom_errorbar(limits, width=0.25)
# print(p)




# read.table("~/Desktop/test.csv", header=F,  sep=",", stringsAsFactors=F) -> dd2
# names(dd2) <- c('system', 'db', 'protocol', 'network', 'throughput', 'latency', 'tuple', 'run', 'time', "bytes")



# dd2 %>% filter(network=="unlimited", tuple == 10000000) %>% select(db,protocol,time,bytes) %>%  group_by(db,protocol) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df3


# ggplot(df3, aes(fill=db, y=time_mean, x=db)) + geom_bar(stat="identity") + facet_grid( . ~ protocol)



# ggplot(df3, aes(fill=db, y=time_mean, x=protocol)) + geom_bar(stat="identity") + facet_grid( . ~ db)


# ggplot(df3, aes(fill=db, y=bytes_mean, x=protocol)) + geom_bar(stat="identity") + facet_grid( . ~ db)




library(dplyr)
library(ggplot2)
library(ggthemes)

theme <- theme_few(base_size = 24) + 
theme(axis.title.y=element_text(vjust=0.9), 
  axis.title.x=element_text(vjust=-0.1),
  axis.ticks.x=element_blank())


read.table("13.csv", header=T,  sep=",", stringsAsFactors=F, na.strings="-1") -> dd3

str(dd3)

dd3 %>% filter(tuple > 1, network=="unlimited", protocol == "native", timeout != 1, is.na(bin_chunksize) | system %in% c("netcat-prot-col-chunk-100000-snappy"), !grepl("netcat-csv-", system, fixed=T))  %>%  group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df4

tuplelabels <- c("100"="10^2","1000"="10^3","10000"="10^4", "100000"="10^5", "1000000"="10^6", "10000000"="10^7")
tuplebreaks <- as.numeric(names(tuplelabels))


# time per db as tuples increase
pdf("protocols-time.pdf", width=10, height=5)

ggplot(df4, aes(color=system, y=time_mean, x=tuple, label=round(time_mean, 2))) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + scale_y_log10() + geom_errorbar(aes(ymax = time_mean + time_se, ymin=time_mean - time_se), width=0.1, size=1) + theme + ggtitle("Wall clock time") + xlab("Tuples (#, log)") + ylab("Wall clock time (s, log)") 

dev.off()


pdf("protocols-time2.pdf", width=10, height=5)

ggplot(df4 %>% filter(tuple==1000000), aes(fill=system, y=time_mean, x=tuple, label=round(time_mean, 2))) + geom_bar(stat="identity", position="dodge") +  scale_x_log10(breaks=tuplebreaks, labels=tuplelabels)  + theme + ggtitle("Wall clock time") + xlab("Tuples (#, log)") + ylab("Wall clock time (s)") 

dev.off()




pdf("protocols-bytes1.pdf", width=10, height=5)

# bytes per db
ggplot(df4, aes(color=system, y=bytes_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + scale_y_log10() + theme + ggtitle("Bytes Transferred") + xlab("Tuples (#, log)") + ylab ("Bytes (#, log)")


dev.off()


pdf("protocols-bytes2.pdf", width=10, height=5)


ggplot(df4%>% filter(tuple> 1000), aes(fill=system, y=bytes_mean, x=tuple)) + geom_bar(stat="identity", position="dodge") +  scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + theme + xlab("Tuples (#, log)") + ylab ("Bytes (#)") + ggtitle("Bytes transferred")


dev.off()


pdf("protocols-packets.pdf", width=10, height=5)


# packets
ggplot(df4, aes(color=system, y=packets_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) +  scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + scale_y_log10() + theme +  xlab("Tuples (#, log)") + ylab("Packets sent (#, log)") + ggtitle("Packets sent")


dev.off()


pdf("protocols-memory.pdf", width=10, height=5)


# client memory
ggplot(df4, aes(color=system, y=memory_max_kb_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels)  + scale_y_log10() + ylab("Max memory (KB, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Memory Footprint")


dev.off()


# protocols


dd3 %>% filter(system != "mariadb-compress", network=="unlimited", db != "mongodb" & db != "hbase" & db != "netcat", timeout != 1, is.na(bin_chunksize))  %>%  group_by(db, protocol, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df5



pdf("protocols-wrapper-time.pdf", width=10, height=5)



ggplot(df5, aes(color=protocol, y=time_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10()  + scale_y_log10() + ylab("Wall clock time (s, log)")  + theme + xlab("Tuples (#, log)") + ggtitle ("Wrapper overhead") + facet_grid( ~ db) + theme( axis.text.x=element_blank())


dev.off()


pdf("protocols-wrapper-bytes.pdf", width=10, height=5)


ggplot(df5, aes(color=protocol, y=bytes_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10()  + scale_y_log10() + ylab ("Bytes (#, log)")  + theme + xlab("Tuples (#, log)") + ggtitle ("Wrapper overhead") + facet_grid( ~ db) + theme( axis.text.x=element_blank())


dev.off()


pdf("protocols-wrapper-memory.pdf", width=10, height=5)


ggplot(df5, aes(color=protocol, y=memory_max_kb_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10()  + scale_y_log10() + ylab("Max memory (KB, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Wrapper overhead") + facet_grid( ~ db) + theme( axis.text.x=element_blank())


dev.off()

# networks

dd3 %>% filter(protocol=="native", db != "netcat", timeout != 1) %>% filter(network %in% c("unlimited", "gigabitethld", "10mbitethhd")) %>%  group_by(system, network, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df6

pdf("protocols-network.pdf", width=10, height=5)

ggplot(df6, aes(color=network, y=time_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10()  + scale_y_log10() + ylab("Wall clock time (s, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Network speed impact") + facet_grid( ~ system) + theme( axis.text.x=element_blank())

dev.off()



pdf("protocols-network2.pdf", width=10, height=5)

ggplot(df6, aes(fill=system, y=time_mean, x=tuple)) + geom_bar(stat="identity", position="dodge") +scale_x_log10() + ylab("Wall clock time (s)") + theme + xlab("Tuples (#, log)") + ggtitle ("Network speed impact") + facet_grid( ~ network) + theme( axis.text.x=element_blank())

dev.off()