Files @ b01cb2bb9139
Branch filter:

Location: DA/protocols/vldb-protocols.R - annotation

Mark Raasveldt
Add netcat of our own protocol message.
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
# library(dplyr)
# library(ggplot2)


# read.table("~/Desktop/secondrun.csv", header=F,  sep=",", stringsAsFactors=F) -> dd
# names(dd) <- c('system', 'network', 'tuple', 'run', 'time', "bytes")



# # dd %>% filter(network == "unlimited") %>% select(system,tuple,time) %>%  group_by(system,tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df
# # limits <- aes(ymax = mean + se, ymin=mean - se)

# # p <- ggplot(df, aes(color=system, y=mean, x=tuple)) + scale_x_log10() + scale_y_log10() + geom_point() + geom_errorbar(limits, width=0.25)
# # print(p)





# dd %>% select(system,network,tuple,time) %>%  group_by(system,network, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df
# limits <- aes(ymax = mean + se, ymin=mean - se)

# p <- ggplot(df, aes(color=system, y=mean, x=tuple)) + scale_x_log10() + scale_y_log10() + geom_point() + geom_errorbar(limits, width=0.25) + geom_line() + facet_grid( . ~ network)
# print(p)




# dd %>% filter(network=="unlimited") %>% select(system,tuple,bytes) %>%  group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df2

# p <- ggplot(df2, aes(color=system, y=mean, x=tuple)) + scale_x_log10() + scale_y_log10() + geom_point() + geom_errorbar(limits, width=0.25)
# print(p)




# read.table("~/Desktop/test.csv", header=F,  sep=",", stringsAsFactors=F) -> dd2
# names(dd2) <- c('system', 'db', 'protocol', 'network', 'throughput', 'latency', 'tuple', 'run', 'time', "bytes")



# dd2 %>% filter(network=="unlimited", tuple == 10000000) %>% select(db,protocol,time,bytes) %>%  group_by(db,protocol) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df3


# ggplot(df3, aes(fill=db, y=time_mean, x=db)) + geom_bar(stat="identity") + facet_grid( . ~ protocol)



# ggplot(df3, aes(fill=db, y=time_mean, x=protocol)) + geom_bar(stat="identity") + facet_grid( . ~ db)


# ggplot(df3, aes(fill=db, y=bytes_mean, x=protocol)) + geom_bar(stat="identity") + facet_grid( . ~ db)




library(dplyr)
library(ggplot2)
library(ggthemes)

theme <- theme_few(base_size = 24) + 
theme(axis.title.y=element_text(vjust=0.9), 
  axis.title.x=element_text(vjust=-0.1),
  axis.ticks.x=element_blank())


read.table("~/Desktop/10.csv", header=T,  sep=",", stringsAsFactors=F, na.strings="-1") -> dd3

str(dd3)

dd3 %>% filter(tuple > 1, network=="unlimited", protocol == "native", timeout != 1)  %>%  group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df4

tuplelabels <- c("100"="10^2","1000"="10^3","10000"="10^4", "100000"="10^5", "1000000"="10^6", "10000000"="10^7")
tuplebreaks <- as.numeric(names(tuplelabels))


# time per db as tuples increase
pdf("protocols-time.pdf", width=10, height=5)

ggplot(df4, aes(color=system, y=time_mean, x=tuple, label=round(time_mean, 2))) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + scale_y_log10() + geom_errorbar(aes(ymax = time_mean + time_se, ymin=time_mean - time_se), width=0.1, size=1) + theme + ggtitle("Wall clock time") + xlab("Tuples (#, log)") + ylab("Wall clock time (s, log)") 

dev.off()


pdf("protocols-bytes1.pdf", width=10, height=5)

# bytes per db
ggplot(df4, aes(color=system, y=bytes_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + scale_y_log10() + theme + ggtitle("Bytes Transferred") + xlab("Tuples (#, log)") + ylab ("Bytes (#, log)")


dev.off()


pdf("protocols-bytes2.pdf", width=10, height=5)


ggplot(df4%>% filter(tuple> 1000), aes(fill=system, y=bytes_mean, x=tuple)) + geom_bar(stat="identity", position="dodge") +  scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + theme + xlab("Tuples (#, log)") + ylab ("Bytes (#)") + ggtitle("Bytes transferred")


dev.off()


pdf("protocols-packets.pdf", width=10, height=5)


# packets
ggplot(df4, aes(color=system, y=packets_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) +  scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + scale_y_log10() + theme +  xlab("Tuples (#, log)") + ylab("Packets sent (#, log)") + ggtitle("Packets sent")


dev.off()


pdf("protocols-memory.pdf", width=10, height=5)


# client memory
ggplot(df4, aes(color=system, y=memory_max_kb_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels)  + scale_y_log10() + ylab("Max memory (KB, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Memory Footprint")


dev.off()


#pdf("protocols-bytes1.pdf", width=10, height=5)


dd3 %>% filter(system != "mariadb-compress", network=="unlimited", db != "mongodb" & db != "hbase" & db != "netcat", timeout != 1)  %>%  group_by(db, protocol, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df5


# protocols

pdf("protocols-wrapper-time.pdf", width=10, height=5)

ggplot(df5, aes(color=protocol, y=time_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10()  + scale_y_log10() + ylab("Wall clock time (s, log)")  + theme + xlab("Tuples (#, log)") + ggtitle ("Wrapper overhead") + facet_grid( ~ db) + theme( axis.text.x=element_blank())


dev.off()


pdf("protocols-wrapper-bytes.pdf", width=10, height=5)


ggplot(df5, aes(color=protocol, y=bytes_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10()  + scale_y_log10() + ylab ("Bytes (#, log)")  + theme + xlab("Tuples (#, log)") + ggtitle ("Wrapper overhead") + facet_grid( ~ db) + theme( axis.text.x=element_blank())


dev.off()


pdf("protocols-wrapper-memory.pdf", width=10, height=5)


ggplot(df5, aes(color=protocol, y=memory_max_kb_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10()  + scale_y_log10() + ylab("Max memory (KB, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Wrapper overhead") + facet_grid( ~ db) + theme( axis.text.x=element_blank())


dev.off()

# networks

dd3 %>% filter(protocol=="native", db != "netcat", timeout != 1) %>% filter(network %in% c("unlimited", "gigabitethld", "10mbitethhd")) %>%  group_by(system, network, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df6

pdf("protocols-network.pdf", width=10, height=5)

ggplot(df6, aes(color=network, y=time_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10()  + scale_y_log10() + ylab("Wall clock time (s, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Network speed impact") + facet_grid( ~ system) + theme( axis.text.x=element_blank())

dev.off()