Files @ 3d604507d726
Branch filter:

Location: DA/protocols/vldb-protocols.R - annotation

Mark Raasveldt
Add pmodbc.c and add new protocol tests to vldb-protocols.py.
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
7fbf056e912a
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
e70ef62c68b1
# library(dplyr)
# library(ggplot2)


# read.table("~/Desktop/secondrun.csv", header=F,  sep=",", stringsAsFactors=F) -> dd
# names(dd) <- c('system', 'network', 'tuple', 'run', 'time', "bytes")



# # dd %>% filter(network == "unlimited") %>% select(system,tuple,time) %>%  group_by(system,tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df
# # limits <- aes(ymax = mean + se, ymin=mean - se)

# # p <- ggplot(df, aes(color=system, y=mean, x=tuple)) + scale_x_log10() + scale_y_log10() + geom_point() + geom_errorbar(limits, width=0.25)
# # print(p)





# dd %>% select(system,network,tuple,time) %>%  group_by(system,network, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df
# limits <- aes(ymax = mean + se, ymin=mean - se)

# p <- ggplot(df, aes(color=system, y=mean, x=tuple)) + scale_x_log10() + scale_y_log10() + geom_point() + geom_errorbar(limits, width=0.25) + geom_line() + facet_grid( . ~ network)
# print(p)




# dd %>% filter(network=="unlimited") %>% select(system,tuple,bytes) %>%  group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df2

# p <- ggplot(df2, aes(color=system, y=mean, x=tuple)) + scale_x_log10() + scale_y_log10() + geom_point() + geom_errorbar(limits, width=0.25)
# print(p)




# read.table("~/Desktop/test.csv", header=F,  sep=",", stringsAsFactors=F) -> dd2
# names(dd2) <- c('system', 'db', 'protocol', 'network', 'throughput', 'latency', 'tuple', 'run', 'time', "bytes")



# dd2 %>% filter(network=="unlimited", tuple == 10000000) %>% select(db,protocol,time,bytes) %>%  group_by(db,protocol) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df3


# ggplot(df3, aes(fill=db, y=time_mean, x=db)) + geom_bar(stat="identity") + facet_grid( . ~ protocol)



# ggplot(df3, aes(fill=db, y=time_mean, x=protocol)) + geom_bar(stat="identity") + facet_grid( . ~ db)


# ggplot(df3, aes(fill=db, y=bytes_mean, x=protocol)) + geom_bar(stat="identity") + facet_grid( . ~ db)




library(dplyr)
library(ggplot2)
library(ggthemes)

theme <- theme_few(base_size = 24) + 
theme(axis.title.y=element_text(vjust=0.9), 
  axis.title.x=element_text(vjust=-0.1),
  axis.ticks.x=element_blank())


read.table("10.csv", header=T,  sep=",", stringsAsFactors=F, na.strings="-1") -> dd3

str(dd3)

dd3 %>% filter(tuple > 1, network=="unlimited", protocol == "native", timeout != 1)  %>%  group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df4

tuplelabels <- c("100"="10^2","1000"="10^3","10000"="10^4", "100000"="10^5", "1000000"="10^6", "10000000"="10^7")
tuplebreaks <- as.numeric(names(tuplelabels))


# time per db as tuples increase
pdf("protocols-time.pdf", width=10, height=5)

ggplot(df4, aes(color=system, y=time_mean, x=tuple, label=round(time_mean, 2))) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + scale_y_log10() + geom_errorbar(aes(ymax = time_mean + time_se, ymin=time_mean - time_se), width=0.1, size=1) + theme + ggtitle("Wall clock time") + xlab("Tuples (#, log)") + ylab("Wall clock time (s, log)") 

dev.off()


pdf("protocols-bytes1.pdf", width=10, height=5)

# bytes per db
ggplot(df4, aes(color=system, y=bytes_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + scale_y_log10() + theme + ggtitle("Bytes Transferred") + xlab("Tuples (#, log)") + ylab ("Bytes (#, log)")


dev.off()


pdf("protocols-bytes2.pdf", width=10, height=5)


ggplot(df4%>% filter(tuple> 1000), aes(fill=system, y=bytes_mean, x=tuple)) + geom_bar(stat="identity", position="dodge") +  scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + theme + xlab("Tuples (#, log)") + ylab ("Bytes (#)") + ggtitle("Bytes transferred")


dev.off()


pdf("protocols-packets.pdf", width=10, height=5)


# packets
ggplot(df4, aes(color=system, y=packets_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) +  scale_x_log10(breaks=tuplebreaks, labels=tuplelabels) + scale_y_log10() + theme +  xlab("Tuples (#, log)") + ylab("Packets sent (#, log)") + ggtitle("Packets sent")


dev.off()


pdf("protocols-memory.pdf", width=10, height=5)


# client memory
ggplot(df4, aes(color=system, y=memory_max_kb_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10(breaks=tuplebreaks, labels=tuplelabels)  + scale_y_log10() + ylab("Max memory (KB, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Memory Footprint")


dev.off()


#pdf("protocols-bytes1.pdf", width=10, height=5)


dd3 %>% filter(system != "mariadb-compress", network=="unlimited", db != "mongodb" & db != "hbase" & db != "netcat", timeout != 1)  %>%  group_by(db, protocol, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df5


# protocols

pdf("protocols-wrapper-time.pdf", width=10, height=5)

ggplot(df5, aes(color=protocol, y=time_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10()  + scale_y_log10() + ylab("Wall clock time (s, log)")  + theme + xlab("Tuples (#, log)") + ggtitle ("Wrapper overhead") + facet_grid( ~ db) + theme( axis.text.x=element_blank())


dev.off()


pdf("protocols-wrapper-bytes.pdf", width=10, height=5)


ggplot(df5, aes(color=protocol, y=bytes_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10()  + scale_y_log10() + ylab ("Bytes (#, log)")  + theme + xlab("Tuples (#, log)") + ggtitle ("Wrapper overhead") + facet_grid( ~ db) + theme( axis.text.x=element_blank())


dev.off()


pdf("protocols-wrapper-memory.pdf", width=10, height=5)


ggplot(df5, aes(color=protocol, y=memory_max_kb_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10()  + scale_y_log10() + ylab("Max memory (KB, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Wrapper overhead") + facet_grid( ~ db) + theme( axis.text.x=element_blank())


dev.off()

# networks

dd3 %>% filter(protocol=="native", db != "netcat", timeout != 1) %>% filter(network %in% c("unlimited", "gigabitethld", "10mbitethhd")) %>%  group_by(system, network, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> df6

pdf("protocols-network.pdf", width=10, height=5)

ggplot(df6, aes(color=network, y=time_mean, x=tuple)) + geom_line(size=1.5) + geom_point(size=2) + scale_x_log10()  + scale_y_log10() + ylab("Wall clock time (s, log)") + theme + xlab("Tuples (#, log)") + ggtitle ("Network speed impact") + facet_grid( ~ system) + theme( axis.text.x=element_blank())

dev.off()