Files @ e23e3482a0b7
Branch filter:

Location: DA/protocols/vldb-protocols2.R - annotation

Hannes Muehleisen
more stuff
3aa4d95db0a9
3aa4d95db0a9
3aa4d95db0a9
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
3aa4d95db0a9
3aa4d95db0a9
3aa4d95db0a9
3aa4d95db0a9
3aa4d95db0a9
3aa4d95db0a9
3aa4d95db0a9
3aa4d95db0a9
78f3a9ffe84c
3aa4d95db0a9
3aa4d95db0a9
3aa4d95db0a9
3aa4d95db0a9
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
3aa4d95db0a9
78f3a9ffe84c
3aa4d95db0a9
3aa4d95db0a9
78f3a9ffe84c
3aa4d95db0a9
3aa4d95db0a9
78f3a9ffe84c
3aa4d95db0a9
3aa4d95db0a9
3aa4d95db0a9
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
3aa4d95db0a9
78f3a9ffe84c
78f3a9ffe84c
3aa4d95db0a9
3aa4d95db0a9
78f3a9ffe84c
3aa4d95db0a9
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
3aa4d95db0a9
3aa4d95db0a9
3aa4d95db0a9
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
78f3a9ffe84c
3aa4d95db0a9
78f3a9ffe84c
3aa4d95db0a9
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
e23e3482a0b7
library(dplyr)
library(ggplot2)
library(ggthemes)
library(ggrepel)
library(stringr)


theme <- theme_few(base_size = 24) + 
theme(axis.title.y=element_text(vjust=0.9), 
  axis.title.x=element_text(vjust=-0.1),
  axis.ticks.x=element_blank(),
  text=element_text(family="serif"))


read.table("11.csv", header=T,  sep=",", stringsAsFactors=F, na.strings="-1") -> dd3


sysnames <- c("mariadb-default"="MySQL", "mariadb-compress"="MySQL+C", "oracle-default"="DBMS X", "postgres-default" = "PostgreSQL", "db2-default" = "DB2", "monetdb-default"="MonetDB", "hbase-default" = "HBase", "hive-default" = "Hive", "mongodb-default" = "MongoDB")

dd3 %>% filter(network=="unlimited", protocol == "native", timeout != 1, system %in% c("hive-default", "hbase-default", "mariadb-default", "mariadb-compress", "db2-default", "oracle-default", "postgres-default", "monetdb-default", "mongodb-default"), tuple %in% c(1, 1000000)) %>% mutate(system=sysnames[system]) %>% select(system, tuple, time, bytes) %>% group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> dd4
 
dd5 <- dd4 %>% filter(tuple==1000000) %>% rename(time_1m=time_mean, bytes_1m=bytes_mean) %>% left_join(dd4 %>% filter(tuple == 1) %>% rename(time_1=time_mean, bytes_1=bytes_mean), by="system") %>% mutate(tuple_time_ms=((time_1m-time_1)/1000000)*1000, tuple_bytes=(bytes_1m-bytes_1)/1000000) 


pdf("total-time.pdf", width=10, height=5)

ggplot(dd4 %>% filter(tuple == 1000000), aes(y=time_mean, x=reorder(system, -time_mean), label=round(time_mean, 1))) + geom_bar(stat="identity", position="dodge", width=.5) + geom_errorbar(aes(ymax = time_mean + time_se, ymin=time_mean - time_se), width=0.2, size=1) +  theme + xlab("") + ylab("Wall clock time (s)") + coord_flip() + geom_text(size=7, hjust=-.8, family="serif") + scale_y_continuous(limits=c(0, 31))


dev.off()

pdf("total-bytes.pdf", width=10, height=5)

ggplot(dd4 %>% filter(tuple == 1000000) %>% mutate(bytes_mean_mb=(bytes_mean/1024/1024)), aes(y=bytes_mean_mb, x=reorder(system, -bytes_mean_mb), label=round(bytes_mean_mb, 2))) + geom_hline(yintercept=120, linetype="dashed") + geom_bar(stat="identity", position="dodge", width=.5) + theme + xlab("") + ylab("Bytes transferred (MB)") + coord_flip() + geom_text(size=7, hjust=-.2, family="serif")  + scale_y_continuous(limits=c(0, 400)) 


dev.off()



# pdf("time-per-tuple.pdf", width=10, height=5)

# ggplot(dd5, aes( y=tuple_time_ms, x=reorder(system, -tuple_time_ms), label=round(tuple_time_ms, 4))) + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Time per tuple (ms)") + coord_flip() + guides(fill=FALSE) + geom_text(size=7, hjust=-.2, family="serif") + scale_y_continuous(limits=c(0, .03))


# dev.off()


# pdf("bytes-per-tuple.pdf", width=10, height=5)


# ggplot(dd5, aes( y=tuple_bytes, x=reorder(system, -tuple_bytes), label=round(tuple_bytes))) + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Bytes per tuple") + coord_flip() + guides(fill=FALSE) + geom_text(size=7, hjust=-.2, family="serif") + scale_y_continuous(limits=c(0, 410))


# dev.off()



# ggplot(dd5, aes(y=tuple_time_ms, x=tuple_bytes, label=system)) + geom_point() + geom_text(size=7, hjust=-.1, family="serif") + theme



read.table("13.csv", header=T,  sep=",", stringsAsFactors=F, na.strings="-1") -> dd3
ntuples <- 1000000

sysnames <- c("mariadb-default"="MySQL", "mariadb-compress"="MySQL+C", "oracle-default"="DBMS X", "postgres-default" = "PostgreSQL", "db2-default" = "DB2", "monetdb-prot9"="MonetDB", "hbase-default" = "HBase", "hive-default" = "Hive", "mongodb-default" = "MongoDB")

dd3 %>% filter(protocol == "odbc-noprint", timeout != 1, system %in% c("hive-default", "hbase-default", "mariadb-default", "mariadb-compress", "db2-default", "oracle-default", "postgres-default", "monetdb-prot9", "mongodb-default"), tuple %in% c(1, ntuples)) %>% mutate(system=sysnames[system]) %>% select(system, tuple, network, time, bytes) %>% group_by(system, tuple, network) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> dd4
 


dd5 <- as.data.frame(dd4 %>% filter(tuple==ntuples) %>% rename(time_1m=time_mean, bytes_1m=bytes_mean) %>% left_join(dd4 %>% filter(tuple == 1) %>% rename(time_1=time_mean, bytes_1=bytes_mean), by=c("system", "network"))) %>% mutate(tuple_time_ms=((time_1m-time_1)/ntuples)*1000, tuple_bytes=(bytes_1m-bytes_1)/ntuples, adj_time= time_1m-time_1, adj_bytes=bytes_1m-bytes_1)



pdf("total-time-noprint.pdf", width=10, height=3.5)

ggplot(dd4 %>% filter(tuple == 1000000, network=="unlimited"), aes(y=time_mean, x=reorder(system, -time_mean), label=round(time_mean, 1))) + geom_hline(yintercept=0.237, linetype="dashed") + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Wall clock time (s)") + coord_flip() + geom_text(size=7, hjust=-.8, family="serif") + scale_y_continuous(limits=c(0, 6))


dev.off()

# pdf("time-per-tuple5.pdf", width=10, height=5)

# ggplot(dd5 %>% filter(network == "unlimited"), aes( y=tuple_time_ms, x=reorder(system, -tuple_time_ms), label=round(tuple_time_ms, 4))) + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Time per tuple (ms)") + coord_flip() + guides(fill=FALSE) + geom_text(size=7, hjust=-.2, family="serif") 

# dev.off()


# pdf("time-per-tuple6.pdf", width=10, height=5)

# ggplot(dd5 %>% filter(network == "10mbitethhd"), aes( y=tuple_time_ms, x=reorder(system, -tuple_time_ms), label=round(tuple_time_ms, 4))) + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Time per tuple (ms)") + coord_flip() + guides(fill=FALSE) + geom_text(size=7, hjust=-.2, family="serif") 

# dev.off()



# pdf("bytes-per-tuple5.pdf", width=10, height=5)


# ggplot(dd5 %>% filter(network == "unlimited"), aes( y=tuple_bytes, x=reorder(system, -tuple_bytes), label=round(tuple_bytes))) + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Bytes per tuple") + coord_flip() + guides(fill=FALSE) + geom_text(size=7, hjust=-.2, family="serif")

# dev.off()



# ggplot(dd5, aes( x=network, y=tuple_time_ms, fill=system)) +geom_bar(stat="identity", position="dodge", width=.5) 



read.table("43.csv", header=T,  sep=",", stringsAsFactors=F, na.strings="-1") -> dd3

ntuples <- 1000000

sysnames <- c("mariadb-default"="MySQL", "mariadb-compress"="MySQL+C", "oracle-default"="DBMS X", "postgres-default" = "PostgreSQL", "db2-default" = "DB2", "monetdb-prot9"="MonetDB", "monetdb-prot10-none-none"="MonetDB++", "monetdb-prot10-snappy-none"="MonetDB++S", "hbase-default" = "HBase", "hive-default" = "Hive", "mongodb-default" = "MongoDB")

dd3 %>% filter(protocol == "odbc-noprint", timeout != 1, system %in% c("mariadb-default", "db2-default", "oracle-default", "postgres-default", "monetdb-prot9", "mariadb-compress"), tuple %in% c(1, ntuples)) %>% mutate(system=sysnames[system]) %>% select(system, tuple, throughput, latency, time, bytes) %>% group_by(system, tuple, throughput, latency) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> dd4
 


dd5 <- as.data.frame(dd4 %>% filter(tuple==ntuples) %>% rename(time_1m=time_mean, bytes_1m=bytes_mean) %>% left_join(dd4 %>% filter(tuple == 1) %>% rename(time_1=time_mean, bytes_1=bytes_mean), by=c("system", "throughput", "latency"))) %>% mutate(tuple_time_ms=((time_1m-time_1)/ntuples)*1000, tuple_bytes=(bytes_1m-bytes_1)/ntuples, adj_time= time_1m-time_1, adj_bytes=bytes_1m-bytes_1)


pdf("latency.pdf", width=10, height=5)

ggplot(dd5 %>% filter(is.na(throughput)), aes(color=system, y=adj_time, x=latency, label=round(adj_time, 1)))  + geom_line(size=1.5) + geom_point(size=3) + scale_y_log10() + scale_x_log10(breaks=c(0.1,1,10,100), limits=c(0.1, 500)) + theme  + xlab("Latency (ms, log)") + ylab("Wall clock time (s, log)") +  theme(legend.position = "none") +  geom_text_repel(data = dd5 %>% filter(is.na(throughput), latency==100 | (system=="DBMS X" & latency==10)), aes(label = system), size = 8,  nudge_x = .3, segment.color = NA, family="serif") 


dev.off()


pdf("throughput.pdf", width=10, height=5)

ggplot(dd5 %>% filter(is.na(latency)), aes(color=system, y=adj_time, x=throughput, label=round(adj_time, 1)))  + geom_line(size=1.5) + geom_point(size=3) + scale_y_log10() + scale_x_log10(breaks=c(10,100,1000), limits=c(10, 3500)) + theme  + xlab("Throughput (Mb/s, log)") + ylab("Wall clock time (s, log)") +  theme(legend.position = "none") +
  geom_text_repel(data = dd5 %>% filter(is.na(latency), throughput==1000), aes(label = system), size = 8,  nudge_x = .35, segment.color = NA, family="serif") 
dev.off()


read.table("44.csv", header=T,  sep=",", stringsAsFactors=F, na.strings="-1") -> dd3

compnames <- c("snappy"="Sy", "xz"="XZ", "lz4" = "LZ4", "gzip" = "GZ")


dd3 %>% filter(grepl("netcat-prot-", dd3$system, fixed=T), tuple == 1000000, bin_chunksize == 100000, is.na(latency)) %>% select(bin_compress, bin_orientation, throughput, bytes, time) %>% group_by(bin_compress, bin_orientation, throughput) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) %>% mutate(bytes_mean_mb=(bytes_mean/1024/1024), bin_compress_name=ifelse(bin_compress == "", "None", compnames[bin_compress]), throughput=ifelse(is.na(throughput), "Unlimited", throughput)) -> dd4





dd4$throughput <- factor(dd4$throughput, levels=c("Unlimited", "1000", "100", "10"))

pdf("compression.pdf", width=10, height=6)

ggplot(dd4 %>% filter(bin_orientation == "col"), aes(y=time_mean, x=bytes_mean_mb, label=bin_compress_name)) + geom_point(size=3) +  theme + scale_y_log10() + ylab("Wall clock time (s, log)") + xlab("Bytes transferred (MB)") + theme(legend.position = "none") + facet_wrap(~throughput) + geom_text_repel(family="serif", size=8, point.padding=unit(2,"mm"))


dev.off()


# for table

xtable::xtable(dd4 %>% filter(is.na(throughput)) %>% select(bin_compress, bin_orientation, bytes_mean) %>% mutate(bytes_mean_mb=round(bytes_mean/1024/1024,1), ratio=round(110271223/bytes_mean, 2)) %>% select(bin_compress, bin_orientation,bytes_mean_mb, ratio))





read.table("47.csv", header=T,  sep=",", stringsAsFactors=F, na.strings="-1") -> dd3


ntuples <- 10000000

sysnames <- c("monetdb-prot10-strings-none-none"="Null-Terminated", "monetdb-prot10-strings-snappy-none"="Null-Terminated + Sy","monetdb-prot10-none-none"="Fixed Witdth", "monetdb-prot10-snappy-none"="Fixed Width + Sy")

dd3 %>% filter(network=="unlimitedms-unlimitedmb/s", protocol == "odbc-noprint", timeout != 1, system %in% c(names(sysnames)), tuple %in% c(1,ntuples)) %>% mutate(system=sysnames[system]) %>% select(system, tuple, time, bytes) %>% group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> dd4
 
 dd5 <- as.data.frame(dd4 %>% filter(tuple==ntuples) %>% rename(time_1m=time_mean, bytes_1m=bytes_mean) %>% left_join(dd4 %>% filter(tuple == 1) %>% rename(time_1=time_mean, bytes_1=bytes_mean), by=c("system"))) %>% mutate(tuple_time_ms=((time_1m-time_1)/ntuples)*1000, tuple_bytes=(bytes_1m-bytes_1)/ntuples, adj_time= time_1m-time_1, adj_bytes=bytes_1m-bytes_1)


pdf("total-time-strings-noprint.pdf", width=10, height=3)


ggplot(dd5 %>% filter(tuple.x == ntuples), aes(y=adj_time, x=reorder(system, -adj_time), label=round(adj_time, 2))) + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Wall clock time (s)") + coord_flip() + geom_text(size=7, hjust=-.4, family="serif") + scale_y_continuous(limits=c(0, 4))

dev.off()

pdf("total-bytes-strings.pdf", width=10, height=3)

ggplot(dd4 %>% filter(tuple == ntuples) %>% mutate(bytes_mean_mb=(bytes_mean/1024/1024)), aes(y=bytes_mean_mb, x=reorder(system, -bytes_mean_mb), label=round(bytes_mean_mb, 2))) + geom_bar(stat="identity", position="dodge", width=.5) + theme + xlab("") + ylab("Bytes transferred (MB)") + coord_flip() + geom_text(size=7, hjust=-.2, family="serif")  + scale_y_continuous(limits=c(0, 840)) 


dev.off()


sysnames <- c("monetdb-prot10-intsonly-pfor"="PFOR-DELTA", "monetdb-prot10-intsonly-none"="None","monetdb-prot10-intsonly-pfor-snappy"="PFOR + Snappy", "monetdb-prot10-intsonly-none-snappy"="Snappy")

dd3 %>% filter(network=="unlimitedms-unlimitedmb/s", protocol == "odbc-noprint", timeout != 1, system %in% c(names(sysnames)), tuple %in% c(1,ntuples)) %>% mutate(system=sysnames[system]) %>% select(system, tuple, time, bytes) %>% group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> dd4
 
 dd5 <- as.data.frame(dd4 %>% filter(tuple==ntuples) %>% rename(time_1m=time_mean, bytes_1m=bytes_mean) %>% left_join(dd4 %>% filter(tuple == 1) %>% rename(time_1=time_mean, bytes_1=bytes_mean), by=c("system"))) %>% mutate(tuple_time_ms=((time_1m-time_1)/ntuples)*1000, tuple_bytes=(bytes_1m-bytes_1)/ntuples, adj_time= time_1m-time_1, adj_bytes=bytes_1m-bytes_1)


pdf("total-time-ints-noprint.pdf", width=10, height=3)


ggplot(dd5 %>% filter(tuple.x == ntuples), aes(y=adj_time, x=reorder(system, -adj_time), label=round(adj_time, 3)))  + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Wall clock time (s)") + coord_flip() + geom_text(size=7, hjust=-.4, family="serif") + scale_y_continuous(limits=c(0, 1.2))

dev.off()

pdf("total-bytes-ints.pdf", width=10, height=3)

ggplot(dd4 %>% filter(tuple == ntuples) %>% mutate(bytes_mean_mb=(bytes_mean/1024/1024)), aes(y=bytes_mean_mb, x=reorder(system, -bytes_mean_mb), label=round(bytes_mean_mb, 2))) + geom_bar(stat="identity", position="dodge", width=.5) + theme + xlab("") + ylab("Bytes transferred (MB)") + coord_flip() + geom_text(size=7, hjust=-.2, family="serif")  + scale_y_continuous(limits=c(0, 220)) 


dev.off()




sysnames <- c("monetdb-prot10"="New Protocol", "netcat-csv"="netcat")

dd3 %>% filter(network=="unlimitedms-unlimitedmb/s", timeout != 1, system %in% c(names(sysnames)), tuple %in% c(1,ntuples)) %>% mutate(system=sysnames[system]) %>% select(system, tuple, time, bytes) %>% group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> dd4
 
 dd5 <- as.data.frame(dd4 %>% filter(tuple==ntuples) %>% rename(time_1m=time_mean, bytes_1m=bytes_mean) %>% left_join(dd4 %>% filter(tuple == 1) %>% rename(time_1=time_mean, bytes_1=bytes_mean), by=c("system"))) %>% mutate(tuple_time_ms=((time_1m-time_1)/ntuples)*1000, tuple_bytes=(bytes_1m-bytes_1)/ntuples, adj_time= time_1m-time_1, adj_bytes=bytes_1m-bytes_1)


pdf("total-time-shootout-noprint.pdf", width=10, height=3)


ggplot(dd5 %>% filter(tuple.x == ntuples), aes(y=adj_time, x=reorder(system, -adj_time), label=round(adj_time, 2))) + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Wall clock time (s)") + coord_flip() + geom_text(size=7, hjust=-.4, family="serif") #+ scale_y_continuous(limits=c(0, 4))

dev.off()

pdf("total-bytes-shootout.pdf", width=10, height=3)

ggplot(dd4 %>% filter(tuple == ntuples) %>% mutate(bytes_mean_mb=(bytes_mean/1024/1024)), aes(y=bytes_mean_mb, x=reorder(system, -bytes_mean_mb), label=round(bytes_mean_mb, 2))) + geom_bar(stat="identity", position="dodge", width=.5) + theme + xlab("") + ylab("Bytes transferred (MB)") + coord_flip() + geom_text(size=7, hjust=-.2, family="serif") # + scale_y_continuous(limits=c(0, 840)) 


dev.off()




read.table("48.csv", header=T,  sep=",", stringsAsFactors=F, na.strings="-1") -> dd3

ntuples <- 1000000

dd3 %>% filter(grepl("monetdb-prot10-", dd3$system, fixed=T)) %>% mutate(system2 = gsub("monetdb-prot10-","", system, fixed=T), compression=str_split_fixed(system2, "-", 2)[,1], chunksize=as.integer(str_split_fixed(system2, "-", 2)[,2])) %>% filter(timeout != 1, tuple %in% c(1,ntuples)) %>% select(compression, chunksize, tuple, time, bytes) %>% group_by(compression, chunksize, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> dd4


 dd5 <- as.data.frame(dd4 %>% filter(tuple==ntuples) %>% rename(time_1m=time_mean, bytes_1m=bytes_mean) %>% left_join(dd4 %>% filter(tuple == 1) %>% rename(time_1=time_mean, bytes_1=bytes_mean), by=c("compression", "chunksize"))) %>% mutate(tuple_time_ms=((time_1m-time_1)/ntuples)*1000, tuple_bytes=(bytes_1m-bytes_1)/ntuples, adj_time= time_1m-time_1, adj_bytes=bytes_1m-bytes_1)

dd5 %>% select(compression, chunksize, adj_time, adj_bytes)  -> dd6

print(xtable::xtable(dd6 %>% filter(compression=="snappy")%>% full_join(dd6 %>% filter(compression =="none"), by="chunksize") %>% select(chunksize, time_uncomp=adj_time.y, bytes_uncom=adj_bytes.y, time_snappy=adj_time.x, bytes_snappy=adj_bytes.x) %>% mutate(ratio=round(bytes_uncom/bytes_snappy, 2), time_uncomp=round(time_uncomp, 2), time_snappy=round(time_snappy, 2), bytes_uncom=round(bytes_uncom/1024/1024, 1)) %>% select(chunksize, time_uncomp, time_snappy, bytes_uncom, ratio)), include.rownames=FALSE)

# ggplot(dd5, aes(y=adj_time, x=chunksize/1024, color=compression)) +  theme + xlab("Chunk Size (KB)") + ylab("Wall clock time (s)") + geom_line() + geom_point() + scale_x_log10()


# ggplot(dd5, aes(y=adj_bytes/1024, x=chunksize/1024, color=compression)) +  theme + xlab("Chunk Size (KB)") + ylab("Data transferred (KB)") + geom_line() + geom_point() + scale_x_log10()







read.table("49.csv", header=T,  sep=",", stringsAsFactors=F, na.strings="-1") -> dd3

ntuples <- 1000000

dd3 %>% filter(grepl("monetdb-prot10-", dd3$system, fixed=T)) %>% mutate(system2 = gsub("monetdb-prot10-","", system, fixed=T), compression=str_split_fixed(system2, "-", 2)[,1], stringmethod=str_split_fixed(system2, "-", 2)[,2]) %>% filter(timeout != 1, tuple %in% c(1,ntuples)) %>% select(compression, stringmethod, tuple, time, bytes) %>% group_by(compression, stringmethod, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> dd4


 dd5 <- as.data.frame(dd4 %>% filter(tuple==ntuples) %>% rename(time_1m=time_mean, bytes_1m=bytes_mean) %>% left_join(dd4 %>% filter(tuple == 1) %>% rename(time_1=time_mean, bytes_1=bytes_mean), by=c("compression", "stringmethod"))) %>% mutate(tuple_time_ms=((time_1m-time_1)/ntuples)*1000, tuple_bytes=(bytes_1m-bytes_1)/ntuples, adj_time= time_1m-time_1, adj_bytes=bytes_1m-bytes_1)


 # ggplot(dd5 %>% filter(compression == "snappy"), aes(y=adj_time, x=reorder(stringmethod, -adj_time), label=round(adj_time, 2))) + geom_bar(stat="identity", position="dodge", width=.5) + theme + xlab("") + ylab("Wall clock time") + coord_flip() + geom_text(size=7, hjust=-.2, family="serif")


 # ggplot(dd5 %>% filter(compression == "snappy"), aes(y=adj_bytes, x=reorder(stringmethod, -adj_bytes), label=round(adj_bytes, 2))) + geom_bar(stat="identity", position="dodge", width=.5) + theme + xlab("") + ylab("Wall clock time") + coord_flip() + geom_text(size=7, hjust=-.2, family="serif")



dd5 %>% select(compression, stringmethod, adj_time, adj_bytes)  -> dd6

print(xtable::xtable(dd6 %>% filter(compression=="snappy")%>% full_join(dd6 %>% filter(compression =="none"), by="stringmethod") %>% select(stringmethod, time_uncomp=adj_time.y, bytes_uncom=adj_bytes.y, time_snappy=adj_time.x, bytes_snappy=adj_bytes.x) %>% mutate(ratio=round(bytes_uncom/bytes_snappy, 2), time_uncomp=round(time_uncomp, 2), time_snappy=round(time_snappy, 2), bytes_uncom=round(bytes_uncom/1024/1024, 1)) %>% select(stringmethod, time_uncomp, time_snappy, bytes_uncom, ratio)), include.rownames=FALSE)




sysnames <- c("mariadb-default"="MySQL", "mariadb-compress"="MySQL+C", "postgres-default" = "PostgreSQL", "db2-default" = "DB2", "monetdb-prot10"="MonetDB++", "monetdb-prot10-snappy"="MonetDB++Sy", "netcat-csv"= "netcat")


read.table("51.csv", header=T,  sep=",", stringsAsFactors=F, na.strings="-1") -> dd3

ntuples <- 10000000

dd3 %>% filter(system %in% c(names(sysnames)), timeout != 1, tuple %in% c(1,ntuples)) %>% mutate(system=sysnames[system]) %>% select(system, network, tuple, time, bytes) %>% group_by(system, network, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> dd4


 dd5 <- as.data.frame(dd4 %>% filter(tuple==ntuples) %>% rename(time_1m=time_mean, bytes_1m=bytes_mean) %>% left_join(dd4 %>% filter(tuple == 1) %>% rename(time_1=time_mean, bytes_1=bytes_mean), by=c("system", "network"))) %>% mutate(tuple_time_ms=((time_1m-time_1)/ntuples)*1000, tuple_bytes=(bytes_1m-bytes_1)/ntuples, adj_time= time_1m-time_1, adj_bytes=bytes_1m-bytes_1) %>% select(system, network, adj_time, adj_bytes)



pdf("10m-final-time-unlimited.pdf", width=10, height=3)

ggplot(dd5 %>% filter(network=="unlimited", system!="netcat"), aes(y=adj_time, x=reorder(system, -adj_time), label=round(adj_time, 1))) + geom_hline(yintercept=1.634335, linetype="dashed") + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Wall clock time (s)") + coord_flip() + geom_text(size=7, hjust=-.2, family="serif") + scale_y_continuous(limits=c(0, 90))

dev.off()

pdf("10m-final-time-slownet.pdf", width=10, height=3)

ggplot(dd5 %>% filter(network=="10mbitethhd", system!="netcat"), aes(y=adj_time, x=reorder(system, -adj_time), label=round(adj_time, 1))) + geom_hline(yintercept=1014.811853, linetype="dashed") + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Wall clock time (s)") + coord_flip() + geom_text(size=7, hjust=-.2, family="serif") + scale_y_continuous(limits=c(0, 2000))

dev.off()