Files @ e23e3482a0b7
Branch filter:

Location: DA/protocols/vldb-protocols2.R

Hannes Muehleisen
more stuff
library(dplyr)
library(ggplot2)
library(ggthemes)
library(ggrepel)
library(stringr)


theme <- theme_few(base_size = 24) + 
theme(axis.title.y=element_text(vjust=0.9), 
  axis.title.x=element_text(vjust=-0.1),
  axis.ticks.x=element_blank(),
  text=element_text(family="serif"))


read.table("11.csv", header=T,  sep=",", stringsAsFactors=F, na.strings="-1") -> dd3


sysnames <- c("mariadb-default"="MySQL", "mariadb-compress"="MySQL+C", "oracle-default"="DBMS X", "postgres-default" = "PostgreSQL", "db2-default" = "DB2", "monetdb-default"="MonetDB", "hbase-default" = "HBase", "hive-default" = "Hive", "mongodb-default" = "MongoDB")

dd3 %>% filter(network=="unlimited", protocol == "native", timeout != 1, system %in% c("hive-default", "hbase-default", "mariadb-default", "mariadb-compress", "db2-default", "oracle-default", "postgres-default", "monetdb-default", "mongodb-default"), tuple %in% c(1, 1000000)) %>% mutate(system=sysnames[system]) %>% select(system, tuple, time, bytes) %>% group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> dd4
 
dd5 <- dd4 %>% filter(tuple==1000000) %>% rename(time_1m=time_mean, bytes_1m=bytes_mean) %>% left_join(dd4 %>% filter(tuple == 1) %>% rename(time_1=time_mean, bytes_1=bytes_mean), by="system") %>% mutate(tuple_time_ms=((time_1m-time_1)/1000000)*1000, tuple_bytes=(bytes_1m-bytes_1)/1000000) 


pdf("total-time.pdf", width=10, height=5)

ggplot(dd4 %>% filter(tuple == 1000000), aes(y=time_mean, x=reorder(system, -time_mean), label=round(time_mean, 1))) + geom_bar(stat="identity", position="dodge", width=.5) + geom_errorbar(aes(ymax = time_mean + time_se, ymin=time_mean - time_se), width=0.2, size=1) +  theme + xlab("") + ylab("Wall clock time (s)") + coord_flip() + geom_text(size=7, hjust=-.8, family="serif") + scale_y_continuous(limits=c(0, 31))


dev.off()

pdf("total-bytes.pdf", width=10, height=5)

ggplot(dd4 %>% filter(tuple == 1000000) %>% mutate(bytes_mean_mb=(bytes_mean/1024/1024)), aes(y=bytes_mean_mb, x=reorder(system, -bytes_mean_mb), label=round(bytes_mean_mb, 2))) + geom_hline(yintercept=120, linetype="dashed") + geom_bar(stat="identity", position="dodge", width=.5) + theme + xlab("") + ylab("Bytes transferred (MB)") + coord_flip() + geom_text(size=7, hjust=-.2, family="serif")  + scale_y_continuous(limits=c(0, 400)) 


dev.off()



# pdf("time-per-tuple.pdf", width=10, height=5)

# ggplot(dd5, aes( y=tuple_time_ms, x=reorder(system, -tuple_time_ms), label=round(tuple_time_ms, 4))) + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Time per tuple (ms)") + coord_flip() + guides(fill=FALSE) + geom_text(size=7, hjust=-.2, family="serif") + scale_y_continuous(limits=c(0, .03))


# dev.off()


# pdf("bytes-per-tuple.pdf", width=10, height=5)


# ggplot(dd5, aes( y=tuple_bytes, x=reorder(system, -tuple_bytes), label=round(tuple_bytes))) + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Bytes per tuple") + coord_flip() + guides(fill=FALSE) + geom_text(size=7, hjust=-.2, family="serif") + scale_y_continuous(limits=c(0, 410))


# dev.off()



# ggplot(dd5, aes(y=tuple_time_ms, x=tuple_bytes, label=system)) + geom_point() + geom_text(size=7, hjust=-.1, family="serif") + theme



read.table("13.csv", header=T,  sep=",", stringsAsFactors=F, na.strings="-1") -> dd3
ntuples <- 1000000

sysnames <- c("mariadb-default"="MySQL", "mariadb-compress"="MySQL+C", "oracle-default"="DBMS X", "postgres-default" = "PostgreSQL", "db2-default" = "DB2", "monetdb-prot9"="MonetDB", "hbase-default" = "HBase", "hive-default" = "Hive", "mongodb-default" = "MongoDB")

dd3 %>% filter(protocol == "odbc-noprint", timeout != 1, system %in% c("hive-default", "hbase-default", "mariadb-default", "mariadb-compress", "db2-default", "oracle-default", "postgres-default", "monetdb-prot9", "mongodb-default"), tuple %in% c(1, ntuples)) %>% mutate(system=sysnames[system]) %>% select(system, tuple, network, time, bytes) %>% group_by(system, tuple, network) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> dd4
 


dd5 <- as.data.frame(dd4 %>% filter(tuple==ntuples) %>% rename(time_1m=time_mean, bytes_1m=bytes_mean) %>% left_join(dd4 %>% filter(tuple == 1) %>% rename(time_1=time_mean, bytes_1=bytes_mean), by=c("system", "network"))) %>% mutate(tuple_time_ms=((time_1m-time_1)/ntuples)*1000, tuple_bytes=(bytes_1m-bytes_1)/ntuples, adj_time= time_1m-time_1, adj_bytes=bytes_1m-bytes_1)



pdf("total-time-noprint.pdf", width=10, height=3.5)

ggplot(dd4 %>% filter(tuple == 1000000, network=="unlimited"), aes(y=time_mean, x=reorder(system, -time_mean), label=round(time_mean, 1))) + geom_hline(yintercept=0.237, linetype="dashed") + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Wall clock time (s)") + coord_flip() + geom_text(size=7, hjust=-.8, family="serif") + scale_y_continuous(limits=c(0, 6))


dev.off()

# pdf("time-per-tuple5.pdf", width=10, height=5)

# ggplot(dd5 %>% filter(network == "unlimited"), aes( y=tuple_time_ms, x=reorder(system, -tuple_time_ms), label=round(tuple_time_ms, 4))) + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Time per tuple (ms)") + coord_flip() + guides(fill=FALSE) + geom_text(size=7, hjust=-.2, family="serif") 

# dev.off()


# pdf("time-per-tuple6.pdf", width=10, height=5)

# ggplot(dd5 %>% filter(network == "10mbitethhd"), aes( y=tuple_time_ms, x=reorder(system, -tuple_time_ms), label=round(tuple_time_ms, 4))) + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Time per tuple (ms)") + coord_flip() + guides(fill=FALSE) + geom_text(size=7, hjust=-.2, family="serif") 

# dev.off()



# pdf("bytes-per-tuple5.pdf", width=10, height=5)


# ggplot(dd5 %>% filter(network == "unlimited"), aes( y=tuple_bytes, x=reorder(system, -tuple_bytes), label=round(tuple_bytes))) + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Bytes per tuple") + coord_flip() + guides(fill=FALSE) + geom_text(size=7, hjust=-.2, family="serif")

# dev.off()



# ggplot(dd5, aes( x=network, y=tuple_time_ms, fill=system)) +geom_bar(stat="identity", position="dodge", width=.5) 



read.table("43.csv", header=T,  sep=",", stringsAsFactors=F, na.strings="-1") -> dd3

ntuples <- 1000000

sysnames <- c("mariadb-default"="MySQL", "mariadb-compress"="MySQL+C", "oracle-default"="DBMS X", "postgres-default" = "PostgreSQL", "db2-default" = "DB2", "monetdb-prot9"="MonetDB", "monetdb-prot10-none-none"="MonetDB++", "monetdb-prot10-snappy-none"="MonetDB++S", "hbase-default" = "HBase", "hive-default" = "Hive", "mongodb-default" = "MongoDB")

dd3 %>% filter(protocol == "odbc-noprint", timeout != 1, system %in% c("mariadb-default", "db2-default", "oracle-default", "postgres-default", "monetdb-prot9", "mariadb-compress"), tuple %in% c(1, ntuples)) %>% mutate(system=sysnames[system]) %>% select(system, tuple, throughput, latency, time, bytes) %>% group_by(system, tuple, throughput, latency) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> dd4
 


dd5 <- as.data.frame(dd4 %>% filter(tuple==ntuples) %>% rename(time_1m=time_mean, bytes_1m=bytes_mean) %>% left_join(dd4 %>% filter(tuple == 1) %>% rename(time_1=time_mean, bytes_1=bytes_mean), by=c("system", "throughput", "latency"))) %>% mutate(tuple_time_ms=((time_1m-time_1)/ntuples)*1000, tuple_bytes=(bytes_1m-bytes_1)/ntuples, adj_time= time_1m-time_1, adj_bytes=bytes_1m-bytes_1)


pdf("latency.pdf", width=10, height=5)

ggplot(dd5 %>% filter(is.na(throughput)), aes(color=system, y=adj_time, x=latency, label=round(adj_time, 1)))  + geom_line(size=1.5) + geom_point(size=3) + scale_y_log10() + scale_x_log10(breaks=c(0.1,1,10,100), limits=c(0.1, 500)) + theme  + xlab("Latency (ms, log)") + ylab("Wall clock time (s, log)") +  theme(legend.position = "none") +  geom_text_repel(data = dd5 %>% filter(is.na(throughput), latency==100 | (system=="DBMS X" & latency==10)), aes(label = system), size = 8,  nudge_x = .3, segment.color = NA, family="serif") 


dev.off()


pdf("throughput.pdf", width=10, height=5)

ggplot(dd5 %>% filter(is.na(latency)), aes(color=system, y=adj_time, x=throughput, label=round(adj_time, 1)))  + geom_line(size=1.5) + geom_point(size=3) + scale_y_log10() + scale_x_log10(breaks=c(10,100,1000), limits=c(10, 3500)) + theme  + xlab("Throughput (Mb/s, log)") + ylab("Wall clock time (s, log)") +  theme(legend.position = "none") +
  geom_text_repel(data = dd5 %>% filter(is.na(latency), throughput==1000), aes(label = system), size = 8,  nudge_x = .35, segment.color = NA, family="serif") 
dev.off()


read.table("44.csv", header=T,  sep=",", stringsAsFactors=F, na.strings="-1") -> dd3

compnames <- c("snappy"="Sy", "xz"="XZ", "lz4" = "LZ4", "gzip" = "GZ")


dd3 %>% filter(grepl("netcat-prot-", dd3$system, fixed=T), tuple == 1000000, bin_chunksize == 100000, is.na(latency)) %>% select(bin_compress, bin_orientation, throughput, bytes, time) %>% group_by(bin_compress, bin_orientation, throughput) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) %>% mutate(bytes_mean_mb=(bytes_mean/1024/1024), bin_compress_name=ifelse(bin_compress == "", "None", compnames[bin_compress]), throughput=ifelse(is.na(throughput), "Unlimited", throughput)) -> dd4





dd4$throughput <- factor(dd4$throughput, levels=c("Unlimited", "1000", "100", "10"))

pdf("compression.pdf", width=10, height=6)

ggplot(dd4 %>% filter(bin_orientation == "col"), aes(y=time_mean, x=bytes_mean_mb, label=bin_compress_name)) + geom_point(size=3) +  theme + scale_y_log10() + ylab("Wall clock time (s, log)") + xlab("Bytes transferred (MB)") + theme(legend.position = "none") + facet_wrap(~throughput) + geom_text_repel(family="serif", size=8, point.padding=unit(2,"mm"))


dev.off()


# for table

xtable::xtable(dd4 %>% filter(is.na(throughput)) %>% select(bin_compress, bin_orientation, bytes_mean) %>% mutate(bytes_mean_mb=round(bytes_mean/1024/1024,1), ratio=round(110271223/bytes_mean, 2)) %>% select(bin_compress, bin_orientation,bytes_mean_mb, ratio))





read.table("47.csv", header=T,  sep=",", stringsAsFactors=F, na.strings="-1") -> dd3


ntuples <- 10000000

sysnames <- c("monetdb-prot10-strings-none-none"="Null-Terminated", "monetdb-prot10-strings-snappy-none"="Null-Terminated + Sy","monetdb-prot10-none-none"="Fixed Witdth", "monetdb-prot10-snappy-none"="Fixed Width + Sy")

dd3 %>% filter(network=="unlimitedms-unlimitedmb/s", protocol == "odbc-noprint", timeout != 1, system %in% c(names(sysnames)), tuple %in% c(1,ntuples)) %>% mutate(system=sysnames[system]) %>% select(system, tuple, time, bytes) %>% group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> dd4
 
 dd5 <- as.data.frame(dd4 %>% filter(tuple==ntuples) %>% rename(time_1m=time_mean, bytes_1m=bytes_mean) %>% left_join(dd4 %>% filter(tuple == 1) %>% rename(time_1=time_mean, bytes_1=bytes_mean), by=c("system"))) %>% mutate(tuple_time_ms=((time_1m-time_1)/ntuples)*1000, tuple_bytes=(bytes_1m-bytes_1)/ntuples, adj_time= time_1m-time_1, adj_bytes=bytes_1m-bytes_1)


pdf("total-time-strings-noprint.pdf", width=10, height=3)


ggplot(dd5 %>% filter(tuple.x == ntuples), aes(y=adj_time, x=reorder(system, -adj_time), label=round(adj_time, 2))) + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Wall clock time (s)") + coord_flip() + geom_text(size=7, hjust=-.4, family="serif") + scale_y_continuous(limits=c(0, 4))

dev.off()

pdf("total-bytes-strings.pdf", width=10, height=3)

ggplot(dd4 %>% filter(tuple == ntuples) %>% mutate(bytes_mean_mb=(bytes_mean/1024/1024)), aes(y=bytes_mean_mb, x=reorder(system, -bytes_mean_mb), label=round(bytes_mean_mb, 2))) + geom_bar(stat="identity", position="dodge", width=.5) + theme + xlab("") + ylab("Bytes transferred (MB)") + coord_flip() + geom_text(size=7, hjust=-.2, family="serif")  + scale_y_continuous(limits=c(0, 840)) 


dev.off()


sysnames <- c("monetdb-prot10-intsonly-pfor"="PFOR-DELTA", "monetdb-prot10-intsonly-none"="None","monetdb-prot10-intsonly-pfor-snappy"="PFOR + Snappy", "monetdb-prot10-intsonly-none-snappy"="Snappy")

dd3 %>% filter(network=="unlimitedms-unlimitedmb/s", protocol == "odbc-noprint", timeout != 1, system %in% c(names(sysnames)), tuple %in% c(1,ntuples)) %>% mutate(system=sysnames[system]) %>% select(system, tuple, time, bytes) %>% group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> dd4
 
 dd5 <- as.data.frame(dd4 %>% filter(tuple==ntuples) %>% rename(time_1m=time_mean, bytes_1m=bytes_mean) %>% left_join(dd4 %>% filter(tuple == 1) %>% rename(time_1=time_mean, bytes_1=bytes_mean), by=c("system"))) %>% mutate(tuple_time_ms=((time_1m-time_1)/ntuples)*1000, tuple_bytes=(bytes_1m-bytes_1)/ntuples, adj_time= time_1m-time_1, adj_bytes=bytes_1m-bytes_1)


pdf("total-time-ints-noprint.pdf", width=10, height=3)


ggplot(dd5 %>% filter(tuple.x == ntuples), aes(y=adj_time, x=reorder(system, -adj_time), label=round(adj_time, 3)))  + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Wall clock time (s)") + coord_flip() + geom_text(size=7, hjust=-.4, family="serif") + scale_y_continuous(limits=c(0, 1.2))

dev.off()

pdf("total-bytes-ints.pdf", width=10, height=3)

ggplot(dd4 %>% filter(tuple == ntuples) %>% mutate(bytes_mean_mb=(bytes_mean/1024/1024)), aes(y=bytes_mean_mb, x=reorder(system, -bytes_mean_mb), label=round(bytes_mean_mb, 2))) + geom_bar(stat="identity", position="dodge", width=.5) + theme + xlab("") + ylab("Bytes transferred (MB)") + coord_flip() + geom_text(size=7, hjust=-.2, family="serif")  + scale_y_continuous(limits=c(0, 220)) 


dev.off()




sysnames <- c("monetdb-prot10"="New Protocol", "netcat-csv"="netcat")

dd3 %>% filter(network=="unlimitedms-unlimitedmb/s", timeout != 1, system %in% c(names(sysnames)), tuple %in% c(1,ntuples)) %>% mutate(system=sysnames[system]) %>% select(system, tuple, time, bytes) %>% group_by(system, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> dd4
 
 dd5 <- as.data.frame(dd4 %>% filter(tuple==ntuples) %>% rename(time_1m=time_mean, bytes_1m=bytes_mean) %>% left_join(dd4 %>% filter(tuple == 1) %>% rename(time_1=time_mean, bytes_1=bytes_mean), by=c("system"))) %>% mutate(tuple_time_ms=((time_1m-time_1)/ntuples)*1000, tuple_bytes=(bytes_1m-bytes_1)/ntuples, adj_time= time_1m-time_1, adj_bytes=bytes_1m-bytes_1)


pdf("total-time-shootout-noprint.pdf", width=10, height=3)


ggplot(dd5 %>% filter(tuple.x == ntuples), aes(y=adj_time, x=reorder(system, -adj_time), label=round(adj_time, 2))) + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Wall clock time (s)") + coord_flip() + geom_text(size=7, hjust=-.4, family="serif") #+ scale_y_continuous(limits=c(0, 4))

dev.off()

pdf("total-bytes-shootout.pdf", width=10, height=3)

ggplot(dd4 %>% filter(tuple == ntuples) %>% mutate(bytes_mean_mb=(bytes_mean/1024/1024)), aes(y=bytes_mean_mb, x=reorder(system, -bytes_mean_mb), label=round(bytes_mean_mb, 2))) + geom_bar(stat="identity", position="dodge", width=.5) + theme + xlab("") + ylab("Bytes transferred (MB)") + coord_flip() + geom_text(size=7, hjust=-.2, family="serif") # + scale_y_continuous(limits=c(0, 840)) 


dev.off()




read.table("48.csv", header=T,  sep=",", stringsAsFactors=F, na.strings="-1") -> dd3

ntuples <- 1000000

dd3 %>% filter(grepl("monetdb-prot10-", dd3$system, fixed=T)) %>% mutate(system2 = gsub("monetdb-prot10-","", system, fixed=T), compression=str_split_fixed(system2, "-", 2)[,1], chunksize=as.integer(str_split_fixed(system2, "-", 2)[,2])) %>% filter(timeout != 1, tuple %in% c(1,ntuples)) %>% select(compression, chunksize, tuple, time, bytes) %>% group_by(compression, chunksize, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> dd4


 dd5 <- as.data.frame(dd4 %>% filter(tuple==ntuples) %>% rename(time_1m=time_mean, bytes_1m=bytes_mean) %>% left_join(dd4 %>% filter(tuple == 1) %>% rename(time_1=time_mean, bytes_1=bytes_mean), by=c("compression", "chunksize"))) %>% mutate(tuple_time_ms=((time_1m-time_1)/ntuples)*1000, tuple_bytes=(bytes_1m-bytes_1)/ntuples, adj_time= time_1m-time_1, adj_bytes=bytes_1m-bytes_1)

dd5 %>% select(compression, chunksize, adj_time, adj_bytes)  -> dd6

print(xtable::xtable(dd6 %>% filter(compression=="snappy")%>% full_join(dd6 %>% filter(compression =="none"), by="chunksize") %>% select(chunksize, time_uncomp=adj_time.y, bytes_uncom=adj_bytes.y, time_snappy=adj_time.x, bytes_snappy=adj_bytes.x) %>% mutate(ratio=round(bytes_uncom/bytes_snappy, 2), time_uncomp=round(time_uncomp, 2), time_snappy=round(time_snappy, 2), bytes_uncom=round(bytes_uncom/1024/1024, 1)) %>% select(chunksize, time_uncomp, time_snappy, bytes_uncom, ratio)), include.rownames=FALSE)

# ggplot(dd5, aes(y=adj_time, x=chunksize/1024, color=compression)) +  theme + xlab("Chunk Size (KB)") + ylab("Wall clock time (s)") + geom_line() + geom_point() + scale_x_log10()


# ggplot(dd5, aes(y=adj_bytes/1024, x=chunksize/1024, color=compression)) +  theme + xlab("Chunk Size (KB)") + ylab("Data transferred (KB)") + geom_line() + geom_point() + scale_x_log10()







read.table("49.csv", header=T,  sep=",", stringsAsFactors=F, na.strings="-1") -> dd3

ntuples <- 1000000

dd3 %>% filter(grepl("monetdb-prot10-", dd3$system, fixed=T)) %>% mutate(system2 = gsub("monetdb-prot10-","", system, fixed=T), compression=str_split_fixed(system2, "-", 2)[,1], stringmethod=str_split_fixed(system2, "-", 2)[,2]) %>% filter(timeout != 1, tuple %in% c(1,ntuples)) %>% select(compression, stringmethod, tuple, time, bytes) %>% group_by(compression, stringmethod, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> dd4


 dd5 <- as.data.frame(dd4 %>% filter(tuple==ntuples) %>% rename(time_1m=time_mean, bytes_1m=bytes_mean) %>% left_join(dd4 %>% filter(tuple == 1) %>% rename(time_1=time_mean, bytes_1=bytes_mean), by=c("compression", "stringmethod"))) %>% mutate(tuple_time_ms=((time_1m-time_1)/ntuples)*1000, tuple_bytes=(bytes_1m-bytes_1)/ntuples, adj_time= time_1m-time_1, adj_bytes=bytes_1m-bytes_1)


 # ggplot(dd5 %>% filter(compression == "snappy"), aes(y=adj_time, x=reorder(stringmethod, -adj_time), label=round(adj_time, 2))) + geom_bar(stat="identity", position="dodge", width=.5) + theme + xlab("") + ylab("Wall clock time") + coord_flip() + geom_text(size=7, hjust=-.2, family="serif")


 # ggplot(dd5 %>% filter(compression == "snappy"), aes(y=adj_bytes, x=reorder(stringmethod, -adj_bytes), label=round(adj_bytes, 2))) + geom_bar(stat="identity", position="dodge", width=.5) + theme + xlab("") + ylab("Wall clock time") + coord_flip() + geom_text(size=7, hjust=-.2, family="serif")



dd5 %>% select(compression, stringmethod, adj_time, adj_bytes)  -> dd6

print(xtable::xtable(dd6 %>% filter(compression=="snappy")%>% full_join(dd6 %>% filter(compression =="none"), by="stringmethod") %>% select(stringmethod, time_uncomp=adj_time.y, bytes_uncom=adj_bytes.y, time_snappy=adj_time.x, bytes_snappy=adj_bytes.x) %>% mutate(ratio=round(bytes_uncom/bytes_snappy, 2), time_uncomp=round(time_uncomp, 2), time_snappy=round(time_snappy, 2), bytes_uncom=round(bytes_uncom/1024/1024, 1)) %>% select(stringmethod, time_uncomp, time_snappy, bytes_uncom, ratio)), include.rownames=FALSE)




sysnames <- c("mariadb-default"="MySQL", "mariadb-compress"="MySQL+C", "postgres-default" = "PostgreSQL", "db2-default" = "DB2", "monetdb-prot10"="MonetDB++", "monetdb-prot10-snappy"="MonetDB++Sy", "netcat-csv"= "netcat")


read.table("51.csv", header=T,  sep=",", stringsAsFactors=F, na.strings="-1") -> dd3

ntuples <- 10000000

dd3 %>% filter(system %in% c(names(sysnames)), timeout != 1, tuple %in% c(1,ntuples)) %>% mutate(system=sysnames[system]) %>% select(system, network, tuple, time, bytes) %>% group_by(system, network, tuple) %>% summarise_each(funs(mean,sd,se=sd(.)/sqrt(n()))) -> dd4


 dd5 <- as.data.frame(dd4 %>% filter(tuple==ntuples) %>% rename(time_1m=time_mean, bytes_1m=bytes_mean) %>% left_join(dd4 %>% filter(tuple == 1) %>% rename(time_1=time_mean, bytes_1=bytes_mean), by=c("system", "network"))) %>% mutate(tuple_time_ms=((time_1m-time_1)/ntuples)*1000, tuple_bytes=(bytes_1m-bytes_1)/ntuples, adj_time= time_1m-time_1, adj_bytes=bytes_1m-bytes_1) %>% select(system, network, adj_time, adj_bytes)



pdf("10m-final-time-unlimited.pdf", width=10, height=3)

ggplot(dd5 %>% filter(network=="unlimited", system!="netcat"), aes(y=adj_time, x=reorder(system, -adj_time), label=round(adj_time, 1))) + geom_hline(yintercept=1.634335, linetype="dashed") + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Wall clock time (s)") + coord_flip() + geom_text(size=7, hjust=-.2, family="serif") + scale_y_continuous(limits=c(0, 90))

dev.off()

pdf("10m-final-time-slownet.pdf", width=10, height=3)

ggplot(dd5 %>% filter(network=="10mbitethhd", system!="netcat"), aes(y=adj_time, x=reorder(system, -adj_time), label=round(adj_time, 1))) + geom_hline(yintercept=1014.811853, linetype="dashed") + geom_bar(stat="identity", position="dodge", width=.5) +  theme + xlab("") + ylab("Wall clock time (s)") + coord_flip() + geom_text(size=7, hjust=-.2, family="serif") + scale_y_continuous(limits=c(0, 2000))

dev.off()