diff --git a/makeplots.R b/makeplots.R new file mode 100755 index 0000000000000000000000000000000000000000..453d0885ec626c967fd133c56a89e079997316cd --- /dev/null +++ b/makeplots.R @@ -0,0 +1,241 @@ +#!R -f +library(ggplot2) +library(ggthemes) +library(scales) +library(xtable) +library(grid) +library(dplyr) + +all <- read.csv("results.tsv",sep="\t",header=F, stringsAsFactors=F) +names(all) <- c("exp", "sys", "conf", "s", "r", "timesec") + +ybreaks <- c(.01,.1,1,10,60,600) +ylabels <- c("10ms","100ms","1s", "10s","1min", "10min") + +xbreaks <- 10^(5:9) +xlabels <- c(expression(10^5),expression(10^6),expression(10^7),expression(10^8),expression(10^9)) + +theme <- theme_few(base_size = 24) + +theme(axis.title.y=element_text(vjust=0.9), + axis.title.x=element_text(vjust=-0.1), + text=element_text(family="serif"), + legend.position = "none" + ) + + +se <- function(x) sqrt(var(x)/length(x)) + + + +# recycling +d <- all %>% filter(exp=="recycling", s > 5) %>% group_by(exp, sys, s, conf) %>% + summarize(meant=mean(timesec), se=se(timesec)) %>% + mutate(datasize=10^as.integer(s), tool=paste(sys, conf)) + +print(d) + +limits <- aes(ymax = meant + se, ymin=meant - se, width=2) + +pdf("recycling.pdf",width=10,height=7) +p <- ggplot(d,aes(datasize,meant,group=tool)) + + geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) + + geom_pointrange(limits) + +# scale_y_log10(breaks=ybreaks, labels=ylabels) + + scale_x_log10(breaks=xbreaks, labels=xlabels) + + + xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme + + annotate("text", x=10^7.7, y=65, label="Renjin", family="serif", size=10)+ + annotate("text", x=10^7.7, y=2, label="Renjin + R.", family="serif", size=10)+ + annotate("text", x=10^7.8, y=21, label="GNU R", family="serif", size=10) + + +print(p) +dev.off() + +# survey +d <- all %>% filter(exp=="survey") %>% group_by(sys, s, conf) %>% + summarize(meant=mean(timesec), se=se(timesec)) %>% + mutate(tool=paste(sys,conf), + datasize=sapply(s, switch, alabama=47512, california=1060060, acs3yr=9093077)) %>% + filter(tool == "Renjin jitopt" | sys != "Renjin") + +print(d) + +limits <- aes(ymax = meant + se, ymin=meant - se, width=2) + +pdf("survey.pdf",width=10,height=7) +ggplot(d,aes(datasize,meant,group=tool)) + + geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) + + geom_pointrange(limits) + + #scale_y_log10(breaks=ybreaks, labels=ylabels) + + scale_x_log10(breaks=c(47512,1060060,9093077)) + + + xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme + + annotate("text", x=10^6.6, y=45, label="GNU R", family="serif", size=10)+ + annotate("text", x=10^6.4, y=100, label="sqlsurvey", family="serif", size=10)+ + annotate("text", x=10^6.8, y=15, label="Renjin", family="serif", size=10) + + #scale_color_brewer(palette=cBrwPl) + + #guides(colour=guide_legend(keywidth=3.5)) + +dev.off() + + +# # identity +d <- all %>% filter(exp=="identity", s > 5) %>% group_by(exp, sys, conf, s) %>% + summarize(meant=mean(timesec), se=se(timesec)) %>% + mutate(tool=ifelse(conf=="none", sys, paste(sys,conf)), datasize=10^as.integer(s)) + +print(d) + +limits <- aes(ymax = meant + se, ymin=meant - se, width=2) + +pdf("identity.pdf",width=10,height=7) +ggplot(d,aes(datasize,meant,group=tool)) + + geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) + + geom_pointrange(limits) + + #scale_y_log10(breaks=ybreaks, labels=ylabels) + + scale_x_log10(breaks=xbreaks, labels=xlabels) + + + xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme + + annotate("text", x=60000000, y=4, label="GNU R", family="serif", size=10)+ + annotate("text", x=40000000, y=8.1, label="Renjin ", family="serif", size=10)+ + annotate("text", x=40000000, y=.6, label="Renjin + Identity", family="serif", size=10) + + #scale_color_brewer(palette=cBrwPl) + + # guides(colour=guide_legend(keywidth=3.5)) + +dev.off() + + +# pushdown + +d <- all %>% filter(exp=="pushdown", s > 4) %>% group_by(exp, sys, s) %>% + summarize(meant=mean(timesec), se=se(timesec)) %>% + mutate(tool=sys, datasize=10^as.integer(s)) + +print(d) + +limits <- aes(ymax = meant + se, ymin=meant - se, width=2) + +pdf("pushdown.pdf",width=10,height=7) +p <- ggplot(d,aes(datasize,meant,group=tool)) + + geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) + + geom_pointrange(limits) + + # scale_y_log10(breaks=ybreaks, labels=ylabels) + + scale_x_log10(breaks=xbreaks, labels=xlabels) + + + xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme + + annotate("text", x=10^7.5, y=5, label="GNU R", family="serif", size=10)+ + annotate("text", x=10^7.6, y=.4, label="Renjin ", family="serif", size=10) + +print(p) +dev.off() + + + +# parallel + + +d <- all %>% filter(exp=="parallel", s == 8) %>% group_by(exp, sys, conf, s) %>% + summarize(meant=mean(timesec), se=se(timesec)) %>% + mutate(datasize=10^as.integer(s), threads=ifelse(conf=="none", 1L, as.integer(conf))) %>% mutate(tool=paste(sys,threads)) + +print(d) + + + + +limits <- aes(ymax = meant + se, ymin=meant - se, width=2) + +pdf("parallel.pdf",width=10,height=7) +p <- ggplot(d,aes(threads,meant,group=s)) + + geom_point(size=4) + geom_line(size=1.5, aes(group=s, linetype=sys)) + + geom_pointrange(limits) + + scale_y_continuous(limits=c(0,NA)) + + scale_x_continuous(breaks=unique(d$threads)) + + geom_vline(xintercept = 10) + + xlab("Number of Threads") + ylab("Execution Time (s)") + theme + + annotate("text", x=17, y=1.5, label="Problem parallelism", family="serif", size=10) #+ + + # annotate("text", x=1.3, y=6, label="GNU R", family="serif", size=10)+ + # annotate("text", x=3.5, y=20, label="Renjin ", family="serif", size=10) + +print(p) +dev.off() + + +f <- d %>% group_by(exp, sys, s) %>% summarize(maxmt=max(meant)) + +d <- d %>% left_join(f) %>% mutate(speedup = maxmt/meant) %>% select(s,threads,speedup) + + +print(d) + +pdf("speedup.pdf",width=10,height=7) +p <- ggplot(d,aes(threads,speedup,group=s)) + + geom_point(size=4) + geom_line(size=1.5, aes(group=s, linetype=sys)) + + scale_y_continuous(limits=c(0,NA)) + + scale_x_continuous(breaks=unique(d$threads)) + + xlab("Number of Threads") + ylab("Speedup") + theme #+ +# annotate("text", x=10, y=6, label="Problem parallelism", family="serif", size=10) #+ + # annotate("text", x=3.5, y=20, label="Renjin ", family="serif", size=10) + +print(p) +dev.off() + + + +# operators +d <- all %>% filter(exp=="operators", s > 5) %>% group_by(exp, sys, s, conf) %>% + summarize(meant=mean(timesec), se=se(timesec)) %>% + mutate(conf=ifelse(conf=="opt"," + Vectorization",""), tool=paste(sys,conf,sep=""), datasize=10^as.integer(s)) + +print(d) + +limits <- aes(ymax = meant + se, ymin=meant - se, width=2) + +pdf("operators.pdf",width=10,height=7) +p <- ggplot(d,aes(datasize,meant,group=tool)) + + geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) + + geom_pointrange(limits) + + # scale_y_log10(breaks=ybreaks, labels=ylabels) + + scale_x_log10(breaks=xbreaks, labels=xlabels, limits=c(NA, 10^8.1)) + + + xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (log)") + theme + + + annotate("text", x=10^7.8, y=4, label="GNU R", family="serif", size=10)+ + annotate("text", x=40000000, y=19, label="Renjin ", family="serif", size=10)+ + annotate("text", x=10^7.95, y=14, label="Renjin + V.", family="serif", size=10) + + #annotate("text", x=700000, y=6, label="Renjin", family="serif", size=10)+ + #annotate("text", x=20000000, y=2, label="Renjin + Recycling", family="serif", size=10) + + +print(p) +dev.off() + + +# # print some latex for the paper +# selection$timesec <- selection$timesec/1000 +# selproj$timesec <- selproj$timesec/1000 +# grouping$timesec <- grouping$timesec/1000 +# joins$timesec <- joins$timesec/1000 + +# selection <- cast(selection,datasetn+oparg ~ tool) +# selproj <- cast(selproj,datasetn+oparg ~ tool) +# joins <- cast(joins,datasetn+oparg ~ tool) +# grouping <- cast(grouping,datasetn+opargn ~ tool) + +# selection[1] <- "" +# print(xtable(selection),include.rownames=FALSE) + +# selproj[1] <- "" +# print(xtable(selproj),include.rownames=FALSE) + +# grouping[1] <- "" +# print(xtable(grouping),include.rownames=FALSE) + +# joins[1] <- "" +# print(xtable(joins),include.rownames=FALSE) +