Files @ 2aece7dd2719
Branch filter:

Location: DA/raaql-paper-experiments/makeplots.R - annotation

Hannes Muehleisen
import
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
#!R -f
library(ggplot2)
library(ggthemes)
library(scales)
library(xtable)
library(grid)
library(dplyr)

all <- read.csv("results.tsv",sep="\t",header=F, stringsAsFactors=F)
names(all) <- c("exp", "sys", "conf", "s", "r", "timesec")

ybreaks <- c(.01,.1,1,10,60,600)
ylabels <- c("10ms","100ms","1s", "10s","1min", "10min")

xbreaks <- 10^(5:9)
xlabels <- c(expression(10^5),expression(10^6),expression(10^7),expression(10^8),expression(10^9))

theme <- theme_few(base_size = 24) + 
theme(axis.title.y=element_text(vjust=0.9), 
	  axis.title.x=element_text(vjust=-0.1),
	  text=element_text(family="serif"),
	legend.position = "none"
    )


se <- function(x) sqrt(var(x)/length(x))



# recycling
d <- all %>% filter(exp=="recycling", s > 5) %>% group_by(exp, sys, s, conf) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(datasize=10^as.integer(s), tool=paste(sys, conf))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("recycling.pdf",width=10,height=7)
p <- ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
#  scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=10^7.7, y=65, label="Renjin", family="serif", size=10)+
	annotate("text", x=10^7.7, y=2, label="Renjin + R.", family="serif", size=10)+
	annotate("text", x=10^7.8, y=21, label="GNU R", family="serif", size=10)


print(p)
dev.off()

# survey
d <- all %>% filter(exp=="survey") %>% group_by(sys, s, conf) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(tool=paste(sys,conf), 
		datasize=sapply(s, switch, alabama=47512, california=1060060, acs3yr=9093077)) %>% 
	filter(tool == "Renjin jitopt" | sys != "Renjin")

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("survey.pdf",width=10,height=7)
ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
  #scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=c(47512,1060060,9093077)) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=10^6.6, y=45, label="GNU R", family="serif", size=10)+
	annotate("text", x=10^6.4, y=100, label="sqlsurvey", family="serif", size=10)+
	annotate("text", x=10^6.8, y=15, label="Renjin", family="serif", size=10)

	#scale_color_brewer(palette=cBrwPl) +
 #guides(colour=guide_legend(keywidth=3.5))

dev.off()


# # identity
d <- all %>% filter(exp=="identity", s > 5) %>% group_by(exp, sys, conf, s) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(tool=ifelse(conf=="none", sys, paste(sys,conf)), datasize=10^as.integer(s))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("identity.pdf",width=10,height=7)
ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
  #scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=60000000, y=4, label="GNU R", family="serif", size=10)+
	annotate("text", x=40000000, y=8.1, label="Renjin ", family="serif", size=10)+
	annotate("text", x=40000000, y=.6, label="Renjin + Identity", family="serif", size=10)

	#scale_color_brewer(palette=cBrwPl) +
 # guides(colour=guide_legend(keywidth=3.5))

dev.off()


# pushdown

d <- all %>% filter(exp=="pushdown", s > 4) %>% group_by(exp, sys, s) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(tool=sys, datasize=10^as.integer(s))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("pushdown.pdf",width=10,height=7)
p <- ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
 # scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=10^7.5, y=5, label="GNU R", family="serif", size=10)+
	annotate("text", x=10^7.6, y=.4, label="Renjin ", family="serif", size=10)

print(p)
dev.off()



# parallel


d <- all %>% filter(exp=="parallel", s  == 8) %>% group_by(exp, sys, conf, s) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(datasize=10^as.integer(s), threads=ifelse(conf=="none", 1L, as.integer(conf))) %>% mutate(tool=paste(sys,threads))

print(d)




limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("parallel.pdf",width=10,height=7)
p <- ggplot(d,aes(threads,meant,group=s)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=s, linetype=sys)) +
  geom_pointrange(limits) +
  scale_y_continuous(limits=c(0,NA)) +
   scale_x_continuous(breaks=unique(d$threads)) +
 geom_vline(xintercept = 10) +
	xlab("Number of Threads") + ylab("Execution Time (s)") + theme +
		annotate("text", x=17, y=1.5, label="Problem parallelism", family="serif", size=10) #+

	# annotate("text", x=1.3, y=6, label="GNU R", family="serif", size=10)+
	# annotate("text", x=3.5, y=20, label="Renjin ", family="serif", size=10)

print(p)
dev.off()


f <- d %>% group_by(exp, sys, s) %>% summarize(maxmt=max(meant))

d <- d %>% left_join(f) %>% mutate(speedup = maxmt/meant) %>% select(s,threads,speedup)


print(d)

pdf("speedup.pdf",width=10,height=7)
p <- ggplot(d,aes(threads,speedup,group=s)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=s, linetype=sys)) +
  scale_y_continuous(limits=c(0,NA)) +
   scale_x_continuous(breaks=unique(d$threads)) +
	xlab("Number of Threads") + ylab("Speedup") + theme #+
#	annotate("text", x=10, y=6, label="Problem parallelism", family="serif", size=10) #+
	# annotate("text", x=3.5, y=20, label="Renjin ", family="serif", size=10)

print(p)
dev.off()



# operators
d <- all %>% filter(exp=="operators", s > 5) %>% group_by(exp, sys, s, conf) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(conf=ifelse(conf=="opt"," + Vectorization",""), tool=paste(sys,conf,sep=""), datasize=10^as.integer(s))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("operators.pdf",width=10,height=7)
p <- ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
   # scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels, limits=c(NA, 10^8.1)) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (log)") + theme +

	annotate("text", x=10^7.8, y=4, label="GNU R", family="serif", size=10)+
	annotate("text", x=40000000, y=19, label="Renjin ", family="serif", size=10)+
	annotate("text", x=10^7.95, y=14, label="Renjin + V.", family="serif", size=10)

	#annotate("text", x=700000, y=6, label="Renjin", family="serif", size=10)+
	#annotate("text", x=20000000, y=2, label="Renjin + Recycling", family="serif", size=10)


print(p)
dev.off()


# # print some latex for the paper
# selection$timesec <- selection$timesec/1000
# selproj$timesec <- selproj$timesec/1000
# grouping$timesec <- grouping$timesec/1000
# joins$timesec <- joins$timesec/1000

# selection <- cast(selection,datasetn+oparg ~ tool)
# selproj <- cast(selproj,datasetn+oparg ~ tool)
# joins <- cast(joins,datasetn+oparg ~ tool)
# grouping <- cast(grouping,datasetn+opargn ~ tool)

# selection[1] <- ""
# print(xtable(selection),include.rownames=FALSE)

# selproj[1] <- ""
# print(xtable(selproj),include.rownames=FALSE)

# grouping[1] <- ""
# print(xtable(grouping),include.rownames=FALSE)

# joins[1] <- ""
# print(xtable(joins),include.rownames=FALSE)