Files @ 1cdd4dffb04a
Branch filter:

Location: DA/raaql-paper-experiments/makeplots.R - annotation

Hannes Muehleisen
removed big files
2aece7dd2719
d4cb694d4b30
d4cb694d4b30
d4cb694d4b30
d4cb694d4b30
d4cb694d4b30
d4cb694d4b30
d4cb694d4b30
d4cb694d4b30
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
d4cb694d4b30
6da277f993c5
d4cb694d4b30
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
d4cb694d4b30
d4cb694d4b30
d4cb694d4b30
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
76ef4811d43e
76ef4811d43e
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
6da277f993c5
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
6da277f993c5
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
#!R -f

(function(lp) {
np <- lp[!(lp %in% installed.packages()[,"Package"])]
if(length(np)) install.packages(np,repos=c("http://cran.rstudio.com/"))
x <- lapply(lp,function(x){library(x,character.only=TRUE)}) 
})(c("ggplot2", "ggthemes", "scales", "xtable", "grid", "dplyr"))



all <- read.csv("results.tsv",sep="\t",header=F, stringsAsFactors=F)
names(all) <- c("exp", "sys", "conf", "s", "r", "timesec")

ybreaks <- c(.01,.1,1,10,60,600)
ylabels <- c("10ms","100ms","1s", "10s","1min", "10min")

xbreaks <- 10^(5:9)
xlabels <- c(expression(10^5),expression(10^6),expression(10^7),expression(10^8),expression(10^9))

theme <- theme_few(base_size = 24) + 
theme(axis.title.y=element_text(vjust=0.9), 
	  axis.title.x=element_text(vjust=-0.1),
	  text=element_text(family="serif"),
	legend.position = "none"
    )


se <- function(x) sqrt(var(x)/length(x))



# survey
d <- all %>% filter(exp=="survey") %>% group_by(sys, s, conf) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(tool=paste(sys,conf), 
		datasize=sapply(s, switch, alabama=47512, california=1060060, acs3yr=9093077)) %>% 
	filter(tool %in% c("Renjin jitopt", "Renjin none", "Renjin 1t") | sys == "GNU R")

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("survey.pdf",width=10,height=7)
ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
  #scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=c(47512,1060060,9093077)) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=10^6.6, y=45, label="GNU R", family="serif", size=10)+
	annotate("text", x=10^6.4, y=100, label="Renjin -opt", family="serif", size=10)+
	annotate("text", x=10^6.8, y=-1, label="Renjin", family="serif", size=10)+
	annotate("text", x=10^6.8, y=20, label="Renjin 1t", family="serif", size=10)

	#scale_color_brewer(palette=cBrwPl) +
 #guides(colour=guide_legend(keywidth=3.5))

dev.off()

# parallel


d <- all %>% filter(exp=="parallel", s  == 8) %>% group_by(exp, sys, conf) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(threads=ifelse(conf=="none", 1L, as.integer(conf))) %>% mutate(tool=paste(sys,threads)) %>% filter(threads < 32)

print(d)


limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("parallel.pdf",width=10,height=7)
p <- ggplot(d,aes(threads,meant,group=sys)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=sys, linetype=sys)) +
  geom_pointrange(limits) +
  scale_y_continuous(limits=c(0,NA)) +
   scale_x_continuous(breaks=unique(d$threads)) +
 geom_vline(xintercept = 10) +
	xlab("Number of Threads") + ylab("Execution Time (s)") + theme +
		annotate("text", x=13.1, y=1.5, label="Problem parallelism", family="serif", size=10) +

	annotate("text", x=2, y=0.4, label="Server", family="serif", size=10)+
	annotate("text", x=3, y=1, label="Laptop", family="serif", size=10)

print(p)
dev.off()


# f <- d %>% group_by(exp, sys, s) %>% summarize(maxmt=max(meant))

# d <- d %>% left_join(f) %>% mutate(speedup = maxmt/meant) %>% select(s,threads,speedup)


# print(d)

# pdf("speedup.pdf",width=10,height=7)
# p <- ggplot(d,aes(threads,speedup,group=s)) + 
#   geom_point(size=4) + geom_line(size=1.5, aes(group=s, linetype=sys)) +
#   scale_y_continuous(limits=c(0,NA)) +
#    scale_x_continuous(breaks=unique(d$threads)) +
# 	xlab("Number of Threads") + ylab("Speedup") + theme #+
# #	annotate("text", x=10, y=6, label="Problem parallelism", family="serif", size=10) #+
# 	# annotate("text", x=3.5, y=20, label="Renjin ", family="serif", size=10)

# print(p)
# dev.off()


# recycling
d <- all %>% filter(exp=="recycling", s > 5) %>% group_by(exp, sys, s, conf) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(datasize=10^as.integer(s), tool=paste(sys, conf))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("recycling.pdf",width=10,height=7)
p <- ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
#  scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=10^7.7, y=65, label="Renjin", family="serif", size=10)+
	annotate("text", x=10^7.7, y=2, label="Renjin + R.", family="serif", size=10)+
	annotate("text", x=10^7.8, y=21, label="GNU R", family="serif", size=10)


print(p)
dev.off()



# # identity
d <- all %>% filter(exp=="identity", s > 5) %>% group_by(exp, sys, conf, s) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(tool=ifelse(conf=="none", sys, paste(sys,conf)), datasize=10^as.integer(s))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("identity.pdf",width=10,height=7)
ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
  #scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=60000000, y=4, label="GNU R", family="serif", size=10)+
	annotate("text", x=40000000, y=8.1, label="Renjin ", family="serif", size=10)+
	annotate("text", x=40000000, y=.6, label="Renjin + Identity", family="serif", size=10)

	#scale_color_brewer(palette=cBrwPl) +
 # guides(colour=guide_legend(keywidth=3.5))

dev.off()


# pushdown

d <- all %>% filter(exp=="pushdown", s > 5) %>% group_by(exp, sys, s) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(tool=sys, datasize=10^as.integer(s))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("pushdown.pdf",width=10,height=7)
p <- ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
 # scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=10^7.6, y=5, label="GNU R", family="serif", size=10)+
	annotate("text", x=10^7.6, y=.4, label="Renjin ", family="serif", size=10)

print(p)
dev.off()




# operators
d <- all %>% filter(exp=="operators", s > 5) %>% group_by(exp, sys, s, conf) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(conf=ifelse(conf=="opt"," + Vectorization",""), tool=paste(sys,conf,sep=""), datasize=10^as.integer(s))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("operators.pdf",width=10,height=7)
p <- ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
   # scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels, limits=c(NA, 10^8.1)) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (log)") + theme +

	annotate("text", x=10^7.8, y=4, label="GNU R", family="serif", size=10)+
	annotate("text", x=40000000, y=19, label="Renjin ", family="serif", size=10)+
	annotate("text", x=10^7.95, y=14, label="Renjin + V.", family="serif", size=10)

	#annotate("text", x=700000, y=6, label="Renjin", family="serif", size=10)+
	#annotate("text", x=20000000, y=2, label="Renjin + Recycling", family="serif", size=10)


print(p)
dev.off()


# # print some latex for the paper
# selection$timesec <- selection$timesec/1000
# selproj$timesec <- selproj$timesec/1000
# grouping$timesec <- grouping$timesec/1000
# joins$timesec <- joins$timesec/1000

# selection <- cast(selection,datasetn+oparg ~ tool)
# selproj <- cast(selproj,datasetn+oparg ~ tool)
# joins <- cast(joins,datasetn+oparg ~ tool)
# grouping <- cast(grouping,datasetn+opargn ~ tool)

# selection[1] <- ""
# print(xtable(selection),include.rownames=FALSE)

# selproj[1] <- ""
# print(xtable(selproj),include.rownames=FALSE)

# grouping[1] <- ""
# print(xtable(grouping),include.rownames=FALSE)

# joins[1] <- ""
# print(xtable(joins),include.rownames=FALSE)