Files @ 8bf3b2c41088
Branch filter:

Location: DA/raaql-paper-experiments/makeplots.R - annotation

Hannes Muehleisen
updates with fixed renjin version
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
#!R -f
library(ggplot2)
library(ggthemes)
library(scales)
library(xtable)
library(grid)
library(dplyr)

all <- read.csv("results.tsv",sep="\t",header=F, stringsAsFactors=F)
names(all) <- c("exp", "sys", "conf", "s", "r", "timesec")

ybreaks <- c(.01,.1,1,10,60,600)
ylabels <- c("10ms","100ms","1s", "10s","1min", "10min")

xbreaks <- 10^(5:9)
xlabels <- c(expression(10^5),expression(10^6),expression(10^7),expression(10^8),expression(10^9))

theme <- theme_few(base_size = 24) + 
theme(axis.title.y=element_text(vjust=0.9), 
	  axis.title.x=element_text(vjust=-0.1),
	  text=element_text(family="serif"),
	legend.position = "none"
    )


se <- function(x) sqrt(var(x)/length(x))






# parallel


d <- all %>% filter(exp=="parallel", s  == 8) %>% group_by(exp, sys, conf) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(threads=ifelse(conf=="none", 1L, as.integer(conf))) %>% mutate(tool=paste(sys,threads)) %>% filter(threads < 32)

print(d)


limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("parallel.pdf",width=10,height=7)
p <- ggplot(d,aes(threads,meant,group=sys)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=sys, linetype=sys)) +
  geom_pointrange(limits) +
  scale_y_continuous(limits=c(0,NA)) +
   scale_x_continuous(breaks=unique(d$threads)) +
 geom_vline(xintercept = 10) +
	xlab("Number of Threads") + ylab("Execution Time (s)") + theme +
		annotate("text", x=13.1, y=1.5, label="Problem parallelism", family="serif", size=10) +

	annotate("text", x=2, y=0.4, label="Laptop", family="serif", size=10)+
	annotate("text", x=3, y=1, label="Server", family="serif", size=10)

print(p)
dev.off()


# f <- d %>% group_by(exp, sys, s) %>% summarize(maxmt=max(meant))

# d <- d %>% left_join(f) %>% mutate(speedup = maxmt/meant) %>% select(s,threads,speedup)


# print(d)

# pdf("speedup.pdf",width=10,height=7)
# p <- ggplot(d,aes(threads,speedup,group=s)) + 
#   geom_point(size=4) + geom_line(size=1.5, aes(group=s, linetype=sys)) +
#   scale_y_continuous(limits=c(0,NA)) +
#    scale_x_continuous(breaks=unique(d$threads)) +
# 	xlab("Number of Threads") + ylab("Speedup") + theme #+
# #	annotate("text", x=10, y=6, label="Problem parallelism", family="serif", size=10) #+
# 	# annotate("text", x=3.5, y=20, label="Renjin ", family="serif", size=10)

# print(p)
# dev.off()


stop()
# recycling
d <- all %>% filter(exp=="recycling", s > 5) %>% group_by(exp, sys, s, conf) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(datasize=10^as.integer(s), tool=paste(sys, conf))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("recycling.pdf",width=10,height=7)
p <- ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
#  scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=10^7.7, y=65, label="Renjin", family="serif", size=10)+
	annotate("text", x=10^7.7, y=2, label="Renjin + R.", family="serif", size=10)+
	annotate("text", x=10^7.8, y=21, label="GNU R", family="serif", size=10)


print(p)
dev.off()

# survey
d <- all %>% filter(exp=="survey") %>% group_by(sys, s, conf) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(tool=paste(sys,conf), 
		datasize=sapply(s, switch, alabama=47512, california=1060060, acs3yr=9093077)) %>% 
	filter(tool == "Renjin jitopt" | sys != "Renjin")

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("survey.pdf",width=10,height=7)
ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
  #scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=c(47512,1060060,9093077)) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=10^6.6, y=45, label="GNU R", family="serif", size=10)+
	annotate("text", x=10^6.4, y=100, label="sqlsurvey", family="serif", size=10)+
	annotate("text", x=10^6.8, y=15, label="Renjin", family="serif", size=10)

	#scale_color_brewer(palette=cBrwPl) +
 #guides(colour=guide_legend(keywidth=3.5))

dev.off()


# # identity
d <- all %>% filter(exp=="identity", s > 5) %>% group_by(exp, sys, conf, s) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(tool=ifelse(conf=="none", sys, paste(sys,conf)), datasize=10^as.integer(s))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("identity.pdf",width=10,height=7)
ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
  #scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=60000000, y=4, label="GNU R", family="serif", size=10)+
	annotate("text", x=40000000, y=8.1, label="Renjin ", family="serif", size=10)+
	annotate("text", x=40000000, y=.6, label="Renjin + Identity", family="serif", size=10)

	#scale_color_brewer(palette=cBrwPl) +
 # guides(colour=guide_legend(keywidth=3.5))

dev.off()


# pushdown

d <- all %>% filter(exp=="pushdown", s > 4) %>% group_by(exp, sys, s) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(tool=sys, datasize=10^as.integer(s))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("pushdown.pdf",width=10,height=7)
p <- ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
 # scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=10^7.5, y=5, label="GNU R", family="serif", size=10)+
	annotate("text", x=10^7.6, y=.4, label="Renjin ", family="serif", size=10)

print(p)
dev.off()




# operators
d <- all %>% filter(exp=="operators", s > 5) %>% group_by(exp, sys, s, conf) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(conf=ifelse(conf=="opt"," + Vectorization",""), tool=paste(sys,conf,sep=""), datasize=10^as.integer(s))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("operators.pdf",width=10,height=7)
p <- ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
   # scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels, limits=c(NA, 10^8.1)) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (log)") + theme +

	annotate("text", x=10^7.8, y=4, label="GNU R", family="serif", size=10)+
	annotate("text", x=40000000, y=19, label="Renjin ", family="serif", size=10)+
	annotate("text", x=10^7.95, y=14, label="Renjin + V.", family="serif", size=10)

	#annotate("text", x=700000, y=6, label="Renjin", family="serif", size=10)+
	#annotate("text", x=20000000, y=2, label="Renjin + Recycling", family="serif", size=10)


print(p)
dev.off()


# # print some latex for the paper
# selection$timesec <- selection$timesec/1000
# selproj$timesec <- selproj$timesec/1000
# grouping$timesec <- grouping$timesec/1000
# joins$timesec <- joins$timesec/1000

# selection <- cast(selection,datasetn+oparg ~ tool)
# selproj <- cast(selproj,datasetn+oparg ~ tool)
# joins <- cast(joins,datasetn+oparg ~ tool)
# grouping <- cast(grouping,datasetn+opargn ~ tool)

# selection[1] <- ""
# print(xtable(selection),include.rownames=FALSE)

# selproj[1] <- ""
# print(xtable(selproj),include.rownames=FALSE)

# grouping[1] <- ""
# print(xtable(grouping),include.rownames=FALSE)

# joins[1] <- ""
# print(xtable(joins),include.rownames=FALSE)