Files @ 32990bba4df7
Branch filter:

Location: DA/raaql-paper-experiments/makeplots.R - annotation

Hannes Muehleisen
mc2
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
76ef4811d43e
76ef4811d43e
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
#!R -f
library(ggplot2)
library(ggthemes)
library(scales)
library(xtable)
library(grid)
library(dplyr)

all <- read.csv("results.tsv",sep="\t",header=F, stringsAsFactors=F)
names(all) <- c("exp", "sys", "conf", "s", "r", "timesec")

ybreaks <- c(.01,.1,1,10,60,600)
ylabels <- c("10ms","100ms","1s", "10s","1min", "10min")

xbreaks <- 10^(5:9)
xlabels <- c(expression(10^5),expression(10^6),expression(10^7),expression(10^8),expression(10^9))

theme <- theme_few(base_size = 24) + 
theme(axis.title.y=element_text(vjust=0.9), 
	  axis.title.x=element_text(vjust=-0.1),
	  text=element_text(family="serif"),
	legend.position = "none"
    )


se <- function(x) sqrt(var(x)/length(x))






# parallel


d <- all %>% filter(exp=="parallel", s  == 8) %>% group_by(exp, sys, conf) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(threads=ifelse(conf=="none", 1L, as.integer(conf))) %>% mutate(tool=paste(sys,threads)) %>% filter(threads < 32)

print(d)


limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("parallel.pdf",width=10,height=7)
p <- ggplot(d,aes(threads,meant,group=sys)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=sys, linetype=sys)) +
  geom_pointrange(limits) +
  scale_y_continuous(limits=c(0,NA)) +
   scale_x_continuous(breaks=unique(d$threads)) +
 geom_vline(xintercept = 10) +
	xlab("Number of Threads") + ylab("Execution Time (s)") + theme +
		annotate("text", x=13.1, y=1.5, label="Problem parallelism", family="serif", size=10) +

	annotate("text", x=2, y=0.4, label="Server", family="serif", size=10)+
	annotate("text", x=3, y=1, label="Laptop", family="serif", size=10)

print(p)
dev.off()


# f <- d %>% group_by(exp, sys, s) %>% summarize(maxmt=max(meant))

# d <- d %>% left_join(f) %>% mutate(speedup = maxmt/meant) %>% select(s,threads,speedup)


# print(d)

# pdf("speedup.pdf",width=10,height=7)
# p <- ggplot(d,aes(threads,speedup,group=s)) + 
#   geom_point(size=4) + geom_line(size=1.5, aes(group=s, linetype=sys)) +
#   scale_y_continuous(limits=c(0,NA)) +
#    scale_x_continuous(breaks=unique(d$threads)) +
# 	xlab("Number of Threads") + ylab("Speedup") + theme #+
# #	annotate("text", x=10, y=6, label="Problem parallelism", family="serif", size=10) #+
# 	# annotate("text", x=3.5, y=20, label="Renjin ", family="serif", size=10)

# print(p)
# dev.off()


stop()
# recycling
d <- all %>% filter(exp=="recycling", s > 5) %>% group_by(exp, sys, s, conf) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(datasize=10^as.integer(s), tool=paste(sys, conf))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("recycling.pdf",width=10,height=7)
p <- ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
#  scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=10^7.7, y=65, label="Renjin", family="serif", size=10)+
	annotate("text", x=10^7.7, y=2, label="Renjin + R.", family="serif", size=10)+
	annotate("text", x=10^7.8, y=21, label="GNU R", family="serif", size=10)


print(p)
dev.off()

# survey
d <- all %>% filter(exp=="survey") %>% group_by(sys, s, conf) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(tool=paste(sys,conf), 
		datasize=sapply(s, switch, alabama=47512, california=1060060, acs3yr=9093077)) %>% 
	filter(tool == "Renjin jitopt" | sys != "Renjin")

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("survey.pdf",width=10,height=7)
ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
  #scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=c(47512,1060060,9093077)) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=10^6.6, y=45, label="GNU R", family="serif", size=10)+
	annotate("text", x=10^6.4, y=100, label="sqlsurvey", family="serif", size=10)+
	annotate("text", x=10^6.8, y=15, label="Renjin", family="serif", size=10)

	#scale_color_brewer(palette=cBrwPl) +
 #guides(colour=guide_legend(keywidth=3.5))

dev.off()


# # identity
d <- all %>% filter(exp=="identity", s > 5) %>% group_by(exp, sys, conf, s) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(tool=ifelse(conf=="none", sys, paste(sys,conf)), datasize=10^as.integer(s))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("identity.pdf",width=10,height=7)
ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
  #scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=60000000, y=4, label="GNU R", family="serif", size=10)+
	annotate("text", x=40000000, y=8.1, label="Renjin ", family="serif", size=10)+
	annotate("text", x=40000000, y=.6, label="Renjin + Identity", family="serif", size=10)

	#scale_color_brewer(palette=cBrwPl) +
 # guides(colour=guide_legend(keywidth=3.5))

dev.off()


# pushdown

d <- all %>% filter(exp=="pushdown", s > 4) %>% group_by(exp, sys, s) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(tool=sys, datasize=10^as.integer(s))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("pushdown.pdf",width=10,height=7)
p <- ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
 # scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=10^7.5, y=5, label="GNU R", family="serif", size=10)+
	annotate("text", x=10^7.6, y=.4, label="Renjin ", family="serif", size=10)

print(p)
dev.off()




# operators
d <- all %>% filter(exp=="operators", s > 5) %>% group_by(exp, sys, s, conf) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(conf=ifelse(conf=="opt"," + Vectorization",""), tool=paste(sys,conf,sep=""), datasize=10^as.integer(s))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("operators.pdf",width=10,height=7)
p <- ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
   # scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels, limits=c(NA, 10^8.1)) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (log)") + theme +

	annotate("text", x=10^7.8, y=4, label="GNU R", family="serif", size=10)+
	annotate("text", x=40000000, y=19, label="Renjin ", family="serif", size=10)+
	annotate("text", x=10^7.95, y=14, label="Renjin + V.", family="serif", size=10)

	#annotate("text", x=700000, y=6, label="Renjin", family="serif", size=10)+
	#annotate("text", x=20000000, y=2, label="Renjin + Recycling", family="serif", size=10)


print(p)
dev.off()


# # print some latex for the paper
# selection$timesec <- selection$timesec/1000
# selproj$timesec <- selproj$timesec/1000
# grouping$timesec <- grouping$timesec/1000
# joins$timesec <- joins$timesec/1000

# selection <- cast(selection,datasetn+oparg ~ tool)
# selproj <- cast(selproj,datasetn+oparg ~ tool)
# joins <- cast(joins,datasetn+oparg ~ tool)
# grouping <- cast(grouping,datasetn+opargn ~ tool)

# selection[1] <- ""
# print(xtable(selection),include.rownames=FALSE)

# selproj[1] <- ""
# print(xtable(selproj),include.rownames=FALSE)

# grouping[1] <- ""
# print(xtable(grouping),include.rownames=FALSE)

# joins[1] <- ""
# print(xtable(joins),include.rownames=FALSE)