Files @ 6da277f993c5
Branch filter:

Location: DA/raaql-paper-experiments/makeplots.R - annotation

Hannes Muehleisen
mc3
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
6da277f993c5
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
76ef4811d43e
76ef4811d43e
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
8bf3b2c41088
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
6da277f993c5
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
6da277f993c5
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
#!R -f
library(ggplot2)
library(ggthemes)
library(scales)
library(xtable)
library(grid)
library(dplyr)

all <- read.csv("results.tsv",sep="\t",header=F, stringsAsFactors=F)
names(all) <- c("exp", "sys", "conf", "s", "r", "timesec")

ybreaks <- c(.01,.1,1,10,60,600)
ylabels <- c("10ms","100ms","1s", "10s","1min", "10min")

xbreaks <- 10^(5:9)
xlabels <- c(expression(10^5),expression(10^6),expression(10^7),expression(10^8),expression(10^9))

theme <- theme_few(base_size = 24) + 
theme(axis.title.y=element_text(vjust=0.9), 
	  axis.title.x=element_text(vjust=-0.1),
	  text=element_text(family="serif"),
	legend.position = "none"
    )


se <- function(x) sqrt(var(x)/length(x))



# survey
d <- all %>% filter(exp=="survey") %>% group_by(sys, s, conf) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(tool=paste(sys,conf), 
		datasize=sapply(s, switch, alabama=47512, california=1060060, acs3yr=9093077)) %>% 
	filter(tool == "Renjin jitopt" | sys != "Renjin")

print(d %>% select(sys,s,meant) %>% arrange(s))

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("survey.pdf",width=10,height=7)
ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
  #scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=c(47512,1060060,9093077)) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=10^6.6, y=45, label="GNU R", family="serif", size=10)+
	annotate("text", x=10^6.4, y=100, label="sqlsurvey", family="serif", size=10)+
	annotate("text", x=10^6.8, y=15, label="Renjin", family="serif", size=10)

	#scale_color_brewer(palette=cBrwPl) +
 #guides(colour=guide_legend(keywidth=3.5))

dev.off()



# parallel


d <- all %>% filter(exp=="parallel", s  == 8) %>% group_by(exp, sys, conf) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(threads=ifelse(conf=="none", 1L, as.integer(conf))) %>% mutate(tool=paste(sys,threads)) %>% filter(threads < 32)

print(d)


limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("parallel.pdf",width=10,height=7)
p <- ggplot(d,aes(threads,meant,group=sys)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=sys, linetype=sys)) +
  geom_pointrange(limits) +
  scale_y_continuous(limits=c(0,NA)) +
   scale_x_continuous(breaks=unique(d$threads)) +
 geom_vline(xintercept = 10) +
	xlab("Number of Threads") + ylab("Execution Time (s)") + theme +
		annotate("text", x=13.1, y=1.5, label="Problem parallelism", family="serif", size=10) +

	annotate("text", x=2, y=0.4, label="Server", family="serif", size=10)+
	annotate("text", x=3, y=1, label="Laptop", family="serif", size=10)

print(p)
dev.off()


# f <- d %>% group_by(exp, sys, s) %>% summarize(maxmt=max(meant))

# d <- d %>% left_join(f) %>% mutate(speedup = maxmt/meant) %>% select(s,threads,speedup)


# print(d)

# pdf("speedup.pdf",width=10,height=7)
# p <- ggplot(d,aes(threads,speedup,group=s)) + 
#   geom_point(size=4) + geom_line(size=1.5, aes(group=s, linetype=sys)) +
#   scale_y_continuous(limits=c(0,NA)) +
#    scale_x_continuous(breaks=unique(d$threads)) +
# 	xlab("Number of Threads") + ylab("Speedup") + theme #+
# #	annotate("text", x=10, y=6, label="Problem parallelism", family="serif", size=10) #+
# 	# annotate("text", x=3.5, y=20, label="Renjin ", family="serif", size=10)

# print(p)
# dev.off()


# recycling
d <- all %>% filter(exp=="recycling", s > 5) %>% group_by(exp, sys, s, conf) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(datasize=10^as.integer(s), tool=paste(sys, conf))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("recycling.pdf",width=10,height=7)
p <- ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
#  scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=10^7.7, y=65, label="Renjin", family="serif", size=10)+
	annotate("text", x=10^7.7, y=2, label="Renjin + R.", family="serif", size=10)+
	annotate("text", x=10^7.8, y=21, label="GNU R", family="serif", size=10)


print(p)
dev.off()



# # identity
d <- all %>% filter(exp=="identity", s > 5) %>% group_by(exp, sys, conf, s) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(tool=ifelse(conf=="none", sys, paste(sys,conf)), datasize=10^as.integer(s))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("identity.pdf",width=10,height=7)
ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
  #scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=60000000, y=4, label="GNU R", family="serif", size=10)+
	annotate("text", x=40000000, y=8.1, label="Renjin ", family="serif", size=10)+
	annotate("text", x=40000000, y=.6, label="Renjin + Identity", family="serif", size=10)

	#scale_color_brewer(palette=cBrwPl) +
 # guides(colour=guide_legend(keywidth=3.5))

dev.off()


# pushdown

d <- all %>% filter(exp=="pushdown", s > 5) %>% group_by(exp, sys, s) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(tool=sys, datasize=10^as.integer(s))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("pushdown.pdf",width=10,height=7)
p <- ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
 # scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
	annotate("text", x=10^7.6, y=5, label="GNU R", family="serif", size=10)+
	annotate("text", x=10^7.6, y=.4, label="Renjin ", family="serif", size=10)

print(p)
dev.off()




# operators
d <- all %>% filter(exp=="operators", s > 5) %>% group_by(exp, sys, s, conf) %>% 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
	mutate(conf=ifelse(conf=="opt"," + Vectorization",""), tool=paste(sys,conf,sep=""), datasize=10^as.integer(s))

print(d)

limits <- aes(ymax = meant + se, ymin=meant - se, width=2)

pdf("operators.pdf",width=10,height=7)
p <- ggplot(d,aes(datasize,meant,group=tool)) + 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
  geom_pointrange(limits) +
   # scale_y_log10(breaks=ybreaks, labels=ylabels) +
    scale_x_log10(breaks=xbreaks, labels=xlabels, limits=c(NA, 10^8.1)) +

	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (log)") + theme +

	annotate("text", x=10^7.8, y=4, label="GNU R", family="serif", size=10)+
	annotate("text", x=40000000, y=19, label="Renjin ", family="serif", size=10)+
	annotate("text", x=10^7.95, y=14, label="Renjin + V.", family="serif", size=10)

	#annotate("text", x=700000, y=6, label="Renjin", family="serif", size=10)+
	#annotate("text", x=20000000, y=2, label="Renjin + Recycling", family="serif", size=10)


print(p)
dev.off()


# # print some latex for the paper
# selection$timesec <- selection$timesec/1000
# selproj$timesec <- selproj$timesec/1000
# grouping$timesec <- grouping$timesec/1000
# joins$timesec <- joins$timesec/1000

# selection <- cast(selection,datasetn+oparg ~ tool)
# selproj <- cast(selproj,datasetn+oparg ~ tool)
# joins <- cast(joins,datasetn+oparg ~ tool)
# grouping <- cast(grouping,datasetn+opargn ~ tool)

# selection[1] <- ""
# print(xtable(selection),include.rownames=FALSE)

# selproj[1] <- ""
# print(xtable(selproj),include.rownames=FALSE)

# grouping[1] <- ""
# print(xtable(grouping),include.rownames=FALSE)

# joins[1] <- ""
# print(xtable(joins),include.rownames=FALSE)