Files
@ 0818c337d503
Branch filter:
Location: DA/lsst_blog/plot_baselines.py
0818c337d503
4.6 KiB
text/x-python
Add blog post url
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | #!/usr/bin/python
import matplotlib as mpl
mpl.use('Agg')
import os, errno, time, sys, pylab
from scipy import *
from scipy import optimize
from scipy import special
from scipy import stats
#import numpy as np
import matplotlib.cm as cm
import matplotlib.ticker as ticker
from datetime import datetime
import logging, csv
from os.path import basename, exists
plotdir = os.path.abspath('.') + '/results'
if not os.path.exists(plotdir):
os.makedirs(plotdir)
print "Created plotdir: %s" % (plotdir)
logdir = os.path.abspath('.') + '/log'
def stdev(v):
"""
Return the Bessel corrected standard deviation of a series of measurements
"""
return sqrt(len(v) * (mean(square(v)) - mean(v)**2) / (len(v) - 1))
def mysql_querytimes():
"""
"""
# The s15 mysql query timing results taken from
# https://confluence.lsstcorp.org/display/DM/S15+Large+Scale+Tests
# Values on website are reported in seconds
counts = [47.75, 40.99, 48.33]
shortrunning = [0.09, 0.33]
fullscans = [247.61, 244.24, 1088.09, 1077.38, 898.61]
joins = [1381.44, 1291.38]
neighbor = [676.02]
mysqls15 = asarray(counts + shortrunning + fullscans + joins + neighbor)
#tms = [ts * 1000 for ts in mysqls15]
return mysqls15
def monetdb_querytimes(qlog):
"""
"""
# From the qlog name, we can deduce the query number
# and run type
qtime = []
with open(qlog, 'rb') as csvfile:
qreader = csv.reader(csvfile, delimiter=';')
for row in qreader:
run = int(row[0])
qtime.append(float(row[1])) # unit is ms
#for i in range(len(qtime)):
# print "%s: qtime = %s" % (i, qtime[i])
return mean(qtime), stdev(qtime)
def plot_results():
"""
"""
logdir = 'sql/s15/log'
plotdir = '.'
plotfiles = []
node = "stones10"
dbversion = "dec2016sp1"
# We have hot and cold runs
runtype = ['hot', 'cold']
#runtype = ['hot']
# We divide the queries in five groups
counts = ['q01', 'q02', 'q03']
shortrunning = ['q04', 'q05']
fullscans = ['q06', 'q07', 'q08', 'q09', 'q10']
joins = ['q11', 'q12']
neighbor = ['q13']
queries = asarray(counts + shortrunning + fullscans + joins + neighbor)
mysqls15 = mysql_querytimes()
monetdbs15 = {}
for temp in runtype:
monetdbs15[temp] = {}
monetdbs15[temp]['qt_avg'] = []
monetdbs15[temp]['qt_std'] = []
for qnr in queries:
qlog = logdir + "/%s.%s.%s.%s.log" % (qnr, temp, node, dbversion)
#print "qlog = %s" % (qlog)
if not exists(qlog):
sys.exit("Run timings. Log file %s does not exist (yet)." % (qlog))
qt_avg, qt_std = monetdb_querytimes(qlog)
print "%s qnr: %s: qt_avg = %s; qt_std = %s" % (temp, qnr, qt_avg, qt_std)
monetdbs15[temp]['qt_avg'].append(qt_avg)
monetdbs15[temp]['qt_std'].append(qt_std)
mhot_t = [t * 0.001 for t in monetdbs15['hot']['qt_avg']]
mhot_s = [s * 0.001 for s in monetdbs15['hot']['qt_std']]
mcold_t = [t * 0.001 for t in monetdbs15['cold']['qt_avg']]
mcold_s = [s * 0.001 for s in monetdbs15['cold']['qt_std']]
#print "monetdbs15 = %s" % (monetdbs15)
fig = pylab.figure(figsize=(28,8))
ax = fig.add_subplot(111)
width = 0.2
ekw = dict(ecolor='k', lw=2, capsize=5, capthick=2)
r1 = ax.bar(arange(len(mysqls15)), mysqls15, width, color='gray' \
,error_kw=ekw,label='MySQL')
r2 = ax.bar(arange(len(mcold_t)) + width, mcold_t, width, yerr=mcold_s, color='dodgerblue' \
,error_kw=ekw,label='MonetDB cold')
r3 = ax.bar(arange(len(mhot_t)) + 2 * width, mhot_t, width, yerr=mhot_s, color='red' \
,error_kw=ekw,label='MonetDB hot')
ax.set_ylabel('Time [s]', fontsize='25')
ax.set_xticks(arange(len(mysqls15)) + width)
ax.set_xticklabels([q.upper() for q in queries])
#ax.set_ylim(ymin=10, ymax=35000)
ax.set_ylim(ymin=0.001, ymax=10000)
ax.set_yscale("log")
#ax.legend(loc='upper right')
ax.legend(loc='upper left', fontsize='25')
for i in range(len(ax.get_xticklabels())):
#ax.get_xticklabels()[i].set_size('xx-large')
ax.get_xticklabels()[i].set_size('25')
for i in range(len(ax.get_yticklabels())):
#ax.get_yticklabels()[i].set_size('xx-large')
ax.get_yticklabels()[i].set_size('25')
pylab.grid(True)
fname = 'baseline_lsst_queries.eps'
plotfile = plotdir + '/' + fname
plotfiles.append(plotfile)
pylab.savefig(plotfile, dpi=400, bbox_inches='tight')
print plotfile
return plotfile
def main():
#query = sys.argv[1]
plot_results()
if __name__ == '__main__':
main()
|