diff --git a/plot_baselines.py b/plot_baselines.py new file mode 100644 index 0000000000000000000000000000000000000000..c895be85719c6794ccf2d9166f9b23a17a078d5e --- /dev/null +++ b/plot_baselines.py @@ -0,0 +1,145 @@ +#!/usr/bin/python + +import matplotlib as mpl +mpl.use('Agg') +import os, errno, time, sys, pylab +from scipy import * +from scipy import optimize +from scipy import special +from scipy import stats +#import numpy as np +import matplotlib.cm as cm +import matplotlib.ticker as ticker +from datetime import datetime +import logging, csv +from os.path import basename, exists + +plotdir = os.path.abspath('.') + '/results' +if not os.path.exists(plotdir): + os.makedirs(plotdir) + print "Created plotdir: %s" % (plotdir) +logdir = os.path.abspath('.') + '/log' + +def stdev(v): + """ + Return the Bessel corrected standard deviation of a series of measurements + """ + return sqrt(len(v) * (mean(square(v)) - mean(v)**2) / (len(v) - 1)) + +def mysql_querytimes(): + """ + """ + # The s15 mysql query timing results taken from + # https://confluence.lsstcorp.org/display/DM/S15+Large+Scale+Tests + # Values on website are reported in seconds + counts = [47.75, 40.99, 48.33] + shortrunning = [0.09, 0.33] + fullscans = [247.61, 244.24, 1088.09, 1077.38, 898.61] + joins = [1381.44, 1291.38] + neighbor = [676.02] + mysqls15 = asarray(counts + shortrunning + fullscans + joins + neighbor) + #tms = [ts * 1000 for ts in mysqls15] + return mysqls15 + +def monetdb_querytimes(qlog): + """ + """ + # From the qlog name, we can deduce the query number + # and run type + qtime = [] + with open(qlog, 'rb') as csvfile: + qreader = csv.reader(csvfile, delimiter=';') + for row in qreader: + run = int(row[0]) + qtime.append(float(row[1])) # unit is ms + #for i in range(len(qtime)): + # print "%s: qtime = %s" % (i, qtime[i]) + return mean(qtime), stdev(qtime) + +def plot_results(): + """ + """ + + logdir = 'sql/s15/log' + plotdir = '.' + plotfiles = [] + + node = "stones10" + dbversion = "dec2016sp1" + + # We have hot and cold runs + runtype = ['hot', 'cold'] + #runtype = ['hot'] + # We divide the queries in five groups + counts = ['q01', 'q02', 'q03'] + shortrunning = ['q04', 'q05'] + fullscans = ['q06', 'q07', 'q08', 'q09', 'q10'] + joins = ['q11', 'q12'] + neighbor = ['q13'] + queries = asarray(counts + shortrunning + fullscans + joins + neighbor) + mysqls15 = mysql_querytimes() + + monetdbs15 = {} + for temp in runtype: + monetdbs15[temp] = {} + monetdbs15[temp]['qt_avg'] = [] + monetdbs15[temp]['qt_std'] = [] + for qnr in queries: + qlog = logdir + "/%s.%s.%s.%s.log" % (qnr, temp, node, dbversion) + #print "qlog = %s" % (qlog) + if not exists(qlog): + sys.exit("Run timings. Log file %s does not exist (yet)." % (qlog)) + qt_avg, qt_std = monetdb_querytimes(qlog) + print "%s qnr: %s: qt_avg = %s; qt_std = %s" % (temp, qnr, qt_avg, qt_std) + monetdbs15[temp]['qt_avg'].append(qt_avg) + monetdbs15[temp]['qt_std'].append(qt_std) + mhot_t = [t * 0.001 for t in monetdbs15['hot']['qt_avg']] + mhot_s = [s * 0.001 for s in monetdbs15['hot']['qt_std']] + mcold_t = [t * 0.001 for t in monetdbs15['cold']['qt_avg']] + mcold_s = [s * 0.001 for s in monetdbs15['cold']['qt_std']] + + #print "monetdbs15 = %s" % (monetdbs15) + fig = pylab.figure(figsize=(28,8)) + ax = fig.add_subplot(111) + + width = 0.2 + ekw = dict(ecolor='k', lw=2, capsize=5, capthick=2) + r1 = ax.bar(arange(len(mysqls15)), mysqls15, width, color='gray' \ + ,error_kw=ekw,label='MySQL') + r2 = ax.bar(arange(len(mcold_t)) + width, mcold_t, width, yerr=mcold_s, color='dodgerblue' \ + ,error_kw=ekw,label='MonetDB cold') + r3 = ax.bar(arange(len(mhot_t)) + 2 * width, mhot_t, width, yerr=mhot_s, color='red' \ + ,error_kw=ekw,label='MonetDB hot') + ax.set_ylabel('Time [s]', fontsize='25') + ax.set_xticks(arange(len(mysqls15)) + width) + ax.set_xticklabels([q.upper() for q in queries]) + #ax.set_ylim(ymin=10, ymax=35000) + ax.set_ylim(ymin=0.001, ymax=10000) + ax.set_yscale("log") + #ax.legend(loc='upper right') + ax.legend(loc='upper left', fontsize='25') + + for i in range(len(ax.get_xticklabels())): + #ax.get_xticklabels()[i].set_size('xx-large') + ax.get_xticklabels()[i].set_size('25') + for i in range(len(ax.get_yticklabels())): + #ax.get_yticklabels()[i].set_size('xx-large') + ax.get_yticklabels()[i].set_size('25') + pylab.grid(True) + + fname = 'baseline_lsst_queries.eps' + + plotfile = plotdir + '/' + fname + plotfiles.append(plotfile) + pylab.savefig(plotfile, dpi=400, bbox_inches='tight') + print plotfile + + return plotfile + +def main(): + #query = sys.argv[1] + plot_results() + +if __name__ == '__main__': + main() +