#!/usr/bin/python import matplotlib as mpl mpl.use('Agg') import os, errno, time, sys, pylab from scipy import * from scipy import optimize from scipy import special from scipy import stats #import numpy as np import matplotlib.cm as cm import matplotlib.ticker as ticker from datetime import datetime import logging, csv from os.path import basename, exists plotdir = os.path.abspath('.') + '/results' if not os.path.exists(plotdir): os.makedirs(plotdir) print "Created plotdir: %s" % (plotdir) logdir = os.path.abspath('.') + '/log' def stdev(v): """ Return the Bessel corrected standard deviation of a series of measurements """ return sqrt(len(v) * (mean(square(v)) - mean(v)**2) / (len(v) - 1)) def mysql_querytimes(): """ """ # The s15 mysql query timing results taken from # https://confluence.lsstcorp.org/display/DM/S15+Large+Scale+Tests # Values on website are reported in seconds counts = [47.75, 40.99, 48.33] shortrunning = [0.09, 0.33] fullscans = [247.61, 244.24, 1088.09, 1077.38, 898.61] joins = [1381.44, 1291.38] neighbor = [676.02] mysqls15 = asarray(counts + shortrunning + fullscans + joins + neighbor) #tms = [ts * 1000 for ts in mysqls15] return mysqls15 def monetdb_querytimes(qlog): """ """ # From the qlog name, we can deduce the query number # and run type qtime = [] with open(qlog, 'rb') as csvfile: qreader = csv.reader(csvfile, delimiter=';') for row in qreader: run = int(row[0]) qtime.append(float(row[1])) # unit is ms #for i in range(len(qtime)): # print "%s: qtime = %s" % (i, qtime[i]) return mean(qtime), stdev(qtime) def plot_results(): """ """ logdir = 'sql/s15/log' plotdir = '.' plotfiles = [] node = "stones10" dbversion = "dec2016sp1" # We have hot and cold runs runtype = ['hot', 'cold'] #runtype = ['hot'] # We divide the queries in five groups counts = ['q01', 'q02', 'q03'] shortrunning = ['q04', 'q05'] fullscans = ['q06', 'q07', 'q08', 'q09', 'q10'] joins = ['q11', 'q12'] neighbor = ['q13'] queries = asarray(counts + shortrunning + fullscans + joins + neighbor) mysqls15 = mysql_querytimes() monetdbs15 = {} for temp in runtype: monetdbs15[temp] = {} monetdbs15[temp]['qt_avg'] = [] monetdbs15[temp]['qt_std'] = [] for qnr in queries: qlog = logdir + "/%s.%s.%s.%s.log" % (qnr, temp, node, dbversion) #print "qlog = %s" % (qlog) if not exists(qlog): sys.exit("Run timings. Log file %s does not exist (yet)." % (qlog)) qt_avg, qt_std = monetdb_querytimes(qlog) print "%s qnr: %s: qt_avg = %s; qt_std = %s" % (temp, qnr, qt_avg, qt_std) monetdbs15[temp]['qt_avg'].append(qt_avg) monetdbs15[temp]['qt_std'].append(qt_std) mhot_t = [t * 0.001 for t in monetdbs15['hot']['qt_avg']] mhot_s = [s * 0.001 for s in monetdbs15['hot']['qt_std']] mcold_t = [t * 0.001 for t in monetdbs15['cold']['qt_avg']] mcold_s = [s * 0.001 for s in monetdbs15['cold']['qt_std']] #print "monetdbs15 = %s" % (monetdbs15) fig = pylab.figure(figsize=(28,8)) ax = fig.add_subplot(111) width = 0.2 ekw = dict(ecolor='k', lw=2, capsize=5, capthick=2) r1 = ax.bar(arange(len(mysqls15)), mysqls15, width, color='gray' \ ,error_kw=ekw,label='MySQL') r2 = ax.bar(arange(len(mcold_t)) + width, mcold_t, width, yerr=mcold_s, color='dodgerblue' \ ,error_kw=ekw,label='MonetDB cold') r3 = ax.bar(arange(len(mhot_t)) + 2 * width, mhot_t, width, yerr=mhot_s, color='red' \ ,error_kw=ekw,label='MonetDB hot') ax.set_ylabel('Time [s]', fontsize='25') ax.set_xticks(arange(len(mysqls15)) + width) ax.set_xticklabels([q.upper() for q in queries]) #ax.set_ylim(ymin=10, ymax=35000) ax.set_ylim(ymin=0.001, ymax=10000) ax.set_yscale("log") #ax.legend(loc='upper right') ax.legend(loc='upper left', fontsize='25') for i in range(len(ax.get_xticklabels())): #ax.get_xticklabels()[i].set_size('xx-large') ax.get_xticklabels()[i].set_size('25') for i in range(len(ax.get_yticklabels())): #ax.get_yticklabels()[i].set_size('xx-large') ax.get_yticklabels()[i].set_size('25') pylab.grid(True) fname = 'baseline_lsst_queries.eps' plotfile = plotdir + '/' + fname plotfiles.append(plotfile) pylab.savefig(plotfile, dpi=400, bbox_inches='tight') print plotfile return plotfile def main(): #query = sys.argv[1] plot_results() if __name__ == '__main__': main()