File diff 000000000000 → 72890999e24d
plot_baselines.py
Show inline comments
 
new file 100644
 
#!/usr/bin/python
 

	
 
import matplotlib as mpl
 
mpl.use('Agg')
 
import os, errno, time, sys, pylab
 
from scipy import *
 
from scipy import optimize
 
from scipy import special
 
from scipy import stats
 
#import numpy as np
 
import matplotlib.cm as cm
 
import matplotlib.ticker as ticker
 
from datetime import datetime
 
import logging, csv
 
from os.path import basename, exists
 

	
 
plotdir = os.path.abspath('.') + '/results'
 
if not os.path.exists(plotdir):
 
    os.makedirs(plotdir)
 
    print "Created plotdir: %s" % (plotdir)
 
logdir = os.path.abspath('.') + '/log'
 

	
 
def stdev(v):
 
    """
 
    Return the Bessel corrected standard deviation of a series of measurements
 
    """
 
    return sqrt(len(v) * (mean(square(v)) - mean(v)**2) / (len(v) - 1))
 

	
 
def mysql_querytimes():
 
    """
 
    """
 
    # The s15 mysql query timing results taken from
 
    # https://confluence.lsstcorp.org/display/DM/S15+Large+Scale+Tests
 
    # Values on website are reported in seconds
 
    counts = [47.75, 40.99, 48.33]
 
    shortrunning = [0.09, 0.33]
 
    fullscans = [247.61, 244.24, 1088.09, 1077.38, 898.61]
 
    joins = [1381.44, 1291.38]
 
    neighbor = [676.02]
 
    mysqls15 = asarray(counts + shortrunning + fullscans + joins + neighbor)
 
    #tms = [ts * 1000 for ts in mysqls15]
 
    return mysqls15
 

	
 
def monetdb_querytimes(qlog):
 
    """
 
    """
 
    # From the qlog name, we can deduce the query number
 
    # and run type
 
    qtime = []
 
    with open(qlog, 'rb') as csvfile:
 
        qreader = csv.reader(csvfile, delimiter=';')
 
        for row in qreader:
 
            run = int(row[0])
 
            qtime.append(float(row[1])) # unit is ms
 
    #for i in range(len(qtime)):
 
    #    print "%s: qtime = %s" % (i, qtime[i])
 
    return mean(qtime), stdev(qtime)
 

	
 
def plot_results():
 
    """
 
    """
 

	
 
    logdir = 'sql/s15/log'
 
    plotdir = '.'
 
    plotfiles = []
 

	
 
    node = "stones10"
 
    dbversion = "dec2016sp1"
 

	
 
    # We have hot and cold runs
 
    runtype = ['hot', 'cold']
 
    #runtype = ['hot']
 
    # We divide the queries in five groups
 
    counts = ['q01', 'q02', 'q03']
 
    shortrunning = ['q04', 'q05']
 
    fullscans = ['q06', 'q07', 'q08', 'q09', 'q10']
 
    joins = ['q11', 'q12']
 
    neighbor = ['q13']
 
    queries = asarray(counts + shortrunning + fullscans + joins + neighbor)
 
    mysqls15 = mysql_querytimes()
 

	
 
    monetdbs15 = {}
 
    for temp in runtype:
 
        monetdbs15[temp] = {}
 
        monetdbs15[temp]['qt_avg'] = []
 
        monetdbs15[temp]['qt_std'] = []
 
        for qnr in queries:
 
            qlog = logdir + "/%s.%s.%s.%s.log" % (qnr, temp, node, dbversion)
 
            #print "qlog = %s" % (qlog)
 
            if not exists(qlog):
 
                sys.exit("Run timings. Log file %s does not exist (yet)." % (qlog))
 
            qt_avg, qt_std = monetdb_querytimes(qlog)
 
            print "%s qnr: %s: qt_avg = %s; qt_std = %s" % (temp, qnr, qt_avg, qt_std)
 
            monetdbs15[temp]['qt_avg'].append(qt_avg)
 
            monetdbs15[temp]['qt_std'].append(qt_std)
 
    mhot_t = [t * 0.001 for t in monetdbs15['hot']['qt_avg']]
 
    mhot_s = [s * 0.001 for s in monetdbs15['hot']['qt_std']]
 
    mcold_t = [t * 0.001 for t in monetdbs15['cold']['qt_avg']]
 
    mcold_s = [s * 0.001 for s in monetdbs15['cold']['qt_std']]
 

	
 
    #print "monetdbs15 = %s" % (monetdbs15)
 
    fig = pylab.figure(figsize=(28,8))
 
    ax = fig.add_subplot(111)
 

	
 
    width = 0.2
 
    ekw = dict(ecolor='k', lw=2, capsize=5, capthick=2)
 
    r1 = ax.bar(arange(len(mysqls15)), mysqls15, width, color='gray' \
 
               ,error_kw=ekw,label='MySQL')
 
    r2 = ax.bar(arange(len(mcold_t)) + width, mcold_t, width, yerr=mcold_s, color='dodgerblue' \
 
               ,error_kw=ekw,label='MonetDB cold')
 
    r3 = ax.bar(arange(len(mhot_t)) + 2 * width, mhot_t, width, yerr=mhot_s, color='red' \
 
               ,error_kw=ekw,label='MonetDB hot')
 
    ax.set_ylabel('Time [s]', fontsize='25')
 
    ax.set_xticks(arange(len(mysqls15)) + width)
 
    ax.set_xticklabels([q.upper() for q in queries])
 
    #ax.set_ylim(ymin=10, ymax=35000)
 
    ax.set_ylim(ymin=0.001, ymax=10000)
 
    ax.set_yscale("log")
 
    #ax.legend(loc='upper right')
 
    ax.legend(loc='upper left', fontsize='25')
 

	
 
    for i in range(len(ax.get_xticklabels())):
 
        #ax.get_xticklabels()[i].set_size('xx-large')
 
        ax.get_xticklabels()[i].set_size('25')
 
    for i in range(len(ax.get_yticklabels())):
 
        #ax.get_yticklabels()[i].set_size('xx-large')
 
        ax.get_yticklabels()[i].set_size('25')
 
    pylab.grid(True)
 

	
 
    fname = 'baseline_lsst_queries.eps'
 

	
 
    plotfile = plotdir + '/' + fname
 
    plotfiles.append(plotfile)
 
    pylab.savefig(plotfile, dpi=400, bbox_inches='tight')
 
    print plotfile
 

	
 
    return plotfile
 

	
 
def main():
 
    #query = sys.argv[1]
 
    plot_results()
 

	
 
if __name__ == '__main__':
 
    main()