Files @ 0818c337d503
Branch filter:

Location: DA/lsst_blog/plot_baselines.py

Bart Scheers
Add blog post url
#!/usr/bin/python

import matplotlib as mpl
mpl.use('Agg')
import os, errno, time, sys, pylab
from scipy import *
from scipy import optimize
from scipy import special
from scipy import stats
#import numpy as np
import matplotlib.cm as cm
import matplotlib.ticker as ticker
from datetime import datetime
import logging, csv
from os.path import basename, exists

plotdir = os.path.abspath('.') + '/results'
if not os.path.exists(plotdir):
    os.makedirs(plotdir)
    print "Created plotdir: %s" % (plotdir)
logdir = os.path.abspath('.') + '/log'

def stdev(v):
    """
    Return the Bessel corrected standard deviation of a series of measurements
    """
    return sqrt(len(v) * (mean(square(v)) - mean(v)**2) / (len(v) - 1))

def mysql_querytimes():
    """
    """
    # The s15 mysql query timing results taken from
    # https://confluence.lsstcorp.org/display/DM/S15+Large+Scale+Tests
    # Values on website are reported in seconds
    counts = [47.75, 40.99, 48.33]
    shortrunning = [0.09, 0.33]
    fullscans = [247.61, 244.24, 1088.09, 1077.38, 898.61]
    joins = [1381.44, 1291.38]
    neighbor = [676.02]
    mysqls15 = asarray(counts + shortrunning + fullscans + joins + neighbor)
    #tms = [ts * 1000 for ts in mysqls15]
    return mysqls15

def monetdb_querytimes(qlog):
    """
    """
    # From the qlog name, we can deduce the query number
    # and run type
    qtime = []
    with open(qlog, 'rb') as csvfile:
        qreader = csv.reader(csvfile, delimiter=';')
        for row in qreader:
            run = int(row[0])
            qtime.append(float(row[1])) # unit is ms
    #for i in range(len(qtime)):
    #    print "%s: qtime = %s" % (i, qtime[i])
    return mean(qtime), stdev(qtime)

def plot_results():
    """
    """

    logdir = 'sql/s15/log'
    plotdir = '.'
    plotfiles = []

    node = "stones10"
    dbversion = "dec2016sp1"

    # We have hot and cold runs
    runtype = ['hot', 'cold']
    #runtype = ['hot']
    # We divide the queries in five groups
    counts = ['q01', 'q02', 'q03']
    shortrunning = ['q04', 'q05']
    fullscans = ['q06', 'q07', 'q08', 'q09', 'q10']
    joins = ['q11', 'q12']
    neighbor = ['q13']
    queries = asarray(counts + shortrunning + fullscans + joins + neighbor)
    mysqls15 = mysql_querytimes()

    monetdbs15 = {}
    for temp in runtype:
        monetdbs15[temp] = {}
        monetdbs15[temp]['qt_avg'] = []
        monetdbs15[temp]['qt_std'] = []
        for qnr in queries:
            qlog = logdir + "/%s.%s.%s.%s.log" % (qnr, temp, node, dbversion)
            #print "qlog = %s" % (qlog)
            if not exists(qlog):
                sys.exit("Run timings. Log file %s does not exist (yet)." % (qlog))
            qt_avg, qt_std = monetdb_querytimes(qlog)
            print "%s qnr: %s: qt_avg = %s; qt_std = %s" % (temp, qnr, qt_avg, qt_std)
            monetdbs15[temp]['qt_avg'].append(qt_avg)
            monetdbs15[temp]['qt_std'].append(qt_std)
    mhot_t = [t * 0.001 for t in monetdbs15['hot']['qt_avg']]
    mhot_s = [s * 0.001 for s in monetdbs15['hot']['qt_std']]
    mcold_t = [t * 0.001 for t in monetdbs15['cold']['qt_avg']]
    mcold_s = [s * 0.001 for s in monetdbs15['cold']['qt_std']]

    #print "monetdbs15 = %s" % (monetdbs15)
    fig = pylab.figure(figsize=(28,8))
    ax = fig.add_subplot(111)

    width = 0.2
    ekw = dict(ecolor='k', lw=2, capsize=5, capthick=2)
    r1 = ax.bar(arange(len(mysqls15)), mysqls15, width, color='gray' \
               ,error_kw=ekw,label='MySQL')
    r2 = ax.bar(arange(len(mcold_t)) + width, mcold_t, width, yerr=mcold_s, color='dodgerblue' \
               ,error_kw=ekw,label='MonetDB cold')
    r3 = ax.bar(arange(len(mhot_t)) + 2 * width, mhot_t, width, yerr=mhot_s, color='red' \
               ,error_kw=ekw,label='MonetDB hot')
    ax.set_ylabel('Time [s]', fontsize='25')
    ax.set_xticks(arange(len(mysqls15)) + width)
    ax.set_xticklabels([q.upper() for q in queries])
    #ax.set_ylim(ymin=10, ymax=35000)
    ax.set_ylim(ymin=0.001, ymax=10000)
    ax.set_yscale("log")
    #ax.legend(loc='upper right')
    ax.legend(loc='upper left', fontsize='25')

    for i in range(len(ax.get_xticklabels())):
        #ax.get_xticklabels()[i].set_size('xx-large')
        ax.get_xticklabels()[i].set_size('25')
    for i in range(len(ax.get_yticklabels())):
        #ax.get_yticklabels()[i].set_size('xx-large')
        ax.get_yticklabels()[i].set_size('25')
    pylab.grid(True)

    fname = 'baseline_lsst_queries.eps'

    plotfile = plotdir + '/' + fname
    plotfiles.append(plotfile)
    pylab.savefig(plotfile, dpi=400, bbox_inches='tight')
    print plotfile

    return plotfile

def main():
    #query = sys.argv[1]
    plot_results()

if __name__ == '__main__':
    main()