Files @ 0818c337d503
Branch filter:

Location: DA/lsst_blog/plot_baselines.py - annotation

Bart Scheers
Add blog post url
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
72890999e24d
#!/usr/bin/python

import matplotlib as mpl
mpl.use('Agg')
import os, errno, time, sys, pylab
from scipy import *
from scipy import optimize
from scipy import special
from scipy import stats
#import numpy as np
import matplotlib.cm as cm
import matplotlib.ticker as ticker
from datetime import datetime
import logging, csv
from os.path import basename, exists

plotdir = os.path.abspath('.') + '/results'
if not os.path.exists(plotdir):
    os.makedirs(plotdir)
    print "Created plotdir: %s" % (plotdir)
logdir = os.path.abspath('.') + '/log'

def stdev(v):
    """
    Return the Bessel corrected standard deviation of a series of measurements
    """
    return sqrt(len(v) * (mean(square(v)) - mean(v)**2) / (len(v) - 1))

def mysql_querytimes():
    """
    """
    # The s15 mysql query timing results taken from
    # https://confluence.lsstcorp.org/display/DM/S15+Large+Scale+Tests
    # Values on website are reported in seconds
    counts = [47.75, 40.99, 48.33]
    shortrunning = [0.09, 0.33]
    fullscans = [247.61, 244.24, 1088.09, 1077.38, 898.61]
    joins = [1381.44, 1291.38]
    neighbor = [676.02]
    mysqls15 = asarray(counts + shortrunning + fullscans + joins + neighbor)
    #tms = [ts * 1000 for ts in mysqls15]
    return mysqls15

def monetdb_querytimes(qlog):
    """
    """
    # From the qlog name, we can deduce the query number
    # and run type
    qtime = []
    with open(qlog, 'rb') as csvfile:
        qreader = csv.reader(csvfile, delimiter=';')
        for row in qreader:
            run = int(row[0])
            qtime.append(float(row[1])) # unit is ms
    #for i in range(len(qtime)):
    #    print "%s: qtime = %s" % (i, qtime[i])
    return mean(qtime), stdev(qtime)

def plot_results():
    """
    """

    logdir = 'sql/s15/log'
    plotdir = '.'
    plotfiles = []

    node = "stones10"
    dbversion = "dec2016sp1"

    # We have hot and cold runs
    runtype = ['hot', 'cold']
    #runtype = ['hot']
    # We divide the queries in five groups
    counts = ['q01', 'q02', 'q03']
    shortrunning = ['q04', 'q05']
    fullscans = ['q06', 'q07', 'q08', 'q09', 'q10']
    joins = ['q11', 'q12']
    neighbor = ['q13']
    queries = asarray(counts + shortrunning + fullscans + joins + neighbor)
    mysqls15 = mysql_querytimes()

    monetdbs15 = {}
    for temp in runtype:
        monetdbs15[temp] = {}
        monetdbs15[temp]['qt_avg'] = []
        monetdbs15[temp]['qt_std'] = []
        for qnr in queries:
            qlog = logdir + "/%s.%s.%s.%s.log" % (qnr, temp, node, dbversion)
            #print "qlog = %s" % (qlog)
            if not exists(qlog):
                sys.exit("Run timings. Log file %s does not exist (yet)." % (qlog))
            qt_avg, qt_std = monetdb_querytimes(qlog)
            print "%s qnr: %s: qt_avg = %s; qt_std = %s" % (temp, qnr, qt_avg, qt_std)
            monetdbs15[temp]['qt_avg'].append(qt_avg)
            monetdbs15[temp]['qt_std'].append(qt_std)
    mhot_t = [t * 0.001 for t in monetdbs15['hot']['qt_avg']]
    mhot_s = [s * 0.001 for s in monetdbs15['hot']['qt_std']]
    mcold_t = [t * 0.001 for t in monetdbs15['cold']['qt_avg']]
    mcold_s = [s * 0.001 for s in monetdbs15['cold']['qt_std']]

    #print "monetdbs15 = %s" % (monetdbs15)
    fig = pylab.figure(figsize=(28,8))
    ax = fig.add_subplot(111)

    width = 0.2
    ekw = dict(ecolor='k', lw=2, capsize=5, capthick=2)
    r1 = ax.bar(arange(len(mysqls15)), mysqls15, width, color='gray' \
               ,error_kw=ekw,label='MySQL')
    r2 = ax.bar(arange(len(mcold_t)) + width, mcold_t, width, yerr=mcold_s, color='dodgerblue' \
               ,error_kw=ekw,label='MonetDB cold')
    r3 = ax.bar(arange(len(mhot_t)) + 2 * width, mhot_t, width, yerr=mhot_s, color='red' \
               ,error_kw=ekw,label='MonetDB hot')
    ax.set_ylabel('Time [s]', fontsize='25')
    ax.set_xticks(arange(len(mysqls15)) + width)
    ax.set_xticklabels([q.upper() for q in queries])
    #ax.set_ylim(ymin=10, ymax=35000)
    ax.set_ylim(ymin=0.001, ymax=10000)
    ax.set_yscale("log")
    #ax.legend(loc='upper right')
    ax.legend(loc='upper left', fontsize='25')

    for i in range(len(ax.get_xticklabels())):
        #ax.get_xticklabels()[i].set_size('xx-large')
        ax.get_xticklabels()[i].set_size('25')
    for i in range(len(ax.get_yticklabels())):
        #ax.get_yticklabels()[i].set_size('xx-large')
        ax.get_yticklabels()[i].set_size('25')
    pylab.grid(True)

    fname = 'baseline_lsst_queries.eps'

    plotfile = plotdir + '/' + fname
    plotfiles.append(plotfile)
    pylab.savefig(plotfile, dpi=400, bbox_inches='tight')
    print plotfile

    return plotfile

def main():
    #query = sys.argv[1]
    plot_results()

if __name__ == '__main__':
    main()