Files @ 0818c337d503
Branch filter:

Location: DA/lsst_blog/load_lsst_data.py

Bart Scheers
Add blog post url
import os, sys, time, logging
import glob, socket, subprocess
from os.path import basename

from pymonetdb.sql.connections import Connection as connect
from pymonetdb.exceptions import Error as DBError

import lsst_schema_queries as sch
import lsst_queries as q

#######################################

def main():
    dbport = 53000
    dbrelease = "Dec2016"
    dbversion = "SP5"
    dbname = "lsst_napels"

    node = socket.gethostname().split('.')[0]

    logfile = 'log/' + node + "_lsst_" + dbrelease + '_' + dbversion + "_" + time.strftime("%Y%m%d-%H%M%S") + ".log"
    logging.basicConfig(filename=logfile, filemode='w', format='%(message)s', level=logging.INFO)
    logging.info('Started')
    print "DB (%s) logging in file %s" % (dbname, logfile)

    csvdir = "/export/scratch2/bscheers/lsst/csvs"
    csvfiles = sorted(glob.glob(csvdir + '/S*_*.csv'))
    print "len(csvfiles) = %s" % (len(csvfiles))

    p = subprocess.Popen(["./init_db.sh", dbname], stdout=subprocess.PIPE)
    output, err = p.communicate()
    print "O:",output
    if err is not None:
        print "E:",err

    hname = 'localhost'
    uname = 'monetdb'
    pword = 'monetdb'

    tstart = time.time()

    conn = connect(hostname=hname, port=dbport, username=uname, password=pword, database=dbname)

    fcnt = 0
    scnt, fscnt, ocnt, ofocnt = 0, 0, 0, 0
    for f in csvfiles:
        fcnt += 1
        t0 = time.time()
        tabname = basename(f)[:-4].lower()
        print "====>\n#### Loading CSV file nr %s: %s ####" % (fcnt, basename(f))
        if tabname.startswith("source_"):
            scnt += 1
            sch.create_table(conn, basename(f), q=1)
            q.load_csv(conn, f, q=2) # executes queries 2 & 3
            if scnt == 1:
                sch.create_merge_table(conn, basename(f), q=4)
                sch.alter_merge_table_add_pkey(conn, f, q=5)
            sch.alter_merge_table_add_table(conn, basename(f), q=6)
        elif tabname.startswith("forcedsource_"):
            fscnt += 1
            sch.create_table(conn, basename(f), q=7)
            q.load_csv(conn, f, q=8)
            if fscnt == 1:
                sch.create_merge_table(conn, basename(f), q=10)
                sch.alter_merge_table_add_pkey(conn, f, q=11)
            sch.alter_merge_table_add_table(conn, basename(f), q=12)
        elif tabname.startswith("object_"):
            ocnt += 1
            sch.create_table(conn, basename(f), q=13)
            q.load_csv(conn, f, q=14)
            if ocnt == 1:
                sch.create_merge_table(conn, basename(f), q=16)
                sch.alter_merge_table_add_pkey(conn, f, q=17)
            sch.alter_merge_table_add_table(conn, basename(f), q=18)
        elif tabname.startswith("objectfulloverlap_"):
            ofocnt += 1
            sch.create_table(conn, basename(f), q=19)
            q.load_csv(conn, f, q=20)
            if ofocnt == 1:
                sch.create_merge_table(conn, basename(f), q=22)
                sch.alter_merge_table_add_pkey(conn, f, q=23)
            sch.alter_merge_table_add_table(conn, basename(f), q=24)
        else:
            sys.exit("No valid tabname: %s" % (tabname))

        print "# L : Total copy-into time for this file: %s seconds" % (str(round(time.time() - t0, 3)))
        print "# L : Total processing time: %s seconds" % (str(round(time.time() - tstart, 3)))

    conn.close()
    print "# DB: Connection closed"
    print "\n#### Total elapsed time: %s seconds ####\n" % (str(round(time.time() - tstart, 3)))
    logging.info('Ended')

    print "Check log file: %s" % (logfile)

if __name__ == '__main__':
    main()