File diff 000000000000 → 72890999e24d
load_lsst_data.py
Show inline comments
 
new file 100644
 
import os, sys, time, logging
 
import glob, socket, subprocess
 
from os.path import basename
 

	
 
from pymonetdb.sql.connections import Connection as connect
 
from pymonetdb.exceptions import Error as DBError
 

	
 
import lsst_schema_queries as sch
 
import lsst_queries as q
 

	
 
#######################################
 

	
 
def main():
 
    dbport = 53000
 
    dbrelease = "Dec2016"
 
    dbversion = "SP5"
 
    dbname = "lsst_napels"
 

	
 
    node = socket.gethostname().split('.')[0]
 

	
 
    logfile = 'log/' + node + "_lsst_" + dbrelease + '_' + dbversion + "_" + time.strftime("%Y%m%d-%H%M%S") + ".log"
 
    logging.basicConfig(filename=logfile, filemode='w', format='%(message)s', level=logging.INFO)
 
    logging.info('Started')
 
    print "DB (%s) logging in file %s" % (dbname, logfile)
 

	
 
    csvdir = "/export/scratch2/bscheers/lsst/csvs"
 
    csvfiles = sorted(glob.glob(csvdir + '/S*_*.csv'))
 
    print "len(csvfiles) = %s" % (len(csvfiles))
 

	
 
    p = subprocess.Popen(["./init_db.sh", dbname], stdout=subprocess.PIPE)
 
    output, err = p.communicate()
 
    print "O:",output
 
    if err is not None:
 
        print "E:",err
 

	
 
    hname = 'localhost'
 
    uname = 'monetdb'
 
    pword = 'monetdb'
 

	
 
    tstart = time.time()
 

	
 
    conn = connect(hostname=hname, port=dbport, username=uname, password=pword, database=dbname)
 

	
 
    fcnt = 0
 
    scnt, fscnt, ocnt, ofocnt = 0, 0, 0, 0
 
    for f in csvfiles:
 
        fcnt += 1
 
        t0 = time.time()
 
        tabname = basename(f)[:-4].lower()
 
        print "====>\n#### Loading CSV file nr %s: %s ####" % (fcnt, basename(f))
 
        if tabname.startswith("source_"):
 
            scnt += 1
 
            sch.create_table(conn, basename(f), q=1)
 
            q.load_csv(conn, f, q=2) # executes queries 2 & 3
 
            if scnt == 1:
 
                sch.create_merge_table(conn, basename(f), q=4)
 
                sch.alter_merge_table_add_pkey(conn, f, q=5)
 
            sch.alter_merge_table_add_table(conn, basename(f), q=6)
 
        elif tabname.startswith("forcedsource_"):
 
            fscnt += 1
 
            sch.create_table(conn, basename(f), q=7)
 
            q.load_csv(conn, f, q=8)
 
            if fscnt == 1:
 
                sch.create_merge_table(conn, basename(f), q=10)
 
                sch.alter_merge_table_add_pkey(conn, f, q=11)
 
            sch.alter_merge_table_add_table(conn, basename(f), q=12)
 
        elif tabname.startswith("object_"):
 
            ocnt += 1
 
            sch.create_table(conn, basename(f), q=13)
 
            q.load_csv(conn, f, q=14)
 
            if ocnt == 1:
 
                sch.create_merge_table(conn, basename(f), q=16)
 
                sch.alter_merge_table_add_pkey(conn, f, q=17)
 
            sch.alter_merge_table_add_table(conn, basename(f), q=18)
 
        elif tabname.startswith("objectfulloverlap_"):
 
            ofocnt += 1
 
            sch.create_table(conn, basename(f), q=19)
 
            q.load_csv(conn, f, q=20)
 
            if ofocnt == 1:
 
                sch.create_merge_table(conn, basename(f), q=22)
 
                sch.alter_merge_table_add_pkey(conn, f, q=23)
 
            sch.alter_merge_table_add_table(conn, basename(f), q=24)
 
        else:
 
            sys.exit("No valid tabname: %s" % (tabname))
 

	
 
        print "# L : Total copy-into time for this file: %s seconds" % (str(round(time.time() - t0, 3)))
 
        print "# L : Total processing time: %s seconds" % (str(round(time.time() - tstart, 3)))
 

	
 
    conn.close()
 
    print "# DB: Connection closed"
 
    print "\n#### Total elapsed time: %s seconds ####\n" % (str(round(time.time() - tstart, 3)))
 
    logging.info('Ended')
 

	
 
    print "Check log file: %s" % (logfile)
 

	
 
if __name__ == '__main__':
 
    main()