diff --git a/vldb-protocols.labnotes b/vldb-protocols.labnotes index eedc6cd4c2d7e802818feb1383e7f4b3a74a15ed..1ef411d94e015dc29d23f08b8511eb88a7f2b34e 100644 --- a/vldb-protocols.labnotes +++ b/vldb-protocols.labnotes @@ -560,7 +560,7 @@ netcat baseline comments -- skip the protocol, dump to csv/parqet/.../feather?, copy files over socket, read again, parse, time everything (only for Monet?) +- skip the protocol, dump to csv/parqet, copy files over socket, read again, parse, time everything (only for Monet?) - measure baseline select * from lineitem limit 1; subtract - develop cost models base on line, types etc - motivation: machine learning that needs lots of rows as input (decision tree?, classifier?, cross-validation, visualization) @@ -594,5 +594,36 @@ https://hemantoracledba.blogspot.nl/2008/06/tuning-very-large-selects-in-sqlplus applied +hive? +https://cwiki.apache.org/confluence/display/Hive/Parquet + +https://cwiki.apache.org/confluence/display/Hive/GettingStarted#GettingStarted-RunningHiveServer2andBeeline + + +SET mapreduce.framework.name=local; + + +CREATE TABLE Staff (id int, name string, salary double) row format delimited fields terminated by ‘,’; + +http://stackoverflow.com/questions/19320611/hadoop-hive-loading-data-from-csv-on-a-local-machine + + +LOAD DATA LOCAL INPATH '/home/yourcsvfile.csv' OVERWRITE INTO TABLE Staff; + + + + + +CREATE TABLE lineitem_ext ( L_ORDERKEY INT, L_PARTKEY INT, L_SUPPKEY INT, L_LINENUMBER INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE DATE, L_COMMITDATE DATE, L_RECEIPTDATE DATE, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) row format delimited fields terminated by '|'; + + +LOAD DATA LOCAL INPATH '/home/user/lineitem.tbl' OVERWRITE INTO TABLE lineitem_ext; + + + +CREATE TABLE lineitem (L_ORDERKEY INT, L_PARTKEY INT, L_SUPPKEY INT, L_LINENUMBER INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE DATE, L_COMMITDATE DATE, L_RECEIPTDATE DATE, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) STORED AS PARQUET; + + +insert into lineitem select * from lineitem_ext;