Advertisement
josephxsxn

SmallFileMerging

Aug 8th, 2017
125
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
SQL 1.26 KB | None | 0 0
  1. --Create table testtable
  2. --(hash string,
  3. --timstim timestamp,
  4. --payload string)
  5. --Partitioned By (string year, string month, string day);
  6.  
  7. --https://issues.apache.org/jira/browse/HIVE-3509
  8. --https://issues.apache.org/jira/browse/HIVE-15774  hive.txn.strict.locking.mode
  9.  
  10. SET hive.execution.engine=mr;
  11. SET hive.support.quoted.identifiers=NONE;
  12. SET mapreduce.INPUT.fileinputformat.split.maxsize=500000000; --optimized for mapper buffers
  13. SET mapreduce.INPUT.fileinputformat.split.minsize=500000000;
  14. SET mapreduce.map.output.compress=TRUE; --improve shuffle transfer
  15. SET mapreduce.map.output.compress.codec= org.apache.hadoop.io.compress.SnappyCodec;
  16. SET mapreduce.reduce.memory.mb=1600; --increase if you get OOM or Heap Errors
  17. SET mapreduce.reduce.java.opts=-Xmx14g; --increase if you get OOM or Heap Errors
  18. SET hive.EXEC.reducers.MAX=1; --control max output files into the partition or table
  19.  
  20. INSERT OVERWRITE TABLE testtable PARTITION (YEAR=2017, MONTH=‘05’, DAY=22)  --Specific Partition Prevents Locking Others SELECT `(year|month|day)?+.+`
  21. FROM testtable
  22. WHERE YEAR=2017 AND MONTH=‘05’ AND DAY=22--Specific WHERE Partition Prevents Locking Others
  23. SORT BY hash, timstim;  --optimize your ORC files for your query predicate patterns
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement