Advertisement
Guest User

hiveQueries

a guest
Aug 16th, 2015
314
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.72 KB | None | 0 0
  1.  
  2. Hive Command:
  3.  
  4. 1. Create Table:
  5. ========================
  6. CREATE TABLE player_runs(player_id INT, year_of_play STRING, runs_scored INT)
  7. COMMENT 'This is the player_run table'
  8. STORED AS TEXTFILE;
  9.  
  10.  
  11. 2. See all tables in a database:
  12. =========================
  13. Show tables;
  14.  
  15.  
  16. 3. See Schema of a table
  17. =========================
  18. describe player_runs
  19.  
  20.  
  21. 4. Alter table commands :
  22. =========================
  23. ALTER TABLE player_runs RENAME TO runs_of_player;
  24. ALTER TABLE runs_of_player ADD COLUMNS (balls_played INT COMMENT 'a new int column');
  25.  
  26.  
  27. 5. Drop Table
  28. =========================
  29. drop table runs_of_player;
  30.  
  31. CREATE TABLE player_runs(player_id INT, year_of_play STRING, runs_scored INT, balls_played INT, country STRING)
  32. COMMENT 'This is the player_run table'
  33. ROW FORMAT DELIMITED
  34. FIELDS TERMINATED BY ','
  35. STORED AS TEXTFILE;
  36.  
  37. LOAD DATA LOCAL INPATH '/home/hduser/runs.csv' INTO TABLE player_runs;
  38.  
  39. select * from player_runs limit 10;
  40.  
  41.  
  42. 6. Create external table
  43. =========================
  44.  
  45. CREATE EXTERNAL TABLE player_runs_p(player_id INT, year_of_play STRING, runs_scored INT, balls_played INT)
  46. COMMENT 'This is the staging player_runs table' PARTITIONED BY(country STRING)
  47. ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'
  48. STORED AS TEXTFILE
  49. LOCATION '/input/runs';
  50.  
  51. hadoop fs -mkdir /input/runs/country=India
  52. hadoop fs -put /home/hduser/runs_India.csv /input/runs/country=India/
  53.  
  54. ALTER TABLE player_runs_p ADD PARTITION(country='India');
  55.  
  56.  
  57. 7. Loading data into Table
  58. =============================
  59.  
  60. LOAD DATA LOCAL INPATH '/home/hduser/runs_US.csv' INTO TABLE player_runs_p PARTITION(country='US');
  61. LOAD DATA LOCAL INPATH '/home/hduser/runs_India.csv' INTO TABLE player_runs_p PARTITION(country='INDIA');
  62.  
  63.  
  64. FROM player_runs_p
  65. INSERT OVERWRITE TABLE player_runs
  66. SELECT player_id, year_of_play, runs_scored,balls_played,0
  67. where player_id=10;
  68.  
  69.  
  70. 8. Simple Select Query:
  71. ==================
  72. Select player_id, runs_scored from player_runs;
  73. select * from player_runs where player_id=10;
  74.  
  75. 9. Group BY Query:
  76. ====================
  77. select player_id, sum(runs_scored) from player_runs group by player_id;
  78.  
  79. 10 Join :
  80. ==================
  81.  
  82. CREATE EXTERNAL TABLE players(player_id INT, name STRING)
  83. COMMENT 'This is the staging player table'
  84. ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'
  85. STORED AS TEXTFILE
  86. LOCATION '/input/player';
  87.  
  88. LOAD DATA LOCAL INPATH '/home/hduser/player.csv' INTO TABLE players;
  89.  
  90. select * from players join player_runs on players.player_id=player_runs.player_id;
  91.  
  92. select * from players full outer join player_runs on players.player_id=player_runs.player_id;
  93.  
  94. select * from players left outer join player_runs on players.player_id=player_runs.player_id;
  95.  
  96.  
  97.  
  98.  
  99. 11 Multi Insert:
  100. ===========================
  101.  
  102. CREATE TABLE player_total_runs(player_id INT, runs_scored INT)
  103. COMMENT 'This is the player_total_run table'
  104. STORED AS TEXTFILE;
  105.  
  106. CREATE TABLE yearly_runs(year_of_play STRING, runs_scored INT)
  107. COMMENT 'This is the yearly_run table'
  108. STORED AS TEXTFILE;
  109.  
  110.  
  111. //Insert into two tables
  112. FROM player_runs
  113. INSERT OVERWRITE TABLE player_total_runs
  114. select player_id, sum(runs_scored)
  115. group by player_id
  116. INSERT OVERWRITE TABLE yearly_runs
  117. select year_of_play, sum(runs_scored)
  118. group by year_of_play;
  119.  
  120.  
  121. //Insert into one table and one HDFS file
  122.  
  123. hadoop fs -mkdir /output/
  124. hadoop fs -mkdir /output/yearly_runs
  125.  
  126. FROM player_runs
  127. INSERT OVERWRITE TABLE player_total_runs
  128. select player_id, sum(runs_scored)
  129. group by player_id
  130. INSERT OVERWRITE DIRECTORY '/output/yearly_runs'
  131. select year_of_play, sum(runs_scored)
  132. group by year_of_play;
  133.  
  134.  
  135.  
  136. 12 Partioning + Distribute: (HIVE CONFIG ERRORS)
  137. ============================
  138.  
  139.  
  140. CREATE EXTERNAL TABLE player_runs_distribute(player_id INT, year_of_play STRING, runs_scored INT, balls_played INT)
  141. COMMENT 'This is the staging player_runs table' PARTITIONED BY(country STRING)
  142. ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'
  143. STORED AS TEXTFILE
  144. LOCATION '/input/runs_distribute/';
  145.  
  146. set hive.exec.dynamic.partition=true;
  147. set hive.exec.dynamic.partition.mode=nonstrict;
  148.  
  149. FROM player_runs
  150. INSERT OVERWRITE TABLE player_runs_distribute PARTITION(country)
  151. SELECT player_id, year_of_play , runs_scored , balls_played, country
  152. DISTRIBUTE BY country;
  153.  
  154. GETTING ERROR MESSAGE:
  155. Dynamic partition strict mode requires at least one static partition column. To turn this off set hive.exec.dynamic.partition.mode=nonstrict
  156.  
  157.  
  158. select * from player_runs_distribute where country='India' limit 10;
  159.  
  160. 13 Partioning + Bucketing: (HIVE CONFIG ERRORS)
  161. ============================
  162.  
  163.  
  164. CREATE EXTERNAL TABLE player_runs_clustered(player_id INT, year_of_play STRING, runs_scored INT, balls_played INT)
  165. COMMENT 'This is the player_runs table' PARTITIONED BY(country STRING)
  166. clustered by (player_id) INTO 10 buckets
  167. ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
  168. LINES TERMINATED BY '\n'
  169. STORED AS TEXTFILE
  170. LOCATION '/input/runs_clustered/';
  171.  
  172. LOAD DATA LOCAL INPATH "data/runs_extra.csv" INTO TABLE player_runs;
  173.  
  174. set hive.exec.dynamic.partition=true;
  175. set hive.exec.dynamic.partition.mode=nonstrict;
  176. set hive.enforce.bucketing=true;
  177.  
  178. FROM player_runs
  179. INSERT OVERWRITE TABLE player_runs_clustered PARTITION(country)
  180. SELECT player_id, year_of_play , runs_scored , balls_played, country
  181. DISTRIBUTE BY country;
  182.  
  183.  
  184. select avg(runs_scored) from player_runs_clustered TABLESAMPLE(BUCKET 1 OUT OF 10);
  185.  
  186.  
  187. 14. MapSide Join
  188. =======================
  189. select /*+ MAPJOIN(players)*/ * from players join player_runs on players.player_id=player_runs.player_id;
  190.  
  191. select * from players join player_runs on players.player_id=player_runs.player_id;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement