Advertisement
Guest User

Untitled

a guest
Apr 20th, 2019
112
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.19 KB | None | 0 0
  1. SET hive.exec.dynamic.partition.mode=nonstrict;
  2. SET hive.exec.max.dynamic.partitions=2000;
  3. SET hive.exec.max.dynamic.partitions.pernode=500;
  4. USE testdb;
  5.  
  6. DROP TABLE IF EXISTS temp_Airline PURGE;
  7.  
  8. CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS temp_Airline(
  9. Year SMALLINT,
  10. Month TINYINT,
  11. DayOfMonth TINYINT,
  12. DayOfWeek TINYINT,
  13. DepTime SMALLINT,
  14. CRSDepTime SMALLINT,
  15. ArrTime SMALLINT,
  16. CRSArrTime SMALLINT,
  17. UniqueCarrier VARCHAR(7),
  18. FlightNum SMALLINT,
  19. TailNum STRING,
  20. ActualElapsedTime SMALLINT,
  21. CRSElapsedAirtime SMALLINT,
  22. AirTime TINYINT,
  23. ArrDelay SMALLINT,
  24. DepDelay SMALLINT,
  25. Origin CHAR(3),
  26. Dest CHAR(3),
  27. Distance INT,
  28. TaxiIn SMALLINT,
  29. TaxiOut SMALLINT,
  30. Cancelled TINYINT,
  31. CancellationCode STRING,
  32. Diverted TINYINT,
  33. CarrierDelay SMALLINT,
  34. WeatherDelay SMALLINT,
  35. NASDelay SMALLINT,
  36. SecurityDelay SMALLINT,
  37. LateAircraftDelay SMALLINT)
  38. ROW FORMAT DELIMITED
  39. FIELDS TERMINATED BY ','
  40. STORED AS TEXTFILE
  41. LOCATION '/input/airline'
  42. TBLPROPERTIES ('skip.header.line.count'='1');
  43.  
  44. DROP TABLE IF EXISTS Airline PURGE;
  45.  
  46. CREATE TABLE Airline (
  47. FlightNum SMALLINT,
  48. TailNum STRING,
  49. DepDate TIMESTAMP,
  50. Delay SMALLINT,
  51. Carrier VARCHAR(7))
  52. COMMENT '2008 airlines'
  53. PARTITIONED BY(Airport CHAR(3))
  54. CLUSTERED BY(Carrier) INTO 8 BUCKETS
  55. STORED AS ORC;
  56.  
  57. INSERT OVERWRITE TABLE Airline PARTITION(Airport)
  58. SELECT s.FlightNum, s.TailNum, CAST(PRINTF('%d-%02d-%02d %02.0f:%02d:00', s.Year, s.Month, s.DayOfMonth, (s.DepTime - 50) / 100, s.DepTime % 100) AS TIMESTAMP), IF(s.ArrDelay>0, s.ArrDelay, 0) as ArrDelay, s.UniqueCarrier, s.Dest
  59. FROM temp_Airline s WHERE Cancelled=0 and Diverted=0;
  60.  
  61.  
  62. ------------------------------------------------------------------------------------------------------------------------------------
  63. USE testdb;
  64.  
  65. SELECT t.Carrier, t.Airport, t.AvgDelay FROM (
  66. SELECT d.Carrier, d.Airport, d.AvgDelay, DENSE_RANK() OVER (PARTITION BY d.Airport ORDER BY d.AvgDelay DESC) AS Rank FROM
  67. (SELECT Airport, Carrier, AVG(Delay) AS AvgDelay FROM Airline GROUP BY Airport, Carrier) as d) as t
  68. WHERE t.Rank < 3 ORDER BY AvgDelay DESC;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement