Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- SET hive.exec.dynamic.partition.mode=nonstrict;
- SET hive.exec.max.dynamic.partitions=2000;
- SET hive.exec.max.dynamic.partitions.pernode=500;
- USE testdb;
- DROP TABLE IF EXISTS temp_Airline PURGE;
- CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS temp_Airline(
- Year SMALLINT,
- Month TINYINT,
- DayOfMonth TINYINT,
- DayOfWeek TINYINT,
- DepTime SMALLINT,
- CRSDepTime SMALLINT,
- ArrTime SMALLINT,
- CRSArrTime SMALLINT,
- UniqueCarrier VARCHAR(7),
- FlightNum SMALLINT,
- TailNum STRING,
- ActualElapsedTime SMALLINT,
- CRSElapsedAirtime SMALLINT,
- AirTime TINYINT,
- ArrDelay SMALLINT,
- DepDelay SMALLINT,
- Origin CHAR(3),
- Dest CHAR(3),
- Distance INT,
- TaxiIn SMALLINT,
- TaxiOut SMALLINT,
- Cancelled TINYINT,
- CancellationCode STRING,
- Diverted TINYINT,
- CarrierDelay SMALLINT,
- WeatherDelay SMALLINT,
- NASDelay SMALLINT,
- SecurityDelay SMALLINT,
- LateAircraftDelay SMALLINT)
- ROW FORMAT DELIMITED
- FIELDS TERMINATED BY ','
- STORED AS TEXTFILE
- LOCATION '/input/airline'
- TBLPROPERTIES ('skip.header.line.count'='1');
- DROP TABLE IF EXISTS Airline PURGE;
- CREATE TABLE Airline (
- FlightNum SMALLINT,
- TailNum STRING,
- DepDate TIMESTAMP,
- Delay SMALLINT,
- Carrier VARCHAR(7))
- COMMENT '2008 airlines'
- PARTITIONED BY(Airport CHAR(3))
- CLUSTERED BY(Carrier) INTO 8 BUCKETS
- STORED AS ORC;
- INSERT OVERWRITE TABLE Airline PARTITION(Airport)
- SELECT s.FlightNum, s.TailNum, CAST(PRINTF('%d-%02d-%02d %02.0f:%02d:00', s.Year, s.Month, s.DayOfMonth, (s.DepTime - 50) / 100, s.DepTime % 100) AS TIMESTAMP), IF(s.ArrDelay>0, s.ArrDelay, 0) as ArrDelay, s.UniqueCarrier, s.Dest
- FROM temp_Airline s WHERE Cancelled=0 and Diverted=0;
- ------------------------------------------------------------------------------------------------------------------------------------
- USE testdb;
- SELECT t.Carrier, t.Airport, t.AvgDelay FROM (
- SELECT d.Carrier, d.Airport, d.AvgDelay, DENSE_RANK() OVER (PARTITION BY d.Airport ORDER BY d.AvgDelay DESC) AS Rank FROM
- (SELECT Airport, Carrier, AVG(Delay) AS AvgDelay FROM Airline GROUP BY Airport, Carrier) as d) as t
- WHERE t.Rank < 3 ORDER BY AvgDelay DESC;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement