pmkhlv

Untitled

Mar 24th, 2022
5,608
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
SQL 1.42 KB | None | 0 0
  1. CREATE DATABASE yellow_taxi location 'hdfs://user/root/2020';
  2.  
  3. CREATE external TABLE dim_vendor
  4. (
  5.     vendor_id INT,
  6.     name string
  7. )
  8. stored AS parquet;
  9.  
  10. CREATE external TABLE taxi_data
  11. (
  12. vendor_id INT,
  13. tpep_pickup_datetime TIMESTAMP,
  14. tpep_dropoff_datetime TIMESTAMP,
  15. passenger_count INT,
  16. trip_distance DOUBLE,
  17. pulocation_id INT,
  18. dolocation_id INT,
  19. ratecode_id INT,
  20. store_and_fwd_flag string,
  21. payment_type INT,
  22. fare_amount DOUBLE,
  23. extra DOUBLE,
  24. mta_tax DOUBLE,
  25. improvement_surcharge DOUBLE,
  26. tip_amount DOUBLE,
  27. tolls_amount DOUBLE,
  28. total_amount DOUBLE)
  29.  
  30. ROW format delimited
  31. FIELDS TERMINATED BY ','
  32. LINES TERMINATED BY '\n'
  33. location 'hdfs:///user/root/2020'
  34. TBLPROPERTIES ("skip.header.line.count"="1");
  35.  
  36.  
  37. CREATE external TABLE part_taxi_data
  38. (
  39. vendor_id INT,
  40. tpep_dropoff_datetime TIMESTAMP,
  41. passenger_count INT,
  42. trip_distance DOUBLE,
  43. pulocation_id INT,
  44. dolocation_id INT,
  45. ratecode_id INT,
  46. store_and_fwd_flag string,
  47. payment_type INT,
  48. fare_amount DOUBLE,
  49. extra DOUBLE,
  50. mta_tax DOUBLE,
  51. improvement_surcharge DOUBLE,
  52. tip_amount DOUBLE,
  53. tolls_amount DOUBLE,
  54. total_amount DOUBLE)
  55. partitioned BY (tpep_pickup_datetime DATE)
  56. ROW format delimited
  57. FIELDS TERMINATED BY ','
  58. LINES TERMINATED BY '\n'
  59. stored AS parquet
  60. location 'hdfs:///user/root/2020/part-yellow-taxi'
  61. TBLPROPERTIES ("skip.header.line.count"="1");
  62.  
  63. INSERT INTO part_taxi_data partition(tpep_pickup_datetime) SELECT * FROM taxi_data;
  64.  
Advertisement
Add Comment
Please, Sign In to add comment