Guest User

Untitled

a guest
Apr 2nd, 2018
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.78 KB | None | 0 0
  1. sqoop-import --connect jdbc:mysql://quickstart.cloudera:3306/retail_db \
  2. --username retail_dba \
  3. --password cloudera \
  4. --table orders \
  5. --fields-terminated-by "\t" \
  6. --lines-terminated-by "\n" \
  7. --as-textfile \
  8. --target-dir /user/cloudera/teja_arun/file-formats/orders-text \
  9. -m 1
  10.  
  11. sqoop-import --connect jdbc:mysql://quickstart.cloudera:3306/retail_db \
  12. --username retail_dba \
  13. --password cloudera \
  14. --table orders \
  15. --target-dir /user/cloudera/teja_arun/file-formats/orders-avros \
  16. --as-avrodatafile \
  17. -m 1
  18. #########not working because with issue in parquetfile ie kite SDK development################
  19. sqoop-import --connect jdbc:mysql://quickstart.cloudera:3306/retail_db \
  20. --username retail_dba \
  21. --password cloudera \
  22. --table orders \
  23. --target-dir /user/cloudera/teja_arun/file-formats/orders-parquet \
  24. --as-parquetfile \
  25. -m 1
  26.  
  27.  
  28. avroData=sqlContext.read.load("/user/cloudera/teja_arun/file-formats/orders-avros","com.databricks.spark.avro")
  29.  
  30. sqlContext.setConf("spark.sql.parquet.compression.codec","snappy")
  31.  
  32. avroData.write.parquet("/user/cloudera/teja_arun/file-formats/orders-parquet-snappy")
  33.  
  34.  
  35. avroData.map(lambda rec: (str(rec[0])+"\t"+str(rec[1])+"\t"+str(rec[2])+"\t"+str(rec[3]))).saveAsTextFile("/user/cloudera/teja_arun/file-formats/order-text-gzip","org.apache.hadoop.io.compress.GzipCodec")
  36. ####sequence file should be key value pairs
  37. avroData.map(lambda rec: (rec[0],(str(rec[0])+"\t"+str(rec[1])+"\t"+str(rec[2])+"\t"+str(rec[3])))).saveAsSequenceFile("/user/cloudera/teja_arun/file-formats/order-sequence")
  38.  
  39. avroData.map(lambda rec: (str(rec[0])+"\t"+str(rec[1])+"\t"+str(rec[2])+"\t"+str(rec[3]))).saveAsTextFile("/user/cloudera/teja_arun/file-formats/order-text-snappy","org.apache.hadoop.io.compress.SnappyCodec")
  40.  
  41. parquetData=sqlContext.read.parquet("/user/cloudera/teja_arun/file-formats/orders-parquet-snappy")
  42. sqlContext.setConf("spark.sql.parquet.compression.codec","uncompressed")
  43. parquetData.write.parquet("/user/cloudera/teja_arun/file-formats/orders-parquet-uncompress")
  44.  
  45. sqlContext.setConf("spark.sql.avro.compression.codec","snappy")
  46. parquetData.write.save("/user/cloudera/teja_arun/file-formats/orders-avro-snappy","com.databricks.spark.avro")
  47.  
  48.  
  49. avroSnappyData=sqlContext.read.load("/user/cloudera/teja_arun/file-formats/orders-avro-snappy","com.databricks.spark.avro")
  50. avroSnappyData.toJSON().saveAsTextFile("/user/cloudera/teja_arun/file-formats/orders-json-nocompress")
  51. avroSnappyData.toJSON().saveAsTextFile("/user/cloudera/teja_arun/file-formats/orders-json-gzip","org.apache.hadoop.io.compress.GzipCodec")
  52.  
  53. jsonData=sqlContext.read.json("/user/cloudera/teja_arun/file-formats/orders-json-nocompress")
  54. jsonData.map(lambda rec:(str(rec[0])+","+str(rec[1])+","+str(rec[2])+","+str(rec[3]))).saveAsTextFile("/user/cloudera/teja_arun/file-formats/orders-csv-gzip","org.apache.hadoop.io.compress.GzipCodec")
Add Comment
Please, Sign In to add comment