Advertisement
Guest User

Untitled

a guest
Apr 7th, 2020
208
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 1.12 KB | None | 0 0
  1. # Start copy files
  2. user='dmp'
  3. host='192.168.225.118'
  4. port='22'
  5. pass='oJN$*8H8vM'
  6. adres_cityes="/storage/fwlog/"
  7. FBZ2="folder_with_files_txtbz2"
  8. FPARQUET="folder_with_parquet_files"
  9.  
  10. echo "event_date = $1"
  11.  
  12. list_cityes=$(lftp sftp://$user:$pass@$host -e "cd $adres_cityes;ls; exit" | awk '{print $9}' | grep -v "\.")
  13.  
  14. echo "list_cityes"
  15. echo $list_cityes
  16.  
  17.  
  18. mkdir $FPARQUET
  19. mkdir $FBZ2
  20. cd ./$FBZ2
  21. ROOT=$(pwd)
  22. echo $ROOT
  23.  
  24. #for city in $list_cityes
  25. for city in $list_cityes
  26. do
  27. echo "Downloading $city"
  28. list_files_dates_with_dates=$(lftp sftp://$user:$pass@$host -e "cd /storage/fwlog/$city/; ls -a | grep $1; exit" | awk '{print $9}')
  29. for file_txtbz2 in $list_files_dates_with_dates
  30. do
  31. lftp sftp://$user:$pass@$host -e "cd /storage/fwlog/$city/;get $file_txtbz2; exit"
  32. echo $file_txtbz2
  33. done
  34. done
  35.  
  36. # Start files converting
  37.  
  38. ls | parallel -j 5 pbzip2 -d -p4 {}
  39.  
  40. echo "Files in folder"
  41. ls
  42.  
  43. cd ..
  44. ls ./folder_with_files_txtbz2/* | parallel -j 3 ./log_to_pandas ./{} ./{}-%d.parquet
  45.  
  46. cd ./folder_with_files_txtbz2
  47. ls *.parquet
  48.  
  49.  
  50. hdfs dfs -put *.parquet /user/r.uraev/parquet_test  
  51.  
  52. echo "Done"
  53. # Profit
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement