Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Start copy files
- user='dmp'
- host='192.168.225.118'
- port='22'
- pass='oJN$*8H8vM'
- adres_cityes="/storage/fwlog/"
- FBZ2="folder_with_files_txtbz2"
- FPARQUET="folder_with_parquet_files"
- echo "event_date = $1"
- list_cityes=$(lftp sftp://$user:$pass@$host -e "cd $adres_cityes;ls; exit" | awk '{print $9}' | grep -v "\.")
- echo "list_cityes"
- echo $list_cityes
- mkdir $FPARQUET
- mkdir $FBZ2
- cd ./$FBZ2
- ROOT=$(pwd)
- echo $ROOT
- #for city in $list_cityes
- for city in $list_cityes
- do
- echo "Downloading $city"
- list_files_dates_with_dates=$(lftp sftp://$user:$pass@$host -e "cd /storage/fwlog/$city/; ls -a | grep $1; exit" | awk '{print $9}')
- for file_txtbz2 in $list_files_dates_with_dates
- do
- lftp sftp://$user:$pass@$host -e "cd /storage/fwlog/$city/;get $file_txtbz2; exit"
- echo $file_txtbz2
- done
- done
- # Start files converting
- ls | parallel -j 5 pbzip2 -d -p4 {}
- echo "Files in folder"
- ls
- cd ..
- ls ./folder_with_files_txtbz2/* | parallel -j 3 ./log_to_pandas ./{} ./{}-%d.parquet
- cd ./folder_with_files_txtbz2
- ls *.parquet
- hdfs dfs -put *.parquet /user/r.uraev/parquet_test
- echo "Done"
- # Profit
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement