Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- site_url="hue:8888"
- hdfspath="/master/datalake/2016/11/18"
- user="timvw"
- pass="xxx"
- cookie_path=cookie
- target=./data
- # hue api variables
- login_url="$site_url/accounts/login/"
- browse_url_prefix="$site_url/filebrowser/view="
- browse_url_suffix="?format=json"
- download_url_prefix="$site_url/filebrowser/download="
- # go to the login page and fetch the csrf token
- curl -s -b "$cookie_path" -c "$cookie_path" --request GET "$login_url?next=/" -o /dev/null
- csrftoken=$(grep csrftoken "$cookie_path" | cut -f 7)
- # now login
- curl -s -b "$cookie_path" -c "$cookie_path" -d "csrfmiddlewaretoken=$csrftoken&username=$user&password=$pass&next=%2F" "$login_url" -s -o /dev/null
- #list files
- files=$(curl -s -b "$cookie_path" -c "$cookie_path" "${browse_url_prefix}${hdfspath}${browse_url_suffix}" | grep path | grep gz | sed 's/"path": "//g' | sed 's/",//g' | sort | uniq | cut -f 1- -d ' ' --output-delimiter=$'\r\n')
- for file in $files
- do
- filepath=$(dirname $file)
- filename=$(basename $file)
- mkdir -p ${target}${filepath}
- curl -s -b "$cookie_path" -c "$cookie_path" "${download_url_prefix}$filepath" -o ${target}${filepath}/${filename}
- done
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement