Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- """
- Simple Python wrapper around lftp to mirror a remote ftp onto a local folder.
- New files are downloaded, deleted files are marked for removal in a TO_DELETE.sh
- script.
- """
- import os
- import pprint
- import re
- import subprocess
- import sys
- import urllib.parse
- def get_diff(local_dir, remote_dir, host, user, password):
- """
- Get a diff between the local copy and the remote server.
- Params:
- - local_dir is the path to the local copy.
- - remote_dir is the path to the folder to replicate on the distant ftp.
- - host is the ftp host.
- - user is the user to authenticate with.
- - password is the associated password.
- Returns a diff ie a list of lftp commands.
- """
- parts_per_file = 10 # Number of parts per file
- parallel = 2 # Number of parallel transfers
- # The --dry-run generates lftp commands that we parse into a diff list.
- lftp_input = """
- open {host}
- user {user} \"{password}\"
- mirror --dry-run --delete -c --use-pget-n={parts_per_file} --parallel={parallel} {remote_dir} {local_dir}
- bye""".format(host=host, user=user, password=password,
- parts_per_file=parts_per_file, parallel=parallel,
- remote_dir=remote_dir, local_dir=local_dir)
- process = subprocess.run(["lftp", "-f" "/dev/stdin"],
- input=lftp_input.encode("utf-8"),
- stdout=subprocess.PIPE)
- stdout = process.stdout.decode("utf-8").strip()
- if stdout != "":
- diff = stdout.split("\n")
- else:
- diff = []
- return diff
- def apply_diff(diff):
- """
- Apply a diff (list of lftp commands).
- Params:
- - diff is the list of lftp commands to apply.
- """
- lftp_input = "set xfer:clobber on\n"
- lftp_input += "\n".join(diff)
- process = subprocess.run(["lftp", "-f" "/dev/stdin"],
- input=lftp_input.encode("utf-8"))
- def extract_rm_commands(diff):
- """
- Extract removal commands (rm) from the diff.
- Params:
- - diff is the list of lftp commands to apply.
- Returns a tuple (rm_commands, diff) where rm_commands is a list of rm
- commands to apply, in sh format, and diff is a list of lftp commands to
- apply without any removal command.
- """
- rm_commands = [i for i in diff if i.startswith("rm")]
- diff = [i for i in diff if i not in rm_commands]
- # Replace file: URLs by quoted non-urlencoded paths
- rm_commands = [re.sub(r"file:(.*)",
- lambda x: "\"%s\"" % (urllib.parse.unquote(x.group(1))),
- i)
- for i in rm_commands]
- return rm_commands, diff
- def uniq_append(file, content):
- """
- Append to a file, ensuring there are not any duplicate lines.
- """
- # Ensure directory exists
- if not os.path.isdir(os.path.dirname(file)):
- os.mkdir(os.path.dirname(file))
- # Add \n to the commands in content, as diff is not \n-terminated.
- content = [i + "\n" for i in content]
- # Add to content all the previous content
- if os.path.isfile(file):
- with open(file, "r") as fh:
- content.extend(fh.readlines())
- # Remove empty lines
- content = [i for i in content if i.strip() != ""]
- # Use a set to uniqify it
- content = set(content)
- # Trick to ensure the rm command associated to the TO_DELETE file is always
- # at the bottom. We remove it from content.
- try:
- content.remove("rm %s\n" % (file,))
- except KeyError:
- pass
- # Write new content
- with open(file, "w") as fh:
- for line in content:
- fh.write(line)
- # And we ensure to write the rm command associated with the TO_DELETE
- # file at the end.
- fh.write("rm %s\n" % (file,))
- if __name__ == "__main__":
- if len(sys.argv) < 6:
- sys.exit("Usage: %s LOCAL_DIR REMOTE_DIR HOST USER PASS" %
- (sys.argv[0],))
- local_dir = sys.argv[1]
- remote_dir = sys.argv[2]
- host = sys.argv[3]
- user = sys.argv[4]
- password = sys.argv[5]
- print("Fetching diff…")
- diff = get_diff(local_dir, remote_dir,
- host, user, password)
- print("Diff is:")
- pprint.pprint(diff)
- print()
- # Save rm commands in a script
- rm_commands, diff = extract_rm_commands(diff)
- rm_script = os.path.normpath(local_dir + "/TO_DELETE.sh")
- uniq_append(rm_script, rm_commands)
- if len(diff) > 0:
- print("Applying get commands in diff…")
- apply_diff(diff)
- else:
- print("No new files to get.")
- if len(rm_commands) > 0:
- print("Rm commands in diff were saved to %s." % (rm_script,))
- print("Done!")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement