Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import subprocess, os
- from ftplib import FTP
- ftp_site = 'ftp.ncbi.nlm.nih.gov'
- ftp = FTP(ftp_site)
- ftp.login()
- ftp.cwd('genomes/genbank/bacteria')
- dirs = ftp.nlst()
- for organism in dirs:
- latest = os.path.join(organism, "latest_assembly_versions")
- for path in ftp.nlst(latest):
- accession = path.split("/")[-1]
- fasta = accession+"_genomic.fna.gz"
- subprocess.call(['rsync',
- '--recursive',
- '-vv',
- '--copy-links',
- '--dry-run',
- '-f=+ '+fasta,
- #'-f=- *',
- 'ftp.ncbi.nlm.nih.gov::genomes/genbank/bacteria/'+latest,
- '--log-file=scratch/test_dir/log.txt',
- 'scratch/' + organism])
Add Comment
Please, Sign In to add comment