Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import tarfile
- import numpy as np
- import io
- from xml.etree import ElementTree as ET
- course = "course.dgYucf.tar.gz"
- tar = tarfile.open("tar_courses/"+course, "r:gz")
- new = tarfile.open("tar_courses/new"+course, "w:gz")
- #print(tar.getnames())
- names = tar.getnames()
- members = tar.getmembers()
- for ii in range(len(members)):
- member = members[ii]
- name = names[ii]
- #new.addfile(member, tar.extractfile(member.name))
- if "video" in name:
- f = tar.extractfile(member)
- if f is not None:
- content = str(f.read())
- if "sub=" in content:
- idx = content.index("sub=")
- end = content[idx+5:].index('"')
- rem = (content[idx:idx+end+6])
- new_content = content.replace(rem,"")[2:-3]
- #print(name)
- #print(new_content)
- else:
- new_content = content[2:-3]
- if "edx_video_id" in new_content:
- idx = new_content.index("edx_video_id=")
- end = new_content[idx+14:].index('"')
- rem = (new_content[idx:idx+end+15])
- new_content = new_content.replace(rem,"")
- print(name)
- print(new_content)
- print("--------------------------")
- info = tarfile.TarInfo(name=name)
- with open("temp.xml", "wb") as f:
- f.write((new_content.encode('utf-8')))
- new.add("temp.xml", arcname=name)
- else:
- new.addfile(member, tar.extractfile(member.name))
- #tar.addfile(tarfile.TarInfo("myfilename.txt"), open("/path/to/filename.txt"))
- new.close()
- tar.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement