Advertisement
Gfy

nzb_utils.py

Gfy
Dec 24th, 2011
219
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.99 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding: latin-1 -*-
  3.  
  4. # This program is free software: you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License as published by
  6. # the Free Software Foundation, either version 3 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program.  If not, see <http://www.gnu.org/licenses/>
  16.  
  17. # http://docs.newzbin.com/index.php/Newzbin:NZB_Specs
  18.  
  19. import pynzb # http://pypi.python.org/pypi/pynzb/
  20. import os
  21. import io
  22. import re
  23. import time
  24. import datetime
  25. from xml.dom import minidom
  26.  
  27. def read_nzb(nzb_file):
  28.     print("Reading %s." % os.path.basename(nzb_file))
  29.     def parse(nzb_file):
  30.         try: # file on disk
  31.             return pynzb.nzb_parser.parse(open(nzb_file).read())
  32.         except: # an open file object
  33.             return pynzb.nzb_parser.parse(nzb_file.read())
  34.  
  35.     try:
  36.         return parse(nzb_file)
  37.     except:
  38.         print("Parsing the nzb file failed. Trying to fix invalid XML.")
  39.         # Problem with the ampersand.
  40.         # newsmangler doesn't properly escape the & in the NZB
  41.         # http://www.powergrep.com/manual/xmpxmlfixentities.html
  42.         XML_AMP_FIX = "&(?!(?:[a-z]+|#[0-9]+|#x[0-9a-f]+);)"
  43.         fixed_nzb = io.BytesIO()
  44.         for line in open(nzb_file, "r").readlines():
  45.             line = re.sub(XML_AMP_FIX, "&amp;", line)
  46.             line = re.sub("&ouml;", "ö", line)
  47.             fixed_nzb.write(line)
  48.         fixed_nzb.seek(0)
  49.         return parse(fixed_nzb)
  50.    
  51. def parse_name(subject):
  52.     """ Grabs the file name from the subject of the Usenet posting.
  53.     Return the whole subject if the file name isn't parseable.
  54.     &quot; must be replaced by " for this to work. """
  55.     match = re.search('''"(.*)"''', subject)
  56.     if match:
  57.         return match.group(1).strip('"')
  58.     else:
  59.         # "Because the poster used a non-standard subject line, the system was
  60.         # unable to determine the filename with certainty."
  61.         match = re.search(".*(\]-| )(?P<filename>.*) [\d/\(\)]+", subject)
  62.         if match:
  63.             return match.group("filename")
  64.         else:
  65.             return subject
  66.  
  67. """
  68. NZBFile Objects
  69. ===============
  70.  
  71. All of the parsers return ``NZBFile`` objects, which are objects with the
  72. following properties:
  73.  
  74. ``poster``:
  75.    The name of the user who posted the file to the newsgroup.
  76.  
  77. ``date``:
  78.    A ``datetime.date`` representation of when the server first saw the file.
  79.  
  80. ``subject``:
  81.    The subject used when the user posted the file to the newsgroup.
  82.  
  83. ``groups``:
  84.    A list of strings representing the newsgroups in which this file may be
  85.    found.
  86.  
  87. ``segments``:
  88.    A list of ``NZBSegment`` objects talking about where to get the contents
  89.    of this file.
  90.  
  91.  
  92. NZBSegment Objects
  93. ==================
  94.  
  95. Each ``NZBFile`` has a list of ``NZBSegment`` objects, which include
  96. information
  97. on how to retrieve a part of a file.  Here's what you can find on an
  98. ``NZBSegment`` object:
  99.  
  100. ``number``:
  101.    The number of the segment in the list of files.
  102.  
  103. ``bytes``:
  104.    The size of the segment, in bytes.
  105.  
  106. ``message_id``:
  107.    The Message-ID of the segment (useful for retrieving the full contents)
  108. """
  109.  
  110. # fix pynzb library
  111. def _parse_date(date):
  112.     if isinstance(date, basestring):
  113.         date = int(date)
  114.     return datetime.datetime.fromtimestamp(date)
  115. pynzb.base.parse_date = _parse_date
  116.  
  117. # add compare functionality
  118. def _equality_test(self, other):
  119.     try:
  120.         return (self.bytes == other.bytes and
  121.             self.number == other.number and
  122.             self.message_id == other.message_id)
  123.     except AttributeError:
  124.         return (self.bytes == other.bytes and
  125.             self.number == other.number)
  126. pynzb.base.NZBSegment.__eq__ = _equality_test
  127.  
  128. # pynzb library only supports parsing
  129. #'add_group', 'add_segment', 'date', 'groups', 'poster', 'segments', 'subject'
  130.  
  131. def empty_nzb_document():
  132.     """ Creates xmldoc XML document for a NZB file. """
  133.     # http://stackoverflow.com/questions/1980380/how-to-render-a-doctype-with-pythons-xml-dom-minidom
  134.     imp = minidom.getDOMImplementation()
  135.     dt = imp.createDocumentType("nzb", "-//newzBin//DTD NZB 1.1//EN",
  136.                                 "http://www.newzbin.com/DTD/nzb/nzb-1.1.dtd")
  137.     doc = imp.createDocument("http://www.newzbin.com/DTD/2003/nzb", "nzb", dt)
  138.     # http://stackoverflow.com/questions/2306149/how-to-write-xml-elements-with-namespaces-in-python
  139.     doc.documentElement.setAttribute('xmlns', 'http://www.newzbin.com/DTD/2003/nzb')
  140.     return doc
  141.  
  142. def get_pretty_xml(document):
  143.     """ NZB will have bugs if used with the wrong Python version:
  144.     http://bugs.python.org/issue1777134 """
  145.     return document.toprettyxml(encoding="UTF-8")
  146.  
  147. def get_xml(document):
  148.     return document.toxml("UTF-8")
  149.  
  150. def _date_to_posix(date):
  151.     """ date: datetime.datetime object """
  152.     posix = time.mktime(date.timetuple())
  153.     return str(int(posix))
  154.  
  155. def add_file(document, nzb_file):
  156.     """ document: xml.dom.minidom.Document object """
  157.     top_element = document.documentElement
  158.     file = document.createElement("file")
  159.    
  160.     # add file attributes
  161.     file.setAttribute("poster", nzb_file.poster)
  162.     file.setAttribute("date", _date_to_posix(nzb_file.date))
  163.     file.setAttribute("subject", nzb_file.subject)
  164.    
  165.     # groups
  166.     groups = document.createElement("groups")
  167.     for group in nzb_file.groups:
  168.         g = document.createElement("group")
  169.         g.appendChild(document.createTextNode(group))
  170.         groups.appendChild(g)
  171.     file.appendChild(groups)   
  172.    
  173.     # segments
  174.     segments = document.createElement("segments")
  175.     for segment in nzb_file.segments:
  176.         s = document.createElement("segment")
  177.         s.setAttribute("bytes", str(segment.bytes))
  178.         s.setAttribute("number", str(segment.number))
  179.         s.appendChild(document.createTextNode(segment.message_id))
  180.         segments.appendChild(s)
  181.     file.appendChild(segments) 
  182.    
  183.     top_element.appendChild(file)  
  184.     return document
  185.  
  186. def list_filenames(nzb_file):
  187.     return [(parse_name(f.subject), f.date) for f in read_nzb(nzb_file)]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement