Advertisement
Guest User

PalmOS PIM PDB parser (AddressBook, DateBook, ToDo, Memo)

a guest
Dec 28th, 2012
143
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 16.10 KB | None | 0 0
  1. #!/usr/bin/python
  2.  
  3. # This code is enhanced version of my ToDo parser (http://pastebin.com/RY0RDV86) and is based on Bob Kline's PalmAddress (http://pastebin.com/f75a93f48) & existing Perl library Palm::PDB (see CPAN).
  4. # It expects a single argument - name of one PDB file and prints text representation of data stored in given PDB file.
  5. # External dependency: istring module (http://python-bitstring.googlecode.com) - for accessing the bitfield segments and converting them to numbers/strings.
  6. # The code can work only in read-only mode and supports only the four built-in PalmOS PIM apps (AddressBook, DateBook, ToDo and Memo).
  7. # TODO: Creating/writing into PDB files & support for parsing PDBs created by other apps.
  8. # FIXME: Parsing of repeating calendar events needs improvement (see comments in the code).
  9. # Author: Jiri Bajer (sarimak at seznam cz)
  10.  
  11. from bitstring import ConstBitArray
  12. import datetime
  13.  
  14. class ToDoRecord:
  15.  
  16.     def __init__(self, raw_record):
  17.         record = ConstBitArray(bytes=raw_record[0:3])
  18.  
  19.         self.due_year = record[0:7].uint + 1904 # Mac date
  20.         self.due_month = record[8:11].uint
  21.         self.due_day = record[12:16].uint
  22.         if record[0:8] == '0b11111111':
  23.             self.due_date = "" # due date not set
  24.         else:
  25.             self.due_date = "%s.%s. %s " % (self.due_day, self.due_month, self.due_year)
  26.         self.done = record[16]
  27.         self.priority = record[21:24].uint
  28.         self.description, self.note = raw_record[3:-1].split('\0') # both may contain \n characters
  29.  
  30.     def __str__(self):
  31.         return "%s[%s] P%s: %s (%s)" % (self.due_date, self.done, self.priority, self.description, self.note)
  32.  
  33. class MemoRecord:
  34.  
  35.     def __init__(self, raw_record):
  36.         self.text, ignored = raw_record.split('\0')
  37.  
  38.     def __str__(self):
  39.         return self.text
  40.  
  41. class AddressBookRecord:
  42.  
  43.     def __init__(self, raw_record, field_names=None):
  44.         custom_fields = ConstBitArray(bytes=raw_record[0:4]) # only 6 least significant 4bit nibbles are used
  45.         fields_used = ConstBitArray(bytes=raw_record[4:8]) # bitfield - indicates if field_name is present in field_values
  46.         field_values = raw_record[9:-1].split('\0') # null-terminated strings for each used filed (+get rid of leading and trailing \0)
  47.  
  48.         if not field_names: # use default names for record fields
  49.             field_names = { 0:"Last Name", 1:"First Name", 2:"Company", 3:"Phone1",
  50.                             4:"Phone2", 5:"Phone3", 6:"Phone4", 7:"Phone5",
  51.                             8:"Address", 9:"City", 10:"State", 11:"Zip Code",
  52.                             12:"Country", 13:"Title", 14:"Custom1", 15:"Custom2",
  53.                             16:"Custom3", 17:"Custom4", 18:"Note", 19:"Phone6",
  54.                             20:"Phone7", 21:"Phone8" }
  55.  
  56.         fields = field_names # make a copy for per-record field renaming
  57.         renames = ( "Work", "Home","Fax", "Other", "E-mail", "Main", "Pager", "Mobile" ) # phones 1-5 may be renamed to these fields
  58.         fields[3] = renames[ custom_fields[28:32].uint ] # renamed Phone1
  59.         fields[4] = renames[ custom_fields[24:28].uint ] # renamed Phone2
  60.         fields[5] = renames[ custom_fields[20:24].uint ] # renamed Phone3
  61.         fields[6] = renames[ custom_fields[16:20].uint ] # renamed Phone4
  62.         fields[7] = renames[ custom_fields[12:16].uint ] # renamed Phone5
  63.  
  64.         self.fields = {}
  65.         field_num = 0
  66.         for bit in range(31, 10, -1): # start from LSB and go through all 22 fields
  67.             if fields_used[bit]: # skip unused fields
  68.                 self.fields[ fields[31 - bit] ] = field_values[field_num] # respect field renaming
  69.                 field_num += 1
  70.  
  71.         self.default_field = fields[ 3 + custom_fields[8:12].uint ] # is displayed in list view, always contains one of renames
  72.         if self.default_field not in self.fields: # fix for records with no phone and default field set to 0 (Work)
  73.             self.default_field = ""
  74.  
  75.     def __str__(self):
  76.         return self.fields.__str__() + " Default: " + self.default_field
  77.  
  78. class DateBookRecord:
  79.     def __init__(self, raw_record):
  80.  
  81.         # event starts occuring since date
  82.         raw_date = ConstBitArray(bytes=raw_record[4:6])
  83.         self.day = raw_date[11:16].uint
  84.         self.month = raw_date[7:11].uint
  85.         self.year = raw_date[0:7].uint + 1904 # Mac date
  86.         self.occurs = "%s.%s %s " % (self.day, self.month, self.year)
  87.  
  88.         # event occurs between start and end time
  89.         self.time = {}
  90.         start_hour = ConstBitArray(bytes=raw_record[0]).uintbe
  91.         if start_hour != 0xFF: # event occurs on particular time
  92.             self.time["start_hour"] = start_hour
  93.             self.time["start_minute"] = ConstBitArray(bytes=raw_record[1]).uintbe
  94.             self.time["end_hour"] = ConstBitArray(bytes=raw_record[2]).uintbe
  95.             self.time["end_minute"] = ConstBitArray(bytes=raw_record[3]).uintbe
  96.             self.occurs += "%02d:%02d-%02d:%02d" % (self.time["start_hour"], self.time["start_minute"], self.time["end_hour"], self.time["end_minute"])
  97.         else:
  98.             self.occurs += "allday"
  99.  
  100.         # event flags
  101.         raw_flags = ConstBitArray(bytes=raw_record[6:8]) # bits [0] and [7:] are ignored, has_location is stored in [6] but currently ignored (location follows after note + has timezone info after itself)
  102.  
  103.         offset = 8 # alarm, repeat and exceptions may further shift it
  104.  
  105.         # event with alarm
  106.         self.alarm = {}
  107.         if raw_flags[1]: # has alarm
  108.             self.alarm["advance"] = raw_record[offset] # how many units in advance the alarm rings
  109.             unit_type = ConstBitArray(bytes=raw_record[offset + 1]).uintbe
  110.             unit_types = { 0: "minutes", 1: "hours", 2: "days" }
  111.             self.alarm["unit"] = unit_types[unit_type]
  112.             offset += 2
  113.  
  114.         # repeating event
  115.         self.repeat = {}
  116.         self.repeat_until = ""
  117.         self.repeat_type = ""
  118.         if raw_flags[2]: # is repeating
  119.             repeat_types = { 1: "daily", 2: "weekly", 3: "monthly by day", 4: "monthly by date", 5: "yearly" }
  120.             repeat_type = ConstBitArray(bytes=raw_record[offset]).uintbe
  121.             self.repeat["type"] = repeat_types[repeat_type]
  122.             self.repeat_type = " repeat " + repeat_types[repeat_type]
  123.  
  124.             # end of repeating
  125.             raw_end_date = ConstBitArray(bytes=raw_record[offset + 2:offset + 4]) # [offset + 1] is always \0
  126.             if raw_end_date != "0xFFFF": # repeat has end date
  127.                 self.repeat["end"] = {}
  128.                 self.repeat["end"]["day"] = raw_end_date[11:16].uint
  129.                 self.repeat["end"]["month"] = raw_end_date[7:11].uint
  130.                 self.repeat["end"]["year"] = raw_end_date[0:7].uint + 1904 # Mac date
  131.                 self.repeat_until = " until %s.%s %s" % (self.repeat["end"]["day"], self.repeat["end"]["month"], self.repeat["end"]["year"])
  132.             else:
  133.                 self.repeat["end"] = None
  134.                 self.repeat_until = " forever"
  135.  
  136.             # repeat every X
  137.             repeat_on = ConstBitArray(bytes=raw_record[offset + 5])
  138.             repeat_frequency = raw_record[offset + 4]
  139.             start_of_week = raw_record[offset + 6] # [offset + 7] is unused
  140.             # TODO: check if start of week doesn't shift the keys
  141.             # repeat_days = { 7: "Mon", 6: "Tue", 5: "Wed", 4: "Thu", 3: "Fri", 2: "Sat", 1: "Sun" }
  142.             repeat_days = { 7: "Sun", 6: "Mon", 5: "Tue", 4: "Wed", 3: "Thu", 2: "Fri", 1: "Sat" }
  143.  
  144.             if repeat_type == "weekly": # e.g. every Mon, Tue and Fri
  145.                 self.repeat["days"] = []
  146.                 for day in repeat_days.keys():
  147.                     if repeat_on[day]:
  148.                         self.repeat["days"].append( repeat_days[day] ) # FIXME - is ignored
  149.                 self.repeat_type += " " + str(self.repeat["days"])
  150.  
  151.             if repeat_type == "monthly by day": # e.g. every 2nd Fri
  152.                 if repeat_on == 5:
  153.                     self.repeat["week"] = "last"
  154.                 else:
  155.                     self.repeat["week"] = repeat_on[5:8].uint + 1 # every Xth weekday of month
  156.                 self.repeat["day"] = repeat_days[ repeat_on[0:5] ] # weekday
  157.                 self.repeat_type += " " + self.repeat["day"] + " " + self.repeat["week"] # FIXME - is ignored
  158.  
  159.             offset += 8
  160.  
  161.         # event occurance exceptions
  162.         self.exceptions = []
  163.         if raw_flags[4]: # has exceptions
  164.             num_exceptions = ConstBitArray(bytes=raw_record[offset:offset + 2]).uintbe
  165.             offset += 2
  166.             for exception in range(num_exceptions):
  167.                 raw_exception = ConstBitArray(bytes=raw_record[offset: offset + 2])
  168.                 day = raw_exception[11:16].uint
  169.                 month = raw_exception[7:11].uint
  170.                 year = raw_exception[0:7].uint + 1904 # Mac date
  171.                 self.exceptions.append( (day, month, year) ) # exceptions are list of tuples
  172.                 offset += 2
  173.  
  174.         # event description
  175.         if raw_flags[5]: # has description
  176.             self.text, ignore, raw_note = raw_record[offset:].partition('\0')
  177.         else:
  178.             self.note = "" # casem None
  179.  
  180.         # event note
  181.         if raw_flags[3]: # has note
  182.             self.note, ignore1, ignore2 = raw_note.partition('\0')
  183.         else:
  184.             self.note = "" # casem None
  185.  
  186.     def __str__(self):
  187.         return self.occurs + self.repeat_type + self.repeat_until + ": " + self.text + " (" + self.note + ") "
  188.  
  189. class PalmDB:
  190.  
  191.     def __init__(self):
  192.         self.raw_data = None # contans unparsed PDB file
  193.  
  194.     def _init_header(self):
  195.         self.header = self.raw_data[0:80] # fixed size byte array
  196.         self.dbname, ignore, ignore = self.header[0:32].partition('\0') # null-terminated string inside of fixed-size array
  197.         self.format_version = ConstBitArray(bytes=self.header[34:36]).uintbe # app-specific, big-endian
  198.         self.dbtype = self.header[60:64] # 4 char app-specific identifier
  199.         self.creator = self.header[64:68] # 4 char identifier assigned to the app
  200.  
  201.         # db attributes
  202.         raw_attributes = ConstBitArray(bytes=self.header[32:34]) # bit array, see below
  203.         self.attributes = {}
  204.         self.attributes["resource"] = raw_attributes[15]
  205.         self.attributes["readonly"] = raw_attributes[14]
  206.         self.attributes["dirty"] = raw_attributes[13]
  207.         self.attributes["archive"] = raw_attributes[12]
  208.         self.attributes["rewritable"] = raw_attributes[11] # PalmOS 2+
  209.         self.attributes["reset"] = raw_attributes[10] # PalmOS 2+
  210.         self.attributes["protected"] = raw_attributes[9]
  211.         self.attributes["syncable"] = not(raw_attributes[8]) # PalmOS 2+
  212.         self.attributes["busy"] = raw_attributes[0]
  213.  
  214.         MAC_EPOCH = 2082844800L # number of seconds between Jan 1 1904 and Jan 1 1970
  215.  
  216.         creation_time = ConstBitArray(bytes=self.header[36:40]).uintbe # seconds since Mac epoch, big-endian
  217.         modification_time = ConstBitArray(bytes=self.header[40:44]).uintbe # seconds since Mac epoch, big-endian (modification number [48:52] and seed [68:72] are unused)
  218.         backup_time = ConstBitArray(bytes=self.header[44:48]).uintbe # seconds since Mac epoch, big-endian
  219.  
  220.         if creation_time > MAC_EPOCH:
  221.             self.creation_time = datetime.datetime.fromtimestamp(creation_time - MAC_EPOCH)
  222.         else:
  223.             self.creation_time = None
  224.         if modification_time > MAC_EPOCH:
  225.             self.modification_time = datetime.datetime.fromtimestamp(modification_time - MAC_EPOCH)
  226.         else:
  227.             self.modification_time = None
  228.  
  229.         if backup_time > MAC_EPOCH:
  230.             self.backup_time = datetime.datetime.fromtimestamp(backup_time - MAC_EPOCH)
  231.         else:
  232.             self.backup_time = None # weird: in my case backup_time always is 28800
  233.  
  234.         # recordlist (chained record lists are deprecated as of PalmOS 4, have no real use and discouraged in lower PalmOS versions => next recordlist [72:75] is unused)
  235.         self.record_count = ConstBitArray(bytes=self.header[76:78]).uintbe # length of (first and the only) record list, big-endian
  236.         self.recordlist_offset = ConstBitArray(bytes=self.header[78:80]).uintbe # array of pointers to real data, may be set to 0x0000 if there are no records
  237.  
  238.         # appinfo
  239.         self.appinfo_offset = ConstBitArray(bytes=self.header[52:56]).uintbe # 0x0000 if not present, big-endian
  240.         self.sortinfo_offset = ConstBitArray(bytes=self.header[56:60]).uintbe # immediately after appinfo, 0x0000 if not present, big-endian
  241.         if self.appinfo_offset != 0:
  242.             if self.sortinfo_offset != 0:
  243.                 appinfo_end = self.sortinfo_offset
  244.             else:
  245.                 if self.recordlist_offset != 0: # no sortinfo
  246.                     appinfo_end = self.recordlist_offset
  247.                 else:
  248.                     appinfo_end = len(self.raw_data) # neither sortinfo nor records
  249.             self.raw_appinfo = self.raw_data[self.appinfo_offset:appinfo_end] # app-specific
  250.         else:
  251.             self.raw_appinfo = None
  252.  
  253.         # standard PalmOS categories (part of appinfo, not mandatory - apps may define a different format but builtin PIM apps use them)
  254.         self.categories = {} # this cannot be an array because records reference the categories via original position (and they don't have to be a contiguous sequence)
  255.         for category_num in range(16):
  256.             category_name, ignore1, ignore2 = self.raw_appinfo[2 + category_num * 16 : 18 + category_num * 16].partition('\0') # null-terminated string, max. 15 chars + \0 (renamed categories [0:2] are ignored)
  257.             if category_name:
  258.                 self.categories[category_num] = category_name # skip unused categories (with empty names) scattered among valid categories but preserve their original position as index
  259.                 # as the categories are referenced by records via order of appearance and not via category IDs, category IDs [258 + category_num] and last category ID [274] are ignored
  260.         if not self.categories:
  261.             self.categories[0] = "Unfiled" # fix for Datebook (has no category entries defined and last category ID is zero)
  262.  
  263.         # optional app-specific appinfo parsing (add your custom formats here)
  264.         if self.creator == "addr":
  265.             raw_labels = self.raw_appinfo[282:282+23*16] # some labels may be globally renamed, appinfo contains all their names (including defaults - renamed labels bitfield is ignored)
  266.             self.labels = {}
  267.             for label_num in range(22):
  268.                 label_name, ignore1, ignore2 = raw_labels[label_num * 16 : label_num * 16 + 16].partition('\0') # null-terminated string, max. 15 chars + \0
  269.                 self.labels[label_num] = label_name
  270.             # country = ConstBitArray(bytes=self.raw_appinfo[634:636]).uintbe
  271.             # sort_by_company = ConstBitArray(bytes=self.raw_appinfo[636:638])[0]
  272.  
  273.         # sortinfo
  274.         if self.sortinfo_offset != 0:
  275.             if self.recordlist_offset:
  276.                 sortinfo_end = self.recordlist_offset
  277.             else:
  278.                 sorinfo_end = len(self.raw_data) # no records
  279.             self.raw_sortinfo = self.raw_data[self.sortinfo_offset:sorinfo_end] # app-specific
  280.         else:
  281.             self.raw_sortinfo = None
  282.  
  283.     def _init_records(self):
  284.         self.raw_records = [] # app-specific, each record is stored as a dict
  285.         offset = 78 + self.recordlist_offset # pointer to pointer to first real data
  286.  
  287.         # find the real data
  288.         for record_num in range(self.record_count):
  289.             record_offset = ConstBitArray(bytes=self.raw_data[offset:offset + 4]).uintbe # pointer to real data
  290.             raw_record_attributes = ConstBitArray(bytes=self.raw_data[offset + 4]) # attributes of real data, bitarray, see below (record ID [offset + 5: offset + 8] is unused)
  291.  
  292.             # record attributes
  293.             record_attributes = {}
  294.             record_attributes["deleted"] = raw_record_attributes[0]
  295.             record_attributes["dirty"] = raw_record_attributes[1]
  296.             record_attributes["busy"] = raw_record_attributes[2]
  297.             record_attributes["secret"] = raw_record_attributes[3]
  298.             record_attributes["category"] = self.categories[ raw_record_attributes[4:8].uint ] # record category is a 4-bit number (not category ID)
  299.  
  300.             # length of real data
  301.             if record_num < self.record_count - 1:
  302.                 next_record_offset = ConstBitArray(bytes=self.raw_data[offset + 8:offset + 12]).uintbe # pointer to next data
  303.             else:
  304.                 next_record_offset = len(self.raw_data) # or pointer to EOF if this is the last record
  305.  
  306.             raw_record = self.raw_data[record_offset:next_record_offset] # get real data
  307.  
  308.             # app-specific raw record parsing (add your custom record formats here)
  309.             record = None
  310.             if self.creator == "todo":
  311.                 record = ToDoRecord(raw_record)
  312.             if self.creator == "memo":
  313.                 record = MemoRecord(raw_record)
  314.             if self.creator == "addr":
  315.                 record = AddressBookRecord(raw_record, self.labels) # label names may be customized
  316.             if self.creator == "date":
  317.                 record = DateBookRecord(raw_record)
  318.  
  319.             self.raw_records.append( { 'raw': raw_record, 'attributes': record_attributes, "record": record } )
  320.             offset += 8 # next record
  321.  
  322.     def __str__(self):
  323.         retval = "%s (%s, %s): %s records" % (self.dbname, self.creator, self.dbtype, self.record_count)
  324.         retval += ", categories: " + str(self.categories)
  325.         retval += ", attributes: " + str(self.attributes)
  326.         return retval
  327.  
  328.     def load(self, filename):
  329.         f = open(filename, 'r')
  330.         self.raw_data = f.read() # as PDB files are relatively small (<100KB) we don't care about RAM demands
  331.         f.close()
  332.         self._init_header()
  333.         self._init_records()
  334.  
  335.     def from_string(self, string):
  336.         self.raw_data = string
  337.         self._init_header()
  338.         self._init_records()
  339.  
  340. if __name__ == '__main__':
  341.  
  342.     import sys
  343.  
  344.     db = PalmDB()
  345.     db.load(sys.argv[1])
  346.  
  347.     print db
  348.  
  349.     for record in db.raw_records:
  350.         if record:
  351.             print record["record"]
  352.         else:
  353.             print record["raw_record"]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement