SHARE
TWEET

Reverse engineering Penultimate

alex_c_ccl Dec 7th, 2012 284 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env python3
  2.  
  3. """
  4. For more details see: http://digitalinvestigation.wordpress.com/2012/12/05/reverse-engineering-evernote-penultimate-or-when-is-a-picture-not-a-picture/
  5. """
  6.  
  7. """
  8. Copyright (c) 2012, CCL Forensics
  9. All rights reserved.
  10.  
  11. Redistribution and use in source and binary forms, with or without
  12. modification, are permitted provided that the following conditions are met:
  13.    * Redistributions of source code must retain the above copyright
  14.      notice, this list of conditions and the following disclaimer.
  15.    * Redistributions in binary form must reproduce the above copyright
  16.      notice, this list of conditions and the following disclaimer in the
  17.      documentation and/or other materials provided with the distribution.
  18.    * Neither the name of the CCL Forensics nor the
  19.      names of its contributors may be used to endorse or promote products
  20.      derived from this software without specific prior written permission.
  21.  
  22. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  23. ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  24. WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  25. DISCLAIMED. IN NO EVENT SHALL CCL FORENSICS BE LIABLE FOR ANY
  26. DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  27. (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28. LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  29. ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  31. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32. """
  33. import sys
  34. import struct
  35. import xml.etree.ElementTree as etree
  36. import os
  37. import datetime
  38.  
  39. # ccl_bplist can be obtained from http://code.google.com/p/ccl-bplist/
  40. import ccl_bplist
  41.  
  42. __version__ = "1.1"
  43. __description__ = "Parses the documents in the Private Documents folder of PenUltimate"
  44. __contact__ = "Alex Caithness"
  45. __outputtype__ = 1
  46. __outputext__ = None
  47.  
  48. IMAGE_DIR = "img"
  49. DATETIME_FORMAT = "%d/%m/%Y %H:%M:%S"
  50.  
  51. EPOCH = datetime.datetime(2001, 1, 1)
  52. def decode_cocoa_time(f):
  53.     return EPOCH + datetime.timedelta(seconds = f)
  54.  
  55. def unpack_float_data(data):
  56.     if len(data) % 4 != 0:
  57.         raise ValueError("floatData is not divisible by 4 (cannot contain a number of full 32-bit floats")
  58.     float_count = len(data) // 4
  59.     if float_count % 3 != 0:
  60.         raise ValueError("floatData does not contain a multiple of 3 floats")
  61.     floats = struct.unpack("<{0}f".format(float_count), data)
  62.     return tuple(floats[i:i+3] for i in range(0, len(floats), 3))
  63.  
  64. """
  65. work_input: tuple containing the path for the input folder (Private Documents)
  66. work_output: tuple containing the folder for the output
  67. """
  68. def __dowork__(work_input, work_output):
  69.     # Unpack input
  70.     if not isinstance(work_input, tuple) or len(work_input) < 1:
  71.         raise ValueError("work_input must be a tuple containing the path for the input database")
  72.    
  73.     if not isinstance(work_output, tuple) or len(work_output) < 1:
  74.         raise ValueError("work_output must be a tuple containing the path for the output")
  75.    
  76.     input_path = work_input[0]
  77.     output_path = work_output[0]
  78.  
  79.     if not os.path.exists(input_path):
  80.         print("ERROR: Cannot find input directory.")
  81.         exit()
  82.     if not os.path.exists(output_path):
  83.         print("ERROR: Cannot find output directory.")
  84.         exit()
  85.  
  86.     # set up output dirs
  87.     os.mkdir(os.path.join(output_path, IMAGE_DIR))
  88.  
  89.     # set up main report
  90.     out_tsv = open(os.path.join(output_path, "report.tsv"), "wt", encoding="utf-8")
  91.     out_tsv.write("\t".join(["book_id", "title", "created_timestamp", "modified_timestamp", "page_count", "change_count", "creating_device_id"]) + "\n")
  92.  
  93.     # Open the notebookList file and deserialize
  94.     notebooklist_path = os.path.join(input_path, "notebookList")
  95.     f = open(notebooklist_path, "br")
  96.     notebook_list_obj = ccl_bplist.deserialise_NsKeyedArchiver(ccl_bplist.load(f))
  97.     f.close()
  98.  
  99.     # Get list of notebooks
  100.     for notebook_details in notebook_list_obj["notebooks"]["NS.objects"]:
  101.  
  102.         title = notebook_details["title"]
  103.         book_id =  notebook_details["name"]
  104.         if isinstance(book_id, ccl_bplist.NsKeyedArchiverDictionary):
  105.             # Sometimes book_id is stored as an "NS.string" rather than straight in a string
  106.             book_id = book_id["NS.string"]
  107.         version = "{0}.{1}".format(notebook_details["versionMajor"], notebook_details["versionMinor"])
  108.         change_count = str(notebook_details["changeCount"])
  109.         page_count = len(notebook_details["pageNames"]["NS.objects"])
  110.         created_timestamp = decode_cocoa_time(notebook_details["created"]["NS.time"]).strftime(DATETIME_FORMAT)
  111.         modified_timestamp = decode_cocoa_time(notebook_details["modified"]["NS.time"]).strftime(DATETIME_FORMAT)
  112.         creating_device_id = notebook_details["creatingDeviceId"]
  113.  
  114.         print("Working on {0} ({1})".format(book_id, title))
  115.  
  116.         # Get the list of pages
  117.         pages_for_output = []
  118.         for page_details in  notebook_details["pageNames"]["NS.objects"]:
  119.             page_id = page_details["NS.string"]
  120.             print("\tPage ID: {0}".format(page_id))
  121.  
  122.             page_path = os.path.join(input_path, "notebooks", book_id, page_id)
  123.  
  124.             # Open file as bplist and deserialize
  125.             notebook_f = open(page_path, "rb")
  126.             document_obj = ccl_bplist.deserialise_NsKeyedArchiver(ccl_bplist.load(notebook_f))
  127.             notebook_f.close()
  128.            
  129.             report_svg = etree.Element("svg",
  130.                                         {"xmlns": "http://www.w3.org/2000/svg",
  131.                                         "version": "1.1",})
  132.            
  133.             image_width = 0
  134.             image_height = 0
  135.             # Iterate the layers
  136.             for layer_index, layer_obj in enumerate(document_obj["pageLayers"]["NS.objects"]):
  137.                 print("\t\tLayer {0}".format(layer_index))
  138.                
  139.                 # get layer size
  140.                 size_string = layer_obj["size"]
  141.                 layer_width, layer_height = (int(s) for s in size_string.strip("{}").split(","))
  142.  
  143.                 image_width = layer_width if image_width < layer_width else image_width
  144.                 image_height = layer_height if image_height < layer_height else image_height
  145.  
  146.                 # get colour for this layer
  147.                 red = int(layer_obj["color"]["red"] * 0xFF)
  148.                 green = int(layer_obj["color"]["green"] * 0xFF)
  149.                 blue = int(layer_obj["color"]["blue"] * 0xFF)
  150.  
  151.                 layer_color_code = "#{0:02x}{1:02x}{2:02x}".format(red,green,blue)
  152.  
  153.                 # Iterate the layer rectangles
  154.                 for layer_rect in layer_obj["layerRects"]["NS.objects"]:
  155.                     # Get the dimentions from the float data
  156.                     # the data is an array of floats grouped in threes:
  157.                     # (x, y, radius)
  158.  
  159.                     float_data = layer_rect["values"]["floatData"]
  160.                     co_ords = unpack_float_data(float_data)
  161.            
  162.                     for x,y,d in co_ords:
  163.                         etree.SubElement(report_svg, "circle",
  164.                                          {"cx" : str(x),
  165.                                           "cy" : str(layer_height - y),
  166.                                           "r" : str(d/4),
  167.                                           "fill" : layer_color_code })
  168.            
  169.             # set viewBox, height and width
  170.             report_svg.set("viewBox", "0 0 {0} {1}".format(image_width, image_height))
  171.             report_svg.set("height", str(image_height))
  172.             report_svg.set("width", str(image_width))
  173.  
  174.             # write out image
  175.             image_output_filename = "{0}_{1}.svg".format(book_id, page_id)
  176.             image_output_path = os.path.join(output_path, IMAGE_DIR, image_output_filename)
  177.             print("\t\tWriting to '{0}'".format(image_output_path))
  178.             with open(image_output_path, "wt", encoding="utf-8") as svg_out:
  179.                 svg_out.write("<!DOCTYPE svg>")
  180.                 svg_out.write(etree.tostring(report_svg, "utf-8", method="html").decode("utf-8"))
  181.             print("\t\tImage written")
  182.            
  183.         out_tsv.write("\t".join([book_id, title, created_timestamp, modified_timestamp, str(page_count), str(change_count), creating_device_id]) + "\n")
  184.  
  185.     out_tsv.close()
  186.  
  187.     print("Done!")
  188.  
  189. def __main__():
  190.     if len(sys.argv) < 3:
  191.         print()
  192.         print("Usage: {0} <Private Documents dir> <output dir>".format(os.path.basename(sys.argv[0])))
  193.         print()
  194.         sys.exit(1)
  195.     else:
  196.         work_input = (sys.argv[1],)
  197.         work_output = (sys.argv[2],)
  198.         __dowork__(work_input, work_output)
  199.  
  200. if __name__ == "__main__":
  201.     __main__()
RAW Paste Data
Top