Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- """
- For more details see: http://digitalinvestigation.wordpress.com/2012/12/05/reverse-engineering-evernote-penultimate-or-when-is-a-picture-not-a-picture/
- """
- """
- Copyright (c) 2012, CCL Forensics
- All rights reserved.
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the CCL Forensics nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL CCL FORENSICS BE LIABLE FOR ANY
- DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- """
- import sys
- import struct
- import xml.etree.ElementTree as etree
- import os
- import datetime
- # ccl_bplist can be obtained from http://code.google.com/p/ccl-bplist/
- import ccl_bplist
- __version__ = "1.1"
- __description__ = "Parses the documents in the Private Documents folder of PenUltimate"
- __contact__ = "Alex Caithness"
- __outputtype__ = 1
- __outputext__ = None
- IMAGE_DIR = "img"
- DATETIME_FORMAT = "%d/%m/%Y %H:%M:%S"
- EPOCH = datetime.datetime(2001, 1, 1)
- def decode_cocoa_time(f):
- return EPOCH + datetime.timedelta(seconds = f)
- def unpack_float_data(data):
- if len(data) % 4 != 0:
- raise ValueError("floatData is not divisible by 4 (cannot contain a number of full 32-bit floats")
- float_count = len(data) // 4
- if float_count % 3 != 0:
- raise ValueError("floatData does not contain a multiple of 3 floats")
- floats = struct.unpack("<{0}f".format(float_count), data)
- return tuple(floats[i:i+3] for i in range(0, len(floats), 3))
- """
- work_input: tuple containing the path for the input folder (Private Documents)
- work_output: tuple containing the folder for the output
- """
- def __dowork__(work_input, work_output):
- # Unpack input
- if not isinstance(work_input, tuple) or len(work_input) < 1:
- raise ValueError("work_input must be a tuple containing the path for the input database")
- if not isinstance(work_output, tuple) or len(work_output) < 1:
- raise ValueError("work_output must be a tuple containing the path for the output")
- input_path = work_input[0]
- output_path = work_output[0]
- if not os.path.exists(input_path):
- print("ERROR: Cannot find input directory.")
- exit()
- if not os.path.exists(output_path):
- print("ERROR: Cannot find output directory.")
- exit()
- # set up output dirs
- os.mkdir(os.path.join(output_path, IMAGE_DIR))
- # set up main report
- out_tsv = open(os.path.join(output_path, "report.tsv"), "wt", encoding="utf-8")
- out_tsv.write("\t".join(["book_id", "title", "created_timestamp", "modified_timestamp", "page_count", "change_count", "creating_device_id"]) + "\n")
- # Open the notebookList file and deserialize
- notebooklist_path = os.path.join(input_path, "notebookList")
- f = open(notebooklist_path, "br")
- notebook_list_obj = ccl_bplist.deserialise_NsKeyedArchiver(ccl_bplist.load(f))
- f.close()
- # Get list of notebooks
- for notebook_details in notebook_list_obj["notebooks"]["NS.objects"]:
- title = notebook_details["title"]
- book_id = notebook_details["name"]
- if isinstance(book_id, ccl_bplist.NsKeyedArchiverDictionary):
- # Sometimes book_id is stored as an "NS.string" rather than straight in a string
- book_id = book_id["NS.string"]
- version = "{0}.{1}".format(notebook_details["versionMajor"], notebook_details["versionMinor"])
- change_count = str(notebook_details["changeCount"])
- page_count = len(notebook_details["pageNames"]["NS.objects"])
- created_timestamp = decode_cocoa_time(notebook_details["created"]["NS.time"]).strftime(DATETIME_FORMAT)
- modified_timestamp = decode_cocoa_time(notebook_details["modified"]["NS.time"]).strftime(DATETIME_FORMAT)
- creating_device_id = notebook_details["creatingDeviceId"]
- print("Working on {0} ({1})".format(book_id, title))
- # Get the list of pages
- pages_for_output = []
- for page_details in notebook_details["pageNames"]["NS.objects"]:
- page_id = page_details["NS.string"]
- print("\tPage ID: {0}".format(page_id))
- page_path = os.path.join(input_path, "notebooks", book_id, page_id)
- # Open file as bplist and deserialize
- notebook_f = open(page_path, "rb")
- document_obj = ccl_bplist.deserialise_NsKeyedArchiver(ccl_bplist.load(notebook_f))
- notebook_f.close()
- report_svg = etree.Element("svg",
- {"xmlns": "http://www.w3.org/2000/svg",
- "version": "1.1",})
- image_width = 0
- image_height = 0
- # Iterate the layers
- for layer_index, layer_obj in enumerate(document_obj["pageLayers"]["NS.objects"]):
- print("\t\tLayer {0}".format(layer_index))
- # get layer size
- size_string = layer_obj["size"]
- layer_width, layer_height = (int(s) for s in size_string.strip("{}").split(","))
- image_width = layer_width if image_width < layer_width else image_width
- image_height = layer_height if image_height < layer_height else image_height
- # get colour for this layer
- red = int(layer_obj["color"]["red"] * 0xFF)
- green = int(layer_obj["color"]["green"] * 0xFF)
- blue = int(layer_obj["color"]["blue"] * 0xFF)
- layer_color_code = "#{0:02x}{1:02x}{2:02x}".format(red,green,blue)
- # Iterate the layer rectangles
- for layer_rect in layer_obj["layerRects"]["NS.objects"]:
- # Get the dimentions from the float data
- # the data is an array of floats grouped in threes:
- # (x, y, radius)
- float_data = layer_rect["values"]["floatData"]
- co_ords = unpack_float_data(float_data)
- for x,y,d in co_ords:
- etree.SubElement(report_svg, "circle",
- {"cx" : str(x),
- "cy" : str(layer_height - y),
- "r" : str(d/4),
- "fill" : layer_color_code })
- # set viewBox, height and width
- report_svg.set("viewBox", "0 0 {0} {1}".format(image_width, image_height))
- report_svg.set("height", str(image_height))
- report_svg.set("width", str(image_width))
- # write out image
- image_output_filename = "{0}_{1}.svg".format(book_id, page_id)
- image_output_path = os.path.join(output_path, IMAGE_DIR, image_output_filename)
- print("\t\tWriting to '{0}'".format(image_output_path))
- with open(image_output_path, "wt", encoding="utf-8") as svg_out:
- svg_out.write("<!DOCTYPE svg>")
- svg_out.write(etree.tostring(report_svg, "utf-8", method="html").decode("utf-8"))
- print("\t\tImage written")
- out_tsv.write("\t".join([book_id, title, created_timestamp, modified_timestamp, str(page_count), str(change_count), creating_device_id]) + "\n")
- out_tsv.close()
- print("Done!")
- def __main__():
- if len(sys.argv) < 3:
- print()
- print("Usage: {0} <Private Documents dir> <output dir>".format(os.path.basename(sys.argv[0])))
- print()
- sys.exit(1)
- else:
- work_input = (sys.argv[1],)
- work_output = (sys.argv[2],)
- __dowork__(work_input, work_output)
- if __name__ == "__main__":
- __main__()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement