Advertisement
opexxx

ntfsUSN.py

May 14th, 2014
272
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 16.77 KB | None | 0 0
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. # Copyright 2013 The Plaso Project Authors.
  4. # Please see the AUTHORS file for details on individual authors.
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License");
  7. # you may not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. #    http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17.  
  18. """Parser for the NTFS USN change journal.
  19.  
  20. More information about the format can be found here:
  21.  
  22. http://msdn.microsoft.com/en-us/library/windows/desktop/aa365720%28v=vs.85%29
  23.  
  24. .aspx
  25. """
  26.  
  27. import struct
  28. from plaso.lib import event
  29. from plaso.lib import eventdata
  30. from plaso.lib import parser
  31. from plaso.lib import errors
  32. from plaso.lib import timelib
  33. from functools import partial
  34.  
  35. class UsnJrnlEvent(event.TimestampEvent):
  36.   """ USN change journal record / event """
  37.  
  38.   DATA_TYPE = 'windows:metadata:usnjrnl'
  39.  
  40.   def __init__(
  41.       self, target_file, majorvers, minorvers, mft, parentmft, usn, reasons,
  42.       attributes, securityid, timestamp, timestamptype
  43.       ) :
  44.     super(UsnJrnlEvent, self).__init__(timestamp, timestamptype,
  45.       'windows:metadata:usnjrnl')
  46.     self.data_type     = 'windows:metadata:usnjrnl'
  47.     self.target_file   = target_file
  48.     self.majorvers     = majorvers
  49.     self.minorvers     = minorvers
  50.     self.mft           = mft
  51.     self.parentmft     = parentmft
  52.     self.usn           = usn
  53.     self.reasons       = reasons
  54.     self.fileattrib    = attributes
  55.     self.security      = securityid
  56.     self.timestamp     = timestamp
  57.     self.timestamptype = timestamptype
  58.  
  59. class UsnJrnlParser(parser.PlasoParser):
  60.   """ Parses the NTFS change journal.
  61.  
  62.      The NTFS update sequence number (USN) change journal resides at
  63.      /$Extend/$UsnJrnl. It is present in NTFS since Windows XP. The most
  64.      relevant data is contained in the named alternate data stream $J, thus
  65.      this parsers looks for the file /$Extend/$UsnJrnl:$J.
  66.      Information about the journal itself will not be reconstructed.
  67.  
  68.      More details on the USN change journal can be found here:
  69.  
  70. http://msdn.microsoft.com/en-us/library/windows/desktop/aa365720%28v=vs.85%29.aspx
  71.  
  72.      Two versions of USN change journal records exist. Major version 2 was
  73.      used since Windows XP / Server 2003, Major version 3 was used since
  74.      Windows 8 / Windows Server 2012.
  75.      All records should be aligned at 64-bit boundaries.
  76.  
  77.      The two structures can be defined as follows:
  78.  
  79.      Major version 2: (Size in Byte)
  80.      Offset Type    Size  Record
  81.      0x00   DWORD     4    RecordLength
  82.      0x04   WORD      2    MajorVersion;
  83.      0x06   WORD      2    MinorVersion;
  84.      0x08   DWORDLONG 8    FileReferenceNumber;
  85.      0x10   DWORDLONG 8    ParentFileReferenceNumber;
  86.      0x18   USN       8    Usn;
  87.      0x20   LARGE_INTEGER 8 TimeStamp;
  88.      0x28   DWORD     4    Reason;
  89.      0x2B   DWORD     4    SourceInfo;
  90.      0x30   DWORD     4    SecurityId;
  91.      0x34   DWORD     4    FileAttributes;
  92.      0x38   WORD      2    FileNameLength;
  93.      0x3A   WORD      2    FileNameOffset;
  94.      0x3C   WCHAR     *    FileName[1];
  95.  
  96.      Major version 3: (Size in Byte)
  97.      Offset Type    Size  Record
  98.      0x00   DWORD     4    RecordLength
  99.      0x04   WORD      2    MajorVersion;
  100.      0x06   WORD      2    MinorVersion;
  101.      0x08   BYTE     16    FileReferenceNumber;
  102.      0x18   BYTE     16    ParentFileReferenceNumber;
  103.      0x28   USN       8    Usn;
  104.      0x30   LARGE_INTEGER 8 TimeStamp;
  105.      0x38   DWORD     4    Reason;
  106.      0x3B   DWORD     4    SourceInfo;
  107.      0x40   DWORD     4    SecurityId;
  108.      0x44   DWORD     4    FileAttributes;
  109.      0x48   WORD      2    FileNameLength;
  110.      0x4A   WORD      2    FileNameOffset;
  111.      0x4C   WCHAR     *    FileName[1];
  112.  
  113.      The major difference between the two versions is the size and type of
  114.      the two FileReferenceNumbers. With the minor difference of the version
  115.      number, the official reasonstrings are identical between the versions
  116.      as well. SourceInfo and File-Attributes are version indepent.
  117.      """
  118.  
  119.   REASONS = {
  120.     0x00000001 : 'The data in the file or directory is overwritten.',
  121.     0x00000002 : 'The file or directory is extended (added to).',
  122.     0x00000004 : 'The file or directory is truncated.',
  123.     0x00000010 : 'The data in one or more named data streams for a file is '
  124.                  'overwritten.',
  125.     0x00000020 : 'The one or more named data streams for a file are extended '
  126.                   '(added to).',
  127.     0x00000040 : 'The one or more named data streams for a file is truncated.',
  128.     0x00000100 : 'The file or directory is created for the first time.',
  129.     0x00000200 : 'The file or directory is deleted.',
  130.     0x00000400 : 'The user made a change to the extended attributes of a file'
  131.                  'or directory. These NTFS file system attributes are not '
  132.                  'accessible to Windows-based applications.',
  133.     0x00000800 : 'A change is made in the access rights to a file or '
  134.                  'directory.',
  135.     0x00001000 : 'The file or directory is renamed, and the file name in the '
  136.                  'USN_RECORD structure is the previous name.',
  137.     0x00002000 : 'A file or directory is renamed, and the file name in the '
  138.                  'USN_RECORD_V2 structure is the new name.',
  139.     0x00004000 : 'A user changes the FILE_ATTRIBUTE_NOT_CONTENT_INDEXED '
  140.                  'attribute. That is, the user changes the file or directory '
  141.                  'from one where content can be indexed to one where content '
  142.                  'cannot be indexed, or vice versa. Content indexing permits '
  143.                  'rapid searching of data by building a database of selected '
  144.                  'content.',
  145.     0x00008000 : 'A user has either changed one or more file or directory '
  146.                  'attributes (for example, the read-only, hidden, system, '
  147.                  'archive, or sparse attribute), or one or more time '
  148.                  'stamps.',
  149.     0x00010000 : 'An NTFS file system hard link is added to or removed from '
  150.                  'the file or directory. An NTFS file system hard link, '
  151.                  'similar to a POSIX hard link, is one of several directory '
  152.                  'entries that see the same file or directory.',
  153.     0x00020000 : 'The compression state of the file or directory is changed '
  154.                  'drom or to compressed.',
  155.     0x00040000 : 'The file or directory is encrypted or decrypted.',
  156.     0x00080000 : 'The object identifier of a file or directory is changed.',
  157.     0x00100000 : 'The reparse point that is contained in a file or directory '
  158.                  'is changed, or a reparse point is added to or deleted from a '
  159.                  'file or directory.',
  160.     0x00200000 : 'A named stream is added to or removed from a file, or a named'
  161.     'stream is renamed.',
  162.     0x80000000 : 'The file or directory is closed.'
  163.     }
  164.  
  165.   SOURCES = {
  166.     0x00000002 : 'USN_SOURCE_AUXILIARY_DATA',
  167.     0x00000001 : 'USN_SOURCE_DATA_MANAGEMENT',
  168.     0x00000004 : 'USN_SOURCE_REPLICATION_MANAGEMENT'
  169.   }
  170.   # See http://msdn.microsoft.com/en-us/library/ee332330%28VS.85%29.aspx
  171.  
  172.   ATTRIBUTES = {
  173.       1:'FILE_ATTRIBUTE_READONLY',
  174.       2:'FILE_ATTRIBUTE_HIDDEN',
  175.       4:'FILE_ATTRIBUTE_SYSTEM',
  176.       16:'FILE_ATTRIBUTE_DIRECTORY',
  177.       32:'FILE_ATTRIBUTE_ARCHIVE',
  178.       64:'FILE_ATTRIBUTE_DEVICE',
  179.       128:'FILE_ATTRIBUTE_NORMAL',
  180.       256:'FILE_ATTRIBUTE_TEMPORARY',
  181.       512:'FILE_ATTRIBUTE_SPARSE_FILE',
  182.       1024:'FILE_ATTRIBUTE_REPARSE_POINT',
  183.       2048:'FILE_ATTRIBUTE_COMPRESSED',
  184.       4096:'FILE_ATTRIBUTE_OFFLINE',
  185.       8192:'FILE_ATTRIBUTE_NOT_CONTENT_INDEXED',
  186.       16384:'FILE_ATTRIBUTE_ENCRYPTED',
  187.       65536:'FILE_ATTRIBUTE_VIRTUAL'
  188.     }
  189.  
  190.   def Parse(self, file_object):
  191.     """ Verifies the requested file as change journal and returns the parsed
  192.    events.
  193.  
  194.     As the journal has no magic bytes or unique recognizable byte patters,
  195.        verification is done by checking the filename for $UsnJrnl and $J.
  196.  
  197.     Args:
  198.     file_object: A filehandle/file-like-object that is seekable to the
  199.        file needed to be checked.
  200.     Raises:
  201.     UnableToParseFile when the file has the wrong name or cannot be
  202.        parsed """
  203.     # Check the given filename ( *$usnjrnl*$J ) at least basically
  204.     try:
  205.       name = file_object.name.lower()
  206.       if not name.endswith(u'$j') or not u'$usnjrnl' in name:
  207.         raise errors.UnableToParseFile(u'[%s] file %s not named *$UsnJrnl*$J'
  208.                                 % (self.parser_name, file_object.name))
  209.     except UnicodeEncodeError as error:
  210.       raise errors.UnableToParseFile(u'[%s] unable to read name of file %s: %s'
  211.                                 % (self.parser_name, file_object.name,error))
  212.     res = self.Scan(file_object)
  213.     return res
  214.  
  215.   def Scan(self, file_object):
  216.     """ Parses and returns change journal records from the given file.
  217.  
  218.     Args:
  219.     file_object: A filehandle/file-like-object that is seekable to the
  220.        file needed to be checked.
  221.     Raises:
  222.     UnableToParseFile when the file has the wrong name or cannot be
  223.        parsed """
  224.     try:
  225.       offset = self.ReadSparseOffset(file_object)
  226.       resultset = self.OffsetParse(file_object, offset)
  227.       return resultset
  228.     except Exception as error:
  229.       raise errors.UnableToParseFile(u'[%s] Exception with scan %s: %s'
  230.                                      % (self.parser_name, file_object,error))
  231.  
  232.   def ReadSparseOffset(self, file_object):
  233.     """Reads file_object and determines the offset of the first non-zero
  234.       byte.
  235.  
  236.    Reads the file_object in 1MB chunks, reading from the left and
  237.       reading till the first non-zero byte is reached. This is determined to
  238.       be the offset and returned.
  239.  
  240.    Args:
  241.      file_object: A filehandle/file-like-object that is seekable to the
  242.      file needed to be checked."""
  243.  
  244.     # $UsnJrnl may contain lots of leading zeros. Try to skip them
  245.     # fast by reading 1MB chunks and stripping zeros
  246.     chunksize = 1024*1024*1024
  247.     l = 0
  248.     for chunk in iter(partial(file_object.read, chunksize), ''):
  249.       chunk = chunk.lstrip('\x00')
  250.       l     = len(chunk)
  251.       if l > 0:
  252.         break
  253.     # The offset is the current file position minus the rest of the
  254.     # current chunk
  255.     offset = file_object.tell() - l
  256.     return offset
  257.  
  258.   def OffsetParse(self, file_object, offset):
  259.     """ Parses filesystem journal from file_object, omitting 'offset' bytes.
  260.  
  261.        $UsnJrnl:$J often contains a large number of leading zeroes, the
  262.        offset can be specified in order to skip over them. """
  263.     try:
  264.       file_object.seek(offset)
  265.     except Exception as error:
  266.       raise errors.UnableToParseFile(u'[%s] unable to seek offset %i in file'
  267.         '%s: %s' % (offset, self.parser_name, file_object.name,error))
  268.     # seek till non-sparse
  269.     # parse
  270.     while ( True ) :
  271.       try:
  272.         entry = self.readEntry(file_object)
  273.         yield UsnJrnlEvent(entry[0], entry[1], entry[2], entry[3], entry[4],
  274.                        entry[5], entry[6], entry[7], entry[8], entry[9],
  275.                        entry[10])
  276.       except EndOfFileError as error:
  277.         break
  278.       except SparseError as error:
  279.         # Try to jump over sparse parts
  280.                 # preoff = file_object.tell()
  281.         offset = self.ReadSparseOffset(file_object)
  282.         # We only want to skip to 64-bit boundaries
  283.         offset = offset - (offset % 8)
  284.         file_object.seek(offset)
  285.  
  286.   def readEntry(self, file_object):
  287.     currentOffset = file_object.tell()
  288.     # Read record size and version numbers only
  289.     data = file_object.read(0x08)
  290.     if len(data) < 0x08 :
  291.       # end of file
  292.       raise EndOfFileError("Reached end of file at offset %i" % currentOffset)
  293.     try:
  294.       formatstring = 'IHH'
  295.       sdata = struct.unpack_from(formatstring, data)
  296.     except struct.error as error:
  297.       raise error
  298.     recordsize = sdata[0]
  299.     if recordsize == 0:
  300.       file_object.seek(currentOffset+4)
  301.       raise SparseError('Reached 0 Byte in recordsize at offset %i - sparse'
  302.                      ' block?' % currentOffset)
  303.     majorversion = sdata[1]
  304.     minorversion = sdata[2]
  305.  
  306.     #Depending on the version, we need to read 52 or 68 bytes with slight
  307.     #differences in the formatstring. Everything else is identical.
  308.  
  309.     if majorversion == 2 :
  310.       rsize = 0x34
  311.       formatstring = 'QQQqIIIIHH'
  312.     else :
  313.       rsize = 0x44
  314.       formatstring = '16B16BQqIIIIHH'
  315.  
  316.     # Read and parse the rest of the non-variable record
  317.     try:
  318.       data = file_object.read(rsize)
  319.       if len(data) < rsize :
  320.         raise EndOfFileError("Unexpectetly reached end of file at offset %i" %
  321.                          currentOffset)
  322.       sdata = struct.unpack_from(formatstring, data)
  323.     except struct.error as error:
  324.       raise error
  325.     mftref = sdata[0]
  326.     mftparentref = sdata[1]
  327.     usn  = sdata[2]
  328.     timestamp = sdata[3]
  329.     reasonID = sdata[4]
  330.     sourceID = sdata[5]
  331.     securityID = sdata[6]
  332.     fileattrib = sdata[7]
  333.     sizefilename = sdata[8]
  334.     # provided for completeness
  335.         # pylint: disable-msg=unused-variable
  336.     offset = sdata[9]
  337.  
  338.     # And now read the filename
  339.     try:
  340.       data = file_object.read(sizefilename)
  341.       if len(data) < sizefilename :
  342.          raise EndOfFileError("Unexpectetly reached end of file at offset %i" % currentOffset)
  343. #      formatstring2 = '%is' % (sizefilename)
  344. #      sdata = struct.unpack_from(formatstring2, data)
  345. #      filename = sdata[0].decode('utf-16')
  346.        filename = data.decode('utf-16')
  347.        padding = recordsize - (rsize+8) - sizefilename
  348.        # Jump over Padding
  349.        data = file_object.read(padding)
  350.      except struct.error as error:
  351.        raise error
  352.      # Reasons, Sources and file attributes are (in essence) bit-arrays, so
  353.      # they can be read by checking which attribute-bits are set.
  354.      reasons = u''
  355.      reasonlist = self.REASONS.keys()
  356.      for r in reasonlist:
  357.        if r & reasonID > 0:
  358.         if len(reasons) > 0:
  359.           reasons = reasons + ' ' + self.REASONS[r]
  360.         else:
  361.           reasons = self.REASONS[r]
  362.  
  363.     sources = u''
  364.     sourcelist = self.SOURCES.keys()
  365.     for s in sourcelist:
  366.       if s & sourceID > 0:
  367.         if len(sources) > 0:
  368.           sources = self.SOURCES[s]
  369.         else:
  370.           sources = sources + ', ' + self.SOURCES[s]
  371.  
  372.     attributes = u''
  373.     attributelist = self.ATTRIBUTES.keys()
  374.     for a in attributelist:
  375.       if a & fileattrib > 0:
  376.         if len(attributes) > 0:
  377.           attributes = attributes + ', ' + self.ATTRIBUTES[a]
  378.         else:
  379.           attributes = self.ATTRIBUTES[a]
  380.  
  381.     # The timestamp is given in the NTFS filetime format
  382.     timestamp = timelib.Timestamp.FromFiletime(timestamp)
  383.  
  384.     # Event needs a type, so try to specify which reason maps to which type
  385.     MODIFICATION_TIME_LIST = [ 0x01, 0x02, 0x04, 0x10, 0x20, 0x40, 0x10000,
  386.                             0x40000, 0x200000 ]
  387.     CREATION_TIME_LIST = [ 0x100, 0x2000 ]
  388.     DELETED_TIME_LIST = [ 0x200, 0x1000 ]
  389.     CHANGE_TIME_LIST = [ 0x400, 0x800, 0x4000, 0x8000, 0x20000, 0x80000,
  390.                       0x100000 ]
  391.         # Provided for completeness
  392.     # pylint: disable-msg=unused-variable
  393.     ACCESS_TIME_LIST = [ 0x80000000 ]
  394.  
  395.     timestamptype = eventdata.EventTimestamp.ACCESS_TIME
  396.     for m in MODIFICATION_TIME_LIST:
  397.       if reasonID & m > 0:
  398.         timestamptype = eventdata.EventTimestamp.MODIFICATION_TIME
  399.         break
  400.     for m in CHANGE_TIME_LIST:
  401.       if reasonID & m > 0:
  402.         timestamptype = eventdata.EventTimestamp.CHANGE_TIME
  403.         break
  404.     for m in CREATION_TIME_LIST:
  405.       if reasonID & m > 0:
  406.         timestamptype = eventdata.EventTimestamp.CREATION_TIME
  407.         break
  408.     for m in DELETED_TIME_LIST:
  409.       if reasonID & m > 0:
  410.         timestamptype = eventdata.EventTimestamp.DELETED_TIME
  411.         break
  412.  
  413.     return [filename, majorversion, minorversion, mftref,
  414.                      mftparentref, usn, reasons, attributes, securityID,
  415.                      timestamp, timestamptype]
  416.  
  417. class EndOfFileError(Exception):
  418.   def __init__(self, value):
  419.     Exception.__init__(self, value)
  420.     self.value = value
  421.  
  422.   def __str__(self):
  423.     return repr(self.value)
  424.  
  425. class SparseError(Exception):
  426.   def __init__(self, value):
  427.     Exception.__init__(self, value)
  428.     self.value = value
  429.  
  430.   def __str__(self):
  431.     return repr(self.value)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement