Don't like ads? PRO users don't see any ads ;-)
Guest

Untitled

By: beenje on Aug 20th, 2012  |  syntax: Python  |  size: 11.84 KB  |  hits: 39  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. class AsterixParser(object):
  2.  
  3.     def __init__(self, categories_nb=None, start=None, end=None):
  4.         self.categories = category.get_asterix_categories(categories_nb)
  5.         self.start = start
  6.         self.end = end
  7.         self.cat_tables = {}
  8.  
  9.     def convert(self, filename, h5filename):
  10.         """Convert a final format file to HDF5 file"""
  11.         self.cat_tables = {}
  12.         with tables.openFile(h5filename, mode="w", title="ASTERIX Test file") as h5file:
  13.             group = h5file.createGroup("/", 'ASTERIX', 'ASTERIX records')
  14.             for number, category in self.categories.items():
  15.                 self.cat_tables[number] = h5file.createTable(group,
  16.                                                     category.name,
  17.                                                     category.create_class(),
  18.                                                     'ASTERIX %s' % category.name,
  19.                                                     tables.Filters(complevel=1))
  20.             self._read_final_format_file(filename)
  21.  
  22.     def _read_final_format_file(self, filename):
  23.         """Read all final format blocks of filename"""
  24.         with open(filename, 'rb') as f:
  25.             map = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
  26.             while True:
  27.                 try:
  28.                     self._read_final_format(map)
  29.                 except struct.error:
  30.                     break
  31.  
  32.     def _read_final_format(self, map):
  33.         """Read one final format block from map"""
  34.         # header: 8 bytes / data: length - 8 - 4 / padding: 4 bytes
  35.         length, board_nb, line_nb, timestamp = struct.unpack('>HBBI', map.read(8))
  36.         # Convert timestamp to seconds
  37.         timestamp = timestamp / 100.0
  38.         data_blocks = map.read(length - 12)
  39.         # Only parse blocks between start and end time
  40.         if ((self.start is None or timestamp >= self.start) and
  41.            (self.end is None or timestamp <= self.end)):
  42.             offset = 0
  43.             while offset < length - 12:
  44.                 offset += self._read_data_block(buffer(data_blocks, offset), timestamp)
  45.         # padding
  46.         map.read(4)
  47.  
  48.     def _read_data_block(self, data, timestamp):
  49.         """Read one data block from data"""
  50.         # header: 3 bytes / data: length - 3
  51.         category, length = struct.unpack_from('>BH', data)
  52.         # Check category
  53.         if category in self.cat_tables:
  54.             offset = 3
  55.             while offset < length - 3:
  56.                 offset += self._read_record(buffer(data, offset), timestamp, category)
  57.         return length
  58.  
  59.     def _get_uap(self, data, category):
  60.         """Return the UAP of the given category"""
  61.         uap_bit_set = self.categories[category].uap_bit_set
  62.         if uap_bit_set is not None:
  63.             # The UAP depends on a specific bit (ex: cat001)
  64.             # Check this bit to use the proper UAP
  65.             byte_nb, bit_nb = uap_bit_set
  66.             value, = struct.unpack('>B', data[byte_nb])
  67.             if (value >> bit_nb) & 0x01:
  68.                 uap_type = self.categories[category].uap_type_if_bit_set
  69.             else:
  70.                 uap_type = self.categories[category].uap_type_if_bit_not_set
  71.         else:
  72.             uap_type = ''
  73.         return self.categories[category].uaps[uap_type]
  74.  
  75.     def _read_record(self, data, timestamp, category):
  76.         """Read one record from data"""
  77.         fspec, offset = self._get_fspec(data)
  78.         uap = self._get_uap(buffer(data, offset), category)
  79.         row = self.cat_tables[category].row
  80.         row['ff_timestamp'] = timestamp
  81.         for index, presence in enumerate(fspec):
  82.             if presence:
  83.                 # UAP is a list with FRN - 1 as index
  84.                 data_item, data_item_format = uap[index]
  85.                 # Set the valid bool to True for this data item
  86.                 row['%s_valid' % data_item] = True
  87.                 subfields, size = self._read_data_item(buffer(data, offset), data_item_format)
  88.                 nv_dict = collections.defaultdict(list)
  89.                 for name, value in subfields:
  90.                     nv_dict[name].append(value)
  91.                 for name, values in nv_dict.items():
  92.                     try:
  93.                         if isinstance(row[name], np.ndarray):
  94.                             # We can only assign an array of identical size
  95.                             # to the table cell
  96.                             # So use an intermediate array
  97.                             array = np.copy(row[name])
  98.                             array[0:len(values)] = values
  99.                             row[name] = array
  100.                         else:
  101.                             # Check if len(values) == 1?
  102.                             row[name] = values[0]
  103.                     except ValueError:
  104.                         logging.error('Error while assigning %s to %s', str(values), name)
  105.                         if isinstance(row[name], np.ndarray) and len(values) > len(row[name]):
  106.                             logging.error('Try to increase max number of extents to %d', len(values))
  107.                         else:
  108.                             logging.error('%s is not a valid enumerated value', values)
  109.                         raise
  110.                     except KeyError:
  111.                         logging.error('Unknown key %s', name)
  112.                         raise
  113.                 offset += size
  114.         row.append()
  115.         return offset
  116.  
  117.     def _read_data_item(self, data, data_item_format):
  118.         """Return a tuple ([(name, value)], size) for all subfields of the data item"""
  119.         if data_item_format.type == 'Fixed':
  120.             # There is only one part
  121.             subfields, size = self._read_subfields(data, data_item_format.fields[0])
  122.         elif data_item_format.type == 'Variable':
  123.             # Read the first part
  124.             subfields, size = self._read_subfields(data, data_item_format.fields[0])
  125.             idx = 1
  126.             # FX is the last subfield
  127.             # subfields[-1] == ('FX', FX value)
  128.             while subfields[-1][1]:
  129.                 # Read the extents while FX == 1
  130.                 subfields_, size_ = self._read_subfields(buffer(data, size), data_item_format.fields[idx])
  131.                 subfields.extend(subfields_)
  132.                 size += size_
  133.                 if idx != -1 and idx < (len(data_item_format.fields) - 1):
  134.                     # Use the next extent format
  135.                     idx += 1
  136.                 else:
  137.                     # Use the last defined extent format
  138.                     idx = -1
  139.         elif data_item_format.type == 'Repetitive':
  140.             # Read the repetition indicator field
  141.             subfields, size = self._read_subfields(data, data_item_format.fields[0])
  142.             repetition = subfields[0][1]
  143.             while repetition:
  144.                 # Read the subfield "repetition" times
  145.                 subfields_, size_ = self._read_subfields(buffer(data, size), data_item_format.fields[1])
  146.                 subfields.extend(subfields_)
  147.                 size += size_
  148.                 repetition -= 1
  149.         elif data_item_format.type == 'Explicit':
  150.             # Read the length indicator field
  151.             subfields, size = self._read_subfields(data, data_item_format.fields[0])
  152.             length = subfields[0][1]
  153.             # Try to decode other subfields?
  154.             size = length
  155.         elif data_item_format.type == 'Compound':
  156.             # data_item_format.fields is a list of DataItemFormat elements
  157.             # The primary subfield determines the presence or absence of the
  158.             # subsequent data subfields
  159.             # Should we return primary_subfields?
  160.             # To remove, we need to remove it from get_data_item_cols or
  161.             # _get_format
  162.             primary_item_format = data_item_format.fields[0]
  163.             primary_subfields, size = self._read_data_item(data, primary_item_format)
  164.             subfields = primary_subfields
  165.             data_subfields_idx, offset = self._get_fspec(data)
  166.             # To debug
  167.             if size != offset:
  168.                 raise ValueError('Invalid size found when reading Compound primary field')
  169.             for index in data_subfields_idx:
  170.                 data_item_format = data_item_format.fields[index + 1]
  171.                 data_subfields, size_ = self._read_data_item(buffer(data, size), data_item_format)
  172.                 size += size_
  173.                 subfields.extend(data_subfields)
  174.         else:
  175.             raise ValueError('Unknown data item format %s' % data_item_format.type)
  176.         return (subfields, size)
  177.  
  178.     def _read_subfields(self, data, data_item_format_field):
  179.         """Return a tuple ([(name, value)], size) for the given subfields
  180.  
  181.        subfields is a list of namedtuple SubFieldFormat
  182.        struct_format is a Struct object initialized with the format needed
  183.        to read all subfields
  184.        """
  185.         subfields, struct_format = data_item_format_field
  186.         sizes = [subfield.size for subfield in subfields]
  187.         bits_size = sum(sizes)
  188.         bytes_size = bits_size / 8
  189.         types = [subfield.type for subfield in subfields]
  190.         if not frozenset(('bits', 'sbits', 'octal')).isdisjoint(frozenset(types)):
  191.             # Get the value
  192.             struct_values = struct_format.unpack_from(data)
  193.             if len(struct_values) == 1:
  194.                 # only one uint read
  195.                 value = struct_values[0]
  196.             else:
  197.                 # Reconstruct value
  198.                 value = 0
  199.                 # Reverse the list to get the LSB in idx == 0
  200.                 for idx, v in enumerate(reversed(struct_values)):
  201.                     value += v << (idx * 8)
  202.             # and split it
  203.             values = self._unpack_bitfields(value, data, bits_size, sizes, types)
  204.         else:
  205.             values = list(struct_format.unpack_from(data))
  206.             for idx, sf_type in enumerate(types):
  207.                 if sf_type == '6bitschar':
  208.                     values[idx] = base64.b64encode(values[idx])
  209.         name_values = [(subfield.name, values[idx] * subfield.scale)
  210.                 for idx, subfield in enumerate(subfields)]
  211.         return (name_values, bytes_size)
  212.  
  213.     def _get_fspec(self, data):
  214.         """Return a tuple (FSPEC, offset)
  215.  
  216.        FSPEC is a list of FRN (FX field is removed)
  217.        offset is the number of bytes read in data
  218.        """
  219.         fspec = []
  220.         offset = 0
  221.         fx = 1
  222.         while fx:
  223.             value, = struct.unpack('>B', data[offset])
  224.             # Add the 7 first bits value to the list
  225.             fspec.extend([(value >> i) & 0x01 for i in range(7, 0, -1)])
  226.             # Get fx (LSB)
  227.             fx = value & 0x01
  228.             offset += 1
  229.         return (fspec, offset)
  230.  
  231.     def _unpack_bitfields(self, value, data, size, bf_sizes, bf_types):
  232.         offset = 0
  233.         values = []
  234.         for (bf_size, bf_type) in zip(bf_sizes, bf_types):
  235.             if bf_type == '6bitschar':
  236.                 # TO check data index
  237.                 #tokens = '>%ds' % (bf_size / 8)
  238.                 #bitfield_val = base64.b64encode(tokens, struct.unpack_from(data[offset:]))
  239.                 bitfield_val = base64.b64encode(data[offset:offset + bf_size])
  240.             else:
  241.                 mask = pow(2, bf_size) - 1
  242.                 bitfield_val = (value >> (size - offset - bf_size)) & mask
  243.                 if bf_type == 'octal':
  244.                     # Convert to octal and format on the right number of
  245.                     # characters
  246.                     nb_char = bf_size / 3
  247.                     bitfield_val = oct(bitfield_val)[-nb_char:].zfill(nb_char)
  248.                 elif bf_type == 'sbits':
  249.                     # Convert to signed value
  250.                     if bitfield_val & pow(2, bf_size - 1) != 0:
  251.                         bitfield_val = bitfield_val - pow(2, bf_size)
  252.             offset += bf_size
  253.             values.append(bitfield_val)
  254.         return values