Pure python
-----------
In [2]: %timeit asterix.convert('data/test', (1, 2, 30))
1 loops, best of 3: 10.1 s per loop
In [3]: %prun asterix.convert('data/test', (1, 2, 30))
3672253 function calls (3672245 primitive calls) in 14.959 seconds
Ordered by: internal time
ncalls tottime percall cumtime percall filename:lineno(function)
15947 5.259 0.000 14.268 0.001 asterix.py:94(_read_record)
138515 2.209 0.000 4.007 0.000 asterix.py:197(_read_subfields)
300860 1.798 0.000 1.798 0.000 enum.py:286(__call__)
65267 0.863 0.000 1.358 0.000 asterix.py:250(_unpack_bitfields)
255458 0.821 0.000 0.821 0.000 {numpy.core.multiarray.array}
133432 0.573 0.000 4.587 0.000 asterix.py:136(_read_data_item)
427318 0.464 0.000 0.469 0.000 {isinstance}
724676 0.448 0.000 0.448 0.000 {method 'append' of 'list' objects}
223598 0.424 0.000 1.148 0.000 numeric.py:167(asarray)
15947 0.252 0.000 0.369 0.000 asterix.py:232(_get_fspec)
304564 0.183 0.000 0.183 0.000 {pow}
138515 0.178 0.000 0.178 0.000 {method 'unpack_from' of 'Struct' objects}
138731 0.129 0.000 0.129 0.000 {sum}
134033 0.128 0.000 0.128 0.000 {method 'items' of 'dict' objects}
65267 0.119 0.000 0.119 0.000 {zip}
13672 0.106 0.000 14.398 0.001 asterix.py:67(_read_data_block)
138731 0.095 0.000 0.095 0.000 {method 'isdisjoint' of 'frozenset' objects}
3 0.065 0.022 0.065 0.022 {method '_g_flush' of 'tables.hdf5Extension.Leaf' objects}
103994 0.061 0.000 0.061 0.000 {len}
30375 0.057 0.000 0.145 0.000 function_base.py:781(copy)
2438 0.056 0.000 14.466 0.006 asterix.py:51(_read_final_format)
41451 0.052 0.000 0.052 0.000 {range}
15947 0.041 0.000 0.049 0.000 asterix.py:78(_get_uap)
52305 0.041 0.000 0.041 0.000 {_struct.unpack}
46193 0.040 0.000 0.040 0.000 {method 'extend' of 'list' objects}
340 0.040 0.000 0.062 0.000 {method '_g_setAttr' of 'tables.hdf5Extension.AttributeSet' objects}
3 0.027 0.009 0.043 0.014 {method '_createTable' of 'tables.tableExtension.Table' objects}
13672 0.024 0.000 0.024 0.000 {_struct.unpack_from}
15984/15978 0.023 0.000 0.062 0.000 utils.py:220(newfget)
600 0.021 0.000 0.071 0.000 category.py:244(_get_subfield_format)
5 0.020 0.004 0.020 0.004 {method '_append_records' of 'tables.tableExtension.Table' objects}
15947 0.019 0.000 0.036 0.000 {method 'append' of 'tables.tableExtension.Row' objects}
701 0.016 0.000 0.016 0.000 {method 'sort' of 'list' objects}
In [4]: %lprun -f asterix.AsterixParser._read_record asterix.convert('data/test', (1, 2, 30))
File: pyasterix/asterix.py
Function: _read_record at line 94
Total time: 29.8317 s
Line # Hits Time Per Hit % Time Line Contents
==============================================================
94 def _read_record(self, data, timestamp, category):
95 """Read one record from data"""
96 15947 674443 42.3 2.3 fspec, offset = self._get_fspec(data)
97 15947 185953 11.7 0.6 uap = self._get_uap(buffer(data, offset), category)
98 15947 128522 8.1 0.4 row = self.cat_tables[category].row
99 15947 61719 3.9 0.2 row['ff_timestamp'] = timestamp
100 303717 864654 2.8 2.9 for index, presence in enumerate(fspec):
101 287770 758162 2.6 2.5 if presence:
102 # UAP is a list with FRN - 1 as index
103 133432 380531 2.9 1.3 data_item, data_item_format = uap[index]
104 # Set the valid bool to True for this data item
105 133432 609039 4.6 2.0 row['%s_valid' % data_item] = True
106 133432 9851746 73.8 33.0 subfields, size = self._read_data_item(buffer(data, offset), data_item_format)
107 133432 605438 4.5 2.0 nv_dict = collections.defaultdict(list)
108 556838 1582620 2.8 5.3 for name, value in subfields:
109 423406 1669555 3.9 5.6 nv_dict[name].append(value)
110 551841 1882226 3.4 6.3 for name, values in nv_dict.items():
111 418409 1114855 2.7 3.7 try:
112 418409 1860801 4.4 6.2 if isinstance(row[name], np.ndarray):
113 # We can only assign an array of identical size
114 # to the table cell
115 # So use an intermediate array
116 30375 352322 11.6 1.2 array = np.copy(row[name])
117 30375 614233 20.2 2.1 array[0:len(values)] = values
118 30375 1721861 56.7 5.8 row[name] = array
119 else:
120 # Check if len(values) == 1?
121 388034 4411702 11.4 14.8 row[name] = values[0]
122 except ValueError:
123 logging.error('Error while assigning %s to %s', str(values), name)
124 if isinstance(row[name], np.ndarray) and len(values) > len(row[name]):
125 logging.error('Try to increase max number of extents to %d', len(values))
126 else:
127 logging.error('%s is not a valid enumerated value', values)
128 raise
129 except KeyError:
130 logging.error('Unknown key %s', name)
131 raise
132 133432 376745 2.8 1.3 offset += size
133 15947 82151 5.2 0.3 row.append()
134 15947 42404 2.7 0.1 return offset
--------------------------------------------------------------------------------------------------------------
cython
------
In [2]: %timeit asterix.convert('data/test', (1, 2, 30))
1 loops, best of 3: 8.58 s per loop
In [4]: %prun asterix.convert('data/test', (1, 2, 30))
1315698 function calls (1315690 primitive calls) in 10.825 seconds
Ordered by: internal time
ncalls tottime percall cumtime percall filename:lineno(function)
15947 4.430 0.000 10.194 0.001 asterix.py:95(_read_record)
300860 1.823 0.000 1.823 0.000 enum.py:286(__call__)
138515 1.588 0.000 1.941 0.000 asterix.py:198(_read_subfields)
255458 0.803 0.000 0.803 0.000 {numpy.core.multiarray.array}
223598 0.462 0.000 1.166 0.000 numeric.py:167(asarray)
133432 0.459 0.000 2.400 0.000 asterix.py:137(_read_data_item)
65267 0.353 0.000 0.353 0.000 asterix.py:251(_unpack_bitfields)
15947 0.166 0.000 0.166 0.000 asterix.py:233(_get_fspec)
13672 0.079 0.000 10.273 0.001 asterix.py:68(_read_data_block)
3 0.065 0.022 0.065 0.022 {method '_g_flush' of 'tables.hdf5Extension.Leaf' objects}
30375 0.064 0.000 0.154 0.000 function_base.py:781(copy)
2438 0.054 0.000 10.328 0.004 asterix.py:52(_read_final_format)
340 0.041 0.000 0.064 0.000 {method '_g_setAttr' of 'tables.hdf5Extension.AttributeSet' objects}
15947 0.029 0.000 0.029 0.000 asterix.py:79(_get_uap)
3 0.028 0.009 0.043 0.014 {method '_createTable' of 'tables.tableExtension.Table' objects}
15984/15978 0.027 0.000 0.066 0.000 utils.py:220(newfget)
600 0.021 0.000 0.071 0.000 category.py:244(_get_subfield_format)
5 0.021 0.004 0.021 0.004 {method '_append_records' of 'tables.tableExtension.Table' objects}
701 0.016 0.000 0.016 0.000 {method 'sort' of 'list' objects}
1076 0.011 0.000 0.033 0.000 atom.py:483(__init__)
1002 0.011 0.000 0.011 0.000 {method 'prod' of 'numpy.ndarray' objects}
340 0.010 0.000 0.131 0.000 attributeset.py:351(_g__setattr)
8909 0.009 0.000 0.014 0.000 {isinstance}
1076 0.009 0.000 0.012 0.000 atom.py:109(_normalize_shape)
1002 0.008 0.000 0.048 0.000 fromnumeric.py:1931(prod)
600 0.008 0.000 0.023 0.000 collections.py:37(__init__)
345 0.008 0.000 0.008 0.000 {method '_g_getAttr' of 'tables.hdf5Extension.AttributeSet' objects}
1002 0.008 0.000 0.039 0.000 fromnumeric.py:32(_wrapit)
1025 0.007 0.000 0.056 0.000 attributeset.py:59(issysattrname)
5 0.006 0.001 0.006 0.001 {method '_close_append' of 'tables.tableExtension.Table' objects}
3 0.006 0.002 0.030 0.010 description.py:377(__init__)
3 0.006 0.002 0.006 0.002 {built-in method _parse}
714 0.006 0.000 0.033 0.000 atom.py:398(from_kind)