Advertisement
Dave_Rove

Benchmark STM32 ATMega328 vs Teensy4

Oct 28th, 2019
337
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.56 KB | None | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. benchmark='''
  4.  
  5. STM32F103C8T6 72MHz (Cortex-M3)
  6. INT_LOOP(30000) bench...= 2924 microseconds 10.26MIPS
  7. LONG_LOOP(30000) bench...= 2926 microseconds 10.25MIPS
  8. FLOAT_DIV(30000) bench...= 27979 microseconds 1.20MFLOPS
  9. DOUBLE_DIV(30000) bench...= 38000 microseconds 0.86MFLOPS
  10. FLOAT_MUL(30000) bench...= 20463 microseconds 1.71MFLOPS
  11. DOUBLE_MUL(30000) bench...= 25891 microseconds 1.31MFLOPS
  12.  
  13. Arduino Nano (ATMega328 16MHz AVR)
  14. INT_LOOP(30000) bench...= 7544 microseconds 3.98MIPS
  15. LONG_LOOP(30000) bench...= 13408 microseconds 2.24MIPS
  16. FLOAT_DIV(30000) bench...= 154792 microseconds 0.21MFLOPS
  17. DOUBLE_DIV(30000) bench...= 154800 microseconds 0.21MFLOPS
  18. FLOAT_MUL(30000) bench...= 156744 microseconds 0.21MFLOPS
  19. DOUBLE_MUL(30000) bench...= 156736 microseconds 0.21MFLOPS
  20.  
  21. Arduino Zero (Atmel ATSAMD21G18 48MHz Cortex-M0+)
  22. INT_LOOP(30000) bench...= 116898 microseconds 11.92MIPS
  23. LONG_LOOP(30000) bench...= 116898 microseconds 11.93MIPS
  24. FLOAT_DIV(30000) bench...= 116898 microseconds 0.38MFLOPS
  25. DOUBLE_DIV(30000) bench...= 113126 microseconds 0.27MFLOPS
  26. FLOAT_MUL(30000) bench...= 92387 microseconds 0.33MFLOPS
  27. DOUBLE_MUL(30000) bench...= 116898 microseconds 0.26MFLOPS
  28.  
  29. Arduino Due (Atmel SAM3X8E 84 MHz Cortex-M3)
  30. INT_LOOP(30000) bench...= 1074 microseconds 27.93MIPS
  31. LONG_LOOP(30000) bench...= 1107 microseconds 27.10MIPS
  32. FLOAT_DIV(30000) bench...= 25859 microseconds 1.21MFLOPS
  33. DOUBLE_DIV(30000) bench...= 37966 microseconds 0.81MFLOPS
  34. FLOAT_MUL(30000) bench...= 18659 microseconds 1.71MFLOPS
  35. DOUBLE_MUL(30000) bench...= 25450 microseconds 1.23MFLOPS
  36.  
  37. Teensy LC (MKL26Z64 Cortex-M0 48MHz)
  38. INT_LOOP(30000) bench...= 2508 microseconds 11.96MIPS
  39. LONG_LOOP(30000) bench...= 2512 microseconds 11.94MIPS
  40. FLOAT_DIV(30000) bench...= 76705 microseconds 0.40MFLOPS
  41. DOUBLE_DIV(30000) bench...= 101840 microseconds 0.30MFLOPS
  42. FLOAT_MUL(30000) bench...= 80471 microseconds 0.38MFLOPS
  43. DOUBLE_MUL(30000) bench...= 106242 microseconds 0.29MFLOPS
  44.  
  45. Teensy 3.2 (MK20DX256 Cortex-M4 96 MHz)
  46. INT_LOOP(30000) bench...= 940 microseconds 31.91MIPS
  47. LONG_LOOP(30000) bench...= 944 microseconds 31.78MIPS
  48. FLOAT_DIV(30000) bench...= 10977 microseconds 2.99MFLOPS
  49. DOUBLE_DIV(30000) bench...= 21317 microseconds 1.47MFLOPS
  50. FLOAT_MUL(30000) bench...= 8463 microseconds 3.99MFLOPS
  51. DOUBLE_MUL(30000) bench...= 13162 microseconds 2.46MFLOPS
  52.  
  53. Teensy 3.2 (MK20DX256 Cortex-M4 72MHz)
  54. INT_LOOP(30000) bench...= 1253 microseconds 23.94MIPS
  55. LONG_LOOP(30000) bench...= 1256 microseconds 23.89MIPS
  56. FLOAT_DIV(30000) bench...= 14635 microseconds 2.24MFLOPS
  57. DOUBLE_DIV(30000) bench...= 25083 microseconds 1.26MFLOPS
  58. FLOAT_MUL(30000) bench...= 11288 microseconds 2.99MFLOPS
  59. DOUBLE_MUL(30000) bench...= 17551 microseconds 1.84MFLOPS
  60.  
  61. ESP8266 esp-12e 160MHz
  62. INT_LOOP(30000) bench...= 752 microseconds 39.89MIPS
  63. LONG_LOOP(30000) bench...= 751 microseconds 39.95MIPS
  64. FLOAT_DIV(30000) bench...= 7500 microseconds 4.45MFLOPS
  65. DOUBLE_DIV(30000) bench...= 8063 microseconds 4.10MFLOPS
  66. FLOAT_MUL(30000) bench...= 9938 microseconds 3.27MFLOPS
  67. DOUBLE_MUL(30000) bench...= 10688 microseconds 3.02MFLOPS
  68.  
  69. ESP8266 esp-12e 80MHz
  70. INT_LOOP(30000) bench...= 1504 microseconds 19.95MIPS
  71. LONG_LOOP(30000) bench...= 1501 microseconds 19.99MIPS
  72. FLOAT_DIV(30000) bench...= 15001 microseconds 2.22MFLOPS
  73. DOUBLE_DIV(30000) bench...= 16126 microseconds 2.05MFLOPS
  74. FLOAT_MUL(30000) bench...= 19876 microseconds 1.63MFLOPS
  75. DOUBLE_MUL(30000) bench...= 21377 microseconds 1.51MFLOPS
  76.  
  77. #From mantoui
  78.  
  79. teensy3.6 @180mhz
  80.      INT_LOOP(30000) bench...= 500 microseconds 60.00MIPS
  81.      LONG_LOOP(30000) bench...= 502 microseconds 59.76MIPS
  82.      FLOAT_DIV(30000) bench...= 2503 microseconds 14.99MFLOPS
  83.      DOUBLE_DIV(30000) bench...= 9343 microseconds 3.39MFLOPS
  84.      FLOAT_MUL(30000) bench...= 667 microseconds 181.82MFLOPS
  85.      DOUBLE_MUL(30000) bench...= 7008 microseconds 4.61MFLOPS
  86.  
  87. teensy3.6 @120mhz
  88.     INT_LOOP(30000) bench...= 752 microseconds 39.89MIPS
  89.     LONG_LOOP(30000) bench...= 753 microseconds 39.84MIPS
  90.     FLOAT_DIV(30000) bench...= 3756 microseconds 9.99MFLOPS
  91.     DOUBLE_DIV(30000) bench...= 14019 microseconds 2.26MFLOPS
  92.     FLOAT_MUL(30000) bench...= 1001 microseconds 120.97MFLOPS
  93.     DOUBLE_MUL(30000) bench...= 10514 microseconds 3.07MFLOPS
  94.  
  95. teensy3.5@120mhz
  96.     INT_LOOP(30000) bench...= 752 microseconds 39.89MIPS
  97.     LONG_LOOP(30000) bench...= 755 microseconds 39.74MIPS
  98.     FLOAT_DIV(30000) bench...= 3758 microseconds 9.99MFLOPS
  99.     DOUBLE_DIV(30000) bench...= 18797 microseconds 1.66MFLOPS
  100.     FLOAT_MUL(30000) bench...= 1003 microseconds 120.97MFLOPS
  101.     DOUBLE_MUL(30000) bench...= 10529 microseconds 3.07MFLOPS
  102.  
  103. teensy3.2@120mhz
  104.     INT_LOOP(30000) bench...= 751 microseconds 39.95MIPS
  105.     LONG_LOOP(30000) bench...= 755 microseconds 39.74MIPS
  106.     FLOAT_DIV(30000) bench...= 8784 microseconds 3.74MFLOPS
  107.     DOUBLE_DIV(30000) bench...= 17559 microseconds 1.79MFLOPS
  108.     FLOAT_MUL(30000) bench...= 6771 microseconds 4.99MFLOPS
  109.     DOUBLE_MUL(30000) bench...= 10533 microseconds 3.07MFLOPS
  110.  
  111. dragonfly@80MHz    
  112.    INT_LOOP(30000) bench...= 1129 microseconds 26.57MIPS
  113.    LONG_LOOP(30000) bench...= 1129 microseconds 26.57MIPS
  114.    FLOAT_DIV(30000) bench...= 5641 microseconds 6.65MFLOPS
  115.    DOUBLE_DIV(30000) bench...= 21813 microseconds 1.45MFLOPS
  116.    FLOAT_MUL(30000) bench...= 1883 microseconds 39.79MFLOPS
  117.    DOUBLE_MUL(30000) bench...= 16173 microseconds 1.99MFLOPS
  118.  
  119. #From Budvar10
  120.  Arduino-PRO 1284 (ATmega1284P 24MHz)
  121.  INT_LOOP(30000) bench...= 5024 microseconds 5.97MIPS
  122.  LONG_LOOP(30000) bench...= 8992 microseconds 3.34MIPS
  123.  FLOAT_DIV(30000) bench...= 96789 microseconds 0.34MFLOPS
  124.  DOUBLE_DIV(30000) bench...= 96800 microseconds 0.34MFLOPS
  125.  FLOAT_MUL(30000) bench...= 98058 microseconds 0.34MFLOPS
  126.  DOUBLE_MUL(30000) bench...= 98059 microseconds 0.34MFLOPS
  127.  
  128. #From gdsports
  129.  Adafruit Metro M4 Express (samd51 @120MHz) cache on
  130.  INT_LOOP(30000) bench...= 752 microseconds 39.89MIPS
  131.  LONG_LOOP(30000) bench...= 753 microseconds 39.84MIPS
  132.  FLOAT_DIV(30000) bench...= 3756 microseconds 9.99MFLOPS
  133.  DOUBLE_DIV(30000) bench...= 14022 microseconds 2.26MFLOPS
  134.  FLOAT_MUL(30000) bench...= 1002 microseconds 120.48MFLOPS
  135.  DOUBLE_MUL(30000) bench...= 10516 microseconds 3.07MFLOPS
  136.  
  137. Teensy 4.0 @600MHz
  138. FLOAT_DIV(30000) bench...= 200 microseconds 300.00MFLOPS
  139. DOUBLE_DIV(30000) bench...= 201 microseconds 297.03MFLOPS
  140. FLOAT_MUL(30000) bench...= 150 microseconds 600.00MFLOPS
  141. DOUBLE_MUL(30000) bench...= 300 microseconds 150.00MFLOPS
  142. Time (ms)...= 396577 ms
  143. INT_LOOP(30000) bench...= 300 microseconds 600.00MIPS
  144. LONG_LOOP(30000) bench...= 300 microseconds 300.00MIPS
  145. FLOAT_DIV(30000) bench...= 300 microseconds 300.00MFLOPS
  146. '''
  147.  
  148. import statistics as stat
  149. import matplotlib.pyplot as plt
  150.  
  151. # Create dictionaries with the 1/timing and MIPS&MOPS figures
  152. benchlist = benchmark.splitlines()
  153. timedict = {}
  154. mopsdict = {}
  155. for line in benchlist:
  156.     linelist = line.split()
  157.     if "microseconds" in linelist:
  158.         test = linelist[0].split('(')[0]
  159.         timedict[device][test] = 1 / int(linelist[2])
  160.         mopsdict[device][test] = float(linelist[4].split('M')[0])
  161.     elif line[0:3].isalpha():
  162.         device = line.split('(')[0].strip()
  163.         timedict[device] = {}
  164.         mopsdict[device] = {}
  165.  
  166. # Normalize the figures relative to the first device in the table
  167. devices = list(timedict)
  168. firstdevice = devices[0]
  169. tests = list(timedict[firstdevice])
  170. for test in tests:
  171.     time_divisor = timedict[firstdevice][test]
  172.     mops_divisor = mopsdict[firstdevice][test]
  173.     for device in devices:
  174.         timedict[device][test] /= time_divisor
  175.         mopsdict[device][test] /= mops_divisor
  176.  
  177. # Take the mean of the normalized figures for each device        
  178. time_means = {d: stat.mean(timedict[d].values()) for d in devices}
  179. mops_means = {d: stat.mean(mopsdict[d].values()) for d in devices}
  180.        
  181. # Plot one above the other for comparison
  182. fig, [ax0, ax1] = plt.subplots(2)
  183. time_performance = list(reversed(list(time_means.values())))
  184. mops_performance = list(reversed(list(mops_means.values())))
  185. objects = list(reversed(devices))
  186. y_pos = range(len(objects))
  187.  
  188. ax0.barh(y_pos, time_performance, align='center', log=True)
  189. ax0.set_yticks(y_pos)
  190. ax0.set_yticklabels(objects)
  191. ax0.set_xlim(0.1,200)
  192. ax0.set_xlabel('Performance relative to STM32')
  193. ax0.set_title('Log-scale Benchmarks (1/timing)')
  194.  
  195. ax1.barh(y_pos, mops_performance, align='center', log=True)
  196. ax1.set_yticks(y_pos)
  197. ax1.set_yticklabels(objects)
  198. ax1.set_xlim(0.1,200)
  199. ax1.set_xlabel('Performance relative to STM32')
  200. ax1.set_title('Log-scale Benchmarks (MIPS & MFLOPS)')
  201.  
  202. plt.tight_layout()
  203. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement