Advertisement
CodingComputing

Comparison of membership check speeds in lists vs sets

May 11th, 2024
506
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.29 KB | None | 0 0
  1. # Comparison of membership check speeds in lists vs sets
  2. # by @CodingComputing
  3.  
  4. # Run this in a Jupyter Notebook
  5.  
  6. # CELL 1 (setup stuff)
  7. import timeit
  8. import numpy as np
  9. import pandas as pd
  10. import matplotlib.pyplot as plt
  11. sizes = 10 ** np.arange(8)  # Different container sizes to time
  12. container_names = ("list", "set")
  13.  
  14. # CELL 2 (compare across different sizes)
  15. size_times_df = pd.DataFrame(columns=container_names)
  16. # Measure time for membership checks for different sizes
  17. for size in sizes:
  18.     test_containers = {
  19.         'list': list(range(size)),
  20.         'set': set(range(size))
  21.     }
  22.     target = np.floor(size/2)  # Choose middle element as target
  23.     for container in test_containers:
  24.         exec_time = timeit.Timer(
  25.             f'{target} in test_containers["{container}"]',
  26.             globals=globals()
  27.             ).timeit(number=10)
  28.         size_times_df.loc[size, container] = exec_time
  29. #
  30. size_times_df
  31.  
  32. # CELL 3 (graph for Cell 2 results)
  33. # Plot and customization
  34. fig, ax = plt.subplots()
  35. size_times_df.plot(ax=ax, marker='o')
  36. # Make log scales on axes, because our data points are magnitudes apart
  37. ax.set_xscale('log')
  38. ax.set_yscale('log')
  39. ax.set_xlabel('Container Size', size=12)
  40. ax.set_ylabel('Membership check time\n(for middle element)', size=12)
  41. ax.set_title('Comparison of Membership Check Times\nin lists vs sets')
  42. ax
  43.  
  44. # CELL 4 (compare across target positions)
  45. # Measure membership check times for different positions within the list
  46. # keeping container size constant
  47. size = 10_000_000
  48. positions = [0, 0.25, 0.5, 0.75, 1]
  49. pos_times_df = pd.DataFrame(columns=container_names)
  50. #
  51. for pos in positions:
  52.     target = round(size*pos)
  53.     for container in test_containers:
  54.         exec_time = timeit.Timer(
  55.             f'{target} in test_containers["{container}"]',
  56.             globals=globals()
  57.         ).timeit(number=10)
  58.         pos_times_df.loc[pos, container] = exec_time
  59. #
  60. pos_times_df
  61.  
  62. # CELL 5 (graph for Cell 4 results)
  63. fig, ax = plt.subplots()
  64. pos_times_df.plot(ax=ax, marker='o')
  65. ax.set_xlabel(f'Relative Position within list\n(Container size: {size})', size=12)
  66. ax.set_ylabel('Membership check time', size=12)
  67. ax.set_title('Comparison of Membership Check Times\nin lists vs sets')
  68. ax
  69.  
  70. # That's it! Follow x.com/CodingComputing for more on Python!
Tags: python
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement