Advertisement
Guest User

Untitled

a guest
Mar 19th, 2019
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.82 KB | None | 0 0
  1. import argparse
  2. from math import pi
  3. import numpy as np
  4. import os
  5. import pandas as pd
  6.  
  7. MIN_RADIUS = 0.5
  8. MAX_RADIUS = 2.0
  9. MIN_HEIGHT = 0.5
  10. MAX_HEIGHT = 2.0
  11. ADDED_ERROR = 0.1
  12. NUM_DECIMALS = 1
  13.  
  14.  
  15. def generate_cylinder_df(size):
  16. """Generate a dataframe of cylinders where the radius and height of
  17. each cylinder are both in the range 0.5 to 2.0 and the volume equals
  18. (pi * r^2) * h. Then add up to a 10% error (uniformly distributed) to
  19. the volume, followed by rounding off radius, height and volume to the
  20. nearest 0.1.
  21. """
  22.  
  23. # Generate radiuses and heights
  24. radius = np.random.uniform(MIN_RADIUS, MAX_RADIUS, size=size)
  25. height = np.random.uniform(MIN_HEIGHT, MAX_HEIGHT, size=size)
  26.  
  27. # Calculate the correct volumes with those radiuses and heights
  28. volume = (pi * radius ** 2) * height
  29.  
  30. # Add the error to the volumes
  31. volume = volume * np.random.uniform(
  32. 1 - ADDED_ERROR,
  33. 1 + ADDED_ERROR,
  34. size=size)
  35.  
  36. # Then round off radius, height and volume
  37. radius = np.round(radius, decimals=NUM_DECIMALS)
  38. height = np.round(height, decimals=NUM_DECIMALS)
  39. volume = np.round(volume, decimals=NUM_DECIMALS)
  40.  
  41. df = pd.DataFrame({
  42. 'volume': volume,
  43. 'radius': radius,
  44. 'height': height, })
  45.  
  46. return df
  47.  
  48.  
  49. if __name__ == '__main__':
  50. if ('get_ipython' not in dir()) & ('PYCHARM_HOSTED' not in os.environ):
  51. # i.e. if run from the command line
  52.  
  53. # Handle command line arguments
  54. parser = argparse.ArgumentParser()
  55. parser.add_argument(
  56. '--filename',
  57. type=str,
  58. help='the filename to save the cylinders to',
  59. required=True)
  60. parser.add_argument(
  61. '--size',
  62. type=int,
  63. help='the number of cylinders to generate',
  64. required=True)
  65. # parser.add_argument(
  66. # '--job-dir',
  67. # help='this model ignores this field, but it is required by gcloud',
  68. # default='junk')
  69. args = parser.parse_args()
  70. arguments = args.__dict__
  71. # arguments.pop('job_dir', None)
  72.  
  73. # Generate cylinders and write them to file
  74. generate_cylinder_df(arguments['size']).to_csv(
  75. arguments['filename'],
  76. index=False)
  77.  
  78. print('saved {} cylinders to {}'.format(
  79. arguments['size'],
  80. arguments['filename']))
  81.  
  82. else: # if run from a notebook or IDE
  83. files_to_generate = {
  84. 'input/cylinders_train.csv': 8000,
  85. 'input/cylinders_eval.csv': 1000,
  86. 'input/cylinders_test.csv': 1000,}
  87.  
  88. for filename, size in files_to_generate.items():
  89. generate_cylinder_df(size=size).to_csv(
  90. filename,
  91. index=False)
  92.  
  93. print('saved {} cylinders to {}'.format(
  94. size,
  95. filename))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement