Advertisement
VSZM

Datagen

Sep 27th, 2019
227
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # To add a new cell, type '#%%'
  2. # To add a new markdown cell, type '#%% [markdown]'
  3.  
  4. #%%
  5. import pandas as pd
  6. import random
  7. from datetime import datetime
  8. from dateutil.relativedelta import relativedelta
  9. from faker import Faker
  10. import pandas as pd
  11.  
  12.  
  13. #%%
  14. fake = Faker()
  15.  
  16. def get_random_name(taken_names):
  17.     name = '.'.join(fake.sentence().lower()[:-1].split())
  18.     while name in taken_names:
  19.         name = '.'.join(fake.sentence().lower()[:-1].split())
  20.  
  21.     return name
  22.  
  23. ten_years_ago = (datetime.now() - relativedelta(years=10)).timestamp()
  24.  
  25. def get_random_dt_after(after = ten_years_ago):
  26.     now = datetime.now().timestamp()    
  27.    
  28.     return datetime.fromtimestamp(random.uniform(after, now))
  29.  
  30.  
  31. #%%
  32. rows = dict([])
  33. pk = 1
  34.  
  35. for i in range(10000):
  36.     identifier = get_random_name(rows.keys())
  37.     title = ' '.join([word.capitalize() for word in identifier.split('.')])
  38.     description = fake.text()
  39.     published_ts = get_random_dt_after()
  40.     last_updated_ts = get_random_dt_after(after = published_ts.timestamp())
  41.     rows[identifier] = (pk, identifier, title, description, published_ts, last_updated_ts)
  42.     pk = pk + 1
  43.  
  44.  
  45. #%%
  46. fake = Faker()
  47.  
  48. fake.sentence().lower()[:-1]
  49.  
  50.  
  51. #%%
  52. df = pd.DataFrame(rows.values(), columns =['PK', 'Id', 'Title', 'Description', 'Published_TS', 'Last_Updated_TS'])
  53.  
  54.  
  55. #%%
  56. df.head()
Advertisement
RAW Paste Data Copied
Advertisement