Guest User

Untitled

a guest
Dec 21st, 2021
2,763
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.11 KB | None | 0 0
  1. __AUTHOR__ = "Soumil Shah "
  2.  
  3. try:
  4. import json
  5. import httplib2
  6. import io
  7. import boto3
  8. import datetime
  9. from faker import Faker
  10. import uuid
  11. import re
  12. import os
  13. from apiclient.discovery import build
  14. from googleapiclient.http import MediaIoBaseDownload
  15. import shutil
  16. except Exception as e:
  17. print("Error : {}".format(e))
  18.  
  19.  
  20. class Settings(object):
  21. def __init__(self):
  22. self.__API_KEY = "XXXXXXXXXXXXXXX"
  23. self.AWS_ACCESS_KEY = "XXXXXXXXXXXXXXXXXXXXXX"
  24. self.AWS_SECRET_KEY = "XXXXXXXXXXXXXXXXXXXXX"
  25. self.AWS_REGION_NAME = "us-east-1"
  26. self.s3bucket_name = "XXXXXXX"
  27.  
  28. @property
  29. def API_KEY(self):
  30. return self.__API_KEY
  31.  
  32. @API_KEY.setter
  33. def API_KEY(self, value):
  34. self.__API_KEY = value
  35.  
  36.  
  37. class GoogleDrive(Settings):
  38. def __init__(self):
  39.  
  40. Settings.__init__(self)
  41. self.service = build("drive", "v3", developerKey=self.API_KEY)
  42.  
  43. def get_files(self, folder_id=""):
  44.  
  45. if folder_id == "":
  46. return "Folder ID cannot be None"
  47.  
  48. else:
  49. param = {
  50. "q": "'"
  51. + folder_id
  52. + "' in parents and mimeType != 'application/vnd.google-apps.folder'"
  53. }
  54. return [
  55. file
  56. for file in self.service.files().list(**param).execute().get("files")
  57. ]
  58.  
  59. def download_file(self, file_id, mime_type="", file_name=""):
  60.  
  61. request = self.service.files().get_media(fileId=file_id)
  62.  
  63. fh = io.BytesIO()
  64. downloader = MediaIoBaseDownload(fh, request)
  65. done = False
  66.  
  67. while done is False:
  68. status, done = downloader.next_chunk()
  69. print("Download %d%%." % int(status.progress() * 100))
  70.  
  71. fh.seek(0)
  72.  
  73. with open(file_name, "wb") as f:
  74. shutil.copyfileobj(fh, f, length=131072)
  75. return True
  76.  
  77.  
  78. class Datetime(object):
  79. @staticmethod
  80. def get_year_month_day():
  81. """
  82. Return Year month and day
  83. :return: str str str
  84. """
  85. dt = datetime.datetime.now()
  86. year = dt.year
  87. month = dt.month
  88. day = dt.day
  89. return year, month, day
  90.  
  91.  
  92. class AWSS3(Settings):
  93.  
  94. """Helper class to which add functionality on top of boto3"""
  95.  
  96. def __init__(self, bucket=None, **kwargs):
  97. Settings.__init__(self)
  98.  
  99. self.BucketName = self.s3bucket_name
  100.  
  101. self.client = boto3.client("s3",
  102. aws_access_key_id=self.AWS_ACCESS_KEY,
  103. aws_secret_access_key=self.AWS_SECRET_KEY,
  104. region_name=self.AWS_REGION_NAME
  105. )
  106.  
  107. def put_files(self, Response=None, Key=None, over_ride=True):
  108. """
  109. Put the File on S3
  110. :return: Bool
  111. """
  112. try:
  113. if over_ride:
  114. Response = bytes(json.dumps(Response).encode("UTF-8"))
  115.  
  116. response = self.client.put_object(
  117. ACL="private", Body=Response, Bucket=self.BucketName, Key=Key
  118. )
  119. return "ok"
  120. except Exception as e:
  121. print("Error : {} ".format(e))
  122. return "error"
  123.  
  124. def item_exists(self, Key):
  125. """Given key check if the items exists on AWS S3"""
  126. try:
  127. response_new = self.client.get_object(Bucket=self.BucketName, Key=str(Key))
  128. return True
  129. except Exception as e:
  130. return False
  131.  
  132. def get_item(self, Key):
  133.  
  134. """Gets the Bytes Data from AWS S3"""
  135.  
  136. try:
  137. response_new = self.client.get_object(Bucket=self.BucketName, Key=str(Key))
  138. return response_new["Body"].read()
  139. except Exception as e:
  140. return False
  141.  
  142. def find_one_update(self, data=None, key=None):
  143.  
  144. """
  145. This checks if Key is on S3 if it is return the data from s3
  146. else store on s3 and return it
  147. """
  148.  
  149. flag = self.item_exists(Key=key)
  150.  
  151. if flag:
  152. data = self.get_item(Key=key)
  153. return data
  154.  
  155. else:
  156. self.put_files(Key=key, Response=data)
  157. return data
  158.  
  159. def delete_object(self, Key):
  160.  
  161. response = self.client.delete_object(Bucket=self.BucketName, Key=Key,)
  162. return response
  163.  
  164. def get_all_keys(self, Prefix=""):
  165.  
  166. """
  167. :param Prefix: Prefix string
  168. :return: Keys List
  169. """
  170. try:
  171. paginator = self.client.get_paginator("list_objects_v2")
  172. pages = paginator.paginate(Bucket=self.BucketName, Prefix=Prefix)
  173.  
  174. tmp = []
  175.  
  176. for page in pages:
  177. for obj in page["Contents"]:
  178. tmp.append(obj["Key"])
  179.  
  180. return tmp
  181. except Exception as e:
  182. return []
  183.  
  184. def print_tree(self):
  185. keys = self.get_all_keys()
  186. for key in keys:
  187. print(key)
  188. return None
  189.  
  190. def find_one_similar_key(self, searchTerm=""):
  191. keys = self.get_all_keys()
  192. return [key for key in keys if re.search(searchTerm, key)]
  193.  
  194. def __repr__(self):
  195. return "AWS S3 Helper class "
  196.  
  197.  
  198. class Datalake(AWSS3):
  199.  
  200. def __init__(self, base_folder):
  201. self.base_folder = base_folder
  202. AWSS3.__init__(self)
  203.  
  204. def upload_json_data_lake(self, json_data, year="", month="", day=""):
  205.  
  206. if year != "" and month != "" and day != "":
  207.  
  208. """base_folder/YYYY/MM/DD"""
  209.  
  210. file_name = "{}_{}_{}_{}.json".format(
  211. year, month, day, uuid.uuid4().__str__()
  212. )
  213.  
  214. path = "{}/year={}/month={}/day={}/{}".format(
  215. self.base_folder, year, month, day, file_name
  216. )
  217.  
  218. self.put_files(Response=json_data, Key=path)
  219.  
  220. else:
  221.  
  222. year, month, day = Datetime.get_year_month_day()
  223.  
  224. """base_folder/YYYY/MM/DD"""
  225.  
  226. file_name = "{}_{}_{}_{}.json".format(
  227. year, month, day, uuid.uuid4().__str__()
  228. )
  229.  
  230. path = "{}/year={}/month={}/day={}/{}".format(
  231. self.base_folder, year, month, day, file_name
  232. )
  233.  
  234. self.put_files(Response=json_data, Key=path)
  235.  
  236. return True
  237.  
  238. def upload_raw_data_lake(self, data, year="", month="", day="", file_extension=''):
  239.  
  240. if year != "" and month != "" and day != "":
  241.  
  242. """base_folder/YYYY/MM/DD"""
  243.  
  244. file_name = "{}_{}_{}_{}.{}".format(
  245. year, month, day, uuid.uuid4().__str__(), file_extension
  246. )
  247.  
  248. path = "{}/year={}/month={}/day={}/{}".format(
  249. self.base_folder, year, month, day, file_name
  250. )
  251.  
  252. self.put_files(Response=data, Key=path, over_ride=False)
  253.  
  254. else:
  255.  
  256. year, month, day = Datetime.get_year_month_day()
  257.  
  258. """base_folder/YYYY/MM/DD"""
  259.  
  260. file_name = "{}_{}_{}_{}.{}".format(
  261. year, month, day, uuid.uuid4().__str__(), file_extension
  262. )
  263.  
  264. path = "{}/year={}/month={}/day={}/{}".format(
  265. self.base_folder, year, month, day, file_name
  266. )
  267.  
  268. self.put_files(Response=data, Key=path, over_ride=False)
  269.  
  270. return True
  271.  
  272.  
  273. if __name__ == "__main__":
  274.  
  275. helper = GoogleDrive()
  276. files = helper.get_files(folder_id="1HVMrSvxZDu88Xz7PT13W2fF0hd7bfNjW")
  277. for file in files:
  278. helper.download_file(
  279. file_id=file.get("id"),
  280. mime_type=file.get("mimeType"),
  281. file_name=file.get("name"),
  282. )
  283.  
  284. helper_data_lake = Datalake(base_folder='googleDriveFiles')
  285.  
  286. full_file_path = os.path.join(os.getcwd(), file.get("name"))
  287. file_extension = file.get("name").split(".")[1]
  288.  
  289. with open(full_file_path, "rb") as f:
  290. blob_data = f.read()
  291. helper_data_lake.upload_raw_data_lake(data=blob_data, file_extension=file_extension)
  292.  
  293. try:os.remove(full_file_path)
  294. except Exception as e:pass
  295.  
  296.  
  297.  
Advertisement
Add Comment
Please, Sign In to add comment