Advertisement
Guest User

Untitled

a guest
Oct 14th, 2019
251
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.36 KB | None | 0 0
  1. #!/bin/python
  2. # -*- coding: utf-8 -*-
  3.  
  4. from __future__ import division
  5. import os
  6. import time
  7. import base64
  8. from hashlib import sha256
  9.  
  10.  
  11. import time
  12.  
  13.  
  14. """
  15. subject: Experimental Fragments Processing (Experiment A)
  16. description: A Dispersal Algorithm used in splitting and stitching msg fragments. This is a snippet for ANNNet.
  17. author: Bryan Angelo Pedrosa
  18. date: 9/10/2019
  19.  
  20.  
  21. metadata information example:
  22. *
  23. chain-ini: 87428fc522803d31065e7bce3cf03fe475096631e5e07bbd7a0fde60c4cf25c7 // start of chain
  24. chain-end: 0263829989b6fd954f72baaf2fc64bc2e2f01d692d4de72986ea808f6e99813f // end of chain
  25. chain-sha: 539b9f77a4c9840da32d59e50ed831be29f06bedd25b62c21029078cfb1712a0 // chain batch's hash id
  26. chain-set: 123409 // chain parts
  27. chain-get: 4563 // total chained parts
  28. *
  29. """
  30.  
  31.  
  32.  
  33. chunk_sz = 63000 # max data to split; standard size, don't change!
  34. qty_circuit = 13 # 13 circuit as default, you can change this
  35. metadata_done = []
  36. metadata = []
  37.  
  38.  
  39.  
  40.  
  41. #### Cache File Management ####
  42.  
  43. inprocs = os.path.join("cache","inprocess")
  44. merged = os.path.join("cache","merged")
  45.  
  46. for paths in ["cache", inprocs, merged]:
  47. # create path if it's non-existent
  48. if not os.path.exists(paths):
  49. print("[!] No {0} path found. Creating one.....".format(paths))
  50. os.mkdir(paths)
  51.  
  52. # remove leftovers from cache folder
  53. for cachefl in os.listdir(paths):
  54. filename = os.path.join(paths, cachefl)
  55. if os.path.isfile(filename):
  56. print("deleting dirty cache: '{0}'".format(filename))
  57. os.remove(filename)
  58.  
  59.  
  60.  
  61.  
  62. #### The Dispersal Algorithm (splitter) ####
  63. class SPLITTING:
  64. def __init__(self):
  65. self.total_parts = 0 # parts to be registered
  66. self.regtred_parts = 0 # total registered parts
  67. self.BATCH_ID_HASH = sha256(os.urandom(32)).hexdigest() # batch hash id
  68. self.recent_EOC_hash = "" # recent end of chain hash
  69.  
  70.  
  71. def regtr(self, chopped_data=""):
  72. # generate chain hash for next chunk
  73. SEEDCHUNK_HASH = sha256("{0}{1}{2}".format(sha256(os.urandom(64)).hexdigest(), self.BATCH_ID_HASH, chopped_data).encode()).hexdigest()
  74.  
  75. # increment registered chunks indicator
  76. self.regtred_parts += 1
  77.  
  78. # decide how metadata stamp would be added on each chopped data
  79. if not bool(self.recent_EOC_hash): # recent EOC is empty; initial process
  80. chunk_metadata = [self.BATCH_ID_HASH, "INIT", SEEDCHUNK_HASH, self.total_parts, 1] # batch_id, initial, next
  81. elif self.total_parts == self.regtred_parts: # total & registered parts match; end of process
  82. chunk_metadata = [self.BATCH_ID_HASH, self.recent_EOC_hash, "FINL", self.total_parts, 1] # batch_id, prev, final
  83. elif bool(self.recent_EOC_hash): # EOC aren't empty; internal process
  84. chunk_metadata = [self.BATCH_ID_HASH, self.recent_EOC_hash, SEEDCHUNK_HASH, self.total_parts, 1] # batch_id, prev, next
  85.  
  86. # push chunk_metadata to metadata
  87. metadata.append(chunk_metadata)
  88.  
  89. # push next hash to self.recent_EOC_hash
  90. self.recent_EOC_hash = SEEDCHUNK_HASH
  91.  
  92. # split cache file... save chopped_data
  93. filename_s = os.path.join("cache", "inprocess", " ".join([str(meta_d) for meta_d in chunk_metadata]))
  94. with open(filename_s, "wb") as cache_fs:
  95. #cache_fs.write(base64.b64encode(chopped_data))
  96. cache_fs.write(chopped_data)
  97.  
  98.  
  99. def slash(self, data=""):
  100. # how much chunks will be distributed equally
  101. circuit_chunk_sz = int(len(data) / qty_circuit)
  102.  
  103. # slice by 'qty_circuit'
  104. # then slice by chunk_sz
  105. # split by 1 if data length lower than circuit
  106. if len(data) <= qty_circuit and len(data) != 0: # single buffer
  107. self.total_parts = len(data)
  108. chunk_buffer = 1
  109. elif circuit_chunk_sz > chunk_sz: # standard size buffer
  110. chunk_buffer = chunk_sz
  111. # determine total iterations
  112. self.total_parts = len(data) // chunk_sz
  113. if bool(len(data) % chunk_sz):
  114. self.total_parts += 1
  115. elif circuit_chunk_sz <= chunk_sz: # custom size buffer
  116. chunk_buffer = circuit_chunk_sz
  117. # determine total iterations
  118. self.total_parts = len(data) // circuit_chunk_sz
  119. if bool(len(data) % circuit_chunk_sz):
  120. self.total_parts += 1
  121. elif len(data) == 0:
  122. raise ValueError("class SPLITTING: Couldn't process 0 length data.")
  123. else:
  124. raise ValueError("class SPLITTING: Unknown error. datatype: {0} ; length: {1}".format(type(data), len(data)))
  125.  
  126. # data chopping loop
  127. while bool(data):
  128. chopped_data = data[:chunk_buffer]
  129. data = data[chunk_buffer:]
  130. self.regtr(chopped_data=chopped_data)
  131.  
  132.  
  133.  
  134.  
  135. #### The Dispersal Algorithm (merger) ####
  136. class STITCHING:
  137. # merging two metadata as well as cache data
  138. def merge(self, metadata_a, metadata_b):
  139. merge_status = False
  140. merge_output = None
  141. merge_order = None
  142. if metadata_a[0] == metadata_b[0]:
  143. # merge if inner fragment identifier match
  144. if metadata_a[2] == metadata_b[1]: # merge order (ascending): metadata_a - metadata_b
  145. merge_status = True
  146. merge_order = "ASCD" # ascending
  147. merge_output = [metadata_a[0], metadata_a[1], metadata_b[2], metadata_a[3], (metadata_a[4] + metadata_b[4])]
  148. elif metadata_b[2] == metadata_a[1]: # merge order (desceding): metadata_b - metadata_a
  149. merge_status = True
  150. merge_order = "DESC" # desceding
  151. merge_output = [metadata_a[0], metadata_b[1], metadata_a[2], metadata_a[3], (metadata_a[4] + metadata_b[4])]
  152.  
  153. return (merge_status, merge_output, merge_order)
  154.  
  155.  
  156. # building fragments
  157. def build(self, metadata=[]):
  158. #while 1: # infinite loop; rest mode
  159. while len(metadata):
  160. metadata_index_f = 0
  161. while len(metadata) and len(metadata) > metadata_index_f:
  162. metadata_index_s = 0
  163. while len(metadata) and len(metadata) > metadata_index_s:
  164. metadata_frag_f = metadata[metadata_index_f]
  165. metadata_frag_s = metadata[metadata_index_s]
  166. metadata_resp = self.merge(metadata_frag_f, metadata_frag_s)
  167.  
  168. # if metadata has completely merged
  169. if metadata_frag_f[3] == metadata_frag_f[4]:
  170. # move to metadata_done
  171. metadata_done.append(metadata.pop(metadata_index_f))
  172.  
  173. # move this file to the merged
  174. filename_fr = os.path.join("cache", "inprocess", " ".join([str(meta_d) for meta_d in metadata_frag_f]))
  175. filename_to = os.path.join("cache", "merged", " ".join([str(meta_d) for meta_d in metadata_frag_f]))
  176. os.rename(filename_fr, filename_to)
  177.  
  178. break
  179.  
  180. elif metadata_resp[0]:
  181. # update metadata that has merged
  182. metadata.append(metadata_resp[1])
  183.  
  184. # remove two metadata that has merged
  185. metadata.remove(metadata_frag_f)
  186. metadata.remove(metadata_frag_s)
  187.  
  188. # merge two cache files to form new entity...
  189. filename_m = os.path.join("cache", "inprocess", " ".join([str(meta_d) for meta_d in metadata_resp[1]]))
  190. filename_a = os.path.join("cache", "inprocess", " ".join([str(meta_d) for meta_d in metadata_frag_f]))
  191. filename_b = os.path.join("cache", "inprocess", " ".join([str(meta_d) for meta_d in metadata_frag_s]))
  192.  
  193. # merge the two via file i/o
  194. with open(filename_m, "a") as cache_fm, open(filename_a, "rb") as cache_fa, open(filename_b, "rb") as cache_fb:
  195. if metadata_resp[2] == "ASCD":
  196. #cache_fm.write(base64.b64encode(base64.b64decode(cache_fa.read()) + base64.b64decode(cache_fb.read())))
  197. cache_fm.write(cache_fa.read() + cache_fb.read()) # no encoding
  198. elif metadata_resp[2] == "DESC":
  199. #cache_fm.write(base64.b64encode(base64.b64decode(cache_fa.read()) + base64.b64decode(cache_fb.read())))
  200. cache_fm.write(cache_fb.read() + cache_fa.read()) # no encoding
  201.  
  202. # delete two cache files that has merged
  203. os.remove(filename_a)
  204. os.remove(filename_b)
  205.  
  206. break
  207.  
  208. metadata_index_s += 1
  209. metadata_index_f += 1
  210. #time.sleep(.2)
  211.  
  212.  
  213.  
  214.  
  215.  
  216. # I'll just leave these here for demonstration purposes.
  217. data = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"*999999
  218. """
  219. metadata = [
  220. # Sample batch A
  221. ["a_sample_batch", "a", "b", 4, 1],
  222. ["a_sample_batch", "c", "d", 4, 1],
  223. ["a_sample_batch", "b", "c", 4, 1],
  224. ["a_sample_batch", "d", "e", 4, 1],
  225. # Sample batch B
  226. ["b_sample_batch", "a", "b", 4, 1],
  227. ["b_sample_batch", "c", "d", 4, 1],
  228. ["b_sample_batch", "b", "c", 4, 1],
  229. ["b_sample_batch", "d", "e", 4, 1]
  230. ]
  231. """
  232. uh = time.time()
  233. if __name__ == "__main__":
  234. SPLITTING().slash(data=data)
  235. print(time.time() - uh)
  236. #print(metadata)
  237.  
  238. STITCHING().build(metadata=metadata)
  239. print(time.time() - uh)
  240.  
  241. #print(metadata_done)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement