Advertisement
Guest User

Search best series of chemicals

a guest
Sep 10th, 2018
482
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 19.90 KB | None | 0 0
  1. import copy
  2. import argparse
  3. import time
  4.  
  5. """
  6. chem_domain.py
  7. """
  8.  
  9. class Chem(object):
  10. """
  11. Def a chem, which is unique with a unique identifier,
  12. blocks a series of other chems,
  13. requirements is a series of chem required
  14. """
  15.  
  16. def __init__(self, identifier, requirements, blocks, attr_values):
  17. self.identifier = identifier
  18. self.requirements = requirements
  19. self.blocks = blocks
  20. self.attr_values =attr_values
  21.  
  22. def __eq__(self, chem):
  23. return self.identifier == chem.identifier
  24. def __ne__(self, chem):
  25. return self.identifier != chem.identifier
  26. def __lt__(self, chem):
  27. return self.identifier < chem.identifier
  28. def __hash__(self):
  29. return self.identifier.__hash__()
  30.  
  31. def is_chem_allowed_by(self, chems):
  32. for chem in self.requirements:
  33. if (chem not in chems):
  34. return False
  35. return True
  36.  
  37. class AttrValues(object):
  38. """
  39. List of values generated by a chem,
  40. total is the sum of others attributes,
  41. the values can be negatives.
  42. """
  43.  
  44. def __init__(self, circulatory, sensory, respiratory, motoric):
  45. self.circulatory = circulatory
  46. self.sensory = sensory
  47. self.respiratory = respiratory
  48. self.motoric = motoric
  49. self.total = circulatory + sensory + respiratory + motoric
  50.  
  51. def copy(self):
  52. return AttrValues(self.circulatory, self.sensory, self.respiratory, self.motoric)
  53. __copy__ = copy
  54.  
  55. def __eq__(self, attr_values):
  56. return self.circulatory == attr_values.circulatory \
  57. and self.sensory == attr_values.sensory \
  58. and self.respiratory == attr_values.respiratory \
  59. and self.motoric == attr_values.motoric
  60.  
  61. def __ne__(self, attr_values):
  62. return self.circulatory != attr_values.circulatory \
  63. or self.sensory != attr_values.sensory \
  64. or self.respiratory != attr_values.respiratory \
  65. or self.motoric != attr_values.motoric
  66.  
  67. def add_values(self, attr_values):
  68. self.circulatory = self.circulatory + attr_values.circulatory
  69. self.sensory = self.sensory + attr_values.sensory
  70. self.respiratory = self.respiratory + attr_values.respiratory
  71. self.motoric = self.motoric + attr_values.motoric
  72. self.total = self.total + attr_values.total
  73.  
  74. class ChemSeries(object):
  75. """
  76. Def a series of chem,
  77. current_values is the sum of values from the chems,
  78. block is all the chems blocked by chems already in the list
  79.  
  80. Can be used as a key or in a set,
  81. but then the series shouldn't be modified anymore, see __hash__ and use freeze
  82. """
  83.  
  84. def __init__(self):
  85. self.current_values = AttrValues(0, 0, 0, 0)
  86. self.chems_ordered = []
  87. self.chems = set()
  88. self.blocks = set()
  89.  
  90. def copy(self):
  91. my_copy = ChemSeries()
  92. my_copy.current_values = copy.copy(self.current_values)
  93. my_copy.chems_ordered = copy.copy(self.chems_ordered)
  94. my_copy.chems = set(copy.copy(self.chems))
  95. my_copy.blocks = set(copy.copy(self.blocks))
  96. return my_copy
  97. __copy__ = copy
  98.  
  99. def __eq__(self, chem_series):
  100. return self.chems == chem_series.chems
  101.  
  102. def __ne__(self, chem_series):
  103. return self.chems != chem_series.chems
  104.  
  105. def __hash__(self):
  106. """
  107. Won't work if self not freezed before
  108. """
  109. return self.chems.__hash__()
  110.  
  111. def freeze(self):
  112. # ignores chems_ordered and current_values, bother only for hash and ==
  113. self.chems = frozenset(self.chems)
  114. self.blocks = frozenset(self.blocks)
  115.  
  116. def can_add_chem(self, chem):
  117. return chem not in self.blocks and chem.is_chem_allowed_by(self.chems)
  118.  
  119. def add_chem_forced(self, chem):
  120. self.chems.add(chem)
  121. self.chems_ordered.append(chem)
  122. self.blocks.update(chem.blocks)
  123. self.current_values.add_values(chem.attr_values)
  124.  
  125. def add_chem(self, chem):
  126. if self.can_add_chem(chem):
  127. self.add_chem_forced(chem)
  128. return True
  129. else:
  130. return False
  131.  
  132. def get_chems_in_canonical_order(self):
  133. """
  134. Returns the list of chems sorted by alphabetic order for parts where the order doesn't matter
  135. """
  136. sorted_chems = copy.copy(self.chems_ordered)
  137. ln = len(sorted_chems)
  138. for pos in range(ln):
  139. for i in range(0, ln-pos-1):
  140. if sorted_chems[i].identifier > sorted_chems[i+1].identifier\
  141. and sorted_chems[i] not in sorted_chems[i+1].blocks\
  142. and sorted_chems[i] not in sorted_chems[i+1].requirements:
  143. temp = sorted_chems[i]
  144. sorted_chems[i] = sorted_chems[i+1]
  145. sorted_chems[i+1] = temp
  146. return sorted_chems
  147.  
  148. """
  149. chem_series_search.py
  150. """
  151.  
  152. class ChemDataInit(object):
  153. __all_chems_by_id = {}
  154.  
  155. @classmethod
  156. def get_chem(cls, chem_id):
  157. chem = cls.__all_chems_by_id.get(chem_id) or Chem(chem_id, [], [], None)
  158. cls.__all_chems_by_id[chem_id] = chem
  159. return chem
  160.  
  161. @classmethod
  162. def get_chems(cls, chem_ids):
  163. chems = []
  164. for chem_id in chem_ids:
  165. chems.append(cls.get_chem(chem_id))
  166. return chems
  167.  
  168. @classmethod
  169. def create_chem(cls, identifier, requirement_ids, block_ids, attr_values):
  170. new_chem = cls.get_chem(identifier)
  171. new_chem.attr_values = attr_values
  172. new_chem.requirements = cls.get_chems(requirement_ids)
  173. new_chem.blocks = cls.get_chems(block_ids)
  174.  
  175. @classmethod
  176. def create_all_chems(cls):
  177. if not cls.__all_chems_by_id:
  178. cls.create_chem('AMPEA', [], ['Calusterone'], AttrValues(14, 7, 15, 8))
  179. cls.create_chem('BMA', [], ['Danazol', 'AMPK'], AttrValues(9, 15, 10, 16))
  180. cls.create_chem('DXAMPEA', [], ['XENOXY', 'Albumin'], AttrValues(8, 7, 11, 11))
  181. cls.create_chem('TST', [], ['Gonadorelin'], AttrValues(7, 13, 15, 9))
  182. cls.create_chem('THG', [], ['ARGOXY', 'Raloxifene'], AttrValues(17, 17, 14, 8))
  183. cls.create_chem('EPO', ['AMPEA'], ['THG'], AttrValues(13, 0, 0, -2))
  184. cls.create_chem('ARGOXY', ['EPO'], ['Formebolone'], AttrValues(10, -1, 0, 0))
  185. cls.create_chem('Bolasterone', [], ['Stanozolol', 'Formoterol'], AttrValues(-1, 11, 0, -1))
  186. cls.create_chem('Bolandiol', ['Bolasterone'], ['FGF'], AttrValues(-2, 8, 1, -3))
  187. cls.create_chem('Danazol', ['DXAMPEA'], ['FGF'], AttrValues(-2, 0, 15, 2))
  188. cls.create_chem('Formebolone', ['Danazol'], ['DHEA'], AttrValues(-1, 2, 10, 3))
  189. cls.create_chem('Gonadorelin', [], ['Anadrol'], AttrValues(2, 1, -4, 16))
  190. cls.create_chem('FGF', ['Gonadorelin'], ['Raloxifene', 'Stanozolol'], AttrValues(-5, 1, 3, 8))
  191. cls.create_chem('Raloxifene', ['ARGOXY'], ['DHEA'], AttrValues(0, 19, -15, -10))
  192. cls.create_chem('Cyclofenil', ['Bolandiol'], ['Anadrol'], AttrValues(18, -19, 29, 20))
  193. cls.create_chem('AMPK', ['Raloxifene'], ['BMA'], AttrValues(22, 0, -10, 32))
  194. cls.create_chem('Calusterone', ['Bolandiol'], ['Cyclofenil'], AttrValues(0, 10, 0, -2))
  195. cls.create_chem('DHEA', ['BMA'], ['Formebolone'], AttrValues(13, 12, 9, 19))
  196. cls.create_chem('Albumin', ['AMPK'], ['TST'], AttrValues(-10, 24, 30, 6))
  197. cls.create_chem('XENOXY', ['ARGOXY'], ['Formoterol', 'TST'], AttrValues(17, 0, 0, -4))
  198. cls.create_chem('Stanozolol', ['Formebolone'], ['Albumin'], AttrValues(0, 0, 9, 5))
  199. cls.create_chem('Anadrol', ['TST'], ['Mannitol'], AttrValues(17, 14, 11, 14))
  200. cls.create_chem('Formoterol', ['Formebolone', 'FGF'], ['AMPK'], AttrValues(2, 0, 2, 14))
  201. cls.create_chem('Mannitol', ['Danazol'], ['XENOXY'], AttrValues(38, 24, 0, -10))
  202. cls.create_chem('IGF-1', ['Albumin'], ['Formoterol'], AttrValues(-10, 19, 28, 31))
  203. return cls.__all_chems_by_id
  204.  
  205. class ChemSeriesSearch(object):
  206. """
  207. Search Chem series, no instance, not for multithread
  208. """
  209. search_branches_skipped = 0
  210. search_branches_skipped_local = 0
  211. search_chems_skipped = 0
  212. search_chems_skipped_local = 0
  213. current_chems_series_set = set()
  214.  
  215. @classmethod
  216. def get_all_chems_series(cls, chem_ids, chems_dic):
  217. cls.current_chems_series_set = set()
  218. cls.search_branches_skipped = 0
  219. cls.search_branches_skipped_local = 0
  220. cls.search_chems_skipped = 0
  221. cls.search_chems_skipped_local = 0
  222. empty_list = ChemSeries()
  223. empty_list.freeze()
  224. cls.current_chems_series_set = set([empty_list])
  225. chems = set()
  226. for chem_id in chem_ids:
  227. chems.add(chems_dic[chem_id])
  228. cls.add_all_chems_series(chems, {empty_list})
  229. return cls.current_chems_series_set
  230.  
  231. @classmethod
  232. def add_all_chems_series(cls, chems, chem_series_set):
  233. for chem in chems:
  234. new_chem_series_set = set()
  235. new_chems = copy.copy(chems)
  236. new_chems.remove(chem)
  237. for chem_series in chem_series_set:
  238. if chem_series.can_add_chem(chem):
  239. new_chem_series = copy.copy(chem_series)
  240. new_chem_series.add_chem_forced(chem)
  241. new_chem_series.freeze()
  242. if cls.add_chems_series(new_chem_series):
  243. new_chem_series_set.add(new_chem_series)
  244. if new_chems:
  245. cls.add_all_chems_series(new_chems, new_chem_series_set)
  246. else:
  247. cls.search_branches_skipped += 1
  248. cls.search_chems_skipped += len(new_chems) - 1
  249. else:
  250. cls.search_branches_skipped_local += 1
  251. cls.search_chems_skipped_local += len(new_chems) - 1
  252.  
  253. @classmethod
  254. def add_all_chems_series_brutal(cls, chems, chem_series_set):
  255. """
  256. Not used, but allows commpare results to "validate" add_all_chems_series
  257. cf. tu_select_best_series.py
  258. """
  259. for chem in chems:
  260. new_chem_series_set = set()
  261. new_chems = copy.copy(chems)
  262. new_chems.remove(chem)
  263. for chem_series in chem_series_set:
  264. if chem_series.can_add_chem(chem):
  265. new_chem_series = copy.copy(chem_series)
  266. new_chem_series.add_chem_forced(chem)
  267. new_chem_series.freeze()
  268. cls.add_chems_series(new_chem_series)
  269. new_chem_series_set.add(new_chem_series)
  270. if new_chems:
  271. cls.add_all_chems_series_brutal(new_chems, new_chem_series_set)
  272.  
  273. @classmethod
  274. def add_chems_series(cls, chem_series):
  275. ln = len(cls.current_chems_series_set)
  276. cls.current_chems_series_set.add(chem_series)
  277. return ln != len(cls.current_chems_series_set)
  278.  
  279. @classmethod
  280. def get_n_best_chem_series(cls, num_to_find, is_strictly_higher, chem_series_list):
  281. best_chem_series_list = [next(iter(chem_series_list))]
  282. for chem_series in chem_series_list:
  283. ln = len(best_chem_series_list)
  284. for i, best_chem_series in reversed(list(enumerate(best_chem_series_list))):
  285. if is_strictly_higher(chem_series, best_chem_series):
  286. best_chem_series_list.insert(i+1, chem_series)
  287. if (ln == num_to_find):
  288. del best_chem_series_list[0]
  289. break
  290. return best_chem_series_list
  291.  
  292. """
  293. select_best_series.py
  294.  
  295. Example of windows command line:
  296. select_best_series.py -c DXAMPEA TST THG EPO ARGOXY Bolasterone Gonadorelin BMA AMPEA
  297.  
  298. Windows command line to get the best series with all chems found:
  299. select_best_series.py
  300. """
  301.  
  302. class ChemTracing(object):
  303. @classmethod
  304. def print_chems(cls, chems):
  305. for chem in chems:
  306. print (chem.identifier, chem.attr_values.circulatory, chem.attr_values.circulatory,\
  307. chem.attr_values.respiratory, chem.attr_values.motoric, chem.attr_values.total)
  308.  
  309. @classmethod
  310. def print_results(cls, chem_series_list):
  311. print('found')
  312. for chem_series in chem_series_list:
  313. cls.print_chem_series(chem_series)
  314. print('end')
  315.  
  316. @classmethod
  317. def print_chem_series(cls, chem_series):
  318. if chem_series.chems_ordered:
  319. for chem in chem_series.get_chems_in_canonical_order():
  320. print(chem.identifier, end=' ')
  321. else:
  322. print('No chem', end=' ')
  323. print('[', end='')
  324. print(chem_series.current_values.circulatory, chem_series.current_values.sensory,\
  325. chem_series.current_values.respiratory, chem_series.current_values.motoric,\
  326. chem_series.current_values.total, sep=',', end='')
  327. print(']')
  328.  
  329. @classmethod
  330. def print_chem_series_results(cls, title, chem_series_list):
  331. print(len(chem_series_list), title)
  332. for chem_series in reversed(chem_series_list):
  333. cls.print_chem_series(chem_series)
  334. print()
  335.  
  336. class SelectBestSeriesMain(object):
  337.  
  338. number_of_best_results = 3
  339.  
  340. def get_best_chem_series_by_circulatory(self, chem_series_list):
  341. return ChemSeriesSearch.get_n_best_chem_series(self.number_of_best_results,
  342. lambda chem_ser1, chem_ser2: chem_ser1.current_values.circulatory > chem_ser2.current_values.circulatory,
  343. chem_series_list)
  344.  
  345. def get_best_chem_series_by_sensory(self, chem_series_list):
  346. return ChemSeriesSearch.get_n_best_chem_series(self.number_of_best_results,
  347. lambda chem_ser1, chem_ser2: chem_ser1.current_values.sensory > chem_ser2.current_values.sensory,
  348. chem_series_list)
  349.  
  350. def get_best_chem_series_by_respiratory(self, chem_series_list):
  351. return ChemSeriesSearch.get_n_best_chem_series(self.number_of_best_results,
  352. lambda chem_ser1, chem_ser2: chem_ser1.current_values.respiratory > chem_ser2.current_values.respiratory,
  353. chem_series_list)
  354.  
  355. def get_best_chem_series_by_motoric(self, chem_series_list):
  356. return ChemSeriesSearch.get_n_best_chem_series(self.number_of_best_results,
  357. lambda chem_ser1, chem_ser2: chem_ser1.current_values.motoric > chem_ser2.current_values.motoric,
  358. chem_series_list)
  359.  
  360. def get_best_chem_series_by_most_average(self, chem_series_list):
  361. return ChemSeriesSearch.get_n_best_chem_series(self.number_of_best_results,
  362. self.has_more_even_attributes,
  363. chem_series_list)
  364.  
  365. def get_best_chem_series_by_total(self, chem_series_list):
  366. return ChemSeriesSearch.get_n_best_chem_series(self.number_of_best_results,
  367. lambda chem_ser1, chem_ser2: chem_ser1.current_values.total > chem_ser2.current_values.total,
  368. chem_series_list)
  369.  
  370. def has_more_even_attributes(self, chem_ser1, chem_ser2):
  371. a1 = (self.get_attributes_ecartype(chem_ser2) + 1) / (self.get_attributes_ecartype(chem_ser1) + 1)
  372. a2 = (chem_ser1.current_values.total + 1) / (chem_ser2.current_values.total + 1)
  373. return a1 + a2 > 2
  374.  
  375. def moyenne(self, tableau):
  376. return sum(tableau, 0.0) / len(tableau)
  377.  
  378. def variance(self, tableau):
  379. m = self.moyenne(tableau)
  380. return self.moyenne([(x-m)**2 for x in tableau])
  381.  
  382. def ecartype(self, tableau):
  383. return self.variance(tableau)**0.5
  384.  
  385. def get_attributes_ecartype(self, chem_series):
  386. return (self.ecartype([chem_series.current_values.circulatory,
  387. chem_series.current_values.sensory,
  388. chem_series.current_values.respiratory,
  389. chem_series.current_values.motoric,
  390. ]))
  391.  
  392. def get_weight_from_average(self, average, chem_series):
  393. return -abs(average - chem_series.current_values.circulatory)\
  394. - abs(average - chem_series.current_values.sensory)\
  395. - abs(average - chem_series.current_values.respiratory)\
  396. - abs(average - chem_series.current_values.motoric)
  397.  
  398. def run(self, chem_ids, results_number):
  399. """
  400. Search all chem series for the given list of chem identifiers
  401. Print the best series found for each category
  402. """
  403. chems = ChemDataInit.create_all_chems()
  404. t0_get_all_chems_serie = time.time()
  405. # ChemSeriesSearch.add_all_chems_series = ChemSeriesSearch.add_all_chems_series_brutal
  406. chem_series_list = ChemSeriesSearch.get_all_chems_series(chem_ids, chems)
  407. t1_get_all_chems_serie = time.time()
  408. print('Local search branches avoided:', ChemSeriesSearch.search_branches_skipped_local)
  409. print('Global search branches avoided:', ChemSeriesSearch.search_branches_skipped)
  410. print('Local search elements avoided:', ChemSeriesSearch.search_chems_skipped_local)
  411. print('Global search elements avoided:', ChemSeriesSearch.search_chems_skipped)
  412. print('Number of different series found:', len(ChemSeriesSearch.current_chems_series_set))
  413. print('Search time:', t1_get_all_chems_serie - t0_get_all_chems_serie)
  414. t0_select_chems_serie = time.time()
  415. self.number_of_best_results = results_number
  416. best_chem_series_by_circulatory_list = self.get_best_chem_series_by_circulatory(chem_series_list)
  417. best_chem_series_by_sensory_list = self.get_best_chem_series_by_sensory(chem_series_list)
  418. best_chem_series_by_respiratory_list = self.get_best_chem_series_by_respiratory(chem_series_list)
  419. best_chem_series_by_motoric_list = self.get_best_chem_series_by_motoric(chem_series_list)
  420. best_chem_series_by_most_average_list = self.get_best_chem_series_by_most_average(chem_series_list)
  421. best_chem_series_by_total_list = self.get_best_chem_series_by_total(chem_series_list)
  422. t1_select_chems_serie = time.time()
  423. print('Selects time:', t1_select_chems_serie - t0_select_chems_serie)
  424. print()
  425. ChemTracing.print_chem_series_results('best circulatory series:', best_chem_series_by_circulatory_list)
  426. ChemTracing.print_chem_series_results('best sensory series:', best_chem_series_by_sensory_list)
  427. ChemTracing.print_chem_series_results('best respiratory series:', best_chem_series_by_respiratory_list)
  428. ChemTracing.print_chem_series_results('best motoric series:', best_chem_series_by_motoric_list)
  429. ChemTracing.print_chem_series_results('most average attributes series:', best_chem_series_by_most_average_list)
  430. ChemTracing.print_chem_series_results('best total series:', best_chem_series_by_total_list)
  431.  
  432. def main():
  433. """
  434. Go!
  435. """
  436. parser = argparse.ArgumentParser(
  437. description='Print the best series of chems for a list of chems already found',\
  438. epilog='Example: >select_best_series.py -c DXAMPEA TST THG EPO ARGOXY Bolasterone Gonadorelin BMA AMPEA')
  439. parser.add_argument('-c', '--chemids', nargs='+')
  440. parser.add_argument('-n', '--resultsnumber', type=int, default=5)
  441. arguments = parser.parse_args()
  442. chem_ids = arguments.chemids
  443. if not chem_ids:
  444. # if no argument search for all the chems
  445. all_chems = ChemDataInit.create_all_chems()
  446. chem_ids = []
  447. for chem_id in all_chems:
  448. chem_ids.append(chem_id)
  449. # chem_ids = ['DXAMPEA', 'TST', 'THG', 'EPO', 'ARGOXY', 'Bolasterone']
  450. SelectBestSeriesMain().run(chem_ids, arguments.resultsnumber)
  451.  
  452. if __name__=='__main__':
  453. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement