Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import copy
- import argparse
- import time
- """
- chem_domain.py
- """
- class Chem(object):
- """
- Def a chem, which is unique with a unique identifier,
- blocks a series of other chems,
- requirements is a series of chem required
- """
- def __init__(self, identifier, requirements, blocks, attr_values):
- self.identifier = identifier
- self.requirements = requirements
- self.blocks = blocks
- self.attr_values =attr_values
- def __eq__(self, chem):
- return self.identifier == chem.identifier
- def __ne__(self, chem):
- return self.identifier != chem.identifier
- def __lt__(self, chem):
- return self.identifier < chem.identifier
- def __hash__(self):
- return self.identifier.__hash__()
- def is_chem_allowed_by(self, chems):
- for chem in self.requirements:
- if (chem not in chems):
- return False
- return True
- class AttrValues(object):
- """
- List of values generated by a chem,
- total is the sum of others attributes,
- the values can be negatives.
- """
- def __init__(self, circulatory, sensory, respiratory, motoric):
- self.circulatory = circulatory
- self.sensory = sensory
- self.respiratory = respiratory
- self.motoric = motoric
- self.total = circulatory + sensory + respiratory + motoric
- def copy(self):
- return AttrValues(self.circulatory, self.sensory, self.respiratory, self.motoric)
- __copy__ = copy
- def __eq__(self, attr_values):
- return self.circulatory == attr_values.circulatory \
- and self.sensory == attr_values.sensory \
- and self.respiratory == attr_values.respiratory \
- and self.motoric == attr_values.motoric
- def __ne__(self, attr_values):
- return self.circulatory != attr_values.circulatory \
- or self.sensory != attr_values.sensory \
- or self.respiratory != attr_values.respiratory \
- or self.motoric != attr_values.motoric
- def add_values(self, attr_values):
- self.circulatory = self.circulatory + attr_values.circulatory
- self.sensory = self.sensory + attr_values.sensory
- self.respiratory = self.respiratory + attr_values.respiratory
- self.motoric = self.motoric + attr_values.motoric
- self.total = self.total + attr_values.total
- class ChemSeries(object):
- """
- Def a series of chem,
- current_values is the sum of values from the chems,
- block is all the chems blocked by chems already in the list
- Can be used as a key or in a set,
- but then the series shouldn't be modified anymore, see __hash__ and use freeze
- """
- def __init__(self):
- self.current_values = AttrValues(0, 0, 0, 0)
- self.chems_ordered = []
- self.chems = set()
- self.blocks = set()
- def copy(self):
- my_copy = ChemSeries()
- my_copy.current_values = copy.copy(self.current_values)
- my_copy.chems_ordered = copy.copy(self.chems_ordered)
- my_copy.chems = set(copy.copy(self.chems))
- my_copy.blocks = set(copy.copy(self.blocks))
- return my_copy
- __copy__ = copy
- def __eq__(self, chem_series):
- return self.chems == chem_series.chems
- def __ne__(self, chem_series):
- return self.chems != chem_series.chems
- def __hash__(self):
- """
- Won't work if self not freezed before
- """
- return self.chems.__hash__()
- def freeze(self):
- # ignores chems_ordered and current_values, bother only for hash and ==
- self.chems = frozenset(self.chems)
- self.blocks = frozenset(self.blocks)
- def can_add_chem(self, chem):
- return chem not in self.blocks and chem.is_chem_allowed_by(self.chems)
- def add_chem_forced(self, chem):
- self.chems.add(chem)
- self.chems_ordered.append(chem)
- self.blocks.update(chem.blocks)
- self.current_values.add_values(chem.attr_values)
- def add_chem(self, chem):
- if self.can_add_chem(chem):
- self.add_chem_forced(chem)
- return True
- else:
- return False
- def get_chems_in_canonical_order(self):
- """
- Returns the list of chems sorted by alphabetic order for parts where the order doesn't matter
- """
- sorted_chems = copy.copy(self.chems_ordered)
- ln = len(sorted_chems)
- for pos in range(ln):
- for i in range(0, ln-pos-1):
- if sorted_chems[i].identifier > sorted_chems[i+1].identifier\
- and sorted_chems[i] not in sorted_chems[i+1].blocks\
- and sorted_chems[i] not in sorted_chems[i+1].requirements:
- temp = sorted_chems[i]
- sorted_chems[i] = sorted_chems[i+1]
- sorted_chems[i+1] = temp
- return sorted_chems
- """
- chem_series_search.py
- """
- class ChemDataInit(object):
- __all_chems_by_id = {}
- @classmethod
- def get_chem(cls, chem_id):
- chem = cls.__all_chems_by_id.get(chem_id) or Chem(chem_id, [], [], None)
- cls.__all_chems_by_id[chem_id] = chem
- return chem
- @classmethod
- def get_chems(cls, chem_ids):
- chems = []
- for chem_id in chem_ids:
- chems.append(cls.get_chem(chem_id))
- return chems
- @classmethod
- def create_chem(cls, identifier, requirement_ids, block_ids, attr_values):
- new_chem = cls.get_chem(identifier)
- new_chem.attr_values = attr_values
- new_chem.requirements = cls.get_chems(requirement_ids)
- new_chem.blocks = cls.get_chems(block_ids)
- @classmethod
- def create_all_chems(cls):
- if not cls.__all_chems_by_id:
- cls.create_chem('AMPEA', [], ['Calusterone'], AttrValues(14, 7, 15, 8))
- cls.create_chem('BMA', [], ['Danazol', 'AMPK'], AttrValues(9, 15, 10, 16))
- cls.create_chem('DXAMPEA', [], ['XENOXY', 'Albumin'], AttrValues(8, 7, 11, 11))
- cls.create_chem('TST', [], ['Gonadorelin'], AttrValues(7, 13, 15, 9))
- cls.create_chem('THG', [], ['ARGOXY', 'Raloxifene'], AttrValues(17, 17, 14, 8))
- cls.create_chem('EPO', ['AMPEA'], ['THG'], AttrValues(13, 0, 0, -2))
- cls.create_chem('ARGOXY', ['EPO'], ['Formebolone'], AttrValues(10, -1, 0, 0))
- cls.create_chem('Bolasterone', [], ['Stanozolol', 'Formoterol'], AttrValues(-1, 11, 0, -1))
- cls.create_chem('Bolandiol', ['Bolasterone'], ['FGF'], AttrValues(-2, 8, 1, -3))
- cls.create_chem('Danazol', ['DXAMPEA'], ['FGF'], AttrValues(-2, 0, 15, 2))
- cls.create_chem('Formebolone', ['Danazol'], ['DHEA'], AttrValues(-1, 2, 10, 3))
- cls.create_chem('Gonadorelin', [], ['Anadrol'], AttrValues(2, 1, -4, 16))
- cls.create_chem('FGF', ['Gonadorelin'], ['Raloxifene', 'Stanozolol'], AttrValues(-5, 1, 3, 8))
- cls.create_chem('Raloxifene', ['ARGOXY'], ['DHEA'], AttrValues(0, 19, -15, -10))
- cls.create_chem('Cyclofenil', ['Bolandiol'], ['Anadrol'], AttrValues(18, -19, 29, 20))
- cls.create_chem('AMPK', ['Raloxifene'], ['BMA'], AttrValues(22, 0, -10, 32))
- cls.create_chem('Calusterone', ['Bolandiol'], ['Cyclofenil'], AttrValues(0, 10, 0, -2))
- cls.create_chem('DHEA', ['BMA'], ['Formebolone'], AttrValues(13, 12, 9, 19))
- cls.create_chem('Albumin', ['AMPK'], ['TST'], AttrValues(-10, 24, 30, 6))
- cls.create_chem('XENOXY', ['ARGOXY'], ['Formoterol', 'TST'], AttrValues(17, 0, 0, -4))
- cls.create_chem('Stanozolol', ['Formebolone'], ['Albumin'], AttrValues(0, 0, 9, 5))
- cls.create_chem('Anadrol', ['TST'], ['Mannitol'], AttrValues(17, 14, 11, 14))
- cls.create_chem('Formoterol', ['Formebolone', 'FGF'], ['AMPK'], AttrValues(2, 0, 2, 14))
- cls.create_chem('Mannitol', ['Danazol'], ['XENOXY'], AttrValues(38, 24, 0, -10))
- cls.create_chem('IGF-1', ['Albumin'], ['Formoterol'], AttrValues(-10, 19, 28, 31))
- return cls.__all_chems_by_id
- class ChemSeriesSearch(object):
- """
- Search Chem series, no instance, not for multithread
- """
- search_branches_skipped = 0
- search_branches_skipped_local = 0
- search_chems_skipped = 0
- search_chems_skipped_local = 0
- current_chems_series_set = set()
- @classmethod
- def get_all_chems_series(cls, chem_ids, chems_dic):
- cls.current_chems_series_set = set()
- cls.search_branches_skipped = 0
- cls.search_branches_skipped_local = 0
- cls.search_chems_skipped = 0
- cls.search_chems_skipped_local = 0
- empty_list = ChemSeries()
- empty_list.freeze()
- cls.current_chems_series_set = set([empty_list])
- chems = set()
- for chem_id in chem_ids:
- chems.add(chems_dic[chem_id])
- cls.add_all_chems_series(chems, {empty_list})
- return cls.current_chems_series_set
- @classmethod
- def add_all_chems_series(cls, chems, chem_series_set):
- for chem in chems:
- new_chem_series_set = set()
- new_chems = copy.copy(chems)
- new_chems.remove(chem)
- for chem_series in chem_series_set:
- if chem_series.can_add_chem(chem):
- new_chem_series = copy.copy(chem_series)
- new_chem_series.add_chem_forced(chem)
- new_chem_series.freeze()
- if cls.add_chems_series(new_chem_series):
- new_chem_series_set.add(new_chem_series)
- if new_chems:
- cls.add_all_chems_series(new_chems, new_chem_series_set)
- else:
- cls.search_branches_skipped += 1
- cls.search_chems_skipped += len(new_chems) - 1
- else:
- cls.search_branches_skipped_local += 1
- cls.search_chems_skipped_local += len(new_chems) - 1
- @classmethod
- def add_all_chems_series_brutal(cls, chems, chem_series_set):
- """
- Not used, but allows commpare results to "validate" add_all_chems_series
- cf. tu_select_best_series.py
- """
- for chem in chems:
- new_chem_series_set = set()
- new_chems = copy.copy(chems)
- new_chems.remove(chem)
- for chem_series in chem_series_set:
- if chem_series.can_add_chem(chem):
- new_chem_series = copy.copy(chem_series)
- new_chem_series.add_chem_forced(chem)
- new_chem_series.freeze()
- cls.add_chems_series(new_chem_series)
- new_chem_series_set.add(new_chem_series)
- if new_chems:
- cls.add_all_chems_series_brutal(new_chems, new_chem_series_set)
- @classmethod
- def add_chems_series(cls, chem_series):
- ln = len(cls.current_chems_series_set)
- cls.current_chems_series_set.add(chem_series)
- return ln != len(cls.current_chems_series_set)
- @classmethod
- def get_n_best_chem_series(cls, num_to_find, is_strictly_higher, chem_series_list):
- best_chem_series_list = [next(iter(chem_series_list))]
- for chem_series in chem_series_list:
- ln = len(best_chem_series_list)
- for i, best_chem_series in reversed(list(enumerate(best_chem_series_list))):
- if is_strictly_higher(chem_series, best_chem_series):
- best_chem_series_list.insert(i+1, chem_series)
- if (ln == num_to_find):
- del best_chem_series_list[0]
- break
- return best_chem_series_list
- """
- select_best_series.py
- Example of windows command line:
- select_best_series.py -c DXAMPEA TST THG EPO ARGOXY Bolasterone Gonadorelin BMA AMPEA
- Windows command line to get the best series with all chems found:
- select_best_series.py
- """
- class ChemTracing(object):
- @classmethod
- def print_chems(cls, chems):
- for chem in chems:
- print (chem.identifier, chem.attr_values.circulatory, chem.attr_values.circulatory,\
- chem.attr_values.respiratory, chem.attr_values.motoric, chem.attr_values.total)
- @classmethod
- def print_results(cls, chem_series_list):
- print('found')
- for chem_series in chem_series_list:
- cls.print_chem_series(chem_series)
- print('end')
- @classmethod
- def print_chem_series(cls, chem_series):
- if chem_series.chems_ordered:
- for chem in chem_series.get_chems_in_canonical_order():
- print(chem.identifier, end=' ')
- else:
- print('No chem', end=' ')
- print('[', end='')
- print(chem_series.current_values.circulatory, chem_series.current_values.sensory,\
- chem_series.current_values.respiratory, chem_series.current_values.motoric,\
- chem_series.current_values.total, sep=',', end='')
- print(']')
- @classmethod
- def print_chem_series_results(cls, title, chem_series_list):
- print(len(chem_series_list), title)
- for chem_series in reversed(chem_series_list):
- cls.print_chem_series(chem_series)
- print()
- class SelectBestSeriesMain(object):
- number_of_best_results = 3
- def get_best_chem_series_by_circulatory(self, chem_series_list):
- return ChemSeriesSearch.get_n_best_chem_series(self.number_of_best_results,
- lambda chem_ser1, chem_ser2: chem_ser1.current_values.circulatory > chem_ser2.current_values.circulatory,
- chem_series_list)
- def get_best_chem_series_by_sensory(self, chem_series_list):
- return ChemSeriesSearch.get_n_best_chem_series(self.number_of_best_results,
- lambda chem_ser1, chem_ser2: chem_ser1.current_values.sensory > chem_ser2.current_values.sensory,
- chem_series_list)
- def get_best_chem_series_by_respiratory(self, chem_series_list):
- return ChemSeriesSearch.get_n_best_chem_series(self.number_of_best_results,
- lambda chem_ser1, chem_ser2: chem_ser1.current_values.respiratory > chem_ser2.current_values.respiratory,
- chem_series_list)
- def get_best_chem_series_by_motoric(self, chem_series_list):
- return ChemSeriesSearch.get_n_best_chem_series(self.number_of_best_results,
- lambda chem_ser1, chem_ser2: chem_ser1.current_values.motoric > chem_ser2.current_values.motoric,
- chem_series_list)
- def get_best_chem_series_by_most_average(self, chem_series_list):
- return ChemSeriesSearch.get_n_best_chem_series(self.number_of_best_results,
- self.has_more_even_attributes,
- chem_series_list)
- def get_best_chem_series_by_total(self, chem_series_list):
- return ChemSeriesSearch.get_n_best_chem_series(self.number_of_best_results,
- lambda chem_ser1, chem_ser2: chem_ser1.current_values.total > chem_ser2.current_values.total,
- chem_series_list)
- def has_more_even_attributes(self, chem_ser1, chem_ser2):
- a1 = (self.get_attributes_ecartype(chem_ser2) + 1) / (self.get_attributes_ecartype(chem_ser1) + 1)
- a2 = (chem_ser1.current_values.total + 1) / (chem_ser2.current_values.total + 1)
- return a1 + a2 > 2
- def moyenne(self, tableau):
- return sum(tableau, 0.0) / len(tableau)
- def variance(self, tableau):
- m = self.moyenne(tableau)
- return self.moyenne([(x-m)**2 for x in tableau])
- def ecartype(self, tableau):
- return self.variance(tableau)**0.5
- def get_attributes_ecartype(self, chem_series):
- return (self.ecartype([chem_series.current_values.circulatory,
- chem_series.current_values.sensory,
- chem_series.current_values.respiratory,
- chem_series.current_values.motoric,
- ]))
- def get_weight_from_average(self, average, chem_series):
- return -abs(average - chem_series.current_values.circulatory)\
- - abs(average - chem_series.current_values.sensory)\
- - abs(average - chem_series.current_values.respiratory)\
- - abs(average - chem_series.current_values.motoric)
- def run(self, chem_ids, results_number):
- """
- Search all chem series for the given list of chem identifiers
- Print the best series found for each category
- """
- chems = ChemDataInit.create_all_chems()
- t0_get_all_chems_serie = time.time()
- # ChemSeriesSearch.add_all_chems_series = ChemSeriesSearch.add_all_chems_series_brutal
- chem_series_list = ChemSeriesSearch.get_all_chems_series(chem_ids, chems)
- t1_get_all_chems_serie = time.time()
- print('Local search branches avoided:', ChemSeriesSearch.search_branches_skipped_local)
- print('Global search branches avoided:', ChemSeriesSearch.search_branches_skipped)
- print('Local search elements avoided:', ChemSeriesSearch.search_chems_skipped_local)
- print('Global search elements avoided:', ChemSeriesSearch.search_chems_skipped)
- print('Number of different series found:', len(ChemSeriesSearch.current_chems_series_set))
- print('Search time:', t1_get_all_chems_serie - t0_get_all_chems_serie)
- t0_select_chems_serie = time.time()
- self.number_of_best_results = results_number
- best_chem_series_by_circulatory_list = self.get_best_chem_series_by_circulatory(chem_series_list)
- best_chem_series_by_sensory_list = self.get_best_chem_series_by_sensory(chem_series_list)
- best_chem_series_by_respiratory_list = self.get_best_chem_series_by_respiratory(chem_series_list)
- best_chem_series_by_motoric_list = self.get_best_chem_series_by_motoric(chem_series_list)
- best_chem_series_by_most_average_list = self.get_best_chem_series_by_most_average(chem_series_list)
- best_chem_series_by_total_list = self.get_best_chem_series_by_total(chem_series_list)
- t1_select_chems_serie = time.time()
- print('Selects time:', t1_select_chems_serie - t0_select_chems_serie)
- print()
- ChemTracing.print_chem_series_results('best circulatory series:', best_chem_series_by_circulatory_list)
- ChemTracing.print_chem_series_results('best sensory series:', best_chem_series_by_sensory_list)
- ChemTracing.print_chem_series_results('best respiratory series:', best_chem_series_by_respiratory_list)
- ChemTracing.print_chem_series_results('best motoric series:', best_chem_series_by_motoric_list)
- ChemTracing.print_chem_series_results('most average attributes series:', best_chem_series_by_most_average_list)
- ChemTracing.print_chem_series_results('best total series:', best_chem_series_by_total_list)
- def main():
- """
- Go!
- """
- parser = argparse.ArgumentParser(
- description='Print the best series of chems for a list of chems already found',\
- epilog='Example: >select_best_series.py -c DXAMPEA TST THG EPO ARGOXY Bolasterone Gonadorelin BMA AMPEA')
- parser.add_argument('-c', '--chemids', nargs='+')
- parser.add_argument('-n', '--resultsnumber', type=int, default=5)
- arguments = parser.parse_args()
- chem_ids = arguments.chemids
- if not chem_ids:
- # if no argument search for all the chems
- all_chems = ChemDataInit.create_all_chems()
- chem_ids = []
- for chem_id in all_chems:
- chem_ids.append(chem_id)
- # chem_ids = ['DXAMPEA', 'TST', 'THG', 'EPO', 'ARGOXY', 'Bolasterone']
- SelectBestSeriesMain().run(chem_ids, arguments.resultsnumber)
- if __name__=='__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement