Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Wed Jun 22 16:42:45 2016
- @author: Krishna
- """
- import re
- def cl(s1, gmk1, gs1, data1): # output will be the confidence level
- for i in range(0,len(beg_gmk_up)):
- if re.search(r'(^!Series_.*?\s"%s\s)' %beg_gmk_up[i], s1, re.I|re.S): # manipulation - UP
- k=k*0.63
- vv=1
- d= 1
- # print ('gmks start7')
- for i in range(0,len(beg_gmk_down)):
- if re.search(r'(^!Series_.*?\s"%s\s)' %beg_gmk_down[i], s1, re.I|re.S): # manipulation - DOWN
- k=k*1.99
- vv=1
- d= -1
- # print ('gmks start7')
- for i in range(0,len(beg_other)):
- if re.search(r'(^!Series_.*?\s"%s\s)' %beg_other[i], s1, re.I|re.S): # line starts with a golden PHRASE - no manipulation info
- k=k*3.71
- vv=1
- # print ('line start7')
- if re.search(r'(^!Series_\w.*?\s"Keywords:)', s1, re.I): # line starts with KEYWORDS - manipulation dependent on the gmk linked
- ll=s1.split(',')
- v=0
- # print (321321)
- for i in range(0,len(ll)):
- if gmk1 and gs1 in ll[i]:
- v=4
- k=k*4.43
- # print (123123123)
- # print (k)
- vv=1
- if gmk1 in ['loss of', 'deficient', 'knockout', 'haploinsufficiency', 'haploin-sufficiency', 'inactivation', 'knock-out', 'deletion', 'inhibition', 'silencing', '-/-', 'null', 'KO', 'knockdown', 'ko', 'lacking', 'mutant']:
- d=-1
- # print ('keyword start7_1')
- if gmk1 in ['treated', 'exposure', 'activation', 'induced', 'expressing', 'overexpression', 'overexpressing', 'stimulated', 'stimulation', 'over-activation', '+', 'treatment']:
- d=1
- # print ('keyword start7_2')
- if v!=4: # THINK OF REMOVING THIS RULE
- k=k*1.83
- # print ('no keywords')
- vv=1
- print ('opposite of keyword start7')
- if re.search(r'(%s\(control\))' %gs1, s1, re.I|re.S): #gs1(control) - no manipulation info
- # print ('control rule') ###############################
- None
- if re.search(r'(The object of this study was to identify genes transcriptionally upregulated.*?downregulated)', s1):
- None
- # print ('golden line rule')
- if re.search(r'(The object of this study was to identify genes transcriptionally upregulated)', s1):
- # print ('golden line rule')
- d=1
- if re.search(r'(The object of this study was to identify genes transcriptionally downregulated)', s1):
- # print ('golden line rule')
- d=-1
- for ii in range(0,len(data1)):
- if (re.search(r'(%s.*?\..*?%s)' % (re.escape(gs1), re.escape(gmk1)), br0, re.I|re.S)) or (re.search(r'(%s.*?\..*?%s)' % (re.escape(gmk1), re.escape(gs1)), br0, re.I|re.S)): #gs1 fullstop gmk1 and viceversa - no manipulation info
- k=k*0.87
- # print ('--fullstop')
- if gmk1 == 'activation':
- if re.search(r'(activation by %s)' %gs1, br0, re.I|re.S): # this was meant to be a negative rule
- k=k*0.21
- vv=1
- d=1
- # print ('activation one') ##########################
- if re.search(r'(probably.*?%s activation)' %gs1, br, re.I|re.S|re.DOTALL):
- k=k*1
- vv=1
- d=1
- # print ('activation two') ##############################
- if re.search(r'(critical for.*?%s activation)' %gs1, br, re.I|re.S):
- k=k*0.1
- vv=1
- d=1
- # print ('activation three') ################################
- if re.search(r'(%s.*?activation of)' %gs1, br3, re.I|re.S):
- k=k*0.73
- vv=1
- d=1
- # print ('activation four') ################################
- if gmk1 == 'deficient':
- if re.search(r'(%s deficient)' %gs1, br0, re.I|re.S): # modification is DOWN
- k=k*3.45
- vv=1
- d=-1
- # print ('deficient one')
- if re.search(r'(deficient.+?exhibited.+?%s)' %gs1, br0, re.I|re.S|re.DOTALL):
- k=k*0.1
- vv=1
- d=-1
- # print ('deficient two') ###########################
- if gmk1 == '-/-':
- if re.search(r'(-/-%s)' %gs1, br0, re.I|re.S): #
- # print ('minus one rule') ###########################
- d=-1
- if re.search(r'(%s-/-)' %gs1, br0, re.I|re.S):
- k=k*2.78
- vv=1
- d=-1
- # print ('minus two')
- if gmk1 == '+':
- if re.search(r'(%s+)' %gs1, br0, re.I|re.S):
- # print ('plus one rule')
- d=1
- if re.search(r'(%s\(\+\))' %gs1, br3, re.I|re.S):
- k=k*0.1
- vv=1
- d=1
- # print ('plus two')
- else:
- # print ('plus last option')
- # None
- if gmk1 == 'induced':
- if re.search(r'(%s.*?was shown.+?induced)' %gs1, br0, re.I|re.S|re.DOTALL):
- k=k*0.1
- vv=1
- d=1
- # print ('induced one') ###########################
- if re.search(r'(%s.*?-induced)' %gs1, br0, re.I|re.S):
- # print ('induced two rule') ###########################
- d=1
- if re.search(r'(induced.*?to %s)' %gs1, br0, re.I|re.S):
- k=k*0.41
- vv=1
- d=1
- # print ('induced three') ###########################
- if re.search(r'(induced.*?while %s)' %gs1, br0, re.I|re.S):
- k=k*0.1
- vv=1
- # print ('induced four') ###########################
- if gmk1 == 'inhibition':
- if re.search(r'(hypothesized\s.*?\sinhibition\s.*?\s%s )' %gs1, br, re.I|re.S):
- # print ('inhibition one rule') ###########################
- d=-1
Add Comment
Please, Sign In to add comment