Package bpm :: Module prune
[hide private]
[frames] | no frames]

Source Code for Module bpm.prune

 1  ''' 
 2  'prune.py' provides all the functions required to prune a set of BPMs after 
 3  they have been generated. 
 4  ''' 
 5  from functools import partial 
 6  from itertools import combinations, product 
 7   
 8  from bpm import conf, geneinter, parallel 
 9   
10 -def prune(bpms):
11 ''' 12 After all BPMs are generated, two different pruning mechanisms are applied. 13 14 The first is pruning all BPMs that have a module less than the minimum size 15 or greater than the maximum size. If either is 0, then the pruning for that 16 constraint is skipped. 17 18 The second pruning mechanism is more complex. Essentially, the interaction 19 weight of each BPM is calculated (see 'interweight') and the list of BPMs 20 are then sorted by that interaction weight in descending order. Starting 21 from the beginning, BPMs are then added to final set of BPMs if and only if 22 its Jaccard index with every BPM already in the final set is less than 23 the threshold. 24 ''' 25 if conf.min_size > 0 or conf.max_size > 0: 26 bpms = filter(lambda (A, B): satisfy_min_max(A, B), bpms) 27 28 # If pruning is disabled, exit now. 29 if not conf.pruning: 30 return bpms 31 32 withI = parallel.pmap(interweight, bpms) 33 withI = sorted(withI, key=lambda (iw, (A, B)): iw, reverse=True) 34 35 pruned = [] 36 for iw, (A, B) in withI: 37 jind = partial(jaccard_index, A.union(B)) 38 if all(map(lambda ji: ji < conf.jaccard, 39 [jind(S1.union(S2)) for S1, S2 in pruned])): 40 pruned.append((A, B)) 41 42 return pruned
43
44 -def interweight((A, B)):
45 ''' 46 Calculates the interaction weight of a BPM. It is defined as the difference 47 of sums of interaction scores within each module and the sum of interaction 48 scores between each module divided by the number of genes in the entire BPM. 49 50 The value returned is a BPM "decorated" with the interaction weight for 51 sorting purposes. This roundabout means of decoration is used so that 52 parallelism can be used for calculating the interaction weights. (As 53 opposed to using a higher order function with 'sorted'.) 54 ''' 55 # For converting a tuple to two arguments 56 gitup = lambda (g1, g2): geneinter.gi(g1, g2) 57 58 def sum_within(S): 59 return sum(map(gitup, combinations(S, 2)))
60 61 within = sum_within(A) + sum_within(B) 62 between = sum(map(gitup, product(A, B))) 63 64 iweight = (within - between) / float(len(A) + len(B)) 65 66 return (iweight, (A, B)) 67
68 -def jaccard_index(bpm1, bpm2):
69 ''' 70 The Jaccard index of a BPM: the number of genes in the intersection of bpm1 71 and bpm2 divided by the number of genes in the union of bpm1 and bpm2. 72 73 A BPM is simply the union of its corresponding modules. 74 ''' 75 return len(bpm1.intersection(bpm2)) / float(len(bpm1.union(bpm2)))
76
77 -def constraint_min(A, B):
78 ''' 79 Whether a BPM's modules BOTH satisfy the minimum size constraint. 80 ''' 81 return len(A) >= conf.min_size and len(B) >= conf.min_size
82
83 -def constraint_max(A, B):
84 ''' 85 Whether a BPM's modules BOTH satisfy the maximum size constraint. 86 ''' 87 return len(A) <= conf.max_size and len(B) <= conf.max_size
88
89 -def satisfy_min_max(A, B):
90 ''' 91 Whether a BPM's modules satisfy both the min and max size constraints. 92 (Including if the constraints are disabled.) 93 ''' 94 return ((conf.min_size == 0 or constraint_min(A, B)) 95 and 96 (conf.max_size == 0 or constraint_max(A, B)))
97