1 '''
2 'partition.py' is a module that parallelizes the process of partitioning random
3 sets of genes. This is where most of the complexity of Genecentric is,
4 particularly in the 'localmaxcut' function.
5 '''
6 import random
7
8 from bpm import conf, geneinter, parallel
9
10
11 happyparts = None
12
27
29 '''
30 group_genes is applied to every gene, and a BPM is generated from *every*
31 gene. In particular, given M happy bipartitions, generate a BPM where
32 the first module contains all genes that appeared in the same set in the M
33 bipartitions C% of the time and the second module contains all genes
34 that appeared in the opposite set in the M bipartitions C% of the time.
35 '''
36 mod1, mod2 = [], []
37
38 for g2 in geneinter.genes:
39
40 freqsame = sum([1 for A, B in happyparts
41 if (g1 in A and g2 in A) or (g1 in B and g2 in B)])
42
43 ratio = float(freqsame) / conf.M
44 if ratio >= conf.C:
45 mod1.append(g2)
46 elif (1 - ratio) >= conf.C:
47 mod2.append(g2)
48
49 parallel.inc_counter()
50 parallel.print_progress()
51
52 return set(mod1), set(mod2)
53
55 '''
56 Generates a random bipartition and makes the bipartition 'happy' by
57 applying 'Weighted-Flip' (from Leiserson et al., 2011) until there are no
58 unhappy genes left.
59 '''
60 A, B = random_bipartition()
61
62 same_set = lambda g1, g2: (g1 in A and g2 in A) or (g1 in B and g2 in B)
63 def weights(g1):
64 '''
65 Calculates the total neighboring weight of 'g1'. The total
66 neighboring weight is a tuple of the sum of interactions in the same
67 set as g1 and the sum of interactions in the opposite set as g1.
68
69 The tuple in this case is represented by a dictionary with keys
70 'same' and 'other'. I'm using a dictionary because the values need
71 to be mutable; they change as we move vertices between the partitions.
72 '''
73 ws = { 'same': 0, 'other': 0 }
74 for g2 in geneinter.genes:
75 w = geneinter.gi(g1, g2)
76 if same_set(g1, g2):
77 ws['same'] += w
78 else:
79 ws['other'] += w
80 return ws
81
82 nweights = { g: weights(g) for g in geneinter.genes }
83 unhappy = get_unhappy(nweights)
84
85 while unhappy:
86 v = random.choice(unhappy)
87
88 if v in A:
89 A.remove(v)
90 B.add(v)
91 else:
92 A.add(v)
93 B.remove(v)
94
95
96
97
98
99
100
101
102 for g, nw in nweights.iteritems():
103 if g == v:
104 nw['same'], nw['other'] = nw['other'], nw['same']
105 continue
106
107
108
109 w = geneinter.gi(v, g)
110
111
112
113 if same_set(v, g):
114 nw['same'] += w
115 nw['other'] -= w
116 else:
117 nw['same'] -= w
118 nw['other'] += w
119
120
121 unhappy = get_unhappy(nweights)
122
123 parallel.inc_counter()
124 parallel.print_progress()
125
126 return A, B
127
129 '''
130 Returns all of the genes that are unhappy given a dictionary of
131 gene ids mapped to its total neighboring weights.
132 '''
133 return [ g for (g, _) in filter(lambda (g, w): w['same'] < w['other'],
134 nweights.items()) ]
135
137 '''
138 Creates two random sets of genes from the genetic interaction data.
139 '''
140 A, B = set(), set()
141 for g in geneinter.genes:
142 if random.random() < 0.5:
143 A.add(g)
144 else:
145 B.add(g)
146
147 return A, B
148