1 '''
2 'faread.py' is a wrapper around 'faclient.py' that handles the nastiness of
3 the data we get back.
4 '''
5 import sys
6
7 from bpm import conf, geneinter, faclient
8
10 '''
11 Sends a geneset to Funcassociate and returns its Go enrichment results.
12
13 Handles a lot of the nastiness of parsing the return results. Namely,
14 using the entity attribute table to actually get the names of genes that
15 correspond to each GO enrichment.
16
17 :param genes: A set of gene identifiers.
18 :param modulecnt: The total number of modules in the BPM file. This
19 corresponds to the 'reps' (reptitions) Funcassociate
20 parameter.
21 '''
22 if conf.fa_species_genespace:
23 genespace = None
24 else:
25 genespace = list(geneinter.genespace)
26
27 c = faclient.FuncassociateClient()
28 response = c.functionate(query=genes,
29 species=conf.fa_species,
30 namespace=conf.fa_namespace,
31 genespace=genespace,
32 reps=modulecnt,
33 cutoff=conf.fa_cutoff)
34
35
36
37
38 names = ['num_with', 'num_query', 'num_genespace_with',
39 'log_odds', 'unadj_p', 'adj_p', 'accession', 'goname']
40 overrep = [dict(zip(names, row)) for row in response['over']]
41
42
43 goterms = {}
44 for row in overrep:
45 assert row['accession'] not in goterms
46 goterms[row['accession']] = {
47 'name': row['goname'],
48 'p': float(row['adj_p']),
49 'num_genespace_with': int(row['num_genespace_with']),
50 'num_with': int(row['num_with']),
51 'num_query': int(row['num_query']),
52 'genes': set(),
53 }
54
55
56
57
58
59
60
61
62
63 enttable = response['entity_attribute_table']
64 for geneind, goinds in enumerate(enttable['table']):
65 for accession in [enttable['column_headers'][i] for i in goinds]:
66 try:
67 gene = enttable['row_headers'][geneind]
68 except IndexError:
69 print >> sys.stderr, 'A gene in the Funcassociate response' \
70 ' could not be found.'
71 sys.exit(1)
72
73 try:
74 goterms[accession]['genes'].add(gene)
75 except IndexError:
76 print >> sys.stderr, 'A GO term in the Funcassociate response' \
77 ' could not be found.'
78 sys.exit(1)
79
80
81
82
83
84
85
86 for accession, goterm in goterms.iteritems():
87 assert len(goterm['genes']) == goterm['num_with'], \
88 '%s: %s' % (accession, str(goterm))
89
90 return goterms
91