Package bpm :: Module faread
[hide private]
[frames] | no frames]

Source Code for Module bpm.faread

 1  ''' 
 2  'faread.py' is a wrapper around 'faclient.py' that handles the nastiness of 
 3  the data we get back. 
 4  ''' 
 5  import sys 
 6   
 7  from bpm import conf, geneinter, faclient 
 8   
9 -def functionate(genes, modulecnt):
10 ''' 11 Sends a geneset to Funcassociate and returns its Go enrichment results. 12 13 Handles a lot of the nastiness of parsing the return results. Namely, 14 using the entity attribute table to actually get the names of genes that 15 correspond to each GO enrichment. 16 17 :param genes: A set of gene identifiers. 18 :param modulecnt: The total number of modules in the BPM file. This 19 corresponds to the 'reps' (reptitions) Funcassociate 20 parameter. 21 ''' 22 if conf.fa_species_genespace: 23 genespace = None 24 else: 25 genespace = list(geneinter.genespace) 26 27 c = faclient.FuncassociateClient() 28 response = c.functionate(query=genes, 29 species=conf.fa_species, 30 namespace=conf.fa_namespace, 31 genespace=genespace, 32 reps=modulecnt, 33 cutoff=conf.fa_cutoff) 34 35 # Lets label the info for each GO term, shall we? 36 # Names ending in 'with' should be read as, for example: 37 # 'Number of genes in genespace *with* this GO term annotation' 38 names = ['num_with', 'num_query', 'num_genespace_with', 39 'log_odds', 'unadj_p', 'adj_p', 'accession', 'goname'] 40 overrep = [dict(zip(names, row)) for row in response['over']] 41 42 # Let's make a dict keyed by GO term 43 goterms = {} 44 for row in overrep: 45 assert row['accession'] not in goterms 46 goterms[row['accession']] = { 47 'name': row['goname'], 48 'p': float(row['adj_p']), 49 'num_genespace_with': int(row['num_genespace_with']), 50 'num_with': int(row['num_with']), 51 'num_query': int(row['num_query']), 52 'genes': set(), # see below 53 } 54 55 # Now go back and annotate the GO terms with the genes that were 56 # enriched for that GO term. 57 # 58 # 'column_headers' are the GO terms 59 # 'row_headers' are the genes 60 # 'table' is a list of lists of indexes into 'column_headers' where 61 # the index of each list corresponds to an index in 'row_headers'. 62 # Thus, we have a mapping from GO term to gene :-) 63 enttable = response['entity_attribute_table'] 64 for geneind, goinds in enumerate(enttable['table']): 65 for accession in [enttable['column_headers'][i] for i in goinds]: 66 try: 67 gene = enttable['row_headers'][geneind] 68 except IndexError: 69 print >> sys.stderr, 'A gene in the Funcassociate response' \ 70 ' could not be found.' 71 sys.exit(1) 72 73 try: 74 goterms[accession]['genes'].add(gene) 75 except IndexError: 76 print >> sys.stderr, 'A GO term in the Funcassociate response' \ 77 ' could not be found.' 78 sys.exit(1) 79 80 # This is commented out because there is a bug in Funcassociate 81 # where the enrichment counts don't match the entity-attribute 82 # correspondence. 83 # N.B. This was actually a performance limitation that has since 84 # been lifted at my request. If it comes back, this assertion 85 # will have to be removed. 86 for accession, goterm in goterms.iteritems(): 87 assert len(goterm['genes']) == goterm['num_with'], \ 88 '%s: %s' % (accession, str(goterm)) 89 90 return goterms
91