Package bpm :: Package cmdargs :: Module go
[hide private]
[frames] | no frames]

Source Code for Module bpm.cmdargs.go

  1  ''' 
  2  'go.py' sets up the command line arguments for the 'genecentric-go'  
  3  program. 
  4   
  5  It can send requests to funcassociate in parallel, so include some 
  6  preprocessing to determine sane defaults. (And don't let the user set the total 
  7  number of parallel process too high; we want to be nice to Funcassociate.) 
  8  ''' 
  9  import argparse 
 10  import multiprocessing as mp 
 11   
 12  import bpm 
 13  from bpm.cmdargs import assert_read_access 
 14   
 15  try: 
 16      __cpus = mp.cpu_count() 
 17   
 18      # be nice to Funcassociate by default 
 19      __default_cpus = min(6, mp.cpu_count()) 
 20  except NotImplementedError: 
 21      __cpus = 1 
 22      __default_cpus = 1 
 23   
 24  parser = argparse.ArgumentParser( 
 25      description='GO enrichment for BPMs', 
 26      formatter_class=argparse.ArgumentDefaultsHelpFormatter) 
 27  aa = parser.add_argument 
 28  aa('geneinter', type=str, 
 29     metavar='INPUT_GENETIC_INTERACTION_FILE', help='Location of the GI file.') 
 30  aa('bpm', type=str, 
 31     metavar='INPUT_BPM_FILE', help='Location of the BPM file.') 
 32  aa('enrichment', type=str, 
 33     metavar='OUTPUT_ENRICHMENT_FILE', help='Output file for GO enrichment.') 
 34  aa('-e', '--ignore-list', dest='ignore', type=str, default=None, 
 35     metavar='IGNORE_FILE', 
 36     help='The location of an ignore gene list file. (One gene per line.) ' 
 37          'Any genes in this file will be excluded from the set of genes used ' 
 38          'to generate BPMs.') 
 39  aa('-s', '--sort-go-by', dest='sort_go_by', type=str, default='p', 
 40     choices=['p', 'accession', 'name', 'num_genes_with'], 
 41     metavar='GO_SORT', 
 42     help='The field to sort GO enrichment by. "p" is the p-value of the ' 
 43          'GO term. "accession" is the GO identifier, i.e., "GO:...". "name" ' 
 44          'is the GO short description, i.e., "histone exchange". And ' 
 45          '"num_genes_with" is the number of genes in the BPM module that are ' 
 46          'enriched with a particular GO term.') 
 47  aa('-t', '--order-go', dest='order_go', type=str, default='asc', 
 48     choices=['asc', 'desc'], metavar='GO_ORDER', 
 49     help='The order in which to sort GO enrichment. "asc" for ascending, ' 
 50          'and "desc" for descending.') 
 51  aa('-p', '--processes', dest='processes', type=int, default=__default_cpus, 
 52     metavar='PROCESSES', 
 53     help='The number of processes to run in parallel. If set to ' 
 54          '1, the multiprocessing module will not be used. ' 
 55          'You should also be nice to Funcassociate and not set ' 
 56          'this too high.') 
 57  aa('--hide-enriched-genes', dest='hide_enriched_genes', action='store_true', 
 58     help='If set, the enriched genes for each GO term will not be written ' 
 59          'to the output file. This may (modestly) cut down on file size if ' 
 60          'there is a lot of enrichment.') 
 61   
 62  aa('--fa-species', dest='fa_species', type=str,  
 63     default='Saccharomyces cerevisiae', metavar='FA_SPECIES', 
 64     help='The species to be used by Funcassociate. Use the ' 
 65          '\'funcassociate-info\' command to get a list of available species.') 
 66  aa('--fa-namespace', dest='fa_namespace', type=str, 
 67     default='sgd_systematic', metavar='FA_NAMESPACE', 
 68     help='The namespace to be used by Funcassociate. This can vary depending ' 
 69          'upon the gene identifiers used in your E-MAP/SGA file. ' 
 70          'Use the \'funcassociate-info\' command to get a list of available ' 
 71          'namespaces for a given species.') 
 72  aa('--fa-cutoff', dest='fa_cutoff', type=float, default=0.05,  
 73     metavar='FA_CUTOFF', 
 74     help='The p-value cutoff for GO enrichment to be used with Funcassociate. ' 
 75          'It should be in the interval (0, 1].') 
 76  aa('--fa-species-genespace', dest='fa_species_genespace', action='store_true', 
 77     help='If set, FuncAssociate will be instructed to use all genes from the ' 
 78          'species in the background genespace. If not set, only the genes in ' 
 79          'the genetic interaction data will be used as the genespace.') 
 80   
 81  aa('--no-progress', dest='progress', action='store_false', 
 82     help='If set, the progress bar will not be shown.') 
 83  aa('-v', '--verbose', dest='verbose', action='store_true', 
 84     help='If set, more output will be shown.') 
 85   
 86  conf = parser.parse_args() 
 87   
 88  # Protect the user from themselves. 
 89  # If the provided number of processes is larger than the detected number of 
 90  # CPUs, forcefully lower it to the number of CPUs. 
 91  if conf.processes > __cpus: 
 92      conf.processes = __cpus 
 93   
 94  # Nice error messages if files don't exist... 
 95  assert_read_access(conf.geneinter) 
 96  assert_read_access(conf.bpm) 
 97  if conf.ignore: # optional file 
 98      assert_read_access(conf.ignore) 
 99   
100  # We don't care about squaring when doing GO enrichment. 
101  conf.squaring = False 
102   
103  # Set the global conf variable 
104  bpm.conf = conf 
105