1 '''
2 'go.py' sets up the command line arguments for the 'genecentric-go'
3 program.
4
5 It can send requests to funcassociate in parallel, so include some
6 preprocessing to determine sane defaults. (And don't let the user set the total
7 number of parallel process too high; we want to be nice to Funcassociate.)
8 '''
9 import argparse
10 import multiprocessing as mp
11
12 import bpm
13 from bpm.cmdargs import assert_read_access
14
15 try:
16 __cpus = mp.cpu_count()
17
18
19 __default_cpus = min(6, mp.cpu_count())
20 except NotImplementedError:
21 __cpus = 1
22 __default_cpus = 1
23
24 parser = argparse.ArgumentParser(
25 description='GO enrichment for BPMs',
26 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
27 aa = parser.add_argument
28 aa('geneinter', type=str,
29 metavar='INPUT_GENETIC_INTERACTION_FILE', help='Location of the GI file.')
30 aa('bpm', type=str,
31 metavar='INPUT_BPM_FILE', help='Location of the BPM file.')
32 aa('enrichment', type=str,
33 metavar='OUTPUT_ENRICHMENT_FILE', help='Output file for GO enrichment.')
34 aa('-e', '--ignore-list', dest='ignore', type=str, default=None,
35 metavar='IGNORE_FILE',
36 help='The location of an ignore gene list file. (One gene per line.) '
37 'Any genes in this file will be excluded from the set of genes used '
38 'to generate BPMs.')
39 aa('-s', '--sort-go-by', dest='sort_go_by', type=str, default='p',
40 choices=['p', 'accession', 'name', 'num_genes_with'],
41 metavar='GO_SORT',
42 help='The field to sort GO enrichment by. "p" is the p-value of the '
43 'GO term. "accession" is the GO identifier, i.e., "GO:...". "name" '
44 'is the GO short description, i.e., "histone exchange". And '
45 '"num_genes_with" is the number of genes in the BPM module that are '
46 'enriched with a particular GO term.')
47 aa('-t', '--order-go', dest='order_go', type=str, default='asc',
48 choices=['asc', 'desc'], metavar='GO_ORDER',
49 help='The order in which to sort GO enrichment. "asc" for ascending, '
50 'and "desc" for descending.')
51 aa('-p', '--processes', dest='processes', type=int, default=__default_cpus,
52 metavar='PROCESSES',
53 help='The number of processes to run in parallel. If set to '
54 '1, the multiprocessing module will not be used. '
55 'You should also be nice to Funcassociate and not set '
56 'this too high.')
57 aa('--hide-enriched-genes', dest='hide_enriched_genes', action='store_true',
58 help='If set, the enriched genes for each GO term will not be written '
59 'to the output file. This may (modestly) cut down on file size if '
60 'there is a lot of enrichment.')
61
62 aa('--fa-species', dest='fa_species', type=str,
63 default='Saccharomyces cerevisiae', metavar='FA_SPECIES',
64 help='The species to be used by Funcassociate. Use the '
65 '\'funcassociate-info\' command to get a list of available species.')
66 aa('--fa-namespace', dest='fa_namespace', type=str,
67 default='sgd_systematic', metavar='FA_NAMESPACE',
68 help='The namespace to be used by Funcassociate. This can vary depending '
69 'upon the gene identifiers used in your E-MAP/SGA file. '
70 'Use the \'funcassociate-info\' command to get a list of available '
71 'namespaces for a given species.')
72 aa('--fa-cutoff', dest='fa_cutoff', type=float, default=0.05,
73 metavar='FA_CUTOFF',
74 help='The p-value cutoff for GO enrichment to be used with Funcassociate. '
75 'It should be in the interval (0, 1].')
76 aa('--fa-species-genespace', dest='fa_species_genespace', action='store_true',
77 help='If set, FuncAssociate will be instructed to use all genes from the '
78 'species in the background genespace. If not set, only the genes in '
79 'the genetic interaction data will be used as the genespace.')
80
81 aa('--no-progress', dest='progress', action='store_false',
82 help='If set, the progress bar will not be shown.')
83 aa('-v', '--verbose', dest='verbose', action='store_true',
84 help='If set, more output will be shown.')
85
86 conf = parser.parse_args()
87
88
89
90
91 if conf.processes > __cpus:
92 conf.processes = __cpus
93
94
95 assert_read_access(conf.geneinter)
96 assert_read_access(conf.bpm)
97 if conf.ignore:
98 assert_read_access(conf.ignore)
99
100
101 conf.squaring = False
102
103
104 bpm.conf = conf
105