@@ -25,95 +25,95 @@ org.biojava.nbio.core.util.ConcurrencyTools;
2525
2626public class CookbookMSAProfiler {
2727
28- ` private static class Profiler {`
29-
30- ` private long maxMemoryUsed, timeCheckpoint;`
31- ` private final long timeStart;`
32-
33- ` private Profiler() {`
34- ` maxMemoryUsed = Runtime . getRuntime(). totalMemory();`
35- ` timeStart = timeCheckpoint = System . nanoTime();`
36- ` }`
37-
38- ` private long getMaxMemoryUsed() {`
39- ` return maxMemoryUsed = Math . max(maxMemoryUsed, Runtime . getRuntime(). totalMemory());`
40- ` }`
41-
42- ` private long getTimeSinceCheckpoint() {`
43- ` return System . nanoTime() - timeCheckpoint;`
44- ` }`
45-
46- ` private long getTimeSinceStart() {`
47- ` return System . nanoTime() - timeStart;`
48- ` }`
49-
50- ` private void setCheckpoint() {`
51- ` maxMemoryUsed = Math . max(maxMemoryUsed, Runtime . getRuntime(). totalMemory());`
52- ` timeCheckpoint = System . nanoTime();`
53- ` }`
54-
55- ` }`
56-
57- ` public static void main (String [] args ) throws Exception {`
58-
59- ` if (args. length < 1 ) {`
60- ` System . err. println(" The first argument must be a fasta file of protein sequences." );`
61- ` return ;`
62- ` }`
63-
64- ` // ConcurrencyTools.setThreadPoolSingle();`
65-
66- ` PrintStream fout = new PrintStream (" msa.txt" );`
67- ` Profiler profiler = new Profiler ();`
68-
69- ` System . out. printf(" Loading sequences from %s... " , args[0 ]);`
70- ` List ` < ProteinSequence > ` list = new ArrayList ` <ProteinSequence >` ();`
71- ` list. addAll(FastaReaderHelper . readFastaProteinSequence(new File (args[0 ])). values());`
72- ` if (args. length > 1 && Integer . parseInt(args[1 ]) < list. size()) {`
73- ` System . out. printf(" %s/%d" , args[1 ], list. size());`
74- ` list = list. subList(0 , Integer . parseInt(args[1 ]));`
75- ` } else {`
76- ` System . out. printf(" %d" , list. size());`
77- ` }`
78- ` System . out. printf(" sequences in %d ms using %d kB%n%n" , profiler. getTimeSinceCheckpoint()/ 1000000 ,`
79- ` profiler. getMaxMemoryUsed()/ 1024 );`
80-
81- ` profiler. setCheckpoint();`
82-
83- ` System . out. print(" Stage 1: pairwise similarity calculation... " );`
84- ` GapPenalty gaps = new SimpleGapPenalty ();`
85- ` SubstitutionMatrix ` < AminoAcidCompound > ` blosum62 = new SimpleSubstitutionMatrix ` <AminoAcidCompound >` ();`
86- ` List ` < PairwiseSequenceScorer<ProteinSequence , AminoAcidCompound > ` > scorers = Alignments . getAllPairsScorers(list,`
87- ` PairwiseSequenceScorerType . GLOBAL_IDENTITIES , gaps, blosum62);`
88- ` Alignments . runPairwiseScorers(scorers);`
89- ` System . out. printf(" %d scores in %d ms using %d kB%n%n" , scorers. size(),`
90- ` profiler. getTimeSinceCheckpoint()/ 1000000 , profiler. getMaxMemoryUsed()/ 1024 );`
91-
92- ` profiler. setCheckpoint();`
93-
94- ` System . out. print(" Stage 2: hierarchical clustering into a guide tree... " );`
95- ` GuideTree ` < ProteinSequence , AminoAcidCompound > ` tree = new GuideTree ` <ProteinSequence , AminoAcidCompound >` (list,`
96- ` scorers);`
97- ` scorers = null ;`
98- ` System . out. printf(" %d ms using %d kB%n%n%s%n%n" , profiler. getTimeSinceCheckpoint()/ 1000000 ,`
99- ` profiler. getMaxMemoryUsed()/ 1024 , tree);`
100-
101- ` profiler. setCheckpoint();`
102-
103- ` System . out. print(" Stage 3: progressive alignment... " );`
104- ` Profile ` < ProteinSequence , AminoAcidCompound > ` msa = Alignments . getProgressiveAlignment(tree,`
105- ` ProfileProfileAlignerType . GLOBAL , gaps, blosum62);`
106- ` System . out. printf(" %d profile-profile alignments in %d ms using %d kB%n%n" , list. size() - 1 ,`
107- ` profiler. getTimeSinceCheckpoint()/ 1000000 , profiler. getMaxMemoryUsed()/ 1024 );`
108- ` fout. print(msa);`
109- ` fout. close();`
110-
111- ` ConcurrencyTools . shutdown();`
112-
113- ` System . out. printf(" Total time: %d ms%nMemory use: %d kB%n" , profiler. getTimeSinceStart()/ 1000000 ,`
114- ` profiler. getMaxMemoryUsed()/ 1024 );`
115-
116- ` }`
28+ private static class Profiler {
29+
30+ private long maxMemoryUsed, timeCheckpoint;
31+ private final long timeStart;
32+
33+ private Profiler() {
34+ maxMemoryUsed = Runtime . getRuntime(). totalMemory();
35+ timeStart = timeCheckpoint = System . nanoTime();
36+ }
37+
38+ private long getMaxMemoryUsed() {
39+ return maxMemoryUsed = Math . max(maxMemoryUsed, Runtime . getRuntime(). totalMemory());
40+ }
41+
42+ private long getTimeSinceCheckpoint() {
43+ return System . nanoTime() - timeCheckpoint;
44+ }
45+
46+ private long getTimeSinceStart() {
47+ return System . nanoTime() - timeStart;
48+ }
49+
50+ private void setCheckpoint() {
51+ maxMemoryUsed = Math . max(maxMemoryUsed, Runtime . getRuntime(). totalMemory());
52+ timeCheckpoint = System . nanoTime();
53+ }
54+
55+ }
56+
57+ public static void main (String [] args ) throws Exception {
58+
59+ if (args. length < 1 ) {
60+ System . err. println(" The first argument must be a fasta file of protein sequences." );
61+ return ;
62+ }
63+
64+ // ConcurrencyTools.setThreadPoolSingle();
65+
66+ PrintStream fout = new PrintStream (" msa.txt" );
67+ Profiler profiler = new Profiler ();
68+
69+ System . out. printf(" Loading sequences from %s... " , args[0 ]);
70+ List<ProteinSequence > list = new ArrayList<ProteinSequence > ();
71+ list. addAll(FastaReaderHelper . readFastaProteinSequence(new File (args[0 ])). values());
72+ if (args. length > 1 && Integer . parseInt(args[1 ]) < list. size()) {
73+ System . out. printf(" %s/%d" , args[1 ], list. size());
74+ list = list. subList(0 , Integer . parseInt(args[1 ]));
75+ } else {
76+ System . out. printf(" %d" , list. size());
77+ }
78+ System . out. printf(" sequences in %d ms using %d kB%n%n" , profiler. getTimeSinceCheckpoint()/ 1000000 ,
79+ profiler. getMaxMemoryUsed()/ 1024 );
80+
81+ profiler. setCheckpoint();
82+
83+ System . out. print(" Stage 1: pairwise similarity calculation... " );
84+ GapPenalty gaps = new SimpleGapPenalty ();
85+ SubstitutionMatrix<AminoAcidCompound > blosum62 = new SimpleSubstitutionMatrix<AminoAcidCompound > ();
86+ List<PairwiseSequenceScorer<ProteinSequence , AminoAcidCompound > > scorers = Alignments . getAllPairsScorers(list,
87+ PairwiseSequenceScorerType . GLOBAL_IDENTITIES , gaps, blosum62);
88+ Alignments . runPairwiseScorers(scorers);
89+ System . out. printf(" %d scores in %d ms using %d kB%n%n" , scorers. size(),
90+ profiler. getTimeSinceCheckpoint()/ 1000000 , profiler. getMaxMemoryUsed()/ 1024 );
91+
92+ profiler. setCheckpoint();
93+
94+ System . out. print(" Stage 2: hierarchical clustering into a guide tree... " );
95+ GuideTree<ProteinSequence , AminoAcidCompound > tree = new GuideTree<ProteinSequence , AminoAcidCompound > (list,
96+ scorers);
97+ scorers = null ;
98+ System . out. printf(" %d ms using %d kB%n%n%s%n%n" , profiler. getTimeSinceCheckpoint()/ 1000000 ,
99+ profiler. getMaxMemoryUsed()/ 1024 , tree);
100+
101+ profiler. setCheckpoint();
102+
103+ System . out. print(" Stage 3: progressive alignment... " );
104+ Profile<ProteinSequence , AminoAcidCompound > msa = Alignments . getProgressiveAlignment(tree,
105+ ProfileProfileAlignerType . GLOBAL , gaps, blosum62);
106+ System . out. printf(" %d profile-profile alignments in %d ms using %d kB%n%n" , list. size() - 1 ,
107+ profiler. getTimeSinceCheckpoint()/ 1000000 , profiler. getMaxMemoryUsed()/ 1024 );
108+ fout. print(msa);
109+ fout. close();
110+
111+ ConcurrencyTools . shutdown();
112+
113+ System . out. printf(" Total time: %d ms%nMemory use: %d kB%n" , profiler. getTimeSinceStart()/ 1000000 ,
114+ profiler. getMaxMemoryUsed()/ 1024 );
115+
116+ }
117117
118118}
119119
0 commit comments