diff --git a/src/HtmlMaker.cpp b/src/HtmlMaker.cpp
index 1dc8ca6..ffa789d 100644
--- a/src/HtmlMaker.cpp
+++ b/src/HtmlMaker.cpp
@@ -20,8 +20,8 @@
#include
#include
#include
-#include
#include
+#include
void
HtmlMaker::put_data(const std::string &placeholder, const std::string &data) {
@@ -41,11 +41,11 @@ HtmlMaker::put_data(const std::string &placeholder, const std::string &data) {
void
HtmlMaker::put_comment(std::string &comment_begin, std::string &comment_end,
const bool done) {
- if (!done) { // put html comments if analysis was skipped
+ if (!done) { // put html comments if analysis was skipped
put_data(comment_begin, "");
}
- else { // otherwise delete placeholder
+ else { // otherwise delete placeholder
put_data(comment_begin, "");
put_data(comment_end, "");
}
@@ -54,20 +54,22 @@ HtmlMaker::put_comment(std::string &comment_begin, std::string &comment_end,
void
HtmlMaker::put_file_details(const FalcoConfig &falco_config) {
using namespace std::string_literals;
- static const auto left_tag = "\\{\\{"s;
- static const auto right_tag = "\\}\\}"s;
+ static constexpr auto left_tag = "\\{\\{";
+ static constexpr auto right_tag = "\\}\\}";
+ const auto filename_formatted = falco_config.filename_stripped;
std::regex filename_re(left_tag + "filename"s + right_tag);
- std::regex_replace(html_boilerplate, filename_re,
- falco_config.filename_stripped);
+ html_boilerplate =
+ std::regex_replace(html_boilerplate, filename_re, filename_formatted);
using system_clock = std::chrono::system_clock;
auto time_unformatted = system_clock::to_time_t(system_clock::now());
std::string time_formatted = std::string(ctime(&time_unformatted));
std::regex date_re(left_tag + "date"s + right_tag);
- std::regex_replace(html_boilerplate, date_re, time_formatted);
+ html_boilerplate =
+ std::regex_replace(html_boilerplate, date_re, time_formatted);
std::regex version_re(left_tag + "version"s + right_tag);
- std::regex_replace(html_boilerplate, version_re, VERSION);
+ html_boilerplate = std::regex_replace(html_boilerplate, version_re, VERSION);
}
diff --git a/src/Module.cpp b/src/Module.cpp
index c7223c9..b3365a2 100644
--- a/src/Module.cpp
+++ b/src/Module.cpp
@@ -44,22 +44,24 @@ make_default_base_groups(std::vector &base_groups,
const std::size_t num_bases) {
base_groups.clear();
for (std::size_t i = 0; i < num_bases; ++i)
- base_groups.push_back(BaseGroup(i, i));
+ base_groups.push_back({i, i});
}
/************* EXP BASE GROUP **************/
void
make_exponential_base_groups(std::vector &base_groups,
const std::size_t &num_bases) {
- std::size_t starting_base = 0, end_base, interval = 1;
+ std::size_t starting_base{};
+ std::size_t end_base{};
+ std::size_t interval{1};
base_groups.clear();
- for (; starting_base < num_bases;) {
+ while (starting_base < num_bases) {
end_base = starting_base + interval - 1;
if (end_base >= num_bases)
end_base = num_bases;
- base_groups.push_back(BaseGroup(starting_base, end_base));
+ base_groups.push_back({starting_base, end_base});
starting_base += interval;
if (starting_base == 9 && num_bases > 75)
interval = 5;
@@ -107,37 +109,40 @@ void
make_linear_base_groups(std::vector &base_groups,
const std::size_t num_bases) {
- // For lengths below 75bp we just return everything.
+ // lengths not larger than 75bp just return everything
if (num_bases <= 75) {
make_default_base_groups(base_groups, num_bases);
return;
}
- // We need to work out what interval we're going to use.
+ // determine the interval to use
const std::size_t interval = get_linear_interval(num_bases);
- std::size_t starting_base = 1;
+ std::size_t starting_base{1};
while (starting_base <= num_bases) {
- std::size_t end_base = starting_base + interval - 1;
-
- if (starting_base < 10)
- end_base = starting_base;
-
- if (starting_base == 10 && interval > 10)
- end_base = interval - 1;
-
- if (end_base > num_bases)
- end_base = num_bases;
-
- BaseGroup bg = BaseGroup(starting_base - 1, end_base - 1);
- base_groups.push_back(bg);
-
- if (starting_base < 10)
- starting_base++;
- else if (starting_base == 10 && interval > 10)
- starting_base = interval;
- else
- starting_base += interval;
+ const auto end_base = [&] {
+ std::size_t end_base = starting_base + interval - 1;
+ if (starting_base < 10)
+ end_base = starting_base;
+ if (starting_base == 10 && interval > 10)
+ end_base = interval - 1;
+ if (end_base > num_bases)
+ end_base = num_bases;
+ return end_base;
+ }();
+
+ assert(starting_base > 0u && end_base > 0u);
+ base_groups.push_back({starting_base - 1ul, end_base - 1ul});
+
+ starting_base = [&] {
+ if (starting_base < 10)
+ starting_base++;
+ else if (starting_base == 10 && interval > 10)
+ starting_base = interval;
+ else
+ starting_base += interval;
+ return starting_base;
+ }();
}
}
@@ -165,33 +170,31 @@ get_corrected_count(std::size_t count_at_limit, std::size_t num_reads,
if (num_reads - num_obs < count_at_limit)
return num_obs;
- // If not then we need to see what the likelihood is that we had
- // another sequence with this number of observations which we would
- // have missed. We'll start by working out the probability of NOT seeing a
- // sequence with this duplication level within the first count_at_limit
- // sequences of num_obs. This is easier than calculating
- // the probability of seeing it.
+ // If not then we need to see what the likelihood is that we had another
+ // sequence with this number of observations which we would have missed. We'll
+ // start by working out the probability of NOT seeing a sequence with this
+ // duplication level within the first count_at_limit sequences of num_obs.
+ // This is easier than calculating the probability of seeing it.
double p_not_seeing = 1.0;
// To save doing long calculations which are never going to produce anything
// meaningful we'll set a limit to our p-value calculation. This is the
// probability below which we won't increase our count by 0.01 of an
// observation. Once we're below this we stop caring about the corrected
- // value since it's going to be so close to the observed value thatwe can
- // just return that instead.
- double limit_of_caring = 1.0 - (num_obs / (num_obs + 0.01));
+ // value since it's going to be so close to the observed value thatwe can just
+ // return that instead.
+ const double limit_of_caring = 1.0 - (num_obs / (num_obs + 0.01));
for (std::size_t i = 0; i < count_at_limit; ++i) {
p_not_seeing *= static_cast((num_reads - i) - dup_level) /
static_cast(num_reads - i);
-
if (p_not_seeing < limit_of_caring) {
p_not_seeing = 0;
break;
}
}
- // Now we can assume that the number we observed can be
- // scaled up by this proportion
+ // Now we can assume that the number we observed can be scaled up by this
+ // proportion
return num_obs /
std::max(std::numeric_limits::min(), 1.0 - p_not_seeing);
}
@@ -377,8 +380,9 @@ ModuleBasicStatistics::ModuleBasicStatistics(const FalcoConfig &config) :
void
ModuleBasicStatistics::summarize_module(FastqStats &stats) {
- // Total sequences
+ // total sequences and bases
total_sequences = stats.num_reads;
+ total_bases = stats.total_bases;
// min and max read length
min_read_length = stats.min_read_length;
@@ -430,49 +434,56 @@ ModuleBasicStatistics::summarize_module(FastqStats &stats) {
// Average read length
avg_read_length = 0;
- std::size_t total_bases = 0;
+ std::size_t total_bases_for_mean = 0;
for (std::size_t i = 0; i < max_read_length; ++i) {
if (i < FastqStats::SHORT_READ_THRESHOLD)
- total_bases += i * stats.read_length_freq[i];
+ total_bases_for_mean += i * stats.read_length_freq[i];
else
- total_bases +=
+ total_bases_for_mean +=
i * stats.long_read_length_freq[i - FastqStats::SHORT_READ_THRESHOLD];
}
-
- avg_read_length =
- total_bases / std::max(static_cast(1), total_sequences);
+ avg_read_length = total_bases_for_mean / std::max(1ul, total_sequences);
// counts bases G and C in each base position
- avg_gc = 0;
// GC %
// GS: TODO delete gc calculation during stream and do it using the total G
// counts in all bases
avg_gc =
- 100 * stats.total_gc / std::max(1.0, static_cast(total_bases));
+ 100.0 * stats.total_gc / std::max(1.0, static_cast(total_bases));
}
-// It's always a pass
void
-ModuleBasicStatistics::make_grade() {}
+ModuleBasicStatistics::make_grade() {} // always a pass
void
ModuleBasicStatistics::write_module(std::ostream &os) {
+ static constexpr auto mega = 1'000'000;
os << "#Measure\tValue\n";
os << "Filename\t" << filename_stripped << "\n";
os << "File type\t" << file_type << "\n";
os << "Encoding\t" << file_encoding << "\n";
os << "Total Sequences\t" << total_sequences << "\n";
+ // clang-format off
+ os << "Total Bases\t"
+ << (total_bases > mega ? total_bases / mega : total_bases)
+ << (total_bases > mega ? " Mbp\n" : " bp\n");
+ // clang-format on
os << "Sequences flagged as poor quality\t" << num_poor << "\n";
os << "Sequence length\t";
- if (min_read_length == max_read_length) {
- os << min_read_length;
- }
- else {
- os << min_read_length << "-" << max_read_length;
- }
+ os << min_read_length;
+ if (min_read_length != max_read_length)
+ os << "-" << max_read_length;
os << "\n";
- os << "%GC\t" << static_cast(avg_gc) << "\n";
+ const auto default_precision{os.precision()};
+ // clang-format off
+ os << "%GC\t"
+ << std::setprecision(1)
+ << std::fixed
+ << avg_gc << '\n'
+ << std::defaultfloat
+ << std::setprecision(default_precision);
+ // clang-format on
}
std::string
@@ -515,9 +526,9 @@ ModuleBasicStatistics::read_data_line(const std::string &line) {
else if (lhs == "Encoding")
file_encoding = rhs;
else if (lhs == "Total Sequences")
- total_sequences = atoi(rhs.c_str());
+ total_sequences = std::atoi(std::data(rhs));
else if (lhs == "Sequences flagged as poor quality")
- num_poor = atoi(rhs.c_str());
+ num_poor = std::atoi(std::data(rhs));
else if (lhs == "Sequence length") {
// non-constant sequence length
@@ -526,12 +537,12 @@ ModuleBasicStatistics::read_data_line(const std::string &line) {
std::string min_l, max_l;
std::getline(seq_iss, min_l, '-');
std::getline(seq_iss, max_l, '-');
- min_read_length = atoi(min_l.c_str());
- max_read_length = atoi(max_l.c_str());
+ min_read_length = std::atoi(std::data(min_l));
+ max_read_length = std::atoi(std::data(max_l));
}
}
else if (lhs == "%GC")
- avg_gc = atoi(rhs.c_str());
+ avg_gc = std::atoi(std::data(rhs));
else {
throw std::runtime_error("malformed basic statistic" + lhs);
}
@@ -819,7 +830,7 @@ ModulePerTileSequenceQuality::summarize_module(FastqStats &stats) {
for (std::size_t i = 0; i < lim; ++i) {
// transform sum of all qualities in mean
const auto itr = stats.tile_position_count.find(v.first);
- if (itr == cend(stats.tile_position_count))
+ if (itr == std::cend(stats.tile_position_count))
throw std::runtime_error(
"failure ModulePerTileSequenceQuality::summarize_module");
const std::size_t count_at_pos = itr->second[i];
@@ -1787,19 +1798,15 @@ void
ModuleOverrepresentedSequences::summarize_module(FastqStats &stats) {
// Keep only sequences that pass the input cutoff
num_reads = stats.num_reads;
- for (auto it = stats.sequence_count.begin(); it != stats.sequence_count.end();
- ++it) {
+ for (auto it = std::cbegin(stats.sequence_count);
+ it != std::cend(stats.sequence_count); ++it) {
if (it->second > num_reads * min_fraction_to_overrepresented) {
overrep_sequences.push_back(*it);
}
}
-
- // Sort strings by frequency
- std::sort(begin(overrep_sequences), end(overrep_sequences),
- [](const std::pair &a,
- const std::pair &b) {
- return a.second > b.second;
- });
+ // sort strings by frequency
+ std::sort(std::begin(overrep_sequences), std::end(overrep_sequences),
+ [](const auto &a, const auto &b) { return a.second > b.second; });
}
void
diff --git a/src/Module.hpp b/src/Module.hpp
index 331d50e..f80ed16 100644
--- a/src/Module.hpp
+++ b/src/Module.hpp
@@ -28,17 +28,15 @@
/* base groups for longer reads, copied from FastQC*/
struct BaseGroup {
- size_t start, end;
- BaseGroup(size_t _start, size_t _end) : start(_start), end(_end) {}
+ std::size_t start{};
+ std::size_t end{};
};
-class Module {
-private:
- const std::string module_name;
+struct Module {
+ const std::string module_name{};
-public:
// avoid writing things prior to summarizing
- bool summarized;
+ bool summarized{};
// The module name displayed in outputs and html
// GS TODO: automate placing it in html too
@@ -67,9 +65,7 @@ class Module {
Module(const std::string &module_name);
virtual ~Module() = 0;
- /*********************************************/
- /*****Abstract functions to be implemented****/
- /*********************************************/
+ // functions to be in child classes
// Summarize the module
virtual void
@@ -85,9 +81,6 @@ class Module {
virtual std::string
make_html_data() = 0;
- /*********************************************/
- /**************Visible functions**************/
- /*********************************************/
// Summarizes and registers that it summarized
void
summarize(FastqStats &stats);
@@ -106,19 +99,19 @@ class Module {
put_data_on_html(HtmlMaker &html_maker);
};
-class ModuleBasicStatistics : public Module {
-public:
- bool is_nanopore;
+struct ModuleBasicStatistics : public Module {
+ static const std::string module_name;
+ bool is_nanopore{};
std::string file_type;
std::string file_encoding;
std::string filename_stripped;
- size_t avg_read_length;
- size_t avg_gc;
- size_t num_poor;
- size_t min_read_length;
- size_t max_read_length;
- size_t total_sequences;
- static const std::string module_name;
+ std::size_t avg_read_length{};
+ double avg_gc{};
+ std::size_t num_poor{};
+ std::size_t min_read_length{};
+ std::size_t max_read_length{};
+ std::size_t total_bases{};
+ std::size_t total_sequences{};
ModuleBasicStatistics(const FalcoConfig &config);
~ModuleBasicStatistics() {}
void
@@ -129,27 +122,30 @@ class ModuleBasicStatistics : public Module {
write_module(std::ostream &os);
std::string
make_html_data();
-
void
read_data_line(const std::string &line);
};
-class ModulePerBaseSequenceQuality : public Module {
-private:
+struct ModulePerBaseSequenceQuality : public Module {
+ static const std::string module_name;
// from FastQC: whether to group bases
- bool do_group;
- size_t num_bases;
- size_t num_groups;
- // grade criteria
- size_t base_lower_warn, base_lower_error, base_median_warn, base_median_error;
- size_t num_warn, num_error;
+ bool do_group{};
+ std::size_t num_bases{};
+ std::size_t num_groups{};
+ std::size_t base_lower_warn{}; // grade criteria
+ std::size_t base_lower_error{}; // grade criteria
+ std::size_t base_median_warn{};
+ std::size_t base_median_error{};
+ std::size_t num_warn{};
+ std::size_t num_error{};
std::vector group_mean;
- std::vector group_ldecile, group_lquartile, group_median,
- group_uquartile, group_udecile;
+ std::vector group_ldecile;
+ std::vector group_lquartile;
+ std::vector group_median;
+ std::vector group_uquartile;
+ std::vector group_udecile;
std::vector base_groups;
-public:
- static const std::string module_name;
ModulePerBaseSequenceQuality(const FalcoConfig &config);
~ModulePerBaseSequenceQuality() {}
void
@@ -164,14 +160,13 @@ class ModulePerBaseSequenceQuality : public Module {
make_html_data();
};
-class ModulePerTileSequenceQuality : public Module {
-private:
- double grade_warn, grade_error;
- size_t max_read_length;
- std::unordered_map> tile_position_quality;
- std::vector tiles_sorted;
+struct ModulePerTileSequenceQuality : public Module {
+ double grade_warn{};
+ double grade_error{};
+ std::size_t max_read_length{};
+ std::unordered_map> tile_position_quality;
+ std::vector tiles_sorted;
-public:
static const std::string module_name;
ModulePerTileSequenceQuality(const FalcoConfig &config);
~ModulePerTileSequenceQuality() {}
@@ -185,17 +180,15 @@ class ModulePerTileSequenceQuality : public Module {
make_html_data();
};
-class ModulePerSequenceQualityScores : public Module {
-private:
- size_t mode_val;
- size_t mode_ind;
- size_t offset;
- std::array quality_count;
+struct ModulePerSequenceQualityScores : public Module {
+ std::size_t mode_val{};
+ std::size_t mode_ind{};
+ std::size_t offset{};
+ std::array quality_count{};
// grade criteria
- size_t mode_warn;
- size_t mode_error;
+ std::size_t mode_warn{};
+ std::size_t mode_error{};
-public:
static const std::string module_name;
ModulePerSequenceQualityScores(const FalcoConfig &config);
~ModulePerSequenceQualityScores() {}
@@ -209,27 +202,25 @@ class ModulePerSequenceQualityScores : public Module {
make_html_data();
};
-class ModulePerBaseSequenceContent : public Module {
-private:
- bool do_group;
- std::vector a_pct, c_pct, t_pct, g_pct;
- double max_diff;
- size_t num_bases;
-
- // flag as to whether or not dataset is WGBS
- bool is_bisulfite;
+struct ModulePerBaseSequenceContent : public Module {
+ static const std::string module_name;
+ bool do_group{};
+ std::vector a_pct;
+ std::vector c_pct;
+ std::vector t_pct;
+ std::vector g_pct;
+ double max_diff{};
+ std::size_t num_bases{};
- // if so we have to test T vs C instead of A vs G
- bool is_reverse_complement;
+ bool is_bisulfite{}; // flag for dataset is WGBS
+ bool is_reverse_complement{}; // indicates test T vs C instead of A vs G
- // for grade
- double sequence_error, sequence_warn;
+ double sequence_error{}; // for grade
+ double sequence_warn{}; // for grade
- size_t num_groups;
+ std::size_t num_groups{};
std::vector base_groups;
-public:
- static const std::string module_name;
ModulePerBaseSequenceContent(const FalcoConfig &config);
~ModulePerBaseSequenceContent() {}
void
@@ -242,15 +233,14 @@ class ModulePerBaseSequenceContent : public Module {
make_html_data();
};
-class ModulePerSequenceGCContent : public Module {
-private:
- double gc_warn, gc_error;
- double gc_deviation;
+struct ModulePerSequenceGCContent : public Module {
+ static const std::string module_name;
+ double gc_warn{};
+ double gc_error{};
+ double gc_deviation{};
std::array gc_count;
std::array theoretical_gc_count;
-public:
- static const std::string module_name;
ModulePerSequenceGCContent(const FalcoConfig &config);
~ModulePerSequenceGCContent() {}
void
@@ -263,26 +253,21 @@ class ModulePerSequenceGCContent : public Module {
make_html_data();
};
-class ModulePerBaseNContent : public Module {
-private:
- size_t num_bases;
- // for grade
- size_t grade_n_warn;
- size_t grade_n_error;
-
- double max_n_pct;
- std::array gc_count;
- std::array theoretical_gc_count;
+struct ModulePerBaseNContent : public Module {
+ static const std::string module_name;
+ std::size_t num_bases{};
+ std::size_t grade_n_warn{}; // for grade
+ std::size_t grade_n_error{}; // for grade
+ double max_n_pct{};
+ std::array gc_count;
+ std::array theoretical_gc_count;
std::vector n_pct;
- // grade vars
- size_t gc_warn, gc_error;
-
- bool do_group;
- size_t num_groups;
+ std::size_t gc_warn{}; // grade vars
+ std::size_t gc_error{}; // grade vars
+ bool do_group{};
+ std::size_t num_groups{};
std::vector base_groups;
-public:
- static const std::string module_name;
ModulePerBaseNContent(const FalcoConfig &config);
~ModulePerBaseNContent() {}
void
@@ -295,23 +280,21 @@ class ModulePerBaseNContent : public Module {
make_html_data();
};
-class ModuleSequenceLengthDistribution : public Module {
-private:
- bool do_grade_error;
- bool do_grade_warn;
- size_t max_read_length;
- std::vector sequence_lengths;
+struct ModuleSequenceLengthDistribution : public Module {
+ static const std::string module_name;
+ bool do_grade_error{};
+ bool do_grade_warn{};
+ std::size_t max_read_length{};
+ std::vector sequence_lengths;
// warn and fail criteria
- bool is_all_same_length;
- size_t empty_reads;
+ bool is_all_same_length{};
+ std::size_t empty_reads{};
- bool do_group;
- size_t num_groups;
+ bool do_group{};
+ std::size_t num_groups{};
std::vector base_groups;
-public:
- static const std::string module_name;
ModuleSequenceLengthDistribution(const FalcoConfig &config);
~ModuleSequenceLengthDistribution() {}
void
@@ -324,19 +307,18 @@ class ModuleSequenceLengthDistribution : public Module {
make_html_data();
};
-class ModuleSequenceDuplicationLevels : public Module {
-private:
- double seq_total, seq_dedup;
+struct ModuleSequenceDuplicationLevels : public Module {
+ static const std::string module_name;
+ double seq_total{};
+ double seq_dedup{};
- double grade_dup_warn;
- double grade_dup_error;
- double total_deduplicated_pct;
- std::array percentage_deduplicated;
- std::array percentage_total;
- std::unordered_map counts_by_freq;
+ double grade_dup_warn{};
+ double grade_dup_error{};
+ double total_deduplicated_pct{};
+ std::array percentage_deduplicated{};
+ std::array percentage_total{};
+ std::unordered_map counts_by_freq;
-public:
- static const std::string module_name;
ModuleSequenceDuplicationLevels(const FalcoConfig &config);
~ModuleSequenceDuplicationLevels() {}
void
@@ -349,20 +331,20 @@ class ModuleSequenceDuplicationLevels : public Module {
make_html_data();
};
-class ModuleOverrepresentedSequences : public Module {
-private:
- size_t num_reads;
- std::vector> overrep_sequences;
- double grade_warn, grade_error;
- const double min_fraction_to_overrepresented = 0.001;
+struct ModuleOverrepresentedSequences : public Module {
+ static constexpr auto min_fraction_to_overrepresented = 0.001;
+ static const std::string module_name;
+
+ std::size_t num_reads{};
+ std::vector> overrep_sequences;
+ double grade_warn{};
+ double grade_error{};
std::vector> contaminants;
// Function to find the matching contaminant within the list
std::string
get_matching_contaminant(const std::string &seq);
-public:
- static const std::string module_name;
ModuleOverrepresentedSequences(const FalcoConfig &config);
~ModuleOverrepresentedSequences() {}
void
@@ -375,36 +357,31 @@ class ModuleOverrepresentedSequences : public Module {
make_html_data();
};
-class ModuleAdapterContent : public Module {
-private:
- // Number of adapters to test
- size_t num_adapters;
-
- // number of bases to report
- size_t num_bases;
-
+struct ModuleAdapterContent : public Module {
+ static const std::string module_name;
+ std::size_t num_adapters{}; // Number of adapters to test
+ std::size_t num_bases{}; // number of bases to report
// adapter size to know how many bases to report
- size_t adapter_size;
+ std::size_t adapter_size{};
- // Information from config
+ // info from config
std::vector adapter_names;
std::vector adapter_seqs;
- std::vector adapter_hashes;
- size_t shortest_adapter_size;
+ std::vector adapter_hashes;
+ std::size_t shortest_adapter_size{};
- // vector to be reported
+ // to be reported
std::vector> adapter_pos_pct;
- // minimum percentages for warn/fail
- double grade_warn, grade_error;
+ // min minimum cutoffs for warn/fail (percentages)
+ double grade_warn{};
+ double grade_error{};
// Aux function to count adapter in a position
double
- count_adapter(const std::vector &kmer_count, const size_t pos,
- const size_t adapter_hash, const size_t adapter_size,
- const size_t kmer_size);
+ count_adapter(const std::vector &kmer_count,
+ const std::size_t pos, const std::size_t adapter_hash,
+ const std::size_t adapter_size, const std::size_t kmer_size);
-public:
- static const std::string module_name;
ModuleAdapterContent(const FalcoConfig &config);
~ModuleAdapterContent() {}
void
@@ -417,25 +394,25 @@ class ModuleAdapterContent : public Module {
make_html_data();
};
-class ModuleKmerContent : public Module {
-private:
- size_t num_kmer_bases;
- size_t kmer_size;
- size_t num_kmers;
- size_t num_seen_kmers;
+struct ModuleKmerContent : public Module {
+ static constexpr std::size_t MIN_OBS_EXP_TO_REPORT = 5;
+ static constexpr std::size_t MAX_KMERS_TO_REPORT = 20;
+ static constexpr std::size_t MAX_KMERS_TO_PLOT = 10;
+ static const std::string module_name;
+
+ std::size_t num_kmer_bases{};
+ std::size_t kmer_size{};
+ std::size_t num_kmers{};
+ std::size_t num_seen_kmers{};
- double grade_warn, grade_error;
- std::array pos_kmer_count;
- std::vector total_kmer_counts;
+ double grade_warn{};
+ double grade_error{};
+ std::array pos_kmer_count{};
+ std::vector total_kmer_counts;
std::vector obs_exp_max;
- std::vector where_obs_exp_is_max;
- std::vector> kmers_to_report;
+ std::vector where_obs_exp_is_max;
+ std::vector> kmers_to_report;
-public:
- static const std::string module_name;
- static const size_t MIN_OBS_EXP_TO_REPORT = 5;
- static const size_t MAX_KMERS_TO_REPORT = 20;
- static const size_t MAX_KMERS_TO_PLOT = 10;
ModuleKmerContent(const FalcoConfig &config);
~ModuleKmerContent() {}
void
diff --git a/src/StreamReader.cpp b/src/StreamReader.cpp
index fb1f769..40b5ffc 100644
--- a/src/StreamReader.cpp
+++ b/src/StreamReader.cpp
@@ -30,20 +30,21 @@ min8(const T a, const T b) {
/****************************************************/
/***************** STREAMREADER *********************/
/****************************************************/
-size_t
+std::size_t
get_tile_split_position(FalcoConfig &config) {
const std::string &filename = config.filename;
// Count colons to know the formatting pattern
- size_t num_colon = 0;
+ std::size_t num_colon{};
if (config.is_sam) {
std::ifstream sam_file(filename);
if (!sam_file)
throw std::runtime_error("cannot load sam file : " + filename);
std::string line;
- while (std::getline(sam_file, line) && line.size() > 0 && line[0] == '@')
+ while (std::getline(sam_file, line) && std::size(line) > 0 &&
+ line[0] == '@')
continue;
- size_t tabPos = line.find('\t');
+ std::size_t tabPos = line.find('\t');
line = line.substr(0, tabPos);
for (char c : line)
num_colon += (c == ':');
@@ -113,26 +114,24 @@ get_tile_split_position(FalcoConfig &config) {
return 0; // no tile information on read name
}
-// function to turn a vector into array for adapter hashes and fast lookup
-std::array
-make_adapters(const std::vector &adapter_hashes) {
- if (adapter_hashes.size() > Constants::max_adapters)
- throw std::runtime_error(
- "Number of adapters is larger than 128, which hinders "
- "visualziation and speed of falco. Please keep it to "
- "under 128");
-
- std::array ans;
- for (size_t i = 0; i < adapter_hashes.size(); ++i)
- ans[i] = adapter_hashes[i];
-
+// function to turn a vector into array for adapter hashes and faster lookup
+std::array
+make_adapters(const std::vector &adapter_hashes) {
+ static constexpr auto error_message =
+ "Number of adapters is larger than 128, which hinders visualziation and "
+ "speed of falco. Please keep it to under 128";
+ if (std::size(adapter_hashes) > Constants::max_adapters)
+ throw std::runtime_error(error_message);
+ std::array ans;
+ std::copy(std::cbegin(adapter_hashes), std::cend(adapter_hashes),
+ std::begin(ans));
return ans;
}
-StreamReader::StreamReader(FalcoConfig &config, const size_t _buffer_size,
+StreamReader::StreamReader(FalcoConfig &config, const std::size_t _buffer_size,
const char _field_separator,
const char _line_separator) :
- // I have to pass the config skips as const to read them fast
+ // must pass the config skips as const to read them fast
do_sequence_hash(config.do_duplication || config.do_overrepresented),
do_kmer(config.do_kmer), do_adapter(config.do_adapter),
do_adapter_optimized(config.do_adapter_optimized),
@@ -152,7 +151,8 @@ StreamReader::StreamReader(FalcoConfig &config, const size_t _buffer_size,
do_adapters_slow(config.do_adapter && !config.do_adapter_optimized),
adapter_seqs(config.adapter_seqs),
- num_adapters(config.adapter_hashes.size()), adapter_size(config.adapter_size),
+ num_adapters(std::size(config.adapter_hashes)),
+ adapter_size(config.adapter_size),
// for case size == 32 expr (1ull << 64) -1 gives 0.
// We need to set mask as all 64 bits 1 => use SIZE_MAX in this case
adapter_mask(adapter_size == 32 ? SIZE_MAX
@@ -193,7 +193,7 @@ StreamReader::put_base_in_buffer() {
buffer[read_pos] = base_from_buffer;
}
else {
- if (leftover_ind == leftover_buffer.size())
+ if (leftover_ind == std::size(leftover_buffer))
leftover_buffer.push_back(base_from_buffer);
else
leftover_buffer[leftover_ind] = base_from_buffer;
@@ -241,7 +241,7 @@ StreamReader::read_fast_forward_line_eof() {
void
StreamReader::get_tile_value() {
tile_cur = 0;
- size_t num_colon = 0;
+ std::size_t num_colon = 0;
for (; *cur_char != field_separator; ++cur_char) {
num_colon += (*cur_char == ':');
if (num_colon == tile_split_point) {
@@ -289,7 +289,7 @@ StreamReader::read_tile_line(FastqStats &stats) {
std::vector(stats.max_read_length, 0.0);
// stats.tile_position_quality.find(tile_cur)->second[0] = 0;
stats.tile_position_count[tile_cur] =
- std::vector(stats.max_read_length, 0);
+ std::vector(stats.max_read_length, 0);
}
}
@@ -301,48 +301,44 @@ StreamReader::read_tile_line(FastqStats &stats) {
// optimized at all times
void
StreamReader::process_sequence_base_from_buffer(FastqStats &stats) {
- // I will count the Ns even if asked to ignore, as checking ifs take time
+ // count Ns even if asked not to report them
if (base_from_buffer == 'N') {
++stats.n_base_count[read_pos];
num_bases_after_n = 1; // start over the current kmer
+ return;
}
+ const auto two_bit = actg_to_2bit(base_from_buffer);
+
// ATGC bases
- else {
- // increments basic statistic counts
- cur_gc_count += (actg_to_2bit(base_from_buffer) & 1);
- ++stats.base_count[(read_pos << Constants::bit_shift_base) |
- actg_to_2bit(base_from_buffer)];
-
- if (do_sliding_window) {
- // Update k-mer sequence
- cur_kmer = ((cur_kmer << Constants::bit_shift_base) |
- actg_to_2bit(base_from_buffer));
-
- // registers k-mer if seen at least k nucleotides since the last n
- if (do_kmer && do_kmer_read &&
- (num_bases_after_n >= Constants::kmer_size)) {
-
- ++stats.kmer_count[(read_pos << Constants::bit_shift_kmer) |
- (cur_kmer & Constants::kmer_mask)];
- ++stats.pos_kmer_count[read_pos];
- }
+ // increments basic statistic counts
+ cur_gc_count += (two_bit & 1);
+ ++stats.base_count[(read_pos << Constants::bit_shift_base) | two_bit];
+
+ if (do_sliding_window) {
+ // Update k-mer sequence
+ cur_kmer = (cur_kmer << Constants::bit_shift_base) | two_bit;
+ // registers k-mer if seen at least k nucleotides since the last n
+ if (do_kmer && do_kmer_read &&
+ (num_bases_after_n >= Constants::kmer_size)) {
+
+ ++stats.kmer_count[(read_pos << Constants::bit_shift_kmer) |
+ (cur_kmer & Constants::kmer_mask)];
+ ++stats.pos_kmer_count[read_pos];
+ }
- // GS: slow, need to use fsm
- if (do_adapter_optimized && (num_bases_after_n == adapter_size)) {
- cur_kmer &= adapter_mask;
- for (size_t i = 0; i != num_adapters; ++i) {
- if (cur_kmer == adapters[i] && !adapters_found[i]) {
- ++stats
- .pos_adapter_count[(read_pos << Constants::bit_shift_adapter) |
- i];
- adapters_found[i] = true;
- }
+ // GS: slow, need to use fsm
+ if (do_adapter_optimized && (num_bases_after_n == adapter_size)) {
+ cur_kmer &= adapter_mask;
+ for (std::size_t i = 0; i != num_adapters; ++i) {
+ if (cur_kmer == adapters[i] && !adapters_found[i]) {
+ ++stats.pos_adapter_count[(read_pos << Constants::bit_shift_adapter) |
+ i];
+ adapters_found[i] = true;
}
}
-
- num_bases_after_n += (num_bases_after_n != adapter_size);
}
+ num_bases_after_n += (num_bases_after_n != adapter_size);
}
}
@@ -353,17 +349,15 @@ StreamReader::process_sequence_base_from_leftover(FastqStats &stats) {
if (base_from_buffer == 'N') {
++stats.long_n_base_count[leftover_ind];
num_bases_after_n = 1; // start over the current kmer
+ return;
}
-
// ATGC bases
- else {
- // increments basic statistic counts
- cur_gc_count += (actg_to_2bit(base_from_buffer) & 1);
- ++stats.long_base_count[(leftover_ind << Constants::bit_shift_base) |
- actg_to_2bit(base_from_buffer)];
-
- // WE WILL NOT DO KMER STATS OUTSIDE OF BUFFER
- }
+ // increments basic statistic counts
+ const auto two_bit = actg_to_2bit(base_from_buffer);
+ cur_gc_count += (two_bit & 1);
+ const auto idx = (leftover_ind << Constants::bit_shift_base) | two_bit;
+ ++stats.long_base_count[idx];
+ // WE WILL NOT DO KMER STATS OUTSIDE OF BUFFER
}
// Gets statistics after reading the entire sequence line
@@ -385,7 +379,8 @@ StreamReader::postprocess_sequence_line(FastqStats &stats) {
// Updates maximum read length if applicable
stats.max_read_length =
- ((read_pos > stats.max_read_length) ? (read_pos) : (stats.max_read_length));
+ read_pos > stats.max_read_length ? read_pos : stats.max_read_length;
+ stats.total_bases += read_pos;
// FastQC's gc model summarized, if requested
if (do_gc_sequence && read_pos != 0) {
@@ -429,8 +424,8 @@ StreamReader::read_sequence_line(FastqStats &stats) {
if (do_adapters_slow) {
const std::string seq_line_str = cur_char;
- for (size_t i = 0; i != num_adapters; ++i) {
- const size_t adapt_index = seq_line_str.find(adapter_seqs[i], 0);
+ for (std::size_t i = 0; i != num_adapters; ++i) {
+ const std::size_t adapt_index = seq_line_str.find(adapter_seqs[i], 0);
if (adapt_index < stats.SHORT_READ_THRESHOLD) {
++stats.pos_adapter_count[((adapt_index + adapter_seqs[i].length() - 1)
<< Constants::bit_shift_adapter) |
@@ -498,7 +493,7 @@ StreamReader::process_quality_base_from_buffer(FastqStats &stats) {
// Tile processing
if (!tile_ignore && do_tile_read && tile_cur != 0) {
// allocate more base space if necessary
- if (stats.tile_position_quality[tile_cur].size() == read_pos) {
+ if (std::size(stats.tile_position_quality[tile_cur]) == read_pos) {
stats.tile_position_quality[tile_cur].push_back(0.0);
stats.tile_position_count[tile_cur].push_back(0);
}
@@ -591,8 +586,8 @@ StreamReader::read_quality_line(FastqStats &stats) {
/*******************************************************/
/*************** THIS IS VERY SLOW ********************/
// if reads are >75pb, truncate to 50 akin to FastQC
-inline size_t
-get_truncate_point(const size_t read_pos) {
+inline std::size_t
+get_truncate_point(const std::size_t read_pos) {
return (read_pos <= Constants::unique_reads_max_length)
? read_pos
: Constants::unique_reads_truncate;
@@ -630,7 +625,7 @@ StreamReader::postprocess_fastq_record(FastqStats &stats) {
}
inline bool
-StreamReader::check_bytes_read(const size_t read_num) {
+StreamReader::check_bytes_read(const std::size_t read_num) {
return ((read_num & check_bytes_read_mask) == 0);
}
@@ -639,7 +634,7 @@ StreamReader::check_bytes_read(const size_t read_num) {
/*******************************************************/
char
get_line_separator(const std::string &filename) {
- FILE *fp = fopen(filename.c_str(), "r");
+ FILE *fp = fopen(std::data(filename), "r");
if (fp == NULL)
throw std::runtime_error("bad input file: " + filename);
@@ -655,29 +650,29 @@ get_line_separator(const std::string &filename) {
}
// Set fastq field_separator as line_separator
-FastqReader::FastqReader(FalcoConfig &_config, const size_t _buffer_size) :
+FastqReader::FastqReader(FalcoConfig &_config, const std::size_t _buffer_size) :
StreamReader(_config, _buffer_size, get_line_separator(_config.filename),
get_line_separator(_config.filename)) {
filebuf = new char[RESERVE_SIZE];
}
-size_t
+std::size_t
get_file_size(const std::string &filename) {
- FILE *fp = fopen(filename.c_str(), "r");
+ FILE *fp = fopen(std::data(filename), "r");
if (fp == NULL)
throw std::runtime_error("bad input file: " + filename);
fseek(fp, 0L, SEEK_END);
- const size_t ret = static_cast(ftell(fp));
+ const std::size_t ret = static_cast(ftell(fp));
fclose(fp);
return ret;
}
// Load fastq with zlib
-size_t
+std::size_t
FastqReader::load() {
- fileobj = fopen(filename.c_str(), "r");
+ fileobj = fopen(std::data(filename), "r");
if (fileobj == NULL)
throw std::runtime_error("Cannot open FASTQ file : " + filename);
return get_file_size(filename);
@@ -696,7 +691,7 @@ FastqReader::~FastqReader() {
// Parses fastq gz by reading line by line into the gzbuf
bool
-FastqReader::read_entry(FastqStats &stats, size_t &num_bytes_read) {
+FastqReader::read_entry(FastqStats &stats, std::size_t &num_bytes_read) {
cur_char = fgets(filebuf, RESERVE_SIZE, fileobj);
// need to check here if we did not hit eof
@@ -710,6 +705,7 @@ FastqReader::read_entry(FastqStats &stats, size_t &num_bytes_read) {
cur_char = fgets(filebuf, RESERVE_SIZE, fileobj);
read_sequence_line(stats);
+
skip_separator();
cur_char = fgets(filebuf, RESERVE_SIZE, fileobj);
@@ -731,6 +727,7 @@ FastqReader::read_entry(FastqStats &stats, size_t &num_bytes_read) {
// Returns if file should keep being checked
if (check_bytes_read(stats.num_reads))
num_bytes_read = ftell(fileobj);
+
return (!is_eof() && cur_char != 0);
}
@@ -738,15 +735,16 @@ FastqReader::read_entry(FastqStats &stats, size_t &num_bytes_read) {
/*************** READ FASTQ GZ RCORD *******************/
/*******************************************************/
// the gz fastq constructor is the same as the fastq
-GzFastqReader::GzFastqReader(FalcoConfig &_config, const size_t _buffer_size) :
+GzFastqReader::GzFastqReader(FalcoConfig &_config,
+ const std::size_t _buffer_size) :
StreamReader(_config, _buffer_size, '\n', '\n') {
gzbuf = new char[RESERVE_SIZE];
}
// Load fastq with zlib
-size_t
+std::size_t
GzFastqReader::load() {
- fileobj = gzopen(filename.c_str(), "r");
+ fileobj = gzopen(std::data(filename), "r");
if (fileobj == Z_NULL)
throw std::runtime_error("Cannot open gzip FASTQ file : " + filename);
@@ -766,7 +764,7 @@ GzFastqReader::~GzFastqReader() {
// Parses fastq gz by reading line by line into the gzbuf
bool
-GzFastqReader::read_entry(FastqStats &stats, size_t &num_bytes_read) {
+GzFastqReader::read_entry(FastqStats &stats, std::size_t &num_bytes_read) {
cur_char = gzgets(fileobj, gzbuf, RESERVE_SIZE);
// need to check here if we did not hit eof
@@ -808,15 +806,15 @@ GzFastqReader::read_entry(FastqStats &stats, size_t &num_bytes_read) {
/*************** READ SAM RECORD ***********************/
/*******************************************************/
// set sam separator as tab
-SamReader::SamReader(FalcoConfig &_config, const size_t _buffer_size) :
+SamReader::SamReader(FalcoConfig &_config, const std::size_t _buffer_size) :
StreamReader(_config, _buffer_size, '\t',
get_line_separator(_config.filename)) {
filebuf = new char[RESERVE_SIZE];
}
-size_t
+std::size_t
SamReader::load() {
- fileobj = fopen(filename.c_str(), "r");
+ fileobj = fopen(std::data(filename), "r");
if (fileobj == NULL)
throw std::runtime_error("Cannot open SAM file : " + filename);
@@ -835,7 +833,7 @@ SamReader::is_eof() {
}
bool
-SamReader::read_entry(FastqStats &stats, size_t &num_bytes_read) {
+SamReader::read_entry(FastqStats &stats, std::size_t &num_bytes_read) {
cur_char = fgets(filebuf, RESERVE_SIZE, fileobj);
if (is_eof())
@@ -845,7 +843,7 @@ SamReader::read_entry(FastqStats &stats, size_t &num_bytes_read) {
read_tile_line(stats);
skip_separator();
- for (size_t i = 0; i < 8; ++i) {
+ for (std::size_t i = 0; i < 8; ++i) {
read_fast_forward_line();
skip_separator();
}
@@ -881,13 +879,13 @@ SamReader::~SamReader() {
// puts base either on buffer or leftover
void
-BamReader::put_base_in_buffer(const size_t pos) {
+BamReader::put_base_in_buffer(const std::size_t pos) {
base_from_buffer = seq_nt16_str[bam_seqi(cur_char, pos)];
if (still_in_buffer) {
buffer[read_pos] = base_from_buffer;
}
else {
- if (leftover_ind == leftover_buffer.size())
+ if (leftover_ind == std::size(leftover_buffer))
leftover_buffer.push_back(base_from_buffer);
else
leftover_buffer[leftover_ind] = base_from_buffer;
@@ -910,15 +908,15 @@ BamReader::read_sequence_line(FastqStats &stats) {
do_kmer_read = (stats.num_reads == next_kmer_read);
adapters_found.reset();
- const size_t seq_len = b->core.l_qseq;
+ const std::size_t seq_len = b->core.l_qseq;
// MN: TODO: make sure everything works in this scope
if (do_adapters_slow) {
std::string seq_line_str(seq_len, '\0');
- for (size_t i = 0; i < seq_len; i++) {
+ for (std::size_t i = 0; i < seq_len; i++) {
seq_line_str[i] = seq_nt16_str[bam_seqi(cur_char, i)];
}
- for (size_t i = 0; i != num_adapters; ++i) {
- const size_t adapt_index = seq_line_str.find(adapter_seqs[i], 0);
+ for (std::size_t i = 0; i != num_adapters; ++i) {
+ const std::size_t adapt_index = seq_line_str.find(adapter_seqs[i], 0);
if (adapt_index < stats.SHORT_READ_THRESHOLD) {
++stats.pos_adapter_count[((adapt_index + adapter_seqs[i].length() - 1)
<< Constants::bit_shift_adapter) |
@@ -933,7 +931,7 @@ BamReader::read_sequence_line(FastqStats &stats) {
// In the following loop, cur_char does not change, but rather i changes
// and we access bases using bam_seqi(cur_char, i) in
// put_base_in_buffer.
- for (size_t i = 0; i < seq_len; i++, ++read_pos) {
+ for (std::size_t i = 0; i < seq_len; i++, ++read_pos) {
// if we reached the buffer size, stop using it and start using leftover
if (read_pos == buffer_size) {
still_in_buffer = false;
@@ -988,8 +986,8 @@ BamReader::read_quality_line(FastqStats &stats) {
cur_quality = 0;
still_in_buffer = true;
- const size_t seq_len = b->core.l_qseq;
- for (size_t i = 0; i < seq_len; ++cur_char, i++) {
+ const std::size_t seq_len = b->core.l_qseq;
+ for (std::size_t i = 0; i < seq_len; ++cur_char, i++) {
if (read_pos == buffer_size) {
still_in_buffer = false;
@@ -1117,14 +1115,14 @@ reverse_quality_scores(bam1_t *aln) {
}
// set sam separator as tab
-BamReader::BamReader(FalcoConfig &_config, const size_t _buffer_size) :
+BamReader::BamReader(FalcoConfig &_config, const std::size_t _buffer_size) :
StreamReader(_config, _buffer_size, '\t', '\n') {
rd_ret = 0;
}
-size_t
+std::size_t
BamReader::load() {
- if (!(hts = hts_open(filename.c_str(), "r")))
+ if (!(hts = hts_open(std::data(filename), "r")))
throw std::runtime_error("cannot load bam file : " + filename);
if (!(hdr = sam_hdr_read(hts)))
@@ -1144,30 +1142,29 @@ BamReader::is_eof() {
}
bool
-BamReader::read_entry(FastqStats &stats, size_t &num_bytes_read) {
+BamReader::read_entry(FastqStats &stats, std::size_t &num_bytes_read) {
static const uint16_t not_reverse = ~BAM_FREVERSE;
if ((rd_ret = sam_read1(hts, hdr, b)) >= 0) {
-
if (bam_is_rev(b)) {
revcomp_seq_by_byte(b);
reverse_quality_scores(b);
b->core.flag &= not_reverse;
}
-
num_bytes_read = 0;
do_read = (stats.num_reads == next_read);
// Read tile line
cur_char = bam_get_qname(b);
last = cur_char + b->m_data;
- const size_t first_padding_null = b->core.l_qname - b->core.l_extranul - 1;
+ const std::size_t first_padding_null =
+ b->core.l_qname - b->core.l_extranul - 1;
// Turn "QUERYNAME\0\0\0" into "QUERYNAME\t\0\0" (assuming
// field_separtor = '\t') to be compatible with read_fast_forward_line().
cur_char[first_padding_null] = field_separator;
read_tile_line(stats);
// Read sequence line
- size_t seq_len = b->core.l_qseq;
+ std::size_t seq_len = b->core.l_qseq;
cur_char = reinterpret_cast(bam_get_seq(b));
BamReader::read_sequence_line(stats);
diff --git a/test/md5sum.txt b/test/md5sum.txt
index faac2a1..c1564b8 100644
--- a/test/md5sum.txt
+++ b/test/md5sum.txt
@@ -1,30 +1,30 @@
-fe1d421b95e1289f62d5f60157b8bed0 test_output/SRR1853178_1/fastqc_data.txt
+37014d8b6e5338c54f6e39a9a65448da test_output/SRR1853178_1/fastqc_data.txt
36df1dcab539ba4ef885239fc8524636 test_output/SRR1853178_1/summary.txt
-19c984bdddd5d656e8bb9f50bb08fc5a test_output/SRR3897196_2/fastqc_data.txt
+ce1121532724dbfc9f75408f3b6bb1b8 test_output/SRR3897196_2/fastqc_data.txt
80cd130958bcb2219f1e5a07d06a3b6e test_output/SRR3897196_2/summary.txt
-9bb60254ebbca76328a0fc5c7d35d38f test_output/SRR10143153_2/fastqc_data.txt
+189608afba8bc88f237259a8801417c4 test_output/SRR9624732_1/fastqc_data.txt
+c94f94bdfbed9b83f156c15ffea84127 test_output/SRR9624732_1/summary.txt
+a96c16a6ac4cce541f745b25d5b04fef test_output/SRR10143153_2/fastqc_data.txt
19f1811f324e4c44154f659bb6e22806 test_output/SRR10143153_2/summary.txt
-b5b45d94670c42ddff565d53ff9b34e8 test_output/SRR1772703.lite.1/fastqc_data.txt
+1991f6ba435c23642d3141856864ad99 test_output/SRR1772703.lite.1/fastqc_data.txt
ad5727295e7c8de6eb6874837bf1518f test_output/SRR1772703.lite.1/summary.txt
-3844262fde0e6c1bfeb6f3d12da3e483 test_output/SRR9624732_2/fastqc_data.txt
+657b3a5b77d8a7b9d5e8ff3c08dae765 test_output/SRR9624732_2/fastqc_data.txt
fefc5d746f853c14b5e00421ad1ec260 test_output/SRR9624732_2/summary.txt
-e7b534295a334f21f143daf0209708a1 test_output/SRR10124060/fastqc_data.txt
+e4df74ec74cf5ee69dd76d08825d1b2c test_output/SRR10124060/fastqc_data.txt
776f7d1b53bbed8683de9ca1d2529f1e test_output/SRR10124060/summary.txt
-38556ea8d058797f2e30b48c30fe77c6 test_output/SRR891268_2/fastqc_data.txt
+e5e62fbaefdf730452e0133eca248f69 test_output/SRR891268_2/fastqc_data.txt
20a8e50baace4c672622793874a3d7de test_output/SRR891268_2/summary.txt
-3cc125e2b29921e9369194616941b9e3 test_output/SRR9878537.lite.1/fastqc_data.txt
+401d183c10a50bdf0eac497c63630918 test_output/SRR9878537.lite.1/fastqc_data.txt
e5c40997d4993c07e164ee5598c39cf9 test_output/SRR9878537.lite.1/summary.txt
-f9ebe6a18e4438a79d535baa59d6a629 test_output/SRR891268_1/fastqc_data.txt
+8334959b07f8baa92548ad939aff5df0 test_output/SRR891268_1/fastqc_data.txt
69e7d0c53cd2e67117637c408b65333a test_output/SRR891268_1/summary.txt
-f11a39a545f2469161c7502c56082ee3 test_output/SRR6059706.lite.1/fastqc_data.txt
+985da7c31b11a4cb40686bf20d0df9cd test_output/SRR6059706.lite.1/fastqc_data.txt
e348e4bcc7fc6f05e989ac7858d2b287 test_output/SRR6059706.lite.1/summary.txt
-753bb1af5e2cf52a38e5d7b3d2c2f39b test_output/SRR6387347/fastqc_data.txt
+2eb1acd772bf29031455cb38072a396f test_output/SRR6387347/fastqc_data.txt
a61f65047e76f93300967cf399d044de test_output/SRR6387347/summary.txt
-513957b15a848d5eb29ca8e5c2ed0c45 test_output/SRR3897196_1/fastqc_data.txt
+d79d49f81677f19247bbd9cd6021f903 test_output/SRR3897196_1/fastqc_data.txt
b736ee95d5c450ef5c0dda31957b6818 test_output/SRR3897196_1/summary.txt
-2297ae14b668630cb547aacfd2da992f test_output/SRR10143153_1/fastqc_data.txt
+c93b8e4f2f14664419f57fb33edf5b64 test_output/SRR10143153_1/fastqc_data.txt
9ad191925d47a57d4f8b12f21ba0a7c3 test_output/SRR10143153_1/summary.txt
-255171890adc7117a5c4fae6d355091e test_output/SRR1853178_2/fastqc_data.txt
+940b43bfbddd9d22cac65895c6d37ae8 test_output/SRR1853178_2/fastqc_data.txt
c331d0f7a6aa9d72be41ac531f9ba269 test_output/SRR1853178_2/summary.txt
-c94f94bdfbed9b83f156c15ffea84127 test_output/SRR9624732_1/summary.txt
-b433a0d30f3952f2f5f94cd90ecc6939 test_output/SRR9624732_1/fastqc_data.txt