// ==========================================================================
// quality_trimmer
// ==========================================================================
// Copyright (c) 2006-2013, Knut Reinert, FU Berlin
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of Knut Reinert or the FU Berlin nor the names of
// its contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
// DAMAGE.
//
// ==========================================================================
// Author: Your Name <your.email@example.net>
// ==========================================================================
#include <seqan/basic.h>
#include <seqan/sequence.h>
#include <seqan/seq_io.h>
#include <seqan/arg_parse.h>
// ==========================================================================
// Classes
// ==========================================================================
// --------------------------------------------------------------------------
// Class QualityTrimmerOptions
// --------------------------------------------------------------------------
// This struct stores the options from the command line.
using namespace seqan;
struct QualityTrimmerOptions
{
CharString inputReads;
CharString outputFile;
unsigned treshold;
QualityTrimmerOptions() :
treshold(0)
{};
};
// ==========================================================================
// Functions
// ==========================================================================
// --------------------------------------------------------------------------
// Function parseCommandLine()
// --------------------------------------------------------------------------
seqan::ArgumentParser::ParseResult
parseCommandLine(QualityTrimmerOptions & options, int argc, char const ** argv)
{
// Setup ArgumentParser.
seqan::ArgumentParser parser("read_mapper");
// Set short description, version, and date.
setShortDescription(parser, "Put a Short Description Here");
setVersion(parser, "0.1");
setDate(parser, "July 2012");
// Define usage line and long description.
addUsageLine(parser, "[\\fIOPTIONS\\fP] \"\\fITEXT\\fP\"");
addDescription(parser, "This tool removes x bases from the end of each read.");
// We require three arguments.
// The reads
addOption(parser, ArgParseOption("is", "inputReads", "Name of the multi-FASTA input.", ArgParseArgument::INPUTFILE, "IN"));
setRequired(parser, "is");
// The output file
addOption(parser, ArgParseOption("o", "outputFile", "Name of the multi-FASTA input.", ArgParseArgument::OUTPUTFILE, "OUT"));
setRequired(parser, "o");
// The quality threshold
addOption(parser, ArgParseOption("t", "threshold", "Error rate", ArgParseArgument::INTEGER, "ERROR"));
// Add Examples Section.
addTextSection(parser, "Examples");
addListItem(parser, "\\fquality_trimmer\\fP \\fB-r\\fP \\fIread_file\\fP \\fB-o\\fP \\fIoutput_file\\fP \\fB-t\\fP \\fIthreshold\\fP",
"");
// Parse command line.
seqan::ArgumentParser::ParseResult res = seqan::parse(parser, argc, argv);
// Only extract options if the program will continue after parseCommandLine()
if (res != seqan::ArgumentParser::PARSE_OK)
return res;
getOptionValue(options.inputReads, parser, "inputReads");
getOptionValue(options.outputFile, parser, "outputFile");
getOptionValue(options.treshold, parser, "threshold");
return seqan::ArgumentParser::PARSE_OK;
}
// --------------------------------------------------------------------------
// Function readFiles()
// --------------------------------------------------------------------------
// Function to load the reads from file.
template <typename TChar, typename TValue, typename TQual>
int readFiles(StringSet<String<TChar> > & meta, StringSet<String<TValue> > & seq, StringSet<String<TQual> > & qual, char const * filename)
{
std::fstream stream(filename, std::ios::binary | std::ios::in);
RecordReader<std::fstream, DoublePass<> > reader(stream);
return read2(meta, seq, qual, reader, Fastq());
}
// --------------------------------------------------------------------------
// Function writeFiles()
// --------------------------------------------------------------------------
// Function to write the output file to disk.
template <typename TChar, typename TValue, typename TQual>
int writeFiles(StringSet<String<TChar> > & meta, StringSet<String<TValue> > & seq, StringSet<String<TQual> > & qual, char const * filename)
{
std::fstream stream(filename, std::ios::binary | std::ios::out);
RecordReader<std::fstream, DoublePass<> > reader(stream);
return write2(stream, meta, seq, qual, Fastq());
}
// --------------------------------------------------------------------------
// Function trim()
// --------------------------------------------------------------------------
// Trimming the reads
template <typename TValue, typename TQual>
void trim(StringSet<String<TValue> > & seq, StringSet<String<TQual> > & qual, QualityTrimmerOptions const & options)
{
for (unsigned i = 0; i < length(qual); ++i)
{
resize(qual[i], length(qual[i]) - options.treshold);
resize(seq[i], length(seq[i]) - options.treshold);
}
}
// --------------------------------------------------------------------------
// Function main()
// --------------------------------------------------------------------------
int main(int argc, char const ** argv)
{
// Parse the command line.
seqan::ArgumentParser parser;
QualityTrimmerOptions options;
seqan::ArgumentParser::ParseResult res = parseCommandLine(options, argc, argv);
// If there was an error parsing or built-in argument parser functionality
// was triggered then we exit the program. The return code is 1 if there
// were errors and 0 if there were none.
if (res != seqan::ArgumentParser::PARSE_OK)
return res == seqan::ArgumentParser::PARSE_ERROR;
std::cout << "Quality Trimming\n"
<< "================\n\n";
// Inіt variables for redaIds, readSeq and quals.
StringSet<String<char> > meta;
StringSet<String<Dna5> > seq;
StringSet<String<char> > qual;
// Load the reads
if (readFiles(meta, seq, qual, toCString(options.inputReads)) != 0)
return 1;
// Trim the reads
trim(seq, qual, options);
// Write the outpus
if (writeFiles(meta, seq, qual, toCString(options.outputFile)) != 0)
return 1;
return 0;
}