vg
tools for working with variation graphs
|
#include <aligner.hpp>
Public Member Functions | |
double | max_possible_mapping_quality (int length) const |
double | estimate_max_possible_mapping_quality (int length, double min_diffs, double next_min_diffs) const |
virtual void | align_pinned (Alignment &alignment, const HandleGraph &g, bool pin_left) const =0 |
virtual void | align_pinned (Alignment &alignment, const HandleGraph &g, const vector< handle_t > &topological_order, bool pin_left) const =0 |
virtual void | align_pinned_multi (Alignment &alignment, vector< Alignment > &alt_alignments, const HandleGraph &g, bool pin_left, int32_t max_alt_alns) const =0 |
virtual void | align_pinned_multi (Alignment &alignment, vector< Alignment > &alt_alignments, const HandleGraph &g, const vector< handle_t > &topological_order, bool pin_left, int32_t max_alt_alns) const =0 |
virtual void | align_global_banded (Alignment &alignment, const HandleGraph &g, int32_t band_padding=0, bool permissive_banding=true) const =0 |
virtual void | align_global_banded_multi (Alignment &alignment, vector< Alignment > &alt_alignments, const HandleGraph &g, int32_t max_alt_alns, int32_t band_padding=0, bool permissive_banding=true) const =0 |
virtual void | align_xdrop (Alignment &alignment, const HandleGraph &g, const vector< MaximalExactMatch > &mems, bool reverse_complemented) const =0 |
virtual void | align_xdrop_multi (Alignment &alignment, const HandleGraph &g, const vector< MaximalExactMatch > &mems, bool reverse_complemented, int32_t max_alt_alns) const =0 |
virtual const XdropAligner & | get_xdrop () const =0 |
virtual int32_t | score_exact_match (const Alignment &aln, size_t read_offset, size_t length) const =0 |
virtual int32_t | score_exact_match (const string &sequence, const string &base_quality) const =0 |
virtual int32_t | score_exact_match (string::const_iterator seq_begin, string::const_iterator seq_end, string::const_iterator base_qual_begin) const =0 |
virtual int32_t | score_partial_alignment (const Alignment &alignment, const HandleGraph &graph, const Path &path, string::const_iterator seq_begin) const =0 |
Compute the score of a path against the given range of subsequence with the given qualities. More... | |
int32_t | score_gap (size_t gap_length) const |
Returns the score of an insert or deletion of the given length. More... | |
void | compute_mapping_quality (vector< Alignment > &alignments, int max_mapping_quality, bool fast_approximation, double cluster_mq, bool use_cluster_mq, int overlap_count, double mq_estimate, double maybe_mq_threshold, double identity_weight) const |
void | compute_paired_mapping_quality (pair< vector< Alignment >, vector< Alignment >> &alignment_pairs, const vector< double > &frag_weights, int max_mapping_quality1, int max_mapping_quality2, bool fast_approximation, double cluster_mq, bool use_cluster_mq, int overlap_count1, int overlap_count2, double mq_estimate1, double mq_estimate2, double maybe_mq_threshold, double identity_weight) const |
same function for paired reads, mapping qualities are stored in both alignments in the pair More... | |
int32_t | compute_mapping_quality (const vector< double > &scores, bool fast_approximation, const vector< double > *multiplicities=nullptr) const |
int32_t | compute_group_mapping_quality (const vector< double > &scores, const vector< size_t > &group, const vector< double > *multiplicities=nullptr) const |
double | mapping_quality_score_diff (double mapping_quality) const |
double | score_to_unnormalized_likelihood_ln (double score) const |
size_t | longest_detectable_gap (const Alignment &alignment, const string::const_iterator &read_pos) const |
The longest gap detectable from a read position without soft-clipping. More... | |
size_t | longest_detectable_gap (size_t read_length, size_t read_pos) const |
The longest gap detectable from a read position without soft-clipping, for a generic read. More... | |
size_t | longest_detectable_gap (const Alignment &alignment) const |
The longest gap detectable from any read position without soft-clipping. More... | |
virtual int32_t | score_gappy_alignment (const Alignment &aln, const function< size_t(pos_t, pos_t, size_t)> &estimate_distance, bool strip_bonuses=false) const |
virtual int32_t | score_ungapped_alignment (const Alignment &aln, bool strip_bonuses=false) const |
virtual void | load_scoring_matrix (std::istream &matrix_stream) |
virtual int32_t | remove_bonuses (const Alignment &aln, bool pinned=false, bool pin_left=false) const |
![]() | |
virtual void | align (Alignment &alignment, const HandleGraph &g, bool traceback_aln, bool print_score_matrices) const =0 |
virtual void | align (Alignment &alignment, const HandleGraph &g, const vector< handle_t > &topological_order, bool traceback_aln, bool print_score_matrices) const =0 |
Same as previous, but takes advantage of a pre-computed topological order. More... | |
Static Public Member Functions | |
static double | maximum_mapping_quality_exact (const vector< double > &scaled_scores, size_t *max_idx_out, const vector< double > *multiplicities=nullptr) |
static double | maximum_mapping_quality_approx (const vector< double > &scaled_scores, size_t *max_idx_out, const vector< double > *multiplicities=nullptr) |
Public Attributes | |
int8_t * | nt_table = nullptr |
int8_t * | score_matrix = nullptr |
int8_t | match |
int8_t | mismatch |
int8_t | gap_open |
int8_t | gap_extension |
int8_t | full_length_bonus |
double | log_base = 0.0 |
Protected Member Functions | |
GSSWAligner ()=default | |
~GSSWAligner () | |
gssw_graph * | create_gssw_graph (const HandleGraph &g) const |
gssw_graph * | create_gssw_graph (const HandleGraph &g, const vector< handle_t > &topological_order) const |
unordered_set< id_t > | identify_pinning_points (const HandleGraph &graph) const |
void | unreverse_graph_mapping (gssw_graph_mapping *gm) const |
void | unreverse_graph (gssw_graph *graph) const |
void | gssw_mapping_to_alignment (gssw_graph *graph, gssw_graph_mapping *gm, Alignment &alignment, bool pinned, bool pin_left, bool print_score_matrices=false) const |
string | graph_cigar (gssw_graph_mapping *gm) const |
double | group_mapping_quality_exact (const vector< double > &scaled_scores, const vector< size_t > &group, const vector< double > *multiplicities=nullptr) const |
double | estimate_next_best_score (int length, double min_diffs) const |
void | init_mapping_quality (double gc_content) |
The basic GSSW-based core aligner implementation, which can then be quality-adjusted or not.
|
protecteddefault |
|
protected |
|
pure virtual |
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
pure virtual |
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
pure virtual |
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
pure virtual |
Same as previous, but takes advantage of a pre-computed topological order. The topological order MUST be left to right, no matter whether you are pinning left or right. If alignment needs to proceed backward, it will be reversed internally.
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
pure virtual |
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
pure virtual |
Same as previous, but takes advantage of a pre-computed topological order. The topological order MUST be left to right, no matter whether you are pinning left or right. If alignment needs to proceed backward, it will be reversed internally.
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
pure virtual |
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
pure virtual |
Implemented in vg::QualAdjAligner, and vg::Aligner.
int32_t GSSWAligner::compute_group_mapping_quality | ( | const vector< double > & | scores, |
const vector< size_t > & | group, | ||
const vector< double > * | multiplicities = nullptr |
||
) | const |
Computes mapping quality for a group of scores in a vector of scores (group given by indexes). Optionally includes a vector of implicit counts >= 1 for the score, but the mapping quality is always calculated as if each member of the group has multiplicity is 1.
int32_t GSSWAligner::compute_mapping_quality | ( | const vector< double > & | scores, |
bool | fast_approximation, | ||
const vector< double > * | multiplicities = nullptr |
||
) | const |
Computes mapping quality for the optimal score in a vector of scores. Optionally includes a vector of implicit counts >= 1 for the scores, but the mapping quality is always calculated as if it multiplicity is 1.
void GSSWAligner::compute_mapping_quality | ( | vector< Alignment > & | alignments, |
int | max_mapping_quality, | ||
bool | fast_approximation, | ||
double | cluster_mq, | ||
bool | use_cluster_mq, | ||
int | overlap_count, | ||
double | mq_estimate, | ||
double | maybe_mq_threshold, | ||
double | identity_weight | ||
) | const |
stores -10 * log_10(P_err) in alignment mapping_quality field where P_err is the probability that the alignment is not the correct one (assuming that one of the alignments in the vector is correct). alignments must have been created with this Aligner for quality score to be valid
void GSSWAligner::compute_paired_mapping_quality | ( | pair< vector< Alignment >, vector< Alignment >> & | alignment_pairs, |
const vector< double > & | frag_weights, | ||
int | max_mapping_quality1, | ||
int | max_mapping_quality2, | ||
bool | fast_approximation, | ||
double | cluster_mq, | ||
bool | use_cluster_mq, | ||
int | overlap_count1, | ||
int | overlap_count2, | ||
double | mq_estimate1, | ||
double | mq_estimate2, | ||
double | maybe_mq_threshold, | ||
double | identity_weight | ||
) | const |
same function for paired reads, mapping qualities are stored in both alignments in the pair
|
protected |
|
protected |
double GSSWAligner::estimate_max_possible_mapping_quality | ( | int | length, |
double | min_diffs, | ||
double | next_min_diffs | ||
) | const |
|
protected |
|
pure virtual |
Get a fresh XdropAligner instance to align with. TODO: make XdropAligner thread safe, and make it a thing you can get from GetAligner.
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
virtual |
Reads a 5x5 substitution scoring matrix from an input stream (can be an ifstream) expecting 5 whitespace-separated 8-bit integers per line
size_t GSSWAligner::longest_detectable_gap | ( | const Alignment & | alignment | ) | const |
The longest gap detectable from any read position without soft-clipping.
size_t GSSWAligner::longest_detectable_gap | ( | const Alignment & | alignment, |
const string::const_iterator & | read_pos | ||
) | const |
The longest gap detectable from a read position without soft-clipping.
size_t GSSWAligner::longest_detectable_gap | ( | size_t | read_length, |
size_t | read_pos | ||
) | const |
The longest gap detectable from a read position without soft-clipping, for a generic read.
double GSSWAligner::mapping_quality_score_diff | ( | double | mapping_quality | ) | const |
Returns the difference between an optimal and second-best alignment scores that would result in this mapping quality using the fast mapping quality approximation
double GSSWAligner::max_possible_mapping_quality | ( | int | length | ) | const |
|
static |
Given a nonempty vector of nonnegative scaled alignment scores, approximate the mapping quality of the maximal score in the vector. Sets max_idx_out to the index of that score in the vector. Optionally includes a vector of implicit counts >= 1 for the scores, but the mapping quality is always calculated as if its multiplicity is 1.
|
static |
Given a nonempty vector of nonnegative scaled alignment scores, compute the mapping quality of the maximal score in the vector. Sets max_idx_out to the index of that score in the vector. Optionally includes a vector of implicit counts >= 1 for the scores, but the mapping quality is always calculated as if its multiplicity is 1.
|
virtual |
Without necessarily rescoring the entire alignment, return the score of the given alignment with bonuses removed. Assumes that bonuses are actually included in the score. Needs to know if the alignment was pinned-end or not, and, if so, which end was pinned.
|
pure virtual |
Compute the score of an exact match in the given alignment, from the given offset, of the given length.
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
pure virtual |
Compute the score of an exact match of the given sequence with the given qualities. Qualities may be ignored by some implementations.
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
pure virtual |
Compute the score of an exact match of the given range of sequence with the given qualities. Qualities may be ignored by some implementations.
Implemented in vg::QualAdjAligner, and vg::Aligner.
int32_t GSSWAligner::score_gap | ( | size_t | gap_length | ) | const |
Returns the score of an insert or deletion of the given length.
|
virtual |
Use the score values in the aligner to score the given alignment, scoring gaps caused by jumping between between nodes using a custom gap length estimation function (which takes the from position, the to position, and a search limit in bp that happens to be the read length).
May include full length bonus or not. TODO: bool flags are bad.
|
pure virtual |
Compute the score of a path against the given range of subsequence with the given qualities.
Implemented in vg::QualAdjAligner, and vg::Aligner.
double GSSWAligner::score_to_unnormalized_likelihood_ln | ( | double | score | ) | const |
Convert a score to an unnormalized log likelihood for the sequence. Requires log_base to have been set.
|
virtual |
Use the score values in the aligner to score the given alignment assuming that there are no gaps between Mappings in the Path
|
protected |
|
protected |
int8_t vg::GSSWAligner::full_length_bonus |
int8_t vg::GSSWAligner::gap_extension |
int8_t vg::GSSWAligner::gap_open |
double vg::GSSWAligner::log_base = 0.0 |
int8_t vg::GSSWAligner::match |
int8_t vg::GSSWAligner::mismatch |
int8_t* vg::GSSWAligner::nt_table = nullptr |
int8_t* vg::GSSWAligner::score_matrix = nullptr |