|
| MinimizerMapper (const gbwtgraph::GBWTGraph &graph, const std::vector< std::unique_ptr< gbwtgraph::DefaultMinimizerIndex >> &minimizer_indexes, MinimumDistanceIndex &distance_index, const PathPositionHandleGraph *path_graph=nullptr) |
|
void | map (Alignment &aln, AlignmentEmitter &alignment_emitter) |
|
vector< Alignment > | map (Alignment &aln) |
|
pair< vector< Alignment >, vector< Alignment > > | map_paired (Alignment &aln1, Alignment &aln2, vector< pair< Alignment, Alignment >> &ambiguous_pair_buffer) |
|
pair< vector< Alignment >, vector< Alignment > > | map_paired (Alignment &aln1, Alignment &aln2) |
|
void | attempt_rescue (const Alignment &aligned_read, Alignment &rescued_alignment, bool rescue_forward) |
|
bool | fragment_distr_is_finalized () |
|
void | finalize_fragment_length_distr () |
|
int64_t | distance_between (const Alignment &aln1, const Alignment &aln2) |
|
void | set_alignment_scores (int8_t match, int8_t mismatch, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus, uint32_t xdrop_max_gap_length=default_xdrop_max_gap_length) |
| Set all the aligner scoring parameters and create the stored aligner instances. More...
|
|
void | load_scoring_matrix (std::ifstream &matrix_stream) |
| Load a scoring amtrix from a file to set scores. More...
|
|
|
std::vector< Minimizer > | find_minimizers (const std::string &sequence, Funnel &funnel) const |
|
std::pair< std::vector< pos_t >, std::vector< size_t > > | find_seeds (const std::vector< Minimizer > &minimizers, const Alignment &aln, Funnel &funnel) const |
|
std::pair< double, double > | score_cluster (const std::vector< size_t > &cluster, size_t i, const std::vector< Minimizer > &minimizers, const std::vector< size_t > &seed_to_source, size_t seq_length, Funnel &funnel) const |
|
void | find_optimal_tail_alignments (const Alignment &aln, const vector< GaplessExtension > &extended_seeds, Alignment &best, Alignment &second_best) const |
|
unordered_map< size_t, unordered_map< size_t, vector< Path > > > | find_connecting_paths (const vector< GaplessExtension > &extended_seeds, size_t read_length) const |
|
vector< TreeSubgraph > | get_tail_forest (const GaplessExtension &extended_seed, size_t read_length, bool left_tails) const |
|
pair< Path, size_t > | get_best_alignment_against_any_tree (const vector< TreeSubgraph > &trees, const string &sequence, const Position &default_position, bool pin_left) const |
|
void | dfs_gbwt (const Position &from, size_t walk_distance, const function< void(const handle_t &)> &enter_handle, const function< void(void)> exit_handle) const |
|
void | dfs_gbwt (handle_t from_handle, size_t from_offset, size_t walk_distance, const function< void(const handle_t &)> &enter_handle, const function< void(void)> exit_handle) const |
|
void | dfs_gbwt (const gbwt::SearchState &start_state, size_t from_offset, size_t walk_distance, const function< void(const handle_t &)> &enter_handle, const function< void(void)> exit_handle) const |
|
template<typename Item , typename Score = double> |
void | process_until_threshold (const vector< Item > &items, const function< Score(size_t)> &get_score, double threshold, size_t min_count, size_t max_count, const function< bool(size_t)> &process_item, const function< void(size_t)> &discard_item_by_count, const function< void(size_t)> &discard_item_by_score) const |
|
template<typename Item , typename Score = double> |
void | process_until_threshold (const vector< Item > &items, const vector< Score > &scores, double threshold, size_t min_count, size_t max_count, const function< bool(size_t)> &process_item, const function< void(size_t)> &discard_item_by_count, const function< void(size_t)> &discard_item_by_score) const |
|
template<typename Item , typename Score = double> |
void | process_until_threshold (const vector< Item > &items, const vector< Score > &scores, const function< bool(size_t, size_t)> &comparator, double threshold, size_t min_count, size_t max_count, const function< bool(size_t)> &process_item, const function< void(size_t)> &discard_item_by_count, const function< void(size_t)> &discard_item_by_score) const |
|
| AlignerClient (double gc_content_estimate=vg::default_gc_content) |
|
const GSSWAligner * | get_aligner (bool have_qualities=true) const |
|
const QualAdjAligner * | get_qual_adj_aligner () const |
|
const Aligner * | get_regular_aligner () const |
|
void vg::MinimizerMapper::dfs_gbwt |
( |
const Position & |
from, |
|
|
size_t |
walk_distance, |
|
|
const function< void(const handle_t &)> & |
enter_handle, |
|
|
const function< void(void)> |
exit_handle |
|
) |
| const |
|
protected |
Run a DFS on valid haplotypes in the GBWT starting from the given Position, and continuing up to the given number of bases.
Calls enter_handle when the DFS enters a haplotype visit to a particular handle, and exit_handle when it exits a visit. These let the caller maintain a stack and track the traversals.
The starting node is only entered if its offset isn't equal to its length (i.e. bases remain to be visited).
Stopping early is not permitted.
unordered_map<size_t, unordered_map<size_t, vector<Path> > > vg::MinimizerMapper::find_connecting_paths |
( |
const vector< GaplessExtension > & |
extended_seeds, |
|
|
size_t |
read_length |
|
) |
| const |
|
protected |
Find for each pair of extended seeds all the haplotype-consistent graph paths against which the intervening read sequence needs to be aligned.
Limits walks from each extended seed end to the longest detectable gap plus the remaining to-be-alinged sequence, both computed using the read length.
extended_seeds must be sorted by read start position. Any extended seeds that overlap in the read will be precluded from connecting.
numeric_limits<size_t>::max() is used to store sufficiently long Paths ending before sources (which cannot be reached from other extended seeds) and starting after sinks (which cannot reach any other extended seeds). Only sources and sinks have these "tail" paths.
Tail paths are only calculated if the MinimizerMapper has linear_tails set to true.
pair< Path, size_t > vg::MinimizerMapper::get_best_alignment_against_any_tree |
( |
const vector< TreeSubgraph > & |
trees, |
|
|
const string & |
sequence, |
|
|
const Position & |
default_position, |
|
|
bool |
pin_left |
|
) |
| const |
|
protected |
Find the best alignment of the given sequence against any of the trees provided in trees, where each tree is a TreeSubgraph over the GBWT graph. Each tree subgraph is rooted at the left in its own local coordinate space, even if we are pinning on the right.
If no mapping is possible (for example, because there are no trees), produce a pure insert at default_position.
Alignment is always pinned.
If pin_left is true, pin the alignment on the left to the root of each tree. Otherwise pin it on the right to the root of each tree.
Returns alingments in gbwt_graph space.
vector< TreeSubgraph > vg::MinimizerMapper::get_tail_forest |
( |
const GaplessExtension & |
extended_seed, |
|
|
size_t |
read_length, |
|
|
bool |
left_tails |
|
) |
| const |
|
protected |
Get all the trees defining tails off the specified side of the specified gapless extension. Should only be called if a tail on that side exists, or this is a waste of time.
If the gapless extension starts or ends at a node boundary, there may be multiple trees produced, each with a distinct root.
If the gapless extension abuts the edge of the read, an empty forest will be produced.
Each tree is represented as a TreeSubgraph over our gbwt_graph.
If left_tails is true, the trees read out of the left sides of the gapless extension. Otherwise they read out of the right side.
template<typename Item , typename Score >
void vg::MinimizerMapper::process_until_threshold |
( |
const vector< Item > & |
items, |
|
|
const function< Score(size_t)> & |
get_score, |
|
|
double |
threshold, |
|
|
size_t |
min_count, |
|
|
size_t |
max_count, |
|
|
const function< bool(size_t)> & |
process_item, |
|
|
const function< void(size_t)> & |
discard_item_by_count, |
|
|
const function< void(size_t)> & |
discard_item_by_score |
|
) |
| const |
|
protected |
Given a vector of items, a function to get the score of each, a score-difference-from-the-best cutoff, and a min and max processed item count, process items in descending score order by calling process_item with the item's number, until min_count items are processed and either max_count items are processed or the score difference threshold is hit (or we run out of items).
If process_item returns false, the item is skipped and does not count against min_count or max_count.
Call discard_item_by_count with the item's number for all remaining items that would pass the score threshold.
Call discard_item_by_score with the item's number for all remaining items that would fail the score threshold.