SeqAn3  3.2.0
The Modern C++ library for sequence analysis.
pairwise_alignment_algorithm.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <concepts>
16 #include <ranges>
17 
24 
25 namespace seqan3::detail
26 {
27 
44 template <typename alignment_configuration_t, typename... policies_t>
45  requires is_type_specialisation_of_v<alignment_configuration_t, configuration>
46 class pairwise_alignment_algorithm : protected policies_t...
47 {
48 protected:
50  using traits_type = alignment_configuration_traits<alignment_configuration_t>;
52  using score_type = typename traits_type::score_type;
54  using alignment_result_type = typename traits_type::alignment_result_type;
55 
56  static_assert(!std::same_as<alignment_result_type, empty_type>, "Alignment result type was not configured.");
57 
58 public:
62  pairwise_alignment_algorithm() = default;
63  pairwise_alignment_algorithm(pairwise_alignment_algorithm const &) = default;
64  pairwise_alignment_algorithm(pairwise_alignment_algorithm &&) = default;
65  pairwise_alignment_algorithm & operator=(pairwise_alignment_algorithm const &) = default;
66  pairwise_alignment_algorithm & operator=(pairwise_alignment_algorithm &&) = default;
67  ~pairwise_alignment_algorithm() = default;
68 
76  pairwise_alignment_algorithm(alignment_configuration_t const & config) : policies_t(config)...
77  {}
79 
125  template <indexed_sequence_pair_range indexed_sequence_pairs_t, typename callback_t>
126  requires std::invocable<callback_t, alignment_result_type>
127  void operator()(indexed_sequence_pairs_t && indexed_sequence_pairs, callback_t && callback)
128  {
129  using std::get;
130 
131  for (auto && [sequence_pair, idx] : indexed_sequence_pairs)
132  {
133  size_t const sequence1_size = std::ranges::distance(get<0>(sequence_pair));
134  size_t const sequence2_size = std::ranges::distance(get<1>(sequence_pair));
135 
136  auto && [alignment_matrix, index_matrix] = this->acquire_matrices(sequence1_size, sequence2_size);
137 
138  compute_matrix(get<0>(sequence_pair), get<1>(sequence_pair), alignment_matrix, index_matrix);
139  this->make_result_and_invoke(std::forward<decltype(sequence_pair)>(sequence_pair),
140  std::move(idx),
141  this->optimal_score,
142  this->optimal_coordinate,
143  alignment_matrix,
144  callback);
145  }
146  }
148 
150  template <indexed_sequence_pair_range indexed_sequence_pairs_t, typename callback_t>
151  requires traits_type::is_vectorised && std::invocable<callback_t, alignment_result_type>
152  auto operator()(indexed_sequence_pairs_t && indexed_sequence_pairs, callback_t && callback)
153  {
154  using simd_collection_t = std::vector<score_type, aligned_allocator<score_type, alignof(score_type)>>;
155  using original_score_t = typename traits_type::original_score_type;
156 
157  // Extract the batch of sequences for the first and the second sequence.
158  auto seq1_collection = indexed_sequence_pairs | views::elements<0> | views::elements<0>;
159  auto seq2_collection = indexed_sequence_pairs | views::elements<0> | views::elements<1>;
160 
161  this->initialise_tracker(seq1_collection, seq2_collection);
162 
163  // Convert batch of sequences to sequence of simd vectors.
164  thread_local simd_collection_t simd_seq1_collection{};
165  thread_local simd_collection_t simd_seq2_collection{};
166 
167  convert_batch_of_sequences_to_simd_vector(simd_seq1_collection,
168  seq1_collection,
169  this->scoring_scheme.padding_symbol);
170  convert_batch_of_sequences_to_simd_vector(simd_seq2_collection,
171  seq2_collection,
172  this->scoring_scheme.padding_symbol);
173 
174  size_t const sequence1_size = std::ranges::distance(simd_seq1_collection);
175  size_t const sequence2_size = std::ranges::distance(simd_seq2_collection);
176 
177  auto && [alignment_matrix, index_matrix] = this->acquire_matrices(sequence1_size, sequence2_size);
178 
179  compute_matrix(simd_seq1_collection, simd_seq2_collection, alignment_matrix, index_matrix);
180 
181  size_t index = 0;
182  for (auto && [sequence_pair, idx] : indexed_sequence_pairs)
183  {
184  original_score_t score = this->optimal_score[index]
185  - (this->padding_offsets[index] * this->scoring_scheme.padding_match_score());
186  matrix_coordinate coordinate{row_index_type{size_t{this->optimal_coordinate.row[index]}},
187  column_index_type{size_t{this->optimal_coordinate.col[index]}}};
188  this->make_result_and_invoke(std::forward<decltype(sequence_pair)>(sequence_pair),
189  std::move(idx),
190  std::move(score),
191  std::move(coordinate),
192  alignment_matrix,
193  callback);
194  ++index;
195  }
196  }
197 
198 protected:
218  template <typename simd_sequence_t, std::ranges::forward_range sequence_collection_t, arithmetic padding_symbol_t>
219  requires std::ranges::output_range<simd_sequence_t, score_type>
220  void convert_batch_of_sequences_to_simd_vector(simd_sequence_t & simd_sequence,
221  sequence_collection_t & sequences,
222  padding_symbol_t const & padding_symbol)
223  {
224  assert(static_cast<size_t>(std::ranges::distance(sequences)) <= traits_type::alignments_per_vector);
225 
226  simd_sequence.clear();
227  for (auto && simd_vector_chunk : sequences | views::to_simd<score_type>(padding_symbol))
228  std::ranges::move(simd_vector_chunk, std::back_inserter(simd_sequence));
229  }
230 
244  template <std::ranges::forward_range sequence1_t,
245  std::ranges::forward_range sequence2_t,
246  std::ranges::input_range alignment_matrix_t,
247  std::ranges::input_range index_matrix_t>
248  requires std::ranges::forward_range<std::ranges::range_reference_t<alignment_matrix_t>>
249  && std::ranges::forward_range<std::ranges::range_reference_t<index_matrix_t>>
250  void compute_matrix(sequence1_t && sequence1,
251  sequence2_t && sequence2,
252  alignment_matrix_t && alignment_matrix,
253  index_matrix_t && index_matrix)
254  {
255  // ---------------------------------------------------------------------
256  // Initialisation phase: allocate memory and initialise first column.
257  // ---------------------------------------------------------------------
258 
259  this->reset_optimum(); // Reset the tracker for the new alignment computation.
260 
261  auto alignment_matrix_it = alignment_matrix.begin();
262  auto indexed_matrix_it = index_matrix.begin();
263 
264  initialise_column(*alignment_matrix_it, *indexed_matrix_it, sequence2);
265 
266  // ---------------------------------------------------------------------
267  // Iteration phase: compute column-wise the alignment matrix.
268  // ---------------------------------------------------------------------
269 
270  for (auto alphabet1 : sequence1)
271  compute_column(*++alignment_matrix_it,
272  *++indexed_matrix_it,
273  this->scoring_scheme_profile_column(alphabet1),
274  sequence2);
275 
276  // ---------------------------------------------------------------------
277  // Final phase: track score of last column
278  // ---------------------------------------------------------------------
279 
280  auto && alignment_column = *alignment_matrix_it;
281  auto && cell_index_column = *indexed_matrix_it;
282 
283  auto alignment_column_it = alignment_column.begin();
284  auto cell_index_column_it = cell_index_column.begin();
285 
286  this->track_last_column_cell(*alignment_column_it, *cell_index_column_it);
287 
288  for ([[maybe_unused]] auto && unused : sequence2)
289  this->track_last_column_cell(*++alignment_column_it, *++cell_index_column_it);
290 
291  this->track_final_cell(*alignment_column_it, *cell_index_column_it);
292  }
293 
312  template <std::ranges::input_range alignment_column_t,
313  std::ranges::input_range cell_index_column_t,
314  std::ranges::input_range sequence2_t>
315  void initialise_column(alignment_column_t && alignment_column,
316  cell_index_column_t && cell_index_column,
317  sequence2_t && sequence2)
318  {
319  // ---------------------------------------------------------------------
320  // Initial phase: prepare column and initialise first cell
321  // ---------------------------------------------------------------------
322 
323  auto first_column_it = alignment_column.begin();
324  auto cell_index_column_it = cell_index_column.begin();
325  *first_column_it = this->track_cell(this->initialise_origin_cell(), *cell_index_column_it);
326 
327  // ---------------------------------------------------------------------
328  // Iteration phase: iterate over column and compute each cell
329  // ---------------------------------------------------------------------
330 
331  for ([[maybe_unused]] auto const & unused : sequence2)
332  {
333  ++first_column_it;
334  *first_column_it =
335  this->track_cell(this->initialise_first_column_cell(*first_column_it), *++cell_index_column_it);
336  }
337 
338  // ---------------------------------------------------------------------
339  // Final phase: track last cell of initial column
340  // ---------------------------------------------------------------------
341 
342  this->track_last_row_cell(*first_column_it, *cell_index_column_it);
343  }
344 
363  template <std::ranges::input_range alignment_column_t,
364  std::ranges::input_range cell_index_column_t,
365  typename alphabet1_t,
366  std::ranges::input_range sequence2_t>
367  requires semialphabet<alphabet1_t> || simd_concept<alphabet1_t>
368  void compute_column(alignment_column_t && alignment_column,
369  cell_index_column_t && cell_index_column,
370  alphabet1_t const & alphabet1,
371  sequence2_t && sequence2)
372  {
373  using score_type = typename traits_type::score_type;
374 
375  // ---------------------------------------------------------------------
376  // Initial phase: prepare column and initialise first cell
377  // ---------------------------------------------------------------------
378 
379  auto alignment_column_it = alignment_column.begin();
380  auto cell_index_column_it = cell_index_column.begin();
381 
382  auto cell = *alignment_column_it;
383  score_type diagonal = cell.best_score();
384  *alignment_column_it = this->track_cell(this->initialise_first_row_cell(cell), *cell_index_column_it);
385 
386  // ---------------------------------------------------------------------
387  // Iteration phase: iterate over column and compute each cell
388  // ---------------------------------------------------------------------
389 
390  for (auto const & alphabet2 : sequence2)
391  {
392  auto cell = *++alignment_column_it;
393  score_type next_diagonal = cell.best_score();
394  *alignment_column_it = this->track_cell(
395  this->compute_inner_cell(diagonal, cell, this->scoring_scheme.score(alphabet1, alphabet2)),
396  *++cell_index_column_it);
397  diagonal = next_diagonal;
398  }
399 
400  // ---------------------------------------------------------------------
401  // Final phase: track last cell
402  // ---------------------------------------------------------------------
403 
404  this->track_last_row_cell(*alignment_column_it, *cell_index_column_it);
405  }
406 };
407 
408 } // namespace seqan3::detail
Provides seqan3::aligned_allocator.
Provides helper type traits for the configuration and execution of the alignment algorithm.
T back_inserter(T... args)
The <concepts> header from C++20's standard library.
Provides seqan3::views::elements.
Provides seqan3::detail::empty_type.
T forward(T... args)
requires requires
The rank_type of the semi-alphabet; defined as the return type of seqan3::to_rank....
Definition: alphabet/concept.hpp:164
The basis for seqan3::alphabet, but requires only rank interface (not char).
matrix_index< size_t > matrix_coordinate
A coordinate type to access an element inside of a two-dimensional matrix.
Definition: matrix_coordinate.hpp:178
constexpr auto const & get(configuration< configs_t... > const &config) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: configuration.hpp:415
The <ranges> header from C++20's standard library.
Provides seqan3::detail::to_simd view.
Provides traits to inspect some information of a type, for example its name.