SeqAn3  3.2.0
The Modern C++ library for sequence analysis.
unidirectional_search_algorithm.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
14 #pragma once
15 
16 #include <ranges>
17 #include <type_traits>
18 
24 
25 namespace seqan3::detail
26 {
27 
30 enum class error_type : uint8_t
31 {
32  deletion,
33  insertion,
34  matchmm,
35  none
36 };
37 
43 template <typename configuration_t, typename index_t, typename... policies_t>
44 class unidirectional_search_algorithm : protected policies_t...
45 {
46 private:
48  using traits_t = search_traits<configuration_t>;
50  using search_result_type = typename traits_t::search_result_type;
51 
52  static_assert(!std::same_as<search_result_type, empty_type>, "The search result type was not configured.");
53 
54 public:
58  unidirectional_search_algorithm() = default;
59  unidirectional_search_algorithm(unidirectional_search_algorithm const &) = default;
60  unidirectional_search_algorithm(unidirectional_search_algorithm &&) = default;
61  unidirectional_search_algorithm & operator=(unidirectional_search_algorithm const &) = default;
62  unidirectional_search_algorithm & operator=(unidirectional_search_algorithm &&) = default;
63  ~unidirectional_search_algorithm() = default;
64 
75  unidirectional_search_algorithm(configuration_t const & cfg, index_t const & index) : policies_t{cfg}...
76  {
77  stratum = cfg.get_or(search_cfg::hit_strata{0}).stratum;
78  index_ptr = &index;
79  }
81 
102  template <typename indexed_query_t, typename callback_t>
103  requires (std::tuple_size_v<indexed_query_t> == 2)
104  && std::ranges::forward_range<std::tuple_element_t<1, indexed_query_t>>
105  && std::invocable<callback_t, search_result_type>
106  void operator()(indexed_query_t && indexed_query, callback_t && callback)
107  {
108  auto && [query_idx, query] = indexed_query;
109  auto error_state = this->max_error_counts(query); // see policy_max_error
110 
111  // construct internal delegate for collecting hits for later filtering (if necessary)
113  delegate = [&internal_hits](auto const & it)
114  {
115  internal_hits.push_back(it);
116  };
117 
118  perform_search_by_hit_strategy(internal_hits, query, error_state);
119 
120  this->make_results(std::move(internal_hits), query_idx, callback); // see policy_search_result_builder
121  }
122 
123 private:
125  index_t const * index_ptr{nullptr};
126 
128  std::function<void(typename index_t::cursor_type const &)> delegate;
129 
131  uint8_t stratum{};
132 
133  // forward declaration
134  template <bool abort_on_hit, typename query_t>
135  bool search_trivial(typename index_t::cursor_type cur,
136  query_t & query,
137  typename index_t::cursor_type::size_type const query_pos,
138  search_param const error_left,
139  error_type const prev_error);
140 
147  template <typename query_t>
148  void perform_search_by_hit_strategy(std::vector<typename index_t::cursor_type> & internal_hits,
149  query_t & query,
150  search_param error_state)
151  {
152  if constexpr (!traits_t::search_all_hits)
153  {
154  auto max_total = error_state.total;
155  error_state.total = 0; // start search with less errors
156 
157  while (internal_hits.empty() && error_state.total <= max_total)
158  {
159  // * If you only want the best hit (traits_t::search_single_best_hit), you stop after finding the
160  // first hit, the hit with the least errors (`abort_on_hit` is true).
161  // * If you are in strata mode (traits_t::search_strata_hits), you do the same as with best hits,
162  // but then do the extra step afterwards (`abort_on_hit` is true).
163  // * If you want all best hits (traits_t::search_all_best_hits), you do not stop after the first
164  // hit but continue the current search algorithm/max_error pattern (`abort_on_hit` is true).
165  constexpr bool abort_on_hit = !traits_t::search_all_best_hits;
166  search_trivial<abort_on_hit>(index_ptr->cursor(), query, 0, error_state, error_type::none);
167  ++error_state.total;
168  }
169 
170  if constexpr (traits_t::search_strata_hits)
171  {
172  if (!internal_hits.empty())
173  {
174  internal_hits.clear();
175  error_state.total += stratum - 1;
176  search_trivial<false>(index_ptr->cursor(), query, 0, error_state, error_type::none);
177  }
178  }
179  }
180  else // traits_t::search_all
181  {
182  // If you want to find all hits, you cannot stop once you found any hit (<false>)
183  // since you have to find all paths in the search tree that satisfy the hit condition.
184  search_trivial<false>(index_ptr->cursor(), query, 0, error_state, error_type::none);
185  }
186  }
187 };
188 
208 template <typename configuration_t, typename index_t, typename... policies_t>
209 template <bool abort_on_hit, typename query_t>
210 inline bool unidirectional_search_algorithm<configuration_t, index_t, policies_t...>::search_trivial(
211  typename index_t::cursor_type cur,
212  query_t & query,
213  typename index_t::cursor_type::size_type const query_pos,
214  search_param const error_left,
215  error_type const prev_error)
216 {
217  // Exact case (end of query sequence or no errors left)
218  if (query_pos == std::ranges::size(query) || error_left.total == 0)
219  {
220  // If not at end of query sequence, try searching the remaining suffix without any errors.
221  using drop_size_t = std::ranges::range_difference_t<query_t>;
222  if (query_pos == std::ranges::size(query)
223  || cur.extend_right(std::views::drop(query, static_cast<drop_size_t>(query_pos))))
224  {
225  delegate(cur);
226  return true;
227  }
228  }
229  // Approximate case
230  else
231  {
232  // Insertion
233  // Only allow insertions if there is no match and we are not at the beginning of the query.
234  bool const allow_insertion =
235  (cur.query_length() > 0) ? cur.last_rank() != seqan3::to_rank(query[query_pos]) : true;
236 
237  if (allow_insertion && (prev_error != error_type::deletion || error_left.substitution == 0)
238  && error_left.insertion > 0)
239  {
240  search_param error_left2{error_left};
241  error_left2.insertion--;
242  error_left2.total--;
243 
244  // Always perform a recursive call. Abort recursion if and only if recursive call found a hit and
245  // abort_on_hit is set to true.
246  if (search_trivial<abort_on_hit>(cur, query, query_pos + 1, error_left2, error_type::insertion)
247  && abort_on_hit)
248  {
249  return true;
250  }
251  }
252 
253  // Do not allow deletions at the beginning of the query sequence
254  if (((query_pos > 0 && error_left.deletion > 0) || error_left.substitution > 0) && cur.extend_right())
255  {
256  do
257  {
258  // Match (when error_left.substitution > 0) and Mismatch
259  if (error_left.substitution > 0)
260  {
261  bool delta = cur.last_rank() != seqan3::to_rank(query[query_pos]);
262  search_param error_left2{error_left};
263  error_left2.total -= delta;
264  error_left2.substitution -= delta;
265 
266  if (search_trivial<abort_on_hit>(cur, query, query_pos + 1, error_left2, error_type::matchmm)
267  && abort_on_hit)
268  {
269  return true;
270  }
271  }
272 
273  // Deletion (Do not allow deletions at the beginning of the query sequence.)
274  if (query_pos > 0)
275  {
276  // Match (when error_left.substitution == 0)
277  if (error_left.substitution == 0 && cur.last_rank() == seqan3::to_rank(query[query_pos]))
278  {
279  if (search_trivial<abort_on_hit>(cur, query, query_pos + 1, error_left, error_type::matchmm)
280  && abort_on_hit)
281  {
282  return true;
283  }
284  }
285 
286  // Deletions at the end of the sequence are not allowed. When the algorithm
287  // arrives here, it cannot be at the end of the query and since deletions do not touch the query
288  // (i.e. increase query_pos) it won't be at the end of the query after the deletion.
289  // Do not allow deletions after an insertion.
290  if ((prev_error != error_type::insertion || error_left.substitution == 0)
291  && error_left.deletion > 0)
292  {
293  search_param error_left2{error_left};
294  error_left2.total--;
295  error_left2.deletion--;
296  // Only search for characters different from the corresponding query character.
297  // (Same character is covered by a match.)
298  if (cur.last_rank() != seqan3::to_rank(query[query_pos]))
299  {
300  if (search_trivial<abort_on_hit>(cur, query, query_pos, error_left2, error_type::deletion)
301  && abort_on_hit)
302  {
303  return true;
304  }
305  }
306  }
307  }
308  }
309  while (cur.cycle_back());
310  }
311  else
312  {
313  // Match (when error_left.substitution == 0)
314  if (cur.extend_right(query[query_pos]))
315  {
316  if (search_trivial<abort_on_hit>(cur, query, query_pos + 1, error_left, error_type::matchmm)
317  && abort_on_hit)
318  {
319  return true;
320  }
321  }
322  }
323  }
324 
325  return false;
326 }
327 
328 } // namespace seqan3::detail
Core alphabet concept and free function/type trait wrappers.
T clear(T... args)
T empty(T... args)
requires requires
The rank_type of the semi-alphabet; defined as the return type of seqan3::to_rank....
Definition: alphabet/concept.hpp:164
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: alphabet/concept.hpp:155
@ none
No flag is set.
Definition: debug_stream_type.hpp:32
constexpr size_t size
The size of a type pack.
Definition: type_pack/traits.hpp:146
T push_back(T... args)
The <ranges> header from C++20's standard library.
Provides the concept for seqan3::detail::sdsl_index.
Provides data structures used by different search algorithms.
Provides seqan3::detail::search_traits.
Forward declares seqan3::detail::test_accessor.