BamTools 2.5.2
BamConstants.h
Go to the documentation of this file.
1// ***************************************************************************
2// BamConstants.h (c) 2011 Derek Barnett
3// Marth Lab, Department of Biology, Boston College
4// ---------------------------------------------------------------------------
5// Last modified: 16 October 2011 (DB)
6// ---------------------------------------------------------------------------
7// Provides basic constants for handling BAM files.
8// ***************************************************************************
9
10#ifndef BAM_CONSTANTS_H
11#define BAM_CONSTANTS_H
12
13#include <cassert>
14#include <string>
15#include "api/api_global.h"
16
21namespace BamTools {
22namespace Constants {
23
24const uint8_t BAM_SIZEOF_INT = 4;
25
26// header magic number
27const char* const BAM_HEADER_MAGIC = "BAM\1";
28const uint8_t BAM_HEADER_MAGIC_LENGTH = 4;
29
30// BAM alignment core size
31const uint8_t BAM_CORE_SIZE = 32;
32const uint8_t BAM_CORE_BUFFER_SIZE = 8;
33
34// BAM alignment flags
35const int BAM_ALIGNMENT_PAIRED = 0x0001;
36const int BAM_ALIGNMENT_PROPER_PAIR = 0x0002;
37const int BAM_ALIGNMENT_UNMAPPED = 0x0004;
38const int BAM_ALIGNMENT_MATE_UNMAPPED = 0x0008;
41const int BAM_ALIGNMENT_READ_1 = 0x0040;
42const int BAM_ALIGNMENT_READ_2 = 0x0080;
43const int BAM_ALIGNMENT_SECONDARY = 0x0100;
44const int BAM_ALIGNMENT_QC_FAILED = 0x0200;
45const int BAM_ALIGNMENT_DUPLICATE = 0x0400;
46const int BAM_ALIGNMENT_SUPPLEMENTARY = 0x0800;
47
48// CIGAR constants
49const char* const BAM_CIGAR_LOOKUP = "MIDNSHP=X";
50const uint8_t BAM_CIGAR_MATCH = 0;
51const uint8_t BAM_CIGAR_INS = 1;
52const uint8_t BAM_CIGAR_DEL = 2;
53const uint8_t BAM_CIGAR_REFSKIP = 3;
54const uint8_t BAM_CIGAR_SOFTCLIP = 4;
55const uint8_t BAM_CIGAR_HARDCLIP = 5;
56const uint8_t BAM_CIGAR_PAD = 6;
57const uint8_t BAM_CIGAR_SEQMATCH = 7;
58const uint8_t BAM_CIGAR_MISMATCH = 8;
59
60const char BAM_CIGAR_MATCH_CHAR = 'M';
61const char BAM_CIGAR_INS_CHAR = 'I';
62const char BAM_CIGAR_DEL_CHAR = 'D';
63const char BAM_CIGAR_REFSKIP_CHAR = 'N';
64const char BAM_CIGAR_SOFTCLIP_CHAR = 'S';
65const char BAM_CIGAR_HARDCLIP_CHAR = 'H';
66const char BAM_CIGAR_PAD_CHAR = 'P';
67const char BAM_CIGAR_SEQMATCH_CHAR = '=';
68const char BAM_CIGAR_MISMATCH_CHAR = 'X';
69
70const int BAM_CIGAR_SHIFT = 4;
71const int BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1);
72
73// BAM tag types & sizes
74const char BAM_TAG_TYPE_ASCII = 'A';
75const char BAM_TAG_TYPE_INT8 = 'c';
76const char BAM_TAG_TYPE_UINT8 = 'C';
77const char BAM_TAG_TYPE_INT16 = 's';
78const char BAM_TAG_TYPE_UINT16 = 'S';
79const char BAM_TAG_TYPE_INT32 = 'i';
80const char BAM_TAG_TYPE_UINT32 = 'I';
81const char BAM_TAG_TYPE_FLOAT = 'f';
82const char BAM_TAG_TYPE_STRING = 'Z';
83const char BAM_TAG_TYPE_HEX = 'H';
84const char BAM_TAG_TYPE_ARRAY = 'B';
85
86const uint8_t BAM_TAG_TAGSIZE = 2;
87const uint8_t BAM_TAG_TYPESIZE = 1;
88const uint8_t BAM_TAG_ARRAYBASE_SIZE = 8;
89
90// DNA bases
91const char* const BAM_DNA_LOOKUP = "=ACMGRSVTWYHKDBN";
92const uint8_t BAM_BASECODE_EQUAL = 0;
93const uint8_t BAM_BASECODE_A = 1;
94const uint8_t BAM_BASECODE_C = 2;
95const uint8_t BAM_BASECODE_M = 3;
96const uint8_t BAM_BASECODE_G = 4;
97const uint8_t BAM_BASECODE_R = 5;
98const uint8_t BAM_BASECODE_S = 6;
99const uint8_t BAM_BASECODE_V = 7;
100const uint8_t BAM_BASECODE_T = 8;
101const uint8_t BAM_BASECODE_W = 9;
102const uint8_t BAM_BASECODE_Y = 10;
103const uint8_t BAM_BASECODE_H = 11;
104const uint8_t BAM_BASECODE_K = 12;
105const uint8_t BAM_BASECODE_D = 13;
106const uint8_t BAM_BASECODE_B = 14;
107const uint8_t BAM_BASECODE_N = 15;
108
109const char BAM_DNA_EQUAL = '=';
110const char BAM_DNA_A = 'A';
111const char BAM_DNA_C = 'C';
112const char BAM_DNA_M = 'M';
113const char BAM_DNA_G = 'G';
114const char BAM_DNA_R = 'R';
115const char BAM_DNA_S = 'S';
116const char BAM_DNA_V = 'V';
117const char BAM_DNA_T = 'T';
118const char BAM_DNA_W = 'W';
119const char BAM_DNA_Y = 'Y';
120const char BAM_DNA_H = 'H';
121const char BAM_DNA_K = 'K';
122const char BAM_DNA_D = 'D';
123const char BAM_DNA_B = 'B';
124const char BAM_DNA_N = 'N';
125const char BAM_DNA_DEL = '-';
126const char BAM_DNA_PAD = '*';
127
128// zlib & BGZF constants
129const char GZIP_ID1 = 31;
130const char GZIP_ID2 = static_cast<char>(139);
131const char CM_DEFLATE = 8;
132const char FLG_FEXTRA = 4;
133const char OS_UNKNOWN = static_cast<char>(255);
134const char BGZF_XLEN = 6;
135const char BGZF_ID1 = 66;
136const char BGZF_ID2 = 67;
137const char BGZF_LEN = 2;
138
139const int8_t GZIP_WINDOW_BITS = -15;
140const int8_t Z_DEFAULT_MEM_LEVEL = 8;
141const uint8_t BGZF_BLOCK_HEADER_LENGTH = 18;
142const uint8_t BGZF_BLOCK_FOOTER_LENGTH = 8;
143const uint32_t BGZF_MAX_BLOCK_SIZE = 65536;
144const uint32_t BGZF_DEFAULT_BLOCK_SIZE = 65536;
145
146} // namespace Constants
147
149// -------------------------
150// tag-type helper structs
151// -------------------------
152
153// fail on any types not specified below
154template <typename T>
155struct TagTypeHelper
156{
157 static bool CanConvertFrom(const char)
158 {
159 assert(false);
160 return false;
161 }
162 static bool CanConvertTo(const char)
163 {
164 assert(false);
165 return false;
166 }
167 static char TypeCode()
168 {
169 assert(false);
170 return 0;
171 }
172};
173
174template <>
175struct TagTypeHelper<uint8_t>
176{
177 static bool CanConvertFrom(const char c)
178 {
180 }
181 static bool CanConvertTo(const char c)
182 {
185 }
186
187 static char TypeCode()
188 {
190 }
191};
192
193template <>
194struct TagTypeHelper<int8_t>
195{
196 static bool CanConvertFrom(const char c)
197 {
199 }
200 static bool CanConvertTo(const char c)
201 {
204 }
205 static char TypeCode()
206 {
208 }
209};
210
211template <>
212struct TagTypeHelper<uint16_t>
213{
214 static bool CanConvertFrom(const char c)
215 {
218 }
219 static bool CanConvertTo(const char c)
220 {
222 }
223 static char TypeCode()
224 {
226 }
227};
228
229template <>
230struct TagTypeHelper<int16_t>
231{
232 static bool CanConvertFrom(const char c)
233 {
236 }
237 static bool CanConvertTo(const char c)
238 {
240 }
241 static char TypeCode()
242 {
244 }
245};
246
247template <>
248struct TagTypeHelper<uint32_t>
249{
250 static bool CanConvertFrom(const char c)
251 {
254 }
255 static bool CanConvertTo(const char c)
256 {
257 return (c == Constants::BAM_TAG_TYPE_UINT32);
258 }
259 static char TypeCode()
260 {
262 }
263};
264
265template <>
266struct TagTypeHelper<int32_t>
267{
268 static bool CanConvertFrom(const char c)
269 {
272 }
273 static bool CanConvertTo(const char c)
274 {
275 return (c == Constants::BAM_TAG_TYPE_INT32);
276 }
277 static char TypeCode()
278 {
280 }
281};
282
283template <>
284struct TagTypeHelper<float>
285{
286 static bool CanConvertFrom(const char c)
287 {
292 }
293 static bool CanConvertTo(const char c)
294 {
295 return (c == Constants::BAM_TAG_TYPE_FLOAT);
296 }
297 static char TypeCode()
298 {
300 }
301};
302
303template <>
304struct TagTypeHelper<std::string>
305{
306 static bool CanConvertFrom(const char c)
307 {
309 }
310 static bool CanConvertTo(const char c)
311 {
313 }
314 static char TypeCode()
315 {
317 }
318};
319
321
322} // namespace BamTools
323
324#endif // BAM_CONSTANTS_H
const int BAM_ALIGNMENT_UNMAPPED
Definition: BamConstants.h:37
const char BAM_TAG_TYPE_UINT8
Definition: BamConstants.h:76
const char CM_DEFLATE
Definition: BamConstants.h:131
const uint8_t BAM_BASECODE_S
Definition: BamConstants.h:98
const char BAM_DNA_N
Definition: BamConstants.h:124
const uint8_t BAM_CIGAR_MISMATCH
Definition: BamConstants.h:58
const char BAM_DNA_H
Definition: BamConstants.h:120
const char BAM_TAG_TYPE_HEX
Definition: BamConstants.h:83
const uint32_t BGZF_DEFAULT_BLOCK_SIZE
Definition: BamConstants.h:144
const char BAM_TAG_TYPE_INT32
Definition: BamConstants.h:79
const char BAM_DNA_D
Definition: BamConstants.h:122
const char BAM_TAG_TYPE_ASCII
Definition: BamConstants.h:74
const char BAM_DNA_V
Definition: BamConstants.h:116
const uint8_t BAM_BASECODE_M
Definition: BamConstants.h:95
const int BAM_CIGAR_SHIFT
Definition: BamConstants.h:70
const char BAM_DNA_T
Definition: BamConstants.h:117
const uint8_t BAM_CIGAR_REFSKIP
Definition: BamConstants.h:53
const uint8_t BAM_CIGAR_SEQMATCH
Definition: BamConstants.h:57
const uint8_t BAM_CIGAR_PAD
Definition: BamConstants.h:56
const uint8_t BGZF_BLOCK_FOOTER_LENGTH
Definition: BamConstants.h:142
const char BAM_CIGAR_INS_CHAR
Definition: BamConstants.h:61
const int BAM_ALIGNMENT_DUPLICATE
Definition: BamConstants.h:45
const char BAM_DNA_G
Definition: BamConstants.h:113
const char BAM_DNA_B
Definition: BamConstants.h:123
const uint8_t BAM_BASECODE_N
Definition: BamConstants.h:107
const char BGZF_XLEN
Definition: BamConstants.h:134
const char BAM_CIGAR_SEQMATCH_CHAR
Definition: BamConstants.h:67
const int BAM_ALIGNMENT_PAIRED
Definition: BamConstants.h:35
const uint8_t BAM_BASECODE_C
Definition: BamConstants.h:94
const char BGZF_LEN
Definition: BamConstants.h:137
const char BAM_CIGAR_MATCH_CHAR
Definition: BamConstants.h:60
const uint8_t BAM_BASECODE_B
Definition: BamConstants.h:106
const char BAM_DNA_DEL
Definition: BamConstants.h:125
const uint8_t BAM_BASECODE_T
Definition: BamConstants.h:100
const char BAM_CIGAR_SOFTCLIP_CHAR
Definition: BamConstants.h:64
const uint8_t BAM_TAG_TAGSIZE
Definition: BamConstants.h:86
const char *const BAM_HEADER_MAGIC
Definition: BamConstants.h:27
const uint8_t BAM_CORE_BUFFER_SIZE
Definition: BamConstants.h:32
const uint8_t BAM_BASECODE_W
Definition: BamConstants.h:101
const char BAM_TAG_TYPE_ARRAY
Definition: BamConstants.h:84
const int BAM_ALIGNMENT_SECONDARY
Definition: BamConstants.h:43
const uint8_t BAM_CIGAR_INS
Definition: BamConstants.h:51
const char BAM_DNA_R
Definition: BamConstants.h:114
const char BAM_DNA_K
Definition: BamConstants.h:121
const char *const BAM_CIGAR_LOOKUP
Definition: BamConstants.h:49
const char BAM_TAG_TYPE_FLOAT
Definition: BamConstants.h:81
const uint8_t BAM_BASECODE_V
Definition: BamConstants.h:99
const char BAM_TAG_TYPE_UINT32
Definition: BamConstants.h:80
const uint8_t BAM_CORE_SIZE
Definition: BamConstants.h:31
const uint8_t BAM_BASECODE_EQUAL
Definition: BamConstants.h:92
const char BAM_DNA_A
Definition: BamConstants.h:110
const int BAM_ALIGNMENT_QC_FAILED
Definition: BamConstants.h:44
const char FLG_FEXTRA
Definition: BamConstants.h:132
const uint8_t BAM_CIGAR_DEL
Definition: BamConstants.h:52
const int8_t Z_DEFAULT_MEM_LEVEL
Definition: BamConstants.h:140
const char BAM_TAG_TYPE_STRING
Definition: BamConstants.h:82
const char BAM_CIGAR_MISMATCH_CHAR
Definition: BamConstants.h:68
const char *const BAM_DNA_LOOKUP
Definition: BamConstants.h:91
const char BAM_DNA_S
Definition: BamConstants.h:115
const uint32_t BGZF_MAX_BLOCK_SIZE
Definition: BamConstants.h:143
const char BAM_CIGAR_PAD_CHAR
Definition: BamConstants.h:66
const uint8_t BGZF_BLOCK_HEADER_LENGTH
Definition: BamConstants.h:141
const char BAM_TAG_TYPE_INT8
Definition: BamConstants.h:75
const int BAM_CIGAR_MASK
Definition: BamConstants.h:71
const int BAM_ALIGNMENT_READ_1
Definition: BamConstants.h:41
const char BGZF_ID1
Definition: BamConstants.h:135
const int BAM_ALIGNMENT_REVERSE_STRAND
Definition: BamConstants.h:39
const char BAM_DNA_C
Definition: BamConstants.h:111
const uint8_t BAM_BASECODE_K
Definition: BamConstants.h:104
const char BAM_TAG_TYPE_UINT16
Definition: BamConstants.h:78
const uint8_t BAM_HEADER_MAGIC_LENGTH
Definition: BamConstants.h:28
const int BAM_ALIGNMENT_READ_2
Definition: BamConstants.h:42
const char BAM_TAG_TYPE_INT16
Definition: BamConstants.h:77
const uint8_t BAM_BASECODE_Y
Definition: BamConstants.h:102
const uint8_t BAM_CIGAR_MATCH
Definition: BamConstants.h:50
const uint8_t BAM_SIZEOF_INT
Definition: BamConstants.h:24
const uint8_t BAM_BASECODE_D
Definition: BamConstants.h:105
const char BGZF_ID2
Definition: BamConstants.h:136
const char BAM_CIGAR_DEL_CHAR
Definition: BamConstants.h:62
const int BAM_ALIGNMENT_MATE_REVERSE_STRAND
Definition: BamConstants.h:40
const int BAM_ALIGNMENT_SUPPLEMENTARY
Definition: BamConstants.h:46
const char BAM_DNA_M
Definition: BamConstants.h:112
const int8_t GZIP_WINDOW_BITS
Definition: BamConstants.h:139
const char BAM_DNA_PAD
Definition: BamConstants.h:126
const uint8_t BAM_BASECODE_A
Definition: BamConstants.h:93
const int BAM_ALIGNMENT_PROPER_PAIR
Definition: BamConstants.h:36
const uint8_t BAM_TAG_TYPESIZE
Definition: BamConstants.h:87
const char BAM_DNA_W
Definition: BamConstants.h:118
const uint8_t BAM_CIGAR_SOFTCLIP
Definition: BamConstants.h:54
const char OS_UNKNOWN
Definition: BamConstants.h:133
const char BAM_CIGAR_REFSKIP_CHAR
Definition: BamConstants.h:63
const uint8_t BAM_BASECODE_G
Definition: BamConstants.h:96
const char BAM_DNA_EQUAL
Definition: BamConstants.h:109
const int BAM_ALIGNMENT_MATE_UNMAPPED
Definition: BamConstants.h:38
const char BAM_CIGAR_HARDCLIP_CHAR
Definition: BamConstants.h:65
const char GZIP_ID1
Definition: BamConstants.h:129
const uint8_t BAM_BASECODE_H
Definition: BamConstants.h:103
const char GZIP_ID2
Definition: BamConstants.h:130
const uint8_t BAM_BASECODE_R
Definition: BamConstants.h:97
const char BAM_DNA_Y
Definition: BamConstants.h:119
const uint8_t BAM_CIGAR_HARDCLIP
Definition: BamConstants.h:55
const uint8_t BAM_TAG_ARRAYBASE_SIZE
Definition: BamConstants.h:88
Contains all BamTools classes & methods.
Definition: Sort.h:24