ContextSV v1.0.0
Loading...
Searching...
No Matches
sv_caller.h
Go to the documentation of this file.
1// SVCaller: Detect SVs from long read alignments
2
3#ifndef SV_CALLER_H
4#define SV_CALLER_H
5
6#include "cnv_caller.h"
7#include "input_data.h"
8#include "sv_object.h"
9#include "fasta_query.h"
10
11#include <htslib/sam.h>
12
14// #include <mutex>
15#include <shared_mutex>
16#include <unordered_map>
17#include <future>
19
20
21class SVCaller {
22 private:
23 struct GenomicRegion {
24 int tid;
25 int start;
26 int end;
27 int query_start;
28 int query_end;
29 bool strand;
30 int cluster_size; // Number of alignments used for this region
31 };
32
33 struct PrimaryAlignment {
34 int start;
35 int end;
36 int query_start;
37 int query_end;
38 bool strand;
39 int cluster_size; // Number of alignments used for this region
40 };
41
42 struct SuppAlignment {
43 int tid;
44 int start;
45 int end;
46 int query_start;
47 int query_end;
48 bool strand;
49 };
50
51 struct SplitSignature {
52 int tid;
53 int start;
54 int end;
55 bool strand;
56 int query_start;
57 int query_end;
58 };
59
60 // Interval Tree Node
61 struct IntervalNode {
62 PrimaryAlignment region;
63 std::string qname;
64 int max_end; // To optimize queries
65 std::unique_ptr<IntervalNode> left;
66 std::unique_ptr<IntervalNode> right;
67
68 IntervalNode(PrimaryAlignment r, std::string name)
69 : region(r), qname(name), max_end(r.end), left(nullptr), right(nullptr) {}
70 };
71
72 int min_mapq = 20; // Minimum mapping quality to be considered
73 mutable std::shared_mutex shared_mutex; // Shared mutex for thread safety
74
75 std::vector<std::string> getChromosomes(const std::string& bam_filepath);
76
77 void findSplitSVSignatures(std::unordered_map<std::string, std::vector<SVCall>>& sv_calls, const InputData& input_data);
78
79 // Process a single CIGAR record and find candidate SVs
80 void processCIGARRecord(bam_hdr_t* header, bam1_t* alignment, std::vector<SVCall>& sv_calls, const std::vector<uint32_t>& pos_depth_map);
81
82 std::pair<int, int> getAlignmentReadPositions(bam1_t* alignment);
83
84 void processChromosome(const std::string& chr, std::vector<SVCall>& combined_sv_calls, const InputData& input_data, const std::vector<uint32_t>& chr_pos_depth_map, double mean_chr_cov);
85
86 void findCIGARSVs(samFile* fp_in, hts_idx_t* idx, bam_hdr_t* bamHdr, const std::string& region, std::vector<SVCall>& sv_calls, const std::vector<uint32_t>& pos_depth_map);
87
88 // Read the next alignment from the BAM file in a thread-safe manner
89 int readNextAlignment(samFile *fp_in, hts_itr_t *itr, bam1_t *bam1);
90
91 void runSplitReadCopyNumberPredictions(const std::string& chr, std::vector<SVCall>& split_sv_calls, const CNVCaller &cnv_caller, const CHMM &hmm, double mean_chr_cov, const std::vector<uint32_t> &pos_depth_map, const InputData &input_data);
92
93 void saveToVCF(const std::unordered_map<std::string, std::vector<SVCall>> &sv_calls, const InputData &input_data, const ReferenceGenome &ref_genome, const std::unordered_map<std::string, std::vector<uint32_t>> &chr_pos_depth_map) const;
94 // void saveToVCF(const std::unordered_map<std::string, std::vector<SVCall>> &sv_calls, const std::string &output_dir, const ReferenceGenome &ref_genome, const std::unordered_map<std::string, std::vector<uint32_t>>& chr_pos_depth_map) const;
95
96 // Query the read depth (INFO/DP) at a position
97 int getReadDepth(const std::vector<uint32_t>& pos_depth_map, uint32_t start) const;
98
99 public:
100 SVCaller() = default;
101
102 // Detect SVs and predict SV type from long read alignments and CNV calls
103 void run(const InputData& input_data);
104
105 // Interval tree
106 void findOverlaps(const std::unique_ptr<IntervalNode>& root, const PrimaryAlignment& query, std::vector<std::string>& result);
107
108 void insert(std::unique_ptr<IntervalNode>& root, const PrimaryAlignment& region, std::string qname);
109};
110
111#endif // SV_CALLER_H
Definition cnv_caller.h:48
Definition input_data.h:22
Definition fasta_query.h:16
Definition sv_caller.h:21
void insert(std::unique_ptr< IntervalNode > &root, const PrimaryAlignment &region, std::string qname)
Definition sv_caller.cpp:988
SVCaller()=default
void findOverlaps(const std::unique_ptr< IntervalNode > &root, const PrimaryAlignment &query, std::vector< std::string > &result)
Definition sv_caller.cpp:972
void run(const InputData &input_data)
Definition sv_caller.cpp:771