ContextSV v1.0.0
Loading...
Searching...
No Matches
input_data.h
Go to the documentation of this file.
1//
2// common.h:
3// Manage common types, parameters, and functions
4
5#ifndef INPUT_DATA_H
6#define INPUT_DATA_H
7
8#include "fasta_query.h"
9
11#include <string>
12#include <vector>
13// #include <map>
14#include <map>
15#include <unordered_map>
16#include <mutex>
18
19// Type definition for B-allele population frequency map (chr -> pos -> pfb)
20// using PFBMap = std::unordered_map<std::string, std::map<int, double>>;
21
22class InputData {
23 public:
24 InputData();
25
26 void printParameters() const;
27
28 std::string getLongReadBam() const;
29
30 void setLongReadBam(std::string filepath);
31
32 // Set the filepath to the HMM parameters.
33 void setHMMFilepath(std::string filepath);
34 std::string getHMMFilepath() const;
35
36 // Set the filepath to the reference genome FASTA file.
37 void setRefGenome(std::string filepath);
38 std::string getRefGenome() const;
39
40 // Set the filepath to the text file containing the locations of the
41 // VCF files with population frequencies for each chromosome.
42 void setAlleleFreqFilepaths(std::string filepath);
43 std::string getAlleleFreqFilepath(std::string chr) const;
44
45 // Set the filepath to the VCF file with SNP calls used for CNV
46 // detection with the HMM.
47 void setSNPFilepath(std::string filepath);
48 std::string getSNPFilepath() const;
49
50 // Set the ethnicity for SNP population frequencies.
51 void setEthnicity(std::string ethnicity);
52 std::string getEthnicity() const;
53
54 // Set the assembly gaps file.
55 void setAssemblyGaps(std::string filepath);
56 std::string getAssemblyGaps() const;
57
58 // Set the sample size for HMM predictions.
59 void setSampleSize(int sample_size);
60 int getSampleSize() const;
61
62 // Set the minimum CNV length to use for copy number predictions.
63 void setMinCNVLength(int min_cnv_length);
64 uint32_t getMinCNVLength() const;
65
66 // Set the epsilon parameter for DBSCAN clustering.
67 void setDBSCAN_Epsilon(double epsilon);
68 double getDBSCAN_Epsilon() const;
69
70 // Set the percentage of mean chromosome coverage to use for DBSCAN
71 // minimum points.
72 void setDBSCAN_MinPtsPct(double min_pts_pct);
73 double getDBSCAN_MinPtsPct() const;
74
75 // Set the chromosome to analyze.
76 void setChromosome(std::string chr);
77 std::string getChromosome() const;
78 bool isSingleChr() const;
79
80 // Set the region to analyze.
81 void setRegion(std::string region);
82 std::pair<int32_t, int32_t> getRegion() const;
83 bool isRegionSet() const;
84
85 // Set the output directory where the results will be written.
86 void setOutputDir(std::string dirpath);
87 std::string getOutputDir() const;
88
89 // Set the number of threads to use when parallelization is possible.
90 void setThreadCount(int thread_count);
91 int getThreadCount() const;
92
93 // Set the verbose flag to true if verbose output is desired.
94 void setVerbose(bool verbose);
95 bool getVerbose();
96
97 // Set whether to extend the SNP CNV regions around the SV breakpoints
98 // (+/- 1/2 SV length), save a TSV file, and generate HTML reports.
99 void saveCNVData(bool save_cnv_data);
100 bool getSaveCNVData() const;
101
102 void setCNVOutputFile(std::string filepath);
103 std::string getCNVOutputFile() const;
104
105 private:
106 std::string long_read_bam;
107 std::string ref_filepath;
108 std::string snp_vcf_filepath;
109 std::string ethnicity;
110 std::unordered_map<std::string, std::string> pfb_filepaths; // Map of population frequency VCF filepaths by chromosome
111 std::string output_dir;
112 int sample_size;
113 uint32_t min_cnv_length;
114 int min_reads;
115 double dbscan_epsilon;
116 double dbscan_min_pts_pct;
117 std::string chr; // Chromosome to analyze
118 std::pair<int32_t, int32_t> start_end; // Region to analyze
119 bool region_set; // True if a region is set
120 int thread_count;
121 std::string hmm_filepath;
122 std::string cnv_filepath;
123 std::string assembly_gaps; // Assembly gaps file
124 bool verbose; // True if verbose output is enabled
125 bool save_cnv_data; // True if SNP CNV regions should be extended around SV breakpoints, and saved to a TSV file (Large performance hit)
126 bool single_chr;
127 std::string cnv_output_file;
128};
129
130#endif // INPUT_DATA_H
Definition input_data.h:22
void saveCNVData(bool save_cnv_data)
Definition input_data.cpp:411
bool isSingleChr() const
Definition input_data.cpp:216
void setAssemblyGaps(std::string filepath)
Definition input_data.cpp:147
void setChromosome(std::string chr)
Definition input_data.cpp:205
void setEthnicity(std::string ethnicity)
Definition input_data.cpp:142
std::string getSNPFilepath() const
Definition input_data.cpp:127
std::string getCNVOutputFile() const
Definition input_data.cpp:426
bool isRegionSet() const
Definition input_data.cpp:257
std::string getRefGenome() const
Definition input_data.cpp:93
void setOutputDir(std::string dirpath)
Definition input_data.cpp:103
void setAlleleFreqFilepaths(std::string filepath)
Definition input_data.cpp:262
std::string getAssemblyGaps() const
Definition input_data.cpp:170
void setDBSCAN_Epsilon(double epsilon)
Definition input_data.cpp:185
double getDBSCAN_MinPtsPct() const
Definition input_data.cpp:200
void setThreadCount(int thread_count)
Definition input_data.cpp:364
std::string getChromosome() const
Definition input_data.cpp:211
uint32_t getMinCNVLength() const
Definition input_data.cpp:175
std::string getHMMFilepath() const
Definition input_data.cpp:374
void printParameters() const
Definition input_data.cpp:41
std::string getLongReadBam() const
Definition input_data.cpp:62
void setRefGenome(std::string filepath)
Definition input_data.cpp:88
void setLongReadBam(std::string filepath)
Definition input_data.cpp:67
std::string getEthnicity() const
Definition input_data.cpp:137
bool getSaveCNVData() const
Definition input_data.cpp:416
double getDBSCAN_Epsilon() const
Definition input_data.cpp:190
int getSampleSize() const
Definition input_data.cpp:117
std::string getAlleleFreqFilepath(std::string chr) const
Definition input_data.cpp:346
std::string getOutputDir() const
Definition input_data.cpp:98
void setSampleSize(int sample_size)
Definition input_data.cpp:122
std::pair< int32_t, int32_t > getRegion() const
Definition input_data.cpp:252
void setHMMFilepath(std::string filepath)
Definition input_data.cpp:379
void setMinCNVLength(int min_cnv_length)
Definition input_data.cpp:180
void setCNVOutputFile(std::string filepath)
Definition input_data.cpp:421
void setRegion(std::string region)
Definition input_data.cpp:221
void setDBSCAN_MinPtsPct(double min_pts_pct)
Definition input_data.cpp:195
void setVerbose(bool verbose)
Definition input_data.cpp:401
int getThreadCount() const
Definition input_data.cpp:369
void setSNPFilepath(std::string filepath)
Definition input_data.cpp:132
bool getVerbose()
Definition input_data.cpp:406
InputData()
Definition input_data.cpp:18