ContextSV v1.0.0
Loading...
Searching...
No Matches
input_data.h
Go to the documentation of this file.
1//
2// common.h:
3// Manage common types, parameters, and functions
4
5#ifndef INPUT_DATA_H
6#define INPUT_DATA_H
7
8#include "fasta_query.h"
9
11#include <string>
12#include <vector>
13// #include <map>
14#include <map>
15#include <unordered_map>
16#include <mutex>
18
19// Type definition for B-allele population frequency map (chr -> pos -> pfb)
20// using PFBMap = std::unordered_map<std::string, std::map<int, double>>;
21
22class InputData {
23 public:
24 InputData();
25
26 void printParameters() const;
27
28 std::string getLongReadBam() const;
29
30 void setLongReadBam(std::string filepath);
31
32 // Set the filepath to the HMM parameters.
33 void setHMMFilepath(std::string filepath);
34 std::string getHMMFilepath() const;
35
36 // Set the filepath to the reference genome FASTA file.
37 void setRefGenome(std::string filepath);
38 std::string getRefGenome() const;
39
40 // Set the filepath to the text file containing the locations of the
41 // VCF files with population frequencies for each chromosome.
42 void setAlleleFreqFilepaths(std::string filepath);
43 std::string getAlleleFreqFilepath(std::string chr) const;
44
45 // Set the filepath to the VCF file with SNP calls used for CNV
46 // detection with the HMM.
47 void setSNPFilepath(std::string filepath);
48 std::string getSNPFilepath() const;
49
50 // Set the ethnicity for SNP population frequencies.
51 void setEthnicity(std::string ethnicity);
52 std::string getEthnicity() const;
53
54 // Set the assembly gaps file.
55 void setAssemblyGaps(std::string filepath);
56 std::string getAssemblyGaps() const;
57
58 // Set/get a target chromosome for single-chromosome analysis.
59 void setChromosome(std::string chr);
60 std::string getChromosome() const;
61
62 // Set the epsilon parameter for DBSCAN clustering.
63 void setDBSCAN_Epsilon(double epsilon);
64 double getDBSCAN_Epsilon() const;
65
66 // Set the percentage of mean chromosome coverage to use for DBSCAN
67 // minimum points.
68 void setDBSCAN_MinPtsPct(double min_pts_pct);
69 double getDBSCAN_MinPtsPct() const;
70
71 // Set the output directory where the results will be written.
72 void setOutputDir(std::string dirpath);
73 std::string getOutputDir() const;
74
75 // Set the number of threads to use when parallelization is possible.
76 void setThreadCount(int thread_count);
77 int getThreadCount() const;
78
79 // Set the verbose flag to true if verbose output is desired.
80 void setVerbose(bool verbose);
81 bool getVerbose();
82
83 // Set whether to extend the SNP CNV regions around the SV breakpoints
84 // (+/- 1/2 SV length), save a TSV file, and generate HTML reports.
85 void saveCNVData(bool save_cnv_data);
86 bool getSaveCNVData() const;
87
88 void setCNVOutputFile(std::string filepath);
89 std::string getCNVOutputFile() const;
90
91 private:
92 std::string long_read_bam;
93 std::string ref_filepath;
94 std::string snp_vcf_filepath;
95 std::string ethnicity;
96 std::unordered_map<std::string, std::string> pfb_filepaths; // Map of population frequency VCF filepaths by chromosome
97 std::string output_dir;
98 int min_reads;
99 double dbscan_epsilon;
100 double dbscan_min_pts_pct;
101 std::string chr; // Chromosome to analyze
102 std::pair<int32_t, int32_t> start_end; // Region to analyze
103 int thread_count;
104 std::string hmm_filepath;
105 std::string cnv_filepath;
106 std::string assembly_gaps; // Assembly gaps file
107 bool verbose; // True if verbose output is enabled
108 bool save_cnv_data; // True if SNP CNV regions should be extended around SV breakpoints, and saved to a TSV file (Large performance hit)
109 bool single_chr;
110 std::string cnv_output_file;
111};
112
113#endif // INPUT_DATA_H
Definition input_data.h:22
void saveCNVData(bool save_cnv_data)
Definition input_data.cpp:363
void setAssemblyGaps(std::string filepath)
Definition input_data.cpp:158
void setChromosome(std::string chr)
Definition input_data.cpp:186
void setEthnicity(std::string ethnicity)
Definition input_data.cpp:153
std::string getSNPFilepath() const
Definition input_data.cpp:138
std::string getCNVOutputFile() const
Definition input_data.cpp:378
std::string getRefGenome() const
Definition input_data.cpp:114
void setOutputDir(std::string dirpath)
Definition input_data.cpp:124
void setAlleleFreqFilepaths(std::string filepath)
Definition input_data.cpp:216
std::string getAssemblyGaps() const
Definition input_data.cpp:181
void setDBSCAN_Epsilon(double epsilon)
Definition input_data.cpp:196
double getDBSCAN_MinPtsPct() const
Definition input_data.cpp:211
void setThreadCount(int thread_count)
Definition input_data.cpp:317
std::string getChromosome() const
Definition input_data.cpp:191
std::string getHMMFilepath() const
Definition input_data.cpp:327
void printParameters() const
Definition input_data.cpp:38
std::string getLongReadBam() const
Definition input_data.cpp:49
void setRefGenome(std::string filepath)
Definition input_data.cpp:109
void setLongReadBam(std::string filepath)
Definition input_data.cpp:54
std::string getEthnicity() const
Definition input_data.cpp:148
bool getSaveCNVData() const
Definition input_data.cpp:368
double getDBSCAN_Epsilon() const
Definition input_data.cpp:201
std::string getAlleleFreqFilepath(std::string chr) const
Definition input_data.cpp:299
std::string getOutputDir() const
Definition input_data.cpp:119
void setHMMFilepath(std::string filepath)
Definition input_data.cpp:332
void setCNVOutputFile(std::string filepath)
Definition input_data.cpp:373
void setDBSCAN_MinPtsPct(double min_pts_pct)
Definition input_data.cpp:206
void setVerbose(bool verbose)
Definition input_data.cpp:353
int getThreadCount() const
Definition input_data.cpp:322
void setSNPFilepath(std::string filepath)
Definition input_data.cpp:143
bool getVerbose()
Definition input_data.cpp:358
InputData()
Definition input_data.cpp:19