vg Internal API Reference¶
Below is an index of all classes, files, and namespaces in vg
, in alphabetical order.
Useful starting points include vg::Node
, vg::Edge
, vg::Path
, and vg::Graph
, which define the Protobuf graph data model, and vg::VG
, which is the main graph class with all the useful graph methods on it.
- struct
- #include <genotyper.hpp>
- class
- #include <gssw_aligner.hpp>
Subclassed by vg::QualAdjAligner
Public Functions
-
Aligner::
Aligner
(int32_t _match = default_match, int32_t _mismatch = default_mismatch, int32_t _gap_open = default_gap_open, int32_t _gap_extension = default_gap_extension)¶
-
Aligner::
~Aligner
(void)¶
-
void
Aligner::
align
(Alignment &alignment, Graph &g, bool print_score_matrices = false)¶
-
void
Aligner::
align_pinned
(Alignment &alignment, Graph &g, int64_t pinned_node_id, bool pin_left)¶
-
void
Aligner::
align_pinned_multi
(Alignment &alignment, vector<Alignment> &alt_alignments, Graph &g, int64_t pinned_node_id, bool pin_left, int32_t max_alt_alns)¶
-
void
Aligner::
align_global_banded
(Alignment &alignment, Graph &g, int32_t band_padding = 0, bool permissive_banding = true)¶
-
void
Aligner::
align_global_banded_multi
(Alignment &alignment, vector<Alignment> &alt_alignments, Graph &g, int32_t max_alt_alns, int32_t band_padding = 0, bool permissive_banding = true)¶
-
void
Aligner::
init_mapping_quality
(double gc_content)¶
-
bool
Aligner::
is_mapping_quality_initialized
()¶
-
void
Aligner::
compute_mapping_quality
(vector<Alignment> &alignments, bool fast_approximation)¶
-
void
Aligner::
compute_paired_mapping_quality
(pair<vector<Alignment>, vector<Alignment>> &alignment_pairs, bool fast_approximation)¶
-
double
Aligner::
score_to_unnormalized_likelihood_ln
(double score)¶
-
int32_t
Aligner::
score_exact_match
(const string &sequence)¶
Public Members
-
int8_t *
vg::Aligner::
nt_table
¶
-
int8_t *
vg::Aligner::
score_matrix
¶
-
int32_t
vg::Aligner::
match
¶
-
int32_t
vg::Aligner::
mismatch
¶
-
int32_t
vg::Aligner::
gap_open
¶
-
int32_t
vg::Aligner::
gap_extension
¶
Protected Functions
-
gssw_graph *
Aligner::
create_gssw_graph
(Graph &g, int64_t pinned_node_id, gssw_node **gssw_pinned_node_out)¶
-
void
vg::Aligner::
topological_sort
(list<gssw_node *> &sorted_nodes)¶
-
void
vg::Aligner::
visit_node
(gssw_node *node, list<gssw_node *> &sorted_nodes, set<gssw_node *> &unmarked_nodes, set<gssw_node *> &temporary_marks)¶
-
void
Aligner::
reverse_graph
(Graph &g, Graph &reversed_graph_out)¶
-
void
Aligner::
unreverse_graph
(Graph &graph)¶
-
void
Aligner::
unreverse_graph_mapping
(gssw_graph_mapping *gm)¶
-
void
Aligner::
gssw_mapping_to_alignment
(gssw_graph *graph, gssw_graph_mapping *gm, Alignment &alignment, bool print_score_matrices = false)¶
-
string
Aligner::
graph_cigar
(gssw_graph_mapping *gm)¶
-
void
Aligner::
align_internal
(Alignment &alignment, vector<Alignment> *multi_alignments, Graph &g, int64_t pinned_node_id, bool pin_left, int32_t max_alt_alns, bool print_score_matrices = false)¶
-
double
Aligner::
maximum_mapping_quality_exact
(vector<double> &scaled_scores, size_t *max_idx_out)¶
-
double
Aligner::
maximum_mapping_quality_approx
(vector<double> &scaled_scores, size_t *max_idx_out)¶
Protected Attributes
-
double
vg::Aligner::
log_base
¶
-
- struct
Alignments link query strings, such as other genomes or reads, to Paths.
Public Members
-
string
vg::Alignment::
sequence
¶ The sequence that has been aligned.
-
Path
vg::Alignment::
path
¶ The Path that the sequence follows in the graph it has been aligned to, containing the
Edit
s that modify the graph to produce the sequence.
-
string
vg::Alignment::
name
¶ The name of the sequence that has been aligned. Similar to read name in BAM.
-
bytes
vg::Alignment::
quality
¶ The quality scores for the sequence, as values on a 0-255 scale.
-
int32
vg::Alignment::
mapping_quality
¶ The mapping quality score for the alignment, in Phreds.
-
int32
vg::Alignment::
score
¶ The score for the alignment, in points.
-
string
vg::Alignment::
sample_name
¶ The name of the sample that produced the aligned read.
-
string
vg::Alignment::
read_group
¶ The name of the read group to which the aligned read belongs.
-
Alignment
vg::Alignment::
fragment_prev
¶ The previous Alignment in the fragment. Contains just enough information to locate the full Alignment; e.g. contains an Alignment with only a name, or only a graph mapping position.
-
bool
vg::Alignment::
is_secondary
¶ Flag marking the Alignment as secondary. All but one maximal-scoring alignment of a given read in a GAM file must be secondary.
-
double
vg::Alignment::
identity
¶ Portion of aligned bases that are perfect matches, or 0 if no bases are aligned.
-
string
- class
- #include <banded_global_aligner.hpp>
Public Functions
-
BandedGlobalAligner::AltTracebackStack::
AltTracebackStack
(int64_t max_multi_alns, vector<BAMatrix *> sink_node_matrices)¶
-
BandedGlobalAligner::AltTracebackStack::
~AltTracebackStack
()¶
-
void
BandedGlobalAligner::AltTracebackStack::
get_alignment_start
(int64_t &node_id, matrix_t &matrix)¶
-
void
BandedGlobalAligner::AltTracebackStack::
next
()¶
-
bool
BandedGlobalAligner::AltTracebackStack::
has_next
()¶
-
void
BandedGlobalAligner::AltTracebackStack::
propose_deflection
(const IntType score, const int64_t from_node_id, const int64_t row_idx, const int64_t col_idx, const int64_t to_node_id, const matrix_t to_matrix)¶
-
IntType
BandedGlobalAligner::AltTracebackStack::
current_traceback_score
()¶
-
bool
BandedGlobalAligner::AltTracebackStack::
at_next_deflection
(int64_t node_id, int64_t row_idx, int64_t col_idx)¶
-
BandedGlobalAligner<IntType>::matrix_t
BandedGlobalAligner::AltTracebackStack::
deflect_to_matrix
()¶
-
BandedGlobalAligner<IntType>::matrix_t
BandedGlobalAligner::AltTracebackStack::
deflect_to_matrix
(int64_t &to_node_id)¶
Private Functions
-
void
BandedGlobalAligner::AltTracebackStack::
insert_traceback
(const vector<Deflection> &traceback_prefix, const IntType score, const int64_t from_node_id, const int64_t row_idx, const int64_t col_idx, const int64_t to_node_id, const matrix_t to_matrix)¶
Private Members
-
template<>
int64_tvg::BandedGlobalAligner<IntType>::AltTracebackStack::
max_multi_alns
¶
-
template<>
list<pair<vector<Deflection>, IntType>>vg::BandedGlobalAligner<IntType>::AltTracebackStack::
alt_tracebacks
¶
-
template<>
list<pair<vector<Deflection>, IntType>>::iteratorvg::BandedGlobalAligner<IntType>::AltTracebackStack::
curr_traceback
¶
-
template<>
vector<Deflection>::iteratorvg::BandedGlobalAligner<IntType>::AltTracebackStack::
curr_deflxn
¶
-
- class
- #include <banded_global_aligner.hpp>
Public Functions
-
BandedGlobalAligner::BABuilder::
BABuilder
(Alignment &alignment)¶
-
BandedGlobalAligner::BABuilder::
~BABuilder
()¶
-
void
BandedGlobalAligner::BABuilder::
update_state
(matrix_t matrix, Node *node, int64_t read_idx, int64_t node_idx)¶
-
void
BandedGlobalAligner::BABuilder::
finalize_alignment
()¶
Private Functions
-
void
BandedGlobalAligner::BABuilder::
finish_current_edit
()¶
-
void
BandedGlobalAligner::BABuilder::
finish_current_node
()¶
Private Members
-
template<>
matrix_tvg::BandedGlobalAligner<IntType>::BABuilder::
matrix_state
¶
-
template<>
boolvg::BandedGlobalAligner<IntType>::BABuilder::
matching
¶
-
template<>
int64_tvg::BandedGlobalAligner<IntType>::BABuilder::
edit_length
¶
-
template<>
int64_tvg::BandedGlobalAligner<IntType>::BABuilder::
edit_read_end_idx
¶
-
- class
- #include <banded_global_aligner.hpp>
Public Functions
-
BandedGlobalAligner::BAMatrix::
BAMatrix
(Alignment &alignment, Node *node, int64_t top_diag, int64_t bottom_diag, BAMatrix **seeds, int64_t num_seeds, int64_t cumulative_seq_len)¶
-
BandedGlobalAligner::BAMatrix::
~BAMatrix
()¶
-
void
BandedGlobalAligner::BAMatrix::
fill_matrix
(int8_t *score_mat, int8_t *nt_table, int8_t gap_open, int8_t gap_extend, bool qual_adjusted, IntType min_inf)¶
-
void
BandedGlobalAligner::BAMatrix::
traceback
(BABuilder &builder, AltTracebackStack &traceback_stack, matrix_t start_mat, int8_t *score_mat, int8_t *nt_table, int8_t gap_open, int8_t gap_extend, bool qual_adjusted, IntType min_inf)¶
-
void
BandedGlobalAligner::BAMatrix::
print_full_matrices
()¶
-
void
BandedGlobalAligner::BAMatrix::
print_rectangularized_bands
()¶
Private Functions
-
void
BandedGlobalAligner::BAMatrix::
traceback_internal
(BABuilder &builder, AltTracebackStack &traceback_stack, int64_t start_row, int64_t start_col, matrix_t start_mat, bool in_lead_gap, int8_t *score_mat, int8_t *nt_table, int8_t gap_open, int8_t gap_extend, bool qual_adjusted, IntType min_inf)¶
-
void
BandedGlobalAligner::BAMatrix::
print_matrix
(matrix_t which_mat)¶
-
void
BandedGlobalAligner::BAMatrix::
print_band
(matrix_t which_mat)¶
Private Members
-
template<>
int64_tvg::BandedGlobalAligner<IntType>::BAMatrix::
top_diag
¶
-
template<>
int64_tvg::BandedGlobalAligner<IntType>::BAMatrix::
bottom_diag
¶
-
template<>
int64_tvg::BandedGlobalAligner<IntType>::BAMatrix::
cumulative_seq_len
¶
-
template<>
BAMatrix **vg::BandedGlobalAligner<IntType>::BAMatrix::
seeds
¶
-
template<>
int64_tvg::BandedGlobalAligner<IntType>::BAMatrix::
num_seeds
¶
-
template<>
IntType *vg::BandedGlobalAligner<IntType>::BAMatrix::
match
¶
-
template<>
IntType *vg::BandedGlobalAligner<IntType>::BAMatrix::
insert_col
¶
-
template<>
IntType *vg::BandedGlobalAligner<IntType>::BAMatrix::
insert_row
¶
Friends
-
friend
vg::BandedGlobalAligner::BAMatrix::BABuilder
-
friend
vg::BandedGlobalAligner::BAMatrix::AltTracebackStack
-
- template <class IntType>
- class
- #include <banded_global_aligner.hpp>
Public Functions
-
BandedGlobalAligner::
BandedGlobalAligner
(Alignment &alignment, Graph &g, int64_t band_padding, bool permissive_banding = false, bool adjust_for_base_quality = false)¶
-
BandedGlobalAligner::
BandedGlobalAligner
(Alignment &alignment, Graph &g, vector<Alignment> &alt_alignments, int64_t max_multi_alns, int64_t band_padding, bool permissive_banding = false, bool adjust_for_base_quality = false)¶
-
BandedGlobalAligner::
~BandedGlobalAligner
()¶
-
void
BandedGlobalAligner::
align
(int8_t *score_mat, int8_t *nt_table, int8_t gap_open, int8_t gap_extend)¶
Private Types
-
enum type
vg::BandedGlobalAligner::
matrix_t
¶ Values:
Private Functions
-
BandedGlobalAligner::
BandedGlobalAligner
(Alignment &alignment, Graph &g, vector<Alignment> *alt_alignments, int64_t max_multi_alns, int64_t band_padding, bool permissive_banding = false, bool adjust_for_base_quality = false)¶
-
void
BandedGlobalAligner::
traceback
(int8_t *score_mat, int8_t *nt_table, int8_t gap_open, int8_t gap_extend, IntType min_inf)¶
-
void
BandedGlobalAligner::
graph_edge_lists
(Graph &g, bool outgoing_edges, vector<vector<int64_t>> &out_edge_list)¶
-
void
BandedGlobalAligner::
topological_sort
(Graph &g, vector<vector<int64_t>> &node_edges_out, vector<Node *> &out_topological_order)¶
-
void
BandedGlobalAligner::
path_lengths_to_sinks
(const string &read, vector<vector<int64_t>> &node_edges_in, vector<int64_t> &shortest_path_to_sink, vector<int64_t> &longest_path_to_sink)¶
-
void
BandedGlobalAligner::
find_banded_paths
(const string &read, bool permissive_banding, vector<vector<int64_t>> &node_edges_in, vector<vector<int64_t>> &node_edges_out, int64_t band_padding, vector<bool> &node_masked, vector<pair<int64_t, int64_t>> &band_ends)¶
-
void
BandedGlobalAligner::
shortest_seq_paths
(vector<vector<int64_t>> &node_edges_out, vector<int64_t> &seq_lens_out, unordered_set<Node *> source_nodes)¶
-
- struct
Summarizes reads that map to single position in the graph. This structure is pretty much identical to a line in Samtools pileup format if qualities set, it must have size = num_bases
- struct
- #include <bubbles.hpp>
- struct
- class
- #include <genotypekit.hpp>
This site finder finds sites with Cactus.
Inherits from vg::SiteFinder
Public Functions
-
vg::CactusSiteFinder::
CactusSiteFinder
(VG &graph, const string &hint_path_name)¶ Make a new CactusSiteFinder to find sites in the given graph.
-
virtual
vg::CactusSiteFinder::
~CactusSiteFinder
()¶
-
void
vg::CactusSiteFinder::
for_each_site_parallel
(const function<void(NestedSite)> &lambda)¶ Find all the sites in parallel with Cactus, make the site tree, and call the given function on all the top-level sites.
-
- class
- #include <caller.hpp>
Public Types
- typedef
- typedef
- typedef
- typedef
- typedef
Public Functions
-
vg::Caller::
Caller
(VG *graph, double het_prior = Default_het_prior, int min_depth = Default_min_depth, int max_depth = Default_max_depth, int min_support = Default_min_support, double min_frac = Default_min_frac, double min_log_likelihood = Default_min_log_likelihood, bool leave_uncalled = false, int default_quality = Default_default_quality, double max_strand_bias = Default_max_strand_bias, ostream *text_calls = NULL, bool bridge_alts = false)¶
-
vg::Caller::
~Caller
()¶
-
void
vg::Caller::
clear
()¶
-
void
vg::Caller::
write_call_graph
(ostream &out, bool json)¶
-
void
vg::Caller::
call_node_pileup
(const NodePileup &pileup)¶
-
void
vg::Caller::
call_edge_pileup
(const EdgePileup &pileup)¶
-
void
vg::Caller::
update_call_graph
()¶
-
void
vg::Caller::
map_paths
()¶
-
void
vg::Caller::
call_base_pileup
(const NodePileup &np, int64_t offset, bool insertions)¶
-
void
vg::Caller::
compute_top_frequencies
(const BasePileup &bp, const vector<pair<int64_t, int64_t>> &base_offsets, string &top_base, int &top_count, int &top_rev_count, string &second_base, int &second_count, int &second_rev_count, int &total_count, bool inserts)¶
-
pair<double, int>
vg::Caller::
base_log_likelihood
(const BasePileup &pb, const vector<pair<int64_t, int64_t>> &base_offsets, const string &val, const string &first, const string &second)¶
-
void
vg::Caller::
create_node_calls
(const NodePileup &np)¶
-
void
vg::Caller::
create_augmented_edge
(Node *node1, int from_offset, bool left_side1, bool aug1, Node *node2, int to_offset, bool left_side2, bool aug2, char cat, StrandSupport support)¶
-
void
vg::Caller::
write_node_tsv
(Node *node, char call, StrandSupport support, int64_t orig_id, int orig_offset)¶
-
void
vg::Caller::
write_edge_tsv
(Edge *edge, char call, StrandSupport support)¶
-
void
vg::Caller::
write_nd_tsv
()¶
Public Members
-
ostream *
vg::Caller::
_text_calls
¶
-
vector<pair<StrandSupport, StrandSupport>>
vg::Caller::
_node_supports
¶
-
vector<pair<StrandSupport, StrandSupport>>
vg::Caller::
_insert_supports
¶
-
int64_t
vg::Caller::
_max_id
¶
-
NodeDivider
vg::Caller::
_node_divider
¶
-
unordered_set<int64_t>
vg::Caller::
_visited_nodes
¶
-
unordered_map<pair<NodeSide, NodeSide>, StrandSupport>
vg::Caller::
_called_edges
¶
-
InsertionHash
vg::Caller::
_inserted_nodes
¶
-
EdgeSupHash
vg::Caller::
_insertion_supports
¶
-
EdgeSupHash
vg::Caller::
_deletion_supports
¶
-
double
vg::Caller::
_het_log_prior
¶
-
double
vg::Caller::
_hom_log_prior
¶
-
int
vg::Caller::
_buffer_size
¶
-
int
vg::Caller::
_min_depth
¶
-
int
vg::Caller::
_max_depth
¶
-
int
vg::Caller::
_min_support
¶
-
double
vg::Caller::
_min_frac
¶
-
double
vg::Caller::
_min_log_likelihood
¶
-
bool
vg::Caller::
_leave_uncalled
¶
-
char
vg::Caller::
_default_quality
¶
-
double
vg::Caller::
_max_strand_bias
¶
-
bool
vg::Caller::
_bridge_alts
¶
Public Static Functions
-
static double
vg::Caller::
safe_log
(double v)¶
Public Static Attributes
-
const double
vg::Caller::
Log_zero
¶
-
const double
vg::Caller::
Default_het_prior
¶
-
const int
vg::Caller::
Default_min_depth
¶
-
const int
vg::Caller::
Default_max_depth
¶
-
const int
vg::Caller::
Default_min_support
¶
-
const double
vg::Caller::
Default_min_frac
¶
-
const double
vg::Caller::
Default_min_log_likelihood
¶
-
const char
vg::Caller::
Default_default_quality
¶
-
const double
vg::Caller::
Default_max_strand_bias
¶
- class
- #include <colors.hpp>
Public Functions
-
vg::Colors::
Colors
(void)¶
-
vg::Colors::
Colors
(int seed_val)¶
-
vg::Colors::
~Colors
(void)¶
-
string
vg::Colors::
hashed
(const string &str)¶
-
string
vg::Colors::
random
(void)¶
Public Members
-
const vector<string>
vg::Colors::
colors
¶
Private Members
-
mt19937
vg::Colors::
rng
¶
-
- class
- #include <genotypekit.hpp>
Represents a strategy for computing consistency between Alignments and SiteTraversals. Determines whether a read is consistent with a SiteTraversal or not (but has access to all the SiteTraversals). Polymorphic base class/interface.
Public Functions
-
virtual
vg::ConsistencyCalculator::
~ConsistencyCalculator
()¶
-
virtual vector<bool>
vg::ConsistencyCalculator::
calculate_consistency
(const NestedSite &site, const vector<SiteTraversal> &traversals, const Alignment &read) const¶
= 0 Return true or false for each tarversal of the site, depending on if the read is consistent with it or not.
-
virtual
- struct
- #include <constructor.hpp>
Represents a constructed region of the graph alogn a single linear sequence. Contains the protobuf Graph holding all the created components (which may be too large to serialize), a set of node IDs whose left sides need to be connected to when you connect to the start of the chunk, and a set of node IDs whose right sides need to be connected to when you connect to the end of the chunk.
Node ordering is restricted: if there is a single source, it must be the very first node in the graph with ID 1, and if there is a single sink it must be the very last node in the graph with ID max_id. Additionally, single sources and single sinks must be visited by only a single path, the reference path.
The overall reference path must also always be path 0. Also, all mappings in all paths must be full-length matches on the forward strand, and they must be sorted by rank. Ranks must be filled and start with rank 1 in each path.
- class
- #include <constructor.hpp>
Inherits from vg::Progressive
Public Functions
-
void
vg::Constructor::
add_name_mapping
(const string &vcf_name, const string &fasta_name)¶ Add a name mapping between a VCF contig name and a FASTA sequence name. Both must be unique.
-
string
vg::Constructor::
vcf_to_fasta
(const string &vcf_name) const¶ Convert the given VCF contig name to a FASTA sequence name, through the rename mappings.
-
string
vg::Constructor::
fasta_to_vcf
(const string &fasta_name) const¶ Convert the given FASTA sequence name to a VCF contig name, through the rename mappings.
-
ConstructedChunk
vg::Constructor::
construct_chunk
(string reference_sequence, string reference_path_name, vector<vcflib::Variant> variants, size_t chunk_offset) const¶ Construct a ConstructedChunk of graph from the given piece of sequence, with the given name, applying the given variants. The variants need to be sorted by start position, and have their start positions set to be ZERO- BASED. However, they also need to have their start positions relative to the global start of the contig, so that hash-based names come out right for them. They also need to not overlap with any variants not in the vector we have (i.e. we need access to all overlapping variants for this region). The variants must not extend beyond the given sequence, though they can abut its edges.
Variants in the vector may not use symbolic alleles.
chunk_offset gives the global 0-based position at which this chunk starts in the reference contig it is part of, which is used to correctly place variants.
-
void
vg::Constructor::
construct_graph
(string vcf_contig, FastaReference &reference, VcfBuffer &variant_source, function<void(Graph&)> callback)¶ Construct a graph for the given VCF contig name, using the given reference and the variants from the given buffered VCF file. Emits a sequence of Graph chunks, which may be too big to serealize directly.
Doesn’t handle any of the setup for VCF indexing. Just scans all the variants that can come out of the buffer, so make sure indexing is set on the file first before passing it in.
-
void
vg::Constructor::
construct_graph
(const vector<FastaReference *> &references, const vector<vcflib::VariantCallFile *> &variant_files, function<void(Graph&)> callback)¶ Construct a graph using the given FASTA references and VCFlib VCF files. The VCF files are assumed to be grouped by contig and then sorted by position within the contig, such that each contig is present in only one file. If multiple FASTAs are used, each contig must be present in only one FASTA file. Reference and VCF vectors may not contain nulls.
Public Members
-
bool
vg::Constructor::
flat
¶
-
bool
vg::Constructor::
alt_paths
¶
-
bool
vg::Constructor::
greedy_pieces
¶
-
bool
vg::Constructor::
chain_deletions
¶
-
size_t
vg::Constructor::
max_node_size
¶
-
size_t
vg::Constructor::
vars_per_chunk
¶
-
size_t
vg::Constructor::
bases_per_chunk
¶
-
set<string>
vg::Constructor::
allowed_vcf_names
¶
-
map<string, pair<size_t, size_t>>
vg::Constructor::
allowed_vcf_regions
¶
-
void
- struct
- #include <readfilter.hpp>
Public Functions
-
vg::ReadFilter::Counts::
Counts
()¶
Public Members
-
vector<size_t>
vg::ReadFilter::Counts::
read
¶
-
vector<size_t>
vg::ReadFilter::Counts::
filtered
¶
-
vector<size_t>
vg::ReadFilter::Counts::
min_score
¶
-
vector<size_t>
vg::ReadFilter::Counts::
max_overhang
¶
-
vector<size_t>
vg::ReadFilter::Counts::
min_mapq
¶
-
vector<size_t>
vg::ReadFilter::Counts::
split
¶
-
vector<size_t>
vg::ReadFilter::Counts::
repeat
¶
-
vector<size_t>
vg::ReadFilter::Counts::
defray
¶
-
- class
- #include <deconstructor.hpp>
Public Functions
-
vg::Deconstructor::
Deconstructor
()¶
-
vg::Deconstructor::
~Deconstructor
()¶
-
void
vg::Deconstructor::
set_xg
(xg::XG *xindex)¶
-
void
vg::Deconstructor::
unroll_my_vg
(int steps)¶
-
void
vg::Deconstructor::
dagify_my_vg
(int steps)¶
-
vg::VG *
vg::Deconstructor::
compact
(int compact_steps)¶ For each superbubble in the graph: If a superbubble is nested and simple (contains no superbubbles), transform it into a node. Record the translation from new node in the graph -> old superbubble map<id_t, SuperBubble>
At each step, find the new superbubbles of the graph and continue with this process.
-
bool
vg::Deconstructor::
is_nested
(SuperBubble sb)¶
-
bool
vg::Deconstructor::
contains_nested
(pair<int64_t, int64_t> start_and_end)¶ detect if there are superbubbles contained within the current superbubble (defined by Start and End)
This is easiest done using a simple linear search between the nodes in topologically order.
-
SuperBubble
vg::Deconstructor::
report_superbubble
(int64_t start, int64_t end)¶ BFS through a superbubble and fill out the corresponding SuperBubble struct.
-
map<pair<id_t, id_t>, vector<id_t>>
vg::Deconstructor::
get_all_superbubbles
()¶ Uses a BFS between nodes in the graph labeled as the endpoints of superbubbles to enumerate the nodes between them. TODO: the dagify transform records the node translation
IDEALLY: return the topological order, the starts/ends of superbubbles, and an index from node -> location in topo order. This makes checking if things are nested trivial.
-
void
vg::Deconstructor::
sb2vcf
(string outfile)¶
Private Functions
-
vector<int64_t>
vg::Deconstructor::
nt_to_ids
(deque<NodeTraversal> &nt)¶
-
SuperBubble
vg::Deconstructor::
translate_id
(id_t id)¶
-
void
vg::Deconstructor::
init
()¶
Private Members
-
xg::XG *
vg::Deconstructor::
my_xg
¶
-
map<id_t, SuperBubble>
vg::Deconstructor::
id_to_bub
¶
-
string
vg::Deconstructor::
mask_file
¶
-
vector<SuperBubble>
vg::Deconstructor::
my_superbubbles
¶
-
size_t
vg::Deconstructor::
my_max_length
¶
-
size_t
vg::Deconstructor::
my_max_component_length
¶
-
- class
- #include <banded_global_aligner.hpp>
Public Functions
-
BandedGlobalAligner::AltTracebackStack::Deflection::
Deflection
(const int64_t from_node_id, const int64_t row_idx, const int64_t col_idx, const int64_t to_node_id, const matrix_t to_matrix)¶
-
BandedGlobalAligner::AltTracebackStack::Deflection::
~Deflection
()¶
Public Members
-
template<>
const int64_tvg::BandedGlobalAligner<IntType>::AltTracebackStack::Deflection::
from_node_id
¶
-
template<>
const int64_tvg::BandedGlobalAligner<IntType>::AltTracebackStack::Deflection::
row_idx
¶
-
template<>
const int64_tvg::BandedGlobalAligner<IntType>::AltTracebackStack::Deflection::
col_idx
¶
-
template<>
const int64_tvg::BandedGlobalAligner<IntType>::AltTracebackStack::Deflection::
to_node_id
¶
-
template<>
const matrix_tvg::BandedGlobalAligner<IntType>::AltTracebackStack::Deflection::
to_matrix
¶
-
- struct
Edges describe linkages between nodes. They are bidirected, connecting the end (default) or start of the “from” node to the start (default) or end of the “to” node.
- struct
Keep pileup-like record for reads that span edges.
- struct
Edits describe how to generate a new string from elements in the graph. To determine the new string, just walk the series of edits, stepping from_length distance in the basis node, and to_length in the novel element, replacing from_length in the basis node with the sequence.
There are several types of Edit:
- matches: from_length == to_length; sequence is empty
- snps: from_length == to_length; sequence = alt
- deletions: to_length == 0 && from_length > to_length; sequence is empty
- insertions: from_length < to_length; sequence = alt
- struct
- #include <caller.hpp>
Public Functions
-
vg::NodeDivider::Entry::
Entry
(Node *r = 0, vector<StrandSupport> sup_r = vector< StrandSupport >(), Node *a1 = 0, vector<StrandSupport> sup_a1 = vector< StrandSupport >(), Node *a2 = 0, vector<StrandSupport> sup_a2 = vector< StrandSupport >())¶
-
vector<StrandSupport> &
vg::NodeDivider::Entry::
sup
(int i)¶
Public Members
-
vector<StrandSupport>
vg::NodeDivider::Entry::
sup_ref
¶
-
vector<StrandSupport>
vg::NodeDivider::Entry::
sup_alt1
¶
-
vector<StrandSupport>
vg::NodeDivider::Entry::
sup_alt2
¶
-
- class
- #include <filter.hpp>
Public Functions
-
vg::Filter::
Filter
()¶
-
vg::Filter::
~Filter
()¶
-
Alignment
vg::Filter::
depth_filter
(Alignment &aln)¶ Looks for Alignments that have large overhangs at the end of them.
Default behavior: if an alignment has a right- or left- clip that is longer than the maximum allowed, return an empty alignment.
Inverse Behavior: if the alignment has a clip that is larger than the maximum allowed at either end, return the alignment. CLI: vg filter -d 10 -q 40 -r -R -r: track depth of both novel variants and those in the graph. -R: remove edits that fail the filter (otherwise toss the whole alignment)
-
Alignment
vg::Filter::
percent_identity_filter
(Alignment &aln)¶ Filter reads that are less than <PCTID> reference. I.E. if a read matches the reference along 80% of its length, and your cutoff is 90% PCTID, throw it out.
-
Alignment
vg::Filter::
split_read_filter
(Alignment &aln)¶ Split reads map to two separate paths in the graph OR vastly separated non-consecutive nodes in a single path.
They’re super important for detecting structural variants, so we may want to filter them out or collect only split reads.
-
Alignment
vg::Filter::
path_divergence_filter
(Alignment &aln)¶ Looks for alignments that transition from one path to another over their length. This may occur for one of several reasons:
- The read covers a translocation
- The read looks a lot like two different (but highly-similar paths)
- The read is shattered (e.g. as in chromothripsis)
Default behavior: if the Alignment is path divergent, return an empty Alignment, else return aln Inverse behavior: if the Alignment is path divergent, return aln, else return an empty Alignment
-
Alignment
vg::Filter::
reversing_filter
(Alignment &aln)¶ Looks for alignments that change direction over their length. This may happen because of:
- Mapping artifacts
- Cycles
- Highly repetitive regions
- Inversions (if you’re lucky enough)
Default behavior: if the Alignment reverses, return an empty Alignment. inverse behavior: if the Alignment reverses, return the Alignment.
-
void
vg::Filter::
set_min_depth
(int depth)¶
-
void
vg::Filter::
set_min_qual
(int qual)¶
-
void
vg::Filter::
set_min_percent_identity
(double pct_id)¶
-
void
vg::Filter::
set_avg_qual
(double avg_qual)¶
-
void
vg::Filter::
set_filter_matches
(bool fm)¶
-
void
vg::Filter::
set_remove_failing_edits
(bool fm)¶
-
void
vg::Filter::
set_soft_clip_limit
(int max_clip)¶
-
void
vg::Filter::
set_split_read_limit
(int split_limit)¶
-
void
vg::Filter::
set_reversing
(bool do_reversing_filter)¶
-
void
vg::Filter::
set_path_divergence
(bool do_path_divergence)¶
-
void
vg::Filter::
set_window_length
(int window_length)¶
-
void
vg::Filter::
set_my_xg_idx
(xg::XG *xg_idx)¶
-
void
vg::Filter::
set_inverse
(bool do_inv)¶
-
int
vg::Filter::
get_min_depth
()¶
-
int
vg::Filter::
get_min_qual
()¶
-
int
vg::Filter::
get_window_length
()¶
-
int
vg::Filter::
get_soft_clip_limit
()¶
-
int
vg::Filter::
get_split_read_limit
()¶
-
double
vg::Filter::
get_min_percent_identity
()¶
-
double
vg::Filter::
get_min_avg_qual
()¶
-
bool
vg::Filter::
get_inverse
()¶
-
bool
vg::Filter::
get_filter_matches
()¶
-
bool
vg::Filter::
get_remove_failing_edits
()¶
-
bool
vg::Filter::
get_do_path_divergence
()¶
-
bool
vg::Filter::
get_do_reversing
()¶
Private Members
-
xg::XG *
vg::Filter::
my_xg_idx
¶
-
unordered_map<string, unordered_map<string, int>>
vg::Filter::
pos_to_edit_to_depth
¶
-
unordered_map<int, int>
vg::Filter::
pos_to_qual
¶
-
bool
vg::Filter::
inverse
¶
-
bool
vg::Filter::
remove_failing_edits
¶
-
bool
vg::Filter::
filter_matches
¶
-
bool
vg::Filter::
do_path_divergence
¶
-
bool
vg::Filter::
do_reversing
¶
-
int
vg::Filter::
min_depth
¶
-
int
vg::Filter::
min_qual
¶
-
int
vg::Filter::
min_cov
¶
-
int
vg::Filter::
window_length
¶
-
int
vg::Filter::
qual_offset
¶
-
int
vg::Filter::
soft_clip_limit
¶
-
int
vg::Filter::
split_read_limit
¶
-
double
vg::Filter::
min_percent_identity
¶
-
double
vg::Filter::
min_avg_qual
¶
-
- class
- #include <genotypekit.hpp>
This genotype prior calculator has a fixed prior for homozygous genotypes and a fixed prior for hets.
Inherits from vg::GenotypePriorCalculator
Public Functions
-
virtual
vg::FixedGenotypePriorCalculator::
~FixedGenotypePriorCalculator
()¶
-
virtual
- struct
Describes a genotype at a particular locus.
Public Members
-
repeated<int32>
vg::Genotype::
allele
¶ These refer to the offsets of the alleles in the Locus object.
-
bool
vg::Genotype::
is_phased
¶
-
double
vg::Genotype::
likelihood
¶
-
double
vg::Genotype::
log_likelihood
¶ Likelihood natural logged.
-
double
vg::Genotype::
log_prior
¶ Prior natural logged.
-
double
vg::Genotype::
log_posterior
¶ Posterior natural logged (unnormalized).
-
repeated<int32>
- class
- #include <genotypekit.hpp>
Represents a strategy for calculating genotype likelihood for a (nested) Site. Polymorphic base class/interface.
Public Functions
-
virtual
vg::GenotypeLikelihoodCalculator::
~GenotypeLikelihoodCalculator
()¶
-
virtual double
vg::GenotypeLikelihoodCalculator::
calculate_log_likelihood
(const NestedSite &site, const vector<SiteTraversal> &traversals, const Genotype &genotype, const vector<vector<bool>> &consistencies, const vector<Support> &supports, const vector<Alignment *> &reads)¶
= 0 Return the log likelihood of the given genotype.
-
virtual
- class
- #include <genotypekit.hpp>
Represents a strategy for assigning genotype priors. Polymorphic base class/interface.
Subclassed by vg::FixedGenotypePriorCalculator
- class
- #include <genotyper.hpp>
Class to hold on to genotyping parameters and genotyping functions.
Public Functions
-
void
vg::Genotyper::
run
(VG &graph, vector<Alignment> &alignments, ostream &out, string ref_path_name = "", string contig_name = "", string sample_name = "", string augmented_file_name = "", bool use_cactus = false, bool subset_graph = false, bool show_progress = false, bool output_vcf = false, bool output_json = false, int length_override = 0, int variant_offset = 0)¶
-
int
vg::Genotyper::
alignment_qual_score
(VG &graph, const Site &site, const Alignment &alignment)¶ Given an Alignment and a Site, compute a phred score for the quality of the alignment’s bases within the site overall (not counting the start and end nodes), which is supposed to be interpretable as the probability that the call of the sequence is wrong (to the degree that it would no longer support the alleles it appears to support).
In practice we’re just going to average the quality scores for all the bases interior to the site (i.e. not counting the start and end nodes).
If the alignment doesn’t have base qualities, or no qualities are available for bases internal to the site, returns a default value.
-
vector<Genotyper::Site>
vg::Genotyper::
find_sites_with_supbub
(VG &graph)¶ Unfold and dagify a graph, find the superbubbles, and then convert them back to the space of the original graph.
Returns a collection of Sites.
-
vector<Genotyper::Site>
vg::Genotyper::
find_sites_with_cactus
(VG &graph, const string &ref_path_name = "")¶ Same as find_sites but use Cactus instead of Superbubbles. This is more general and doesn’t require DAGifcation etc., but we keep both versions around for now for debugging and comparison
If ref_path_name is the empty string, it is not used. Otherwise, it must be the name of a path present in the graph.
-
list<NodeTraversal>
vg::Genotyper::
get_traversal_of_site
(VG &graph, const Site &site, const Path &path)¶ Given a path (which may run either direction through a site, or not touch the ends at all), collect a list of NodeTraversals in order for the part of the path that is inside the site, in the same orientation as the path.
-
string
vg::Genotyper::
traversals_to_string
(const list<NodeTraversal> &path)¶ Make a list of NodeTraversals into the string they represent.
-
vector<list<NodeTraversal>>
vg::Genotyper::
get_paths_through_site
(VG &graph, const Site &site, const map<string, Alignment *> &reads_by_name)¶ For the given site, emit all subpaths with unique sequences that run from start to end, out of the paths in the graph. Uses the map of reads by name to determine if a path is a read or a real named path. Paths through the site supported only by reads are subject to a min recurrence count, while those supported by actual embedded named paths are not.
-
string
vg::Genotyper::
get_qualities_in_site
(VG &graph, const Site &site, const Alignment &alignment)¶ Get all the quality values in the alignment between the start and end nodes of a site. Handles alignments that enter the site from the end, and alignments that never make it through the site.
If we run out of qualities, or qualities aren’t present, returns no qualities.
If an alignment goes through the site multipe times, we get all the qualities from when it is in the site.
Does not return qualities on the start and end nodes. May return an empty string.
-
map<Alignment *, vector<Genotyper::Affinity>>
vg::Genotyper::
get_affinities
(VG &graph, const map<string, Alignment *> &reads_by_name, const Site &site, const vector<list<NodeTraversal>> &superbubble_paths)¶ Get the affinity of all the reads relevant to the superbubble to all the paths through the superbubble.
Affinity is a double out of 1.0. Higher is better.
-
map<Alignment *, vector<Genotyper::Affinity>>
vg::Genotyper::
get_affinities_fast
(VG &graph, const map<string, Alignment *> &reads_by_name, const Site &site, const vector<list<NodeTraversal>> &superbubble_paths)¶ Get affinities as above but using only string comparison instead of alignment. Affinities are 0 for mismatch and 1 for a perfect match.
-
Locus
vg::Genotyper::
genotype_site
(VG &graph, const Site &site, const vector<list<NodeTraversal>> &superbubble_paths, const map<Alignment *, vector<Affinity>> &affinities)¶ Compute annotated genotype from affinities and superbubble paths. Needs access to the graph so it can chop up the alignments, which requires node sizes.
-
double
vg::Genotyper::
get_genotype_log_likelihood
(VG &graph, const Site &site, const vector<int> &genotype, const vector<pair<Alignment *, vector<Affinity>>> &alignment_consistency)¶ Compute the probability of the observed alignments given the genotype.
Takes a genotype as a vector of allele numbers, and support data as a collection of pairs of Alignments and vectors of bools marking whether each alignment is consistent with each allele.
Alignments should have had their quality values trimmed down to just the part covering the superbubble.
Returns a natural log likelihood.
-
double
vg::Genotyper::
get_genotype_log_prior
(const vector<int> &genotype)¶ Compute the prior probability of the given genotype.
Takes a genotype as a vector of allele numbers. It is not guaranteed that allele 0 corresponds to any notion of primary reference-ness.
Returns a natural log prior probability.
TODO: add in strand bias
-
vector<vcflib::Variant>
vg::Genotyper::
locus_to_variant
(VG &graph, const Site &site, const ReferenceIndex &index, vcflib::VariantCallFile &vcf, const Locus &locus, const string &sample_name = "SAMPLE")¶ Make a VCFlib variant from a called Locus. Depends on an index of the reference path we want to call against.
Returns 0 or more variants we can articulate from the superbubble. Sometimes if we can’t make a variant for the superbubble against the reference path, we’ll emit 0 variants.
-
void
vg::Genotyper::
write_vcf_header
(std::ostream &stream, const std::string &sample_name, const std::string &contig_name, size_t contig_size)¶ Make a VCF header
-
vcflib::VariantCallFile *
vg::Genotyper::
start_vcf
(std::ostream &stream, const ReferenceIndex &index, const string &sample_name, const string &contig_name, size_t contig_size)¶ Start VCF output to a stream. Returns a VCFlib VariantCallFile that needs to be deleted.
-
pair<pair<int64_t, int64_t>, bool>
vg::Genotyper::
get_site_reference_bounds
(const Site &site, const ReferenceIndex &index)¶ Utility function for getting the reference bounds (start and past-end) of a site with relation to a given reference index. Computes bounds of the variable region, not including the fixed start and end node lengths. Also returns whether the reference path goes through the site forwards (false) or backwards (true).
-
void
vg::Genotyper::
report_site
(const Site &site, const ReferenceIndex *index = nullptr)¶ Tell the statistics tracking code that a site exists. We can do things like count up the site length in the reference and so on. Called only once per site, but may be called on multiple threads simultaneously.
-
void
vg::Genotyper::
report_site_traversal
(const Site &site, const string &read_name)¶ Tell the statistics tracking code that a read traverses a site completely. May be called multiple times for a given read and site, and may be called in parallel.
-
void
vg::Genotyper::
print_statistics
(ostream &out)¶ Print site statistics to the given stream.
-
void
vg::Genotyper::
edge_allele_labels
(const VG &graph, const Site &site, const vector<list<NodeTraversal>> &superbubble_paths, unordered_map<pair<NodeTraversal, NodeTraversal>, unordered_set<size_t>, hash_oriented_edge> *out_edge_allele_sets)¶
-
void
vg::Genotyper::
allele_ambiguity_log_probs
(const VG &graph, const Site &site, const vector<list<NodeTraversal>> &superbubble_paths, const unordered_map<pair<NodeTraversal, NodeTraversal>, unordered_set<size_t>, hash_oriented_edge> &edge_allele_sets, vector<unordered_map<vector<size_t>, double, hash_ambiguous_allele_set>> *out_allele_ambiguity_probs)¶
Public Members
-
size_t
vg::Genotyper::
max_path_search_steps
¶
-
int
vg::Genotyper::
unfold_max_length
¶
-
int
vg::Genotyper::
dagify_steps
¶
-
double
vg::Genotyper::
max_het_bias
¶
-
bool
vg::Genotyper::
use_mapq
¶
-
bool
vg::Genotyper::
realign_indels
¶
-
int
vg::Genotyper::
default_sequence_quality
¶
-
int
vg::Genotyper::
min_recurrence
¶
-
int
vg::Genotyper::
min_consistent_per_strand
¶
-
double
vg::Genotyper::
min_score_per_base
¶
-
double
vg::Genotyper::
het_prior_logprob
¶
-
Translator
vg::Genotyper::
translator
¶
-
map<size_t, size_t>
vg::Genotyper::
site_reference_length_histogram
¶
-
QualAdjAligner
vg::Genotyper::
quality_aligner
¶
-
void
- struct
Graphs are collections of nodes and edges. They can represent subgraphs of larger graphs or be wholly-self-sufficient. Protobuf memory limits of 67108864 bytes mean we typically keep the size of them small generating graphs as collections of smaller subgraphs.
- template <typename A, typename B>
-
struct class
std::
hash
<pair<A, B>>¶ - #include <hash_map.hpp>
Public Functions
-
size_t
std::hash::
operator()
(const pair<A, B> &x) const¶
-
size_t
- template <>
-
struct class
std::
hash
<vg::NodeSide>¶ - #include <nodeside.hpp>
Hash functor to hash
NodeSide
s. We need to implement a hash function for these if we want to be able to use them in keys in hash maps.
- struct
- #include <genotyper.hpp>
Public Functions
-
size_t
vg::Genotyper::hash_ambiguous_allele_set::
operator()
(const vector<size_t> &ambiguous_set) const¶
-
size_t
- template <typename K, typename V>
- class
- #include <hash_map.hpp>
Inherits from google::dense_hash_map< K, V >
Public Functions
-
vg::hash_map::
hash_map
()¶
-
- template <typename K, typename V>
-
class class
vg::
hash_map
<K *, V>¶ - #include <hash_map.hpp>
Inherits from google::dense_hash_map< K *, V >
Public Functions
-
vg::hash_map::
hash_map
()¶
-
- struct
- #include <genotyper.hpp>
Public Functions
-
size_t
vg::Genotyper::hash_node_traversal::
operator()
(const NodeTraversal &node_traversal) const¶
-
size_t
- struct
- #include <genotyper.hpp>
Public Functions
-
size_t
vg::Genotyper::hash_oriented_edge::
operator()
(const pair<const NodeTraversal, const NodeTraversal> &edge) const¶
-
size_t
- class
- #include <index.hpp>
Public Functions
-
vg::Index::
Index
(void)¶
-
vg::Index::
Index
(string &name)¶
-
vg::Index::
~Index
(void)¶
-
rocksdb::Options
vg::Index::
GetOptions
(void)¶
-
void
vg::Index::
open_read_only
(string &dir)¶
-
void
vg::Index::
open_for_write
(string &dir)¶
-
void
vg::Index::
open_for_bulk_load
(string &dir)¶
-
void
vg::Index::
reset_options
(void)¶
-
void
vg::Index::
flush
(void)¶
-
void
vg::Index::
compact
(void)¶
-
void
vg::Index::
close
(void)¶
-
void
vg::Index::
for_range
(string &key_start, string &key_end, std::function<void(string&, string&)> lambda)¶
-
void
vg::Index::
put_kmer
(const string &kmer, const int64_t id, const int32_t pos)¶
-
void
vg::Index::
batch_kmer
(const string &kmer, const int64_t id, const int32_t pos, rocksdb::WriteBatch &batch)¶
-
void
vg::Index::
put_metadata
(const string &tag, const string &data)¶
-
void
vg::Index::
put_node_path
(int64_t node_id, int64_t path_id, int64_t path_pos, bool backward, const Mapping &mapping)¶
-
void
vg::Index::
put_path_position
(int64_t path_id, int64_t path_pos, bool backward, int64_t node_id, const Mapping &mapping)¶
-
rocksdb::Status
vg::Index::
get_edge
(int64_t from, bool from_start, int64_t to, bool to_end, Edge &edge)¶
-
rocksdb::Status
vg::Index::
get_metadata
(const string &key, string &data)¶
-
int
vg::Index::
get_node_path
(int64_t node_id, int64_t path_id, int64_t &path_pos, bool &backward, Mapping &mapping)¶
-
void
vg::Index::
for_alignment_in_range
(int64_t id1, int64_t id2, std::function<void(const Alignment&)> lambda)¶
-
void
vg::Index::
for_alignment_to_node
(int64_t node_id, std::function<void(const Alignment&)> lambda)¶
-
void
vg::Index::
for_alignment_to_nodes
(const vector<int64_t> &ids, std::function<void(const Alignment&)> lambda)¶
-
void
vg::Index::
for_base_alignments
(const set<int64_t> &aln_ids, std::function<void(const Alignment&)> lambda)¶
-
const string
vg::Index::
key_for_node
(int64_t id)¶
-
const string
vg::Index::
key_for_edge_on_start
(int64_t node_id, int64_t other, bool backward)¶
-
const string
vg::Index::
key_for_edge_on_end
(int64_t node_id, int64_t other, bool backward)¶
-
const string
vg::Index::
key_prefix_for_edges_on_node_start
(int64_t node)¶
-
const string
vg::Index::
key_prefix_for_edges_on_node_end
(int64_t node)¶
-
const string
vg::Index::
key_for_kmer
(const string &kmer, int64_t id)¶
-
const string
vg::Index::
key_prefix_for_kmer
(const string &kmer)¶
-
const string
vg::Index::
key_for_metadata
(const string &tag)¶
-
const string
vg::Index::
key_for_path_position
(int64_t path_id, int64_t path_pos, bool backward, int64_t node_id)¶
-
const string
vg::Index::
key_for_node_path_position
(int64_t node_id, int64_t path_id, int64_t path_pos, bool backward)¶
-
const string
vg::Index::
key_prefix_for_node_path
(int64_t node_id, int64_t path_id)¶
-
const string
vg::Index::
key_for_mapping_prefix
(int64_t node_id)¶
-
const string
vg::Index::
key_for_alignment_prefix
(int64_t node_id)¶
-
const string
vg::Index::
key_for_base
(int64_t aln_id)¶
-
const string
vg::Index::
key_prefix_for_traversal
(int64_t node_id)¶
-
void
vg::Index::
parse_edge
(const string &key, const string &value, char &type, int64_t &id1, int64_t &id2, Edge &edge)¶
-
void
vg::Index::
parse_edge
(const string &key, char &type, int64_t &node_id, int64_t &other_id, bool &backward)¶
-
void
vg::Index::
parse_kmer
(const string &key, const string &value, string &kmer, int64_t &id, int32_t &pos)¶
-
void
vg::Index::
parse_node_path
(const string &key, const string &value, int64_t &node_id, int64_t &path_id, int64_t &path_pos, bool &backward, Mapping &mapping)¶
-
void
vg::Index::
parse_path_position
(const string &key, const string &value, int64_t &path_id, int64_t &path_pos, bool &backward, int64_t &node_id, Mapping &mapping)¶
-
void
vg::Index::
parse_mapping
(const string &key, const string &value, int64_t &node_id, string &hash, Mapping &mapping)¶
-
void
vg::Index::
parse_alignment
(const string &key, const string &value, int64_t &node_id, string &hash, Alignment &alignment)¶
-
void
vg::Index::
parse_base
(const string &key, const string &value, int64_t &aln_id, Alignment &alignment)¶
-
void
vg::Index::
parse_traversal
(const string &key, const string &value, int64_t &node_id, int16_t &rank, bool &backward, int64_t &aln_id)¶
-
string
vg::Index::
entry_to_string
(const string &key, const string &value)¶
-
string
vg::Index::
graph_entry_to_string
(const string &key, const string &value)¶
-
string
vg::Index::
kmer_entry_to_string
(const string &key, const string &value)¶
-
string
vg::Index::
position_entry_to_string
(const string &key, const string &value)¶
-
string
vg::Index::
metadata_entry_to_string
(const string &key, const string &value)¶
-
string
vg::Index::
node_path_to_string
(const string &key, const string &value)¶
-
string
vg::Index::
path_position_to_string
(const string &key, const string &value)¶
-
string
vg::Index::
mapping_entry_to_string
(const string &key, const string &value)¶
-
string
vg::Index::
alignment_entry_to_string
(const string &key, const string &value)¶
-
string
vg::Index::
base_entry_to_string
(const string &key, const string &value)¶
-
string
vg::Index::
traversal_entry_to_string
(const string &key, const string &value)¶
-
void
vg::Index::
for_graph_range
(int64_t from_id, int64_t to_id, function<void(string&, string&)> lambda)¶
-
void
vg::Index::
get_nodes_next
(int64_t node, bool backward, vector<pair<int64_t, bool>> &destinations)¶
-
void
vg::Index::
get_nodes_prev
(int64_t node, bool backward, vector<pair<int64_t, bool>> &destinations)¶
-
void
vg::Index::
node_path_position
(int64_t id, string &path_name, int64_t &position, bool &backward, int64_t &offset)¶
-
pair<list<pair<int64_t, bool>>, pair<int64_t, bool>>
vg::Index::
get_nearest_node_prev_path_member
(int64_t node_id, bool backward, int64_t path_id, int64_t &path_pos, bool &relative_orientation, int max_steps = 4)¶
-
pair<list<pair<int64_t, bool>>, pair<int64_t, bool>>
vg::Index::
get_nearest_node_next_path_member
(int64_t node_id, bool backward, int64_t path_id, int64_t &path_pos, bool &relative_orientation, int max_steps = 4)¶
-
bool
vg::Index::
get_node_path_relative_position
(int64_t node_id, bool backward, int64_t path_id, list<pair<int64_t, bool>> &path_prev, int64_t &prev_pos, bool &prev_orientation, list<pair<int64_t, bool>> &path_next, int64_t &next_pos, bool &next_orientation)¶
-
Mapping
vg::Index::
path_relative_mapping
(int64_t node_id, bool backward, int64_t path_id, list<pair<int64_t, bool>> &path_prev, int64_t &prev_pos, bool &prev_orientation, list<pair<int64_t, bool>> &path_next, int64_t &next_pos, bool &next_orientation)¶
-
bool
vg::Index::
surject_alignment
(const Alignment &source, set<string> &path_names, Alignment &surjection, string &path_name, int64_t &path_pos, bool &path_reverse, int window = 5)¶
-
void
vg::Index::
path_layout
(map<string, pair<pair<int64_t, bool>, pair<int64_t, bool>>> &layout, map<string, int64_t> &lengths)¶
-
pair<int64_t, bool>
vg::Index::
path_first_node
(int64_t path_id)¶
-
pair<int64_t, bool>
vg::Index::
path_last_node
(int64_t path_id, int64_t &path_length)¶
-
uint64_t
vg::Index::
approx_size_of_kmer_matches
(const string &kmer)¶
-
void
vg::Index::
approx_sizes_of_kmer_matches
(const vector<string> &kmers, vector<uint64_t> &sizes)¶
-
void
vg::Index::
for_kmer_range
(const string &kmer, function<void(string&, string&)> lambda)¶
-
void
vg::Index::
get_kmer_positions
(const string &kmer, map<int64_t, vector<int32_t>> &positions)¶
-
void
vg::Index::
get_kmer_positions
(const string &kmer, map<string, vector<pair<int64_t, int32_t>>> &positions)¶
-
void
vg::Index::
prune_kmers
(int max_kb_on_disk)¶
-
void
vg::Index::
remember_kmer_size
(int size)¶
-
set<int>
vg::Index::
stored_kmer_sizes
(void)¶
-
void
vg::Index::
store_batch
(map<string, string> &items)¶
-
void
vg::Index::
kmer_matches
(std::string &kmer, std::set<int64_t> &node_ids, std::set<int64_t> &edge_ids)¶
-
string
vg::Index::
first_kmer_key
(const string &kmer)¶
-
int64_t
vg::Index::
get_max_path_id
(void)¶
-
void
vg::Index::
put_max_path_id
(int64_t id)¶
-
int64_t
vg::Index::
new_path_id
(const string &name)¶
-
string
vg::Index::
path_name_prefix
(const string &name)¶
-
string
vg::Index::
path_id_prefix
(int64_t id)¶
-
void
vg::Index::
put_path_id_to_name
(int64_t id, const string &name)¶
-
void
vg::Index::
put_path_name_to_id
(int64_t id, const string &name)¶
-
string
vg::Index::
get_path_name
(int64_t id)¶
-
int64_t
vg::Index::
get_path_id
(const string &name)¶
-
map<string, int64_t>
vg::Index::
paths_by_id
(void)¶
-
char
vg::Index::
graph_key_type
(const string &key)¶
Public Members
-
string
vg::Index::
name
¶
-
char
vg::Index::
start_sep
¶
-
char
vg::Index::
end_sep
¶
-
int
vg::Index::
threads
¶
-
rocksdb::DB *
vg::Index::
db
¶
-
bool
vg::Index::
is_open
¶
-
bool
vg::Index::
use_snappy
¶
-
rocksdb::Options
vg::Index::
db_options
¶
-
rocksdb::WriteOptions
vg::Index::
write_options
¶
-
rocksdb::ColumnFamilyOptions
vg::Index::
column_family_options
¶
-
bool
vg::Index::
bulk_load
¶
-
bool
vg::Index::
mem_env
¶
-
size_t
vg::Index::
block_cache_size
¶
-
mt19937
vg::Index::
rng
¶
-
- class
- #include <index.hpp>
Inherits from exception
Public Functions
-
vg::indexOpenException::
indexOpenException
(string message = "")¶
Private Functions
-
virtual const char *
vg::indexOpenException::
what
() const¶
Private Members
-
string
vg::indexOpenException::
message
¶
-
- struct
- #include <caller.hpp>
- struct
We need to suppress overlapping variants, but interval trees are hard to write. This accomplishes the collision check with a massive bit vector.
Public Functions
-
glenn2vcf::IntervalBitfield::
IntervalBitfield
(size_t length)¶ Make a new IntervalBitfield covering a region of the specified length.
-
bool
glenn2vcf::IntervalBitfield::
collides
(size_t start, size_t pastEnd)¶ Scan for a collision (O(n) in interval length)
-
void
glenn2vcf::IntervalBitfield::
add
(size_t start, size_t pastEnd)¶ Take up an interval.
-
- class
Inherits from exception
- struct
Public Functions
-
json_autoptr::
json_autoptr
(json_t *json)¶
-
json_autoptr::
~json_autoptr
()¶
-
json_t *
json_autoptr::
release
()¶
Public Members
-
json_t *
json_autoptr::
ptr
¶
-
- template <class T>
- class
- #include <json2pb.h>
Public Functions
-
JSONStreamHelper::
~JSONStreamHelper
()¶
Private Members
-
FILE *
JSONStreamHelper::
_fp
¶
-
- class
- #include <index.hpp>
Inherits from exception
Private Functions
-
virtual const char *
vg::keyNotFoundException::
what
() const¶
-
virtual const char *
- struct
Used to serialize kmer matches.
- struct
- #include <vg.hpp>
We create a struct that represents each kmer record we want to send to gcsa2
- struct
Describes a genetic locus with multiple possible alleles, a genotype, and observational support.
Public Members
-
string
vg::Locus::
name
¶ A locus may have an identifying name.
-
repeated<Path>
vg::Locus::
allele
¶ These are all the alleles at the locus, not just the called ones. Note that a primary reference allele may or may not appear.
-
string
- class
- #include <mapper.hpp>
Public Functions
-
vg::Mapper::
Mapper
(void)¶
-
vg::Mapper::
~Mapper
(void)¶
-
void
vg::Mapper::
clear_aligners
(void)¶
-
QualAdjAligner *
vg::Mapper::
get_qual_adj_aligner
(void)¶
-
void
vg::Mapper::
init_node_cache
(void)¶
-
void
vg::Mapper::
record_fragment_length
(int length)¶
-
double
vg::Mapper::
fragment_length_stdev
(void)¶
-
double
vg::Mapper::
fragment_length_mean
(void)¶
-
double
vg::Mapper::
estimate_gc_content
()¶
-
void
vg::Mapper::
init_aligner
(int32_t match, int32_t mismatch, int32_t gap_open, int32_t gap_extend)¶
-
void
vg::Mapper::
set_alignment_scores
(int32_t match, int32_t mismatch, int32_t gap_open, int32_t gap_extend)¶
-
bool
vg::Mapper::
alignments_consistent
(const map<string, double> &pos1, const map<string, double> &pos2, int fragment_size_bound)¶
-
set<MaximalExactMatch *>
vg::Mapper::
resolve_paired_mems
(vector<MaximalExactMatch> &mems1, vector<MaximalExactMatch> &mems2)¶
-
vector<Alignment>
vg::Mapper::
mems_id_clusters_to_alignments
(const Alignment &alignment, vector<MaximalExactMatch> &mems, int additional_multimaps)¶
-
vector<Alignment>
vg::Mapper::
mems_pos_clusters_to_alignments
(const Alignment &aln, vector<MaximalExactMatch> &mems, int additional_multimaps)¶
-
Alignment
vg::Mapper::
mems_to_alignment
(const Alignment &aln, vector<MaximalExactMatch> &mems)¶
-
Alignment
vg::Mapper::
mem_to_alignment
(MaximalExactMatch &mem)¶
-
int64_t
vg::Mapper::
get_node_length
(int64_t node_id)¶
-
Alignment
vg::Mapper::
align
(const string &seq, int kmer_size = 0, int stride = 0, int max_mem_length = 0, int band_width = 1000)¶
-
Alignment
vg::Mapper::
align
(const Alignment &read, int kmer_size = 0, int stride = 0, int max_mem_length = 0, int band_width = 1000)¶
-
vector<Alignment>
vg::Mapper::
align_multi
(const Alignment &aln, int kmer_size = 0, int stride = 0, int max_mem_length = 0, int band_width = 1000)¶
-
pair<vector<Alignment>, vector<Alignment>>
vg::Mapper::
align_paired_multi
(const Alignment &read1, const Alignment &read2, bool &queued_resolve_later, int kmer_size = 0, int stride = 0, int max_mem_length = 0, int band_width = 1000, int pair_window = 64)¶
-
pair<Alignment, Alignment>
vg::Mapper::
align_paired
(const Alignment &read1, const Alignment &read2, bool &queued_resolve_later, int kmer_size = 0, int stride = 0, int max_mem_length = 0, int band_width = 1000, int pair_window = 64)¶
-
Alignment
vg::Mapper::
surject_alignment
(const Alignment &source, set<string> &path_names, string &path_name, int64_t &path_pos, bool &path_reverse, int window)¶
-
vector<MaximalExactMatch>
vg::Mapper::
find_smems
(const string &seq, int max_length)¶
-
bool
vg::Mapper::
get_mem_hits_if_under_max
(MaximalExactMatch &mem)¶
-
void
vg::Mapper::
check_mems
(const vector<MaximalExactMatch> &mems)¶
-
vector<MaximalExactMatch>
vg::Mapper::
find_forward_mems
(const string &seq, size_t step = 1, int max_mem_length = 0)¶
-
vector<Alignment>
vg::Mapper::
mem_to_alignments
(MaximalExactMatch &mem)¶
Public Members
-
xg::XG *
vg::Mapper::
xindex
¶
-
vector<QualAdjAligner *>
vg::Mapper::
qual_adj_aligners
¶
-
deque<double>
vg::Mapper::
fragment_lengths
¶
-
int
vg::Mapper::
cached_fragment_length_mean
¶
-
int
vg::Mapper::
cached_fragment_length_stdev
¶
-
int
vg::Mapper::
since_last_fragment_length_estimate
¶
-
int
vg::Mapper::
fragment_length_estimate_interval
¶
-
bool
vg::Mapper::
debug
¶
-
int
vg::Mapper::
alignment_threads
¶
-
set<int>
vg::Mapper::
kmer_sizes
¶
-
int
vg::Mapper::
best_clusters
¶
-
int
vg::Mapper::
cluster_min
¶
-
int
vg::Mapper::
hit_size_threshold
¶
-
float
vg::Mapper::
min_kmer_entropy
¶
-
int
vg::Mapper::
kmer_min
¶
-
int
vg::Mapper::
max_thread_gap
¶
-
int
vg::Mapper::
kmer_sensitivity_step
¶
-
bool
vg::Mapper::
prefer_forward
¶
-
bool
vg::Mapper::
greedy_accept
¶
-
float
vg::Mapper::
accept_identity
¶
-
int
vg::Mapper::
min_mem_length
¶
-
int
vg::Mapper::
mem_threading
¶
-
int
vg::Mapper::
hit_max
¶
-
int
vg::Mapper::
context_depth
¶
-
int
vg::Mapper::
max_attempts
¶
-
int
vg::Mapper::
thread_extension
¶
-
int
vg::Mapper::
max_target_factor
¶
-
size_t
vg::Mapper::
max_query_graph_ratio
¶
-
int
vg::Mapper::
max_multimaps
¶
-
int
vg::Mapper::
softclip_threshold
¶
-
int
vg::Mapper::
max_softclip_iterations
¶
-
float
vg::Mapper::
min_identity
¶
-
int
vg::Mapper::
extra_pairing_multimaps
¶
-
bool
vg::Mapper::
adjust_alignments_for_base_quality
¶
-
MappingQualityMethod
vg::Mapper::
mapping_quality_method
¶
-
bool
vg::Mapper::
always_rescue
¶
-
int
vg::Mapper::
fragment_max
¶
-
int
vg::Mapper::
fragment_size
¶
-
double
vg::Mapper::
fragment_sigma
¶
-
int
vg::Mapper::
fragment_length_cache_size
¶
Private Functions
-
vector<Alignment>
vg::Mapper::
align_multi_internal
(bool compute_unpaired_qualities, const Alignment &aln, int kmer_size, int stride, int max_mem_length, int band_width, int additional_multimaps = 0, vector<MaximalExactMatch> *restricted_mems = nullptr)¶
-
vector<Alignment>
vg::Mapper::
score_sort_and_deduplicate_alignments
(vector<Alignment> &all_alns, const Alignment &original_alignment)¶
-
void
vg::Mapper::
filter_and_process_multimaps
(vector<Alignment> &all_alns, int additional_multimaps)¶
-
vector<Alignment>
vg::Mapper::
align_multi_kmers
(const Alignment &aln, int kmer_size = 0, int stride = 0, int band_width = 1000)¶
-
Alignment
vg::Mapper::
align_banded
(const Alignment &read, int kmer_size = 0, int stride = 0, int max_mem_length = 0, int band_width = 1000)¶
-
vector<Alignment>
vg::Mapper::
align_mem_multi
(const Alignment &alignment, vector<MaximalExactMatch> &mems, int additional_multimaps = 0)¶
-
- struct
A Mapping defines the relationship between a node in system and another entity. An empty edit list implies complete match, however it is preferred to specify the full edit structure. as it is more complex to handle special cases.
- class
- #include <mapper.hpp>
Public Functions
-
vg::MaximalExactMatch::
MaximalExactMatch
(string::const_iterator b, string::const_iterator e, gcsa::range_type r, size_t m = 0)¶
-
string
vg::MaximalExactMatch::
sequence
(void) const¶
-
void
vg::MaximalExactMatch::
fill_nodes
(gcsa::GCSA *gcsa)¶
-
void
vg::MaximalExactMatch::
fill_match_count
(gcsa::GCSA *gcsa)¶
-
int
vg::MaximalExactMatch::
length
(void) const¶
-
- struct
- #include <genotypekit.hpp>
Represents a genotypeable site, with input and output NodeTraversals, that can contain other nested sites within it.
Must be understood in relation to some vg graph.
Public Members
-
vector<NestedSite>
vg::NestedSite::
children
¶
-
map<NodeTraversal, size_t>
vg::NestedSite::
child_border_index
¶
-
NodeTraversal
vg::NestedSite::
start
¶
-
NodeTraversal
vg::NestedSite::
end
¶
-
vector<NestedSite>
- struct
Nodes store sequence data.
- struct
- #include <caller.hpp>
Public Functions
-
void
vg::NodeDivider::
add_fragment
(const Node *orig_node, int offset, Node *subnode, EntryCat cat, vector<StrandSupport> sup)¶
-
NodeDivider::Entry
vg::NodeDivider::
break_end
(const Node *orig_node, VG *graph, int offset, bool left_side)¶
-
list<Mapping>
vg::NodeDivider::
map_node
(int64_t node_id, int64_t start_offset, int64_t length, bool reverse)¶
-
void
vg::NodeDivider::
clear
()¶
-
void
- struct
Collect pileup records by node. Saves some space and hashing over storing individually, assuming not too sparse and avg. node length more than couple bases the ith BasePileup in the array corresponds to the position at offset i.
- class
- #include <nodeside.hpp>
Represents one side of a Node, identified by ID, for the purposes of indexing edges. TODO: duplicates much of the functionality of NodeTraversal, and causes API duplication to accomodate both. There should only be one.
Public Functions
-
vg::NodeSide::
NodeSide
(id_t node, bool is_end = false)¶ Create a NodeSide for the given side of the given Node. We need this to be a converting constructor so we can represent the empty and deleted item keys in a pair_hash_map.
Public Members
-
bool
vg::NodeSide::
is_end
¶ Are we the end side? Or the start side?
Public Static Functions
-
static pair<NodeSide, NodeSide>
vg::NodeSide::
pair_from_edge
(Edge *e)¶ Make an edge into a canonically ordered pair of NodeSides.
-
static pair<NodeSide, NodeSide>
vg::NodeSide::
pair_from_edge
(const Edge &e)¶ Make an edge into a canonically ordered pair of NodeSides.
-
- class
- #include <nodetraversal.hpp>
Represents a node traversed in a certain orientation. The default orientation is start to end, but if
backward
is set, represents the node being traversed end to start. A list of these can serve as an edit-free version of a path, especially if supplemented with a length and an initial node offset. A path node has a left and a right side, which are the start and end of the node if it is forward, or the end and start of the node if it is backward.Public Functions
-
vg::NodeTraversal::
NodeTraversal
(Node *node, bool backward = false)¶ Make a NodeTraversal that traverses the given Node in the given orientation. We don’t want Node*s to turn into NodeTraversals when we aren’t expecting it, so this is explicit.
-
vg::NodeTraversal::
NodeTraversal
()¶ Create a NodeTraversal of no node.
-
bool
vg::NodeTraversal::
operator==
(const NodeTraversal &other) const¶ Equality operator.
-
bool
vg::NodeTraversal::
operator!=
(const NodeTraversal &other) const¶ Inequality operator.
-
bool
vg::NodeTraversal::
operator<
(const NodeTraversal &other) const¶ Comparison operator for sorting in sets and maps. Make sure to sort by node ID and not pointer value, because people will expect that.
-
NodeTraversal
vg::NodeTraversal::
reverse
(void) const¶ Reverse complement the node traversal, returning a traversal of the same node in the opposite direction.
-
- template <typename K, typename V>
- class
- #include <hash_map.hpp>
Inherits from google::dense_hash_map< K, V, std::hash< K > >
Public Functions
-
vg::pair_hash_map::
pair_hash_map
()¶
-
- struct
Paths are walks through nodes defined by a series of
Edit
s. They can be used to represent:- haplotypes
- mappings of reads, or alignments, by including edits
- relationships between nodes
- annotations from other data sources, such as: genes, exons, motifs, transcripts, peaks
Public Members
-
string
vg::Path::
name
¶ The name of the path. Path names starting with underscore (_) are reserved for internal VG use.
-
repeated<Mapping>
vg::Path::
mapping
¶ The
Mapping
s which describe the order and orientation in which the Path visitsNode
s.
-
bool
vg::Path::
is_circular
¶ Set to true if the path is circular.
- class
- #include <path.hpp>
Public Functions
-
vg::Paths::
Paths
(void)¶
-
void
vg::Paths::
sort_by_mapping_rank
(void)¶
-
void
vg::Paths::
rebuild_mapping_aux
(void)¶
-
vector<string>
vg::Paths::
all_path_names
(void)¶
-
void
vg::Paths::
make_circular
(const string &name)¶
-
void
vg::Paths::
make_linear
(const string &name)¶
-
void
vg::Paths::
rebuild_node_mapping
(void)¶
-
list<Mapping>::iterator
vg::Paths::
insert_mapping
(list<Mapping>::iterator w, const string &path_name, const Mapping &m)¶
-
void
vg::Paths::
remove_paths
(const set<string> &names)¶
-
void
vg::Paths::
remove_path
(const string &name)¶
-
void
vg::Paths::
keep_paths
(const set<string> &name)¶
-
bool
vg::Paths::
has_path
(const string &name)¶
-
void
vg::Paths::
to_json
(ostream &out)¶
-
bool
vg::Paths::
has_mapping
(const string &name, size_t rank)¶
-
vector<string>
vg::Paths::
over_edge
(id_t id1, bool rev1, id_t id2, bool rev2, vector<string> following)¶
-
vector<string>
vg::Paths::
over_directed_edge
(id_t id1, bool rev1, id_t id2, bool rev2, vector<string> following)¶
-
size_t
vg::Paths::
size
(void) const¶
-
bool
vg::Paths::
empty
(void) const¶
-
void
vg::Paths::
clear
(void)¶
-
void
vg::Paths::
clear_mapping_ranks
(void)¶
-
void
vg::Paths::
compact_ranks
(void)¶
-
void
vg::Paths::
load
(istream &in)¶
-
void
vg::Paths::
write
(ostream &out)¶
-
void
vg::Paths::
append_mapping
(const string &name, id_t id, size_t rank = 0, bool is_reverse = false)¶
-
void
vg::Paths::
prepend_mapping
(const string &name, id_t id, size_t rank = 0, bool is_reverse = false)¶
-
size_t
vg::Paths::
get_next_rank
(const string &name)¶
-
void
vg::Paths::
for_each_name
(const function<void(const string&)> &lambda)¶
Public Members
-
set<string>
vg::Paths::
circular
¶
-
- class
- #include <pictographs.hpp>
Public Functions
-
vg::Pictographs::
Pictographs
(void)¶
-
vg::Pictographs::
Pictographs
(int seed_val)¶
-
vg::Pictographs::
~Pictographs
(void)¶
-
string
vg::Pictographs::
hashed
(const string &str)¶
-
string
vg::Pictographs::
random
(void)¶
Private Members
-
mt19937
vg::Pictographs::
rng
¶
-
- struct
Bundle up Node and Edge pileups.
Public Members
-
repeated<NodePileup>
vg::Pileup::
node_pileups
¶
-
repeated<EdgePileup>
vg::Pileup::
edge_pileups
¶
-
repeated<NodePileup>
- class
- #include <pileup.hpp>
Public Types
- typedef
- typedef
Public Functions
-
vg::Pileups::
Pileups
(VG *graph, int min_quality = 0, int max_mismatches = 1, int window_size = 0, int max_depth = 1000, bool use_mapq = false)¶
-
vg::Pileups::
~Pileups
()¶
-
void
vg::Pileups::
clear
()¶
-
void
vg::Pileups::
to_json
(ostream &out)¶
-
void
vg::Pileups::
load
(istream &in)¶
-
void
vg::Pileups::
write
(ostream &out, uint64_t buffer_size = 5)¶
-
void
vg::Pileups::
for_each_node_pileup
(const function<void(NodePileup&)> &lambda)¶
-
NodePileup *
vg::Pileups::
get_node_pileup
(int64_t node_id)¶
-
NodePileup *
vg::Pileups::
get_create_node_pileup
(const Node *node)¶
-
void
vg::Pileups::
for_each_edge_pileup
(const function<void(EdgePileup&)> &lambda)¶
-
EdgePileup *
vg::Pileups::
get_edge_pileup
(pair<NodeSide, NodeSide> sides)¶
-
EdgePileup *
vg::Pileups::
get_create_edge_pileup
(pair<NodeSide, NodeSide> sides)¶
-
bool
vg::Pileups::
insert_node_pileup
(NodePileup *pileup)¶
-
bool
vg::Pileups::
insert_edge_pileup
(EdgePileup *edge_pileup)¶
-
void
vg::Pileups::
compute_from_edit
(NodePileup &pileup, int64_t &node_offset, int64_t &read_offset, const Node &node, const Alignment &alignment, const Mapping &mapping, const Edit &edit, const Edit *next_edit, const vector<int> &mismatch_counts, pair<const Mapping *, int64_t> &last_match, pair<const Mapping *, int64_t> &last_del, pair<const Mapping *, int64_t> &open_del)¶
-
bool
vg::Pileups::
pass_filter
(const Alignment &alignment, int64_t read_offset, int64_t length, const vector<int> &mismatches) const¶
-
BasePileup &
vg::Pileups::
merge_base_pileups
(BasePileup &p1, BasePileup &p2)¶
-
NodePileup &
vg::Pileups::
merge_node_pileups
(NodePileup &p1, NodePileup &p2)¶
-
EdgePileup &
vg::Pileups::
merge_edge_pileups
(EdgePileup &p1, EdgePileup &p2)¶
-
char
vg::Pileups::
combined_quality
(char base_quality, int map_quality) const¶
Public Members
-
NodePileupHash
vg::Pileups::
_node_pileups
¶
-
EdgePileupHash
vg::Pileups::
_edge_pileups
¶
-
int
vg::Pileups::
_min_quality
¶
-
int
vg::Pileups::
_max_mismatches
¶
-
int
vg::Pileups::
_window_size
¶
-
int
vg::Pileups::
_max_depth
¶
-
bool
vg::Pileups::
_use_mapq
¶
-
uint64_t
vg::Pileups::
_min_quality_count
¶
-
uint64_t
vg::Pileups::
_max_mismatch_count
¶
-
uint64_t
vg::Pileups::
_bases_count
¶
Public Static Functions
-
void
vg::Pileups::
count_mismatches
(VG &graph, const Path &path, vector<int> &mismatches, bool skipIndels = false)¶
-
static BasePileup *
vg::Pileups::
get_base_pileup
(NodePileup &np, int64_t offset)¶
-
static const BasePileup *
vg::Pileups::
get_base_pileup
(const NodePileup &np, int64_t offset)¶
-
static BasePileup *
vg::Pileups::
get_create_base_pileup
(NodePileup &np, int64_t offset)¶
-
void
vg::Pileups::
parse_base_offsets
(const BasePileup &bp, vector<pair<int64_t, int64_t>> &offsets)¶
-
void
vg::Pileups::
casify
(string &seq, bool is_reverse)¶
-
void
vg::Pileups::
make_match
(string &seq, int64_t from_length, bool is_reverse)¶
-
void
vg::Pileups::
make_insert
(string &seq, bool is_reverse)¶
-
void
vg::Pileups::
make_delete
(string &seq, bool is_reverse, const pair<const Mapping *, int64_t> &last_match, const Mapping &mapping, int64_t node_offset)¶
-
void
vg::Pileups::
make_delete
(string &seq, bool is_reverse, int64_t from_id, int64_t from_offset, bool from_start, int64_t to_id, int64_t to_offset, bool to_end)¶
-
void
vg::Pileups::
parse_insert
(const string &tok, int64_t &len, string &seq, bool &is_reverse)¶
-
void
vg::Pileups::
parse_delete
(const string &tok, bool &is_reverse, int64_t &from_id, int64_t &from_offset, bool &from_start, int64_t &to_id, int64_t &to_offset, bool &to_end)¶
-
bool
vg::Pileups::
base_equal
(char c1, char c2, bool is_reverse)¶
-
char
vg::Pileups::
extract_match
(const BasePileup &bp, int64_t offset)¶
-
string
vg::Pileups::
extract
(const BasePileup &bp, int64_t offset)¶
- struct
- #include <vg.hpp>
Structure for managing parallel construction of a graph.
Public Functions
- struct
A position in the graph is a node, direction, and offset. The node is stored by ID, and the offset is 0-based and counts from the start of the node in the specified orientation. The direction specifies which orientation of the node we are considering, the forward (as stored) or reverse complement.
Example:
seq+ G A T T A C A offset+ → 0 1 2 3 4 5 6 7 seq- C T A A T G T offset- → 0 1 2 3 4 5 6 7
Or both at once:
offset- 7 6 5 4 3 2 1 0 ← seq+ G A T T A C A offset+ → 0 1 2 3 4 5 6 7
- class
- #include <progressive.hpp>
Inherit form this class to give your class create_progress(), update_progress(), and destroy_progress() methods, and a public show_progress field that can be toggled on and off.
Must not be destroyed while a progress bar is active.
Subclassed by vg::Constructor, vg::VG
Public Functions
-
void
vg::Progressive::
preload_progress
(const string &message)¶ If no progress bar is currently displayed, set the message to use for the next progress bar to be created. Does nothing if show_progress is false or when a progress bar is displayed.
Public so that users of a class can provide descriptive messages for generic progress operations (like VG‘s for_each_kmer_parallel).
-
void
vg::Progressive::
create_progress
(const string &message, long count)¶ Create a progress bar showing the given message, with the given number of items to process. Does nothing if show_progress is false. Replaces any existing progress bar.
-
void
vg::Progressive::
create_progress
(long count)¶ Create a progress bar with the given number of items to process, using either a default message, or the message passed to the last preload_progress call since a progress bar was destroyed. Does nothing if show_progress is false. Replaces any existing progress bar.
-
void
vg::Progressive::
update_progress
(long i)¶ Update the progress bar, noting that the given number of items have been processed. Does nothing if no progress bar is displayed.
-
void
vg::Progressive::
increment_progress
()¶ Update the progress bar, noting that one additional item has been processed. Does nothing if no progress bar is displayed.
-
void
vg::Progressive::
destroy_progress
(void)¶ Destroy the current progress bar, if it exists.
Public Members
-
bool
vg::Progressive::
show_progress
¶
-
void
- class
- #include <gssw_aligner.hpp>
Inherits from vg::Aligner
Public Functions
-
QualAdjAligner::
QualAdjAligner
(int8_t _match = default_match, int8_t _mismatch = default_mismatch, int8_t _gap_open = default_gap_open, int8_t _gap_extension = default_gap_extension, int8_t _max_scaled_score = default_max_scaled_score, uint8_t _max_qual_score = default_max_qual_score, double gc_content = default_gc_content)¶
-
QualAdjAligner::
~QualAdjAligner
(void)¶
-
void
QualAdjAligner::
align
(Alignment &alignment, Graph &g, bool print_score_matrices = false)¶
-
void
QualAdjAligner::
align_global_banded
(Alignment &alignment, Graph &g, int32_t band_padding = 0, bool permissive_banding = true)¶
-
void
vg::QualAdjAligner::
align_pinned
(Alignment &alignment, Graph &g, int64_t node_id, bool pin_left)¶
-
void
QualAdjAligner::
align_global_banded_multi
(Alignment &alignment, vector<Alignment> &alt_alignments, Graph &g, int32_t max_alt_alns, int32_t band_padding = 0, bool permissive_banding = true)¶
-
void
QualAdjAligner::
init_mapping_quality
(double gc_content)¶
-
int32_t
QualAdjAligner::
score_exact_match
(const string &sequence, const string &base_quality)¶
Public Members
-
uint8_t
vg::QualAdjAligner::
max_qual_score
¶
-
int8_t
vg::QualAdjAligner::
scaled_gap_open
¶
-
int8_t
vg::QualAdjAligner::
scaled_gap_extension
¶
-
int8_t *
vg::QualAdjAligner::
adjusted_score_matrix
¶
Private Functions
-
void
QualAdjAligner::
init_quality_adjusted_scores
(int8_t _max_scaled_score, uint8_t _max_qual_score, double gc_content)¶
-
- class
- #include <readfilter.hpp>
Public Functions
-
int
vg::ReadFilter::
filter
(istream *alignment_stream, xg::XG *xindex = nullptr)¶ Filter the alignments available from the given stream, placing them on standard output or in the appropriate file. Returns 0 on success, exit code to use on error.
If an XG index is required, use the specified one. If one is required and not provided, the function will complain and return nonzero.
TODO: Refactor to be less CLI-aware and more modular-y.
-
bool
vg::ReadFilter::
trim_ambiguous_ends
(xg::XG *index, Alignment &alignment, int k)¶ Look at either end of the given alignment, up to k bases in from the end. See if that tail of the alignment is mapped such that another embedding in the given graph can produce the same sequence as the sequence along the embedding that the read actually has, and if so trim back the read.
In the case of softclips, the aligned portion of the read is considered, and if trimmign is required, the softclips are hard-clipped off.
Returns true if the read had to be modified, and false otherwise.
MUST NOT be called with a null index.
Public Members
-
double
vg::ReadFilter::
min_secondary
¶
-
double
vg::ReadFilter::
min_primary
¶
-
bool
vg::ReadFilter::
frac_score
¶
-
bool
vg::ReadFilter::
sub_score
¶
-
int
vg::ReadFilter::
max_overhang
¶
-
int
vg::ReadFilter::
context_size
¶
-
bool
vg::ReadFilter::
verbose
¶
-
double
vg::ReadFilter::
min_mapq
¶
-
int
vg::ReadFilter::
repeat_size
¶
-
int
vg::ReadFilter::
defray_length
¶
-
int
vg::ReadFilter::
defray_count
¶
-
bool
vg::ReadFilter::
drop_split
¶
-
int
vg::ReadFilter::
threads
¶
-
string
vg::ReadFilter::
regions_file
¶
-
string
vg::ReadFilter::
outbase
¶
Private Functions
-
bool
vg::ReadFilter::
has_repeat
(Alignment &aln, int k)¶ * quick and dirty filter to see if removing reads that can slip around and still map perfectly helps vg call. returns true if at either end of read sequence, at least k bases are repetitive, checking repeats of up to size 2k
-
int
- class
- #include <realigner.hpp>
Public Functions
-
vg::Realigner::
Realigner
(vcflib::VariantCallFile &v, FastaReference &r, const string &t)¶
-
void
vg::Realigner::
construct
(void)¶
Public Members
-
FastaReference &
vg::Realigner::
ref
¶
-
vcflib::VariantCallFile
vg::Realigner::
vcf_file
¶
-
string
vg::Realigner::
target
¶
-
string
vg::Realigner::
seq_name
¶
-
int
vg::Realigner::
start_pos
¶
-
int
vg::Realigner::
end_pos
¶
-
bool
vg::Realigner::
debug
¶
-
double
vg::Realigner::
identity_trigger
¶
-
bool
vg::Realigner::
realign_unpaired
¶
-
double
vg::Realigner::
softclip_trigger
¶
-
int
vg::Realigner::
idx_kmer_size
¶
-
int
vg::Realigner::
edge_max
¶
-
bool
vg::Realigner::
idx_path_only
¶
-
int
vg::Realigner::
doubling_steps
¶
-
gcsa::GCSA *
vg::Realigner::
gcsaidx
¶
-
gcsa::LCPArray *
vg::Realigner::
lcpidx
¶
-
xg::XG *
vg::Realigner::
xgidx
¶
-
- struct
- #include <genotyper.hpp>
Holds indexes of the reference in a graph: position to node, node to position and orientation, and the full reference string.
- struct
Holds indexes of the reference: position to node, node to position and orientation, and the full reference string.
- struct
- #include <region.hpp>
- class
- #include <sampler.hpp>
Public Functions
-
vg::Sampler::
Sampler
(xg::XG *x, int seed = 0, bool forward_only = false)¶
-
string
vg::Sampler::
sequence
(size_t length)¶
-
vector<Alignment>
vg::Sampler::
alignment_pair
(size_t read_length, size_t fragment_length, double fragment_std_dev, double base_error, double indel_error)¶
-
- struct
- #include <bubbles.hpp>
- struct
- #include <genotyper.hpp>
- class
- #include <genotypekit.hpp>
Represents a strategy for finding (nested) Sites in a vg graph. Polymorphic base class/interface.
Subclassed by vg::CactusSiteFinder
Public Functions
-
virtual
vg::SiteFinder::
~SiteFinder
()¶
-
virtual void
vg::SiteFinder::
for_each_site_parallel
(const function<void(NestedSite)> &lambda)¶
= 0 Run a function on all root-level NestedSites in parallel. Site trees are passed by value so they have a clear place to live during parallel operations.
-
virtual
- struct
- #include <genotypekit.hpp>
Represents a traversal of a (possibly nested) site, going from start to end and visiting nodes, edges, and contained nested sites. Basic component of a genotype.
- class
- #include <ssw_aligner.hpp>
Public Functions
-
vg::SSWAligner::
SSWAligner
(uint8_t _match = 1, uint8_t _mismatch = 4, uint8_t _gap_open = 6, uint8_t _gap_extension = 1)¶
-
vg::SSWAligner::
~SSWAligner
(void)¶
-
Alignment
vg::SSWAligner::
ssw_to_vg
(const StripedSmithWaterman::Alignment &ssw_aln, const string &query, const string &ref)¶
-
void
vg::SSWAligner::
PrintAlignment
(const StripedSmithWaterman::Alignment &alignment)¶
-
- struct
- #include <caller.hpp>
Public Functions
-
vg::StrandSupport::
StrandSupport
(int f = 0, int r = 0, int o = 0, double ll = -1e100)¶
-
bool
vg::StrandSupport::
operator<
(const StrandSupport &other) const¶
-
bool
vg::StrandSupport::
operator>=
(const StrandSupport &other) const¶
-
bool
vg::StrandSupport::
operator==
(const StrandSupport &other) const¶
-
StrandSupport
vg::StrandSupport::
operator-
(const StrandSupport &other) const¶
-
StrandSupport &
vg::StrandSupport::
operator+=
(const StrandSupport &other)¶
-
int
vg::StrandSupport::
depth
()¶
-
int
vg::StrandSupport::
total
()¶
-
- template <typename K, typename V>
- class
- #include <hash_map.hpp>
Inherits from google::dense_hash_map< K, V >
Public Functions
-
vg::string_hash_map::
string_hash_map
()¶
-
- class
- #include <subcommand.hpp>
Represents a subcommand with a name, a description, and some functions. Registers itself on construction in a static registry, and provides static functions for enumerating through that registry.
Public Functions
-
vg::subcommand::Subcommand::
Subcommand
(std::string name, std::string description, std::function<int(int, char **)> main_function)¶ Make and register a subcommand with the given name and description, which calls the given main function when invoked.
-
const std::string &
vg::subcommand::Subcommand::
get_description
() const¶ Get the description of a subcommand.
-
const int
vg::subcommand::Subcommand::
operator()
(int argc, char **argv) const¶ Run the main function of a subcommand. Return the return code.
Public Static Functions
-
const Subcommand *
vg::subcommand::Subcommand::
get
(int argc, char **argv)¶ Get the appropriate subcommand to handle the given arguments, or nullptr if no matching subcommand is found.
-
void
vg::subcommand::Subcommand::
for_each
(const std::function<void(const Subcommand&)> &lambda)¶ Call the given lambda with each known subcommand, in order.
Private Functions
Private Members
Private Static Functions
-
std::map<std::string, Subcommand *> &
vg::subcommand::Subcommand::
get_registry
()¶ Since we can’t rely on a static member field being constructed before any static code that creates actual subcommands gets run, we rely on keeping the registry in a static variable inside a static method, so it gets constructed on first use. Note that at shutdown some of the poinbters in the registry may be to already-destructed static objects.
-
- struct
- #include <deconstructor.hpp>
- struct
Aggregates information about the reads supporting an allele.
- struct
Translations map from one graph to another. A collection of these provides a covering mapping between a from and to graph. If each “from” path through the base graph corresponds to a “to” path in an updated graph, then we can use these translations to project positions, mappings, and paths in the new graph into the old one using the Translator interface.
- class
- #include <translator.hpp>
Class to map paths into a base graph found via a set of Translations
Public Functions
-
vg::Translator::
Translator
(void)¶
-
vg::Translator::
Translator
(istream &in)¶
-
vg::Translator::
Translator
(const vector<Translation> &trans)¶
-
void
vg::Translator::
load
(const vector<Translation> &trans)¶
-
void
vg::Translator::
build_position_table
(void)¶
-
Translation
vg::Translator::
get_translation
(const Position &position)¶
-
Position
vg::Translator::
translate
(const Position &position, const Translation &translation)¶
-
Translation
vg::Translator::
overlay
(const Translation &trans)¶
Public Members
-
vector<Translation>
vg::Translator::
translations
¶
-
map<pos_t, Translation *>
vg::Translator::
pos_to_trans
¶
-
- class
- #include <genotypekit.hpp>
Represents a strategy for finding traversals of (nested) sites. Polymorphic base class/interface.
Subclassed by vg::TrivialTraversalFinder
Public Functions
-
virtual
vg::TraversalFinder::
~TraversalFinder
()¶
-
virtual vector<SiteTraversal>
vg::TraversalFinder::
find_traversals
(const NestedSite &site)¶
= 0
-
virtual
- class
- #include <genotypekit.hpp>
Represents a strategy for calculating Supports for SiteTraversals. Polymorphic base class/interface.
Public Functions
-
virtual
vg::TraversalSupportCalculator::
~TraversalSupportCalculator
()¶
-
virtual vector<Support>
vg::TraversalSupportCalculator::
calculate_supports
(const NestedSite &site, const vector<SiteTraversal> &traversals, const vector<Alignment *> &reads, const vector<vector<bool>> &consistencies) const¶
= 0 Return Supports for all the SiteTraversals, given the reads and their consistency flags.
-
virtual
- template <typename T>
- struct
- #include <utility.hpp>
Public Types
- typedef
Public Functions
-
vg::Tree::
~Tree
()¶
- template <typename T>
- struct
- #include <utility.hpp>
- class
- #include <genotypekit.hpp>
This traversal finder finds one or more traversals through leaf sites with no children. It uses a depth-first search. It doesn’t work on non-leaf sites, and is not guaranteed to find all traversals.
Inherits from vg::TraversalFinder
Public Functions
-
virtual
vg::TrivialTraversalFinder::
~TrivialTraversalFinder
()¶
-
vector<SiteTraversal>
vg::TrivialTraversalFinder::
find_traversals
(const NestedSite &site)¶ Find at least one traversal of the site by depth first search, if any exist. Only works on sites with no children.
-
virtual
- class
- #include <constructor.hpp>
Provides a one-variant look-ahead buffer on a vcflib::VariantFile. Lets construction functions peek and see if they want the next variant, or lets them ignore it for the next construction function for a different contig to handle. Ought not to be copied.
Handles conversion from 1-based vcflib coordinates to 0-based vg coordinates.
Public Functions
-
vcflib::Variant *
vg::VcfBuffer::
get
()¶ Return a pointer to the buffered variant, or null if no variant is buffered. Pointer is invalidated when the buffer is handled. The variant will have a 0-based start coordinate.
-
void
vg::VcfBuffer::
handle_buffer
()¶ To be called when the buffer is filled. Marks the buffered variant as handled, discarding it, and allowing another to be read.
-
void
vg::VcfBuffer::
fill_buffer
()¶ Can be called when the buffer is filled or empty. If there is no variant in the buffer, tries to load a variant into the buffer, if one can be obtained from the file.
-
bool
vg::VcfBuffer::
has_tabix
()¶ This returns true if we have a tabix index, and false otherwise. If this is false, set_region may be called, but will do nothing and return false.
-
bool
vg::VcfBuffer::
set_region
(const string &contig, int64_t start = -1, int64_t end = -1)¶ This tries to set the region on the underlying vcflib VariantCallFile to the given contig and region, if specified. Coordinates coming in should be 0-based,a nd will be converted to 1-based internally.
Returns true if the region was successfully set, and false otherwise (for example, if there is not tabix index, or if the given region is not part of this VCF. Note that if there is a tabix index, and set_region returns false, the position in the VCF file is undefined until the next successful set_region call.
If either of start and end are specified, then both of start and end must be specified.
-
vcflib::Variant *
- class
- #include <genotypekit.hpp>
Represents a strategy for converting Locus objects to VCF records. Polymorphic base class/interface.
- class
- #include <genotypekit.hpp>
Represents a filter that passes or rejects VCF records according to some criteria. Polymorphic base class/interface.
- class
- #include <vectorizer.hpp>
Public Functions
-
Vectorizer::
Vectorizer
(xg::XG *x)¶
-
Vectorizer::
~Vectorizer
()¶
-
void
Vectorizer::
add_bv
(bit_vector v)¶
-
void
Vectorizer::
add_name
(string n)¶
-
void
Vectorizer::
emit
(ostream &out, bool r_format, bool annotate)¶
-
bit_vector
Vectorizer::
alignment_to_onehot
(Alignment a)¶
-
vector<int>
Vectorizer::
alignment_to_a_hot
(Alignment a)¶
-
vector<double>
Vectorizer::
alignment_to_custom_score
(Alignment a, std::function<double(Alignment)> lambda)¶
-
vector<double>
Vectorizer::
alignment_to_identity_hot
(Alignment a)¶
-
string
Vectorizer::
output_wabbit_map
()¶
- template <typename T>
-
string
Vectorizer::
format
(T v)¶
- template <typename T>
-
string
Vectorizer::
wabbitize
(string name, T v)¶
-
- class
- #include <vg.hpp>
Represents a variation graph. Graphs consist of nodes, connected by edges. Graphs are bidirected and may be cyclic. Nodes carry forward-oriented sequences. Edges are directed, with a “from” and to” node, and are generally used to connect the end of the “from” node to the start of the “to” node. However, edges can connect to either the start or end of either node.
Inherits from vg::Progressive
Public Functions
-
void
vg::VG::
set_edge
(Edge *edge)¶ Set the edge indexes through this function. Picks up the sides being connected by the edge automatically, and silently drops the edge if they are already connected.
-
void
vg::VG::
print_edges
(void)¶
-
vector<pair<id_t, bool>> &
vg::VG::
edges_start
(Node *node)¶ Get nodes and backward flags following edges that attach to this node’s start.
-
vector<pair<id_t, bool>> &
vg::VG::
edges_start
(id_t id)¶ Get nodes and backward flags following edges that attach to this node’s start.
-
vector<pair<id_t, bool>> &
vg::VG::
edges_end
(Node *node)¶ Get nodes and backward flags following edges that attach to this node’s end.
-
vector<pair<id_t, bool>> &
vg::VG::
edges_end
(id_t id)¶ Get nodes and backward flags following edges that attach to this node’s end.
-
size_t
vg::VG::
size
(void)¶ Number of nodes.
-
size_t
vg::VG::
length
(void)¶ Total sequence length.
-
vg::VG::
VG
(void)¶ Default constructor.
-
vg::VG::
VG
(istream &in, bool showp = false)¶ Construct from protobufs.
-
vg::VG::
VG
(function<bool(Graph&)> &get_next_graph, bool showp = false, )¶ Construct from an arbitrary source of Graph protobuf messages (which populates the given Graph and returns a flag for whether it’s valid).
-
vg::VG::
VG
(set<Node *> &nodes, set<Edge *> &edges)¶ Construct from sets of nodes and edges. For example, from a subgraph of another graph.
-
map<id_t, vcflib::Variant>
vg::VG::
get_node_id_to_variant
(vcflib::VariantCallFile vfile)¶ Takes in a VCF file and returns a map [node] = vcflib::variant. Unfortunately this is specific to a given graph and VCF.
It will need to throw warnings if the node or variant is not in the graph.
This is useful for VCF masking:
if map.find(node) then mask variant
It’s also useful for calling known variants
for m in alignment.mappings: node = m.Pos.nodeID if node in node_to_vcf: return (alignment supports variant)
It would be nice if this also supported edges (e.g. for inversions/transversions/breakpoints?).
-
void
vg::VG::
dice_nodes
(int max_node_size)¶ Chop up the nodes.
-
void
vg::VG::
unchop
(void)¶ Does the reverse combines nodes by removing edges where doing so has no effect on the graph labels.
-
set<list<NodeTraversal>>
vg::VG::
simple_components
(int min_size = 1)¶ Get the set of components that could be merged into single nodes without changing the path space of the graph. Emits oriented traversals of nodes, in the order and orientation in which they are to be merged.
-
set<list<NodeTraversal>>
vg::VG::
simple_multinode_components
(void)¶ Get the simple components of multiple nodes.
-
set<set<id_t>>
vg::VG::
strongly_connected_components
(void)¶ Get the strongly connected components of the graph.
-
set<set<id_t>>
vg::VG::
multinode_strongly_connected_components
(void)¶ Get only multi-node strongly connected components.
-
bool
vg::VG::
is_acyclic
(void)¶ Returns true if the graph does not contain cycles.
-
void
vg::VG::
keep_multinode_strongly_connected_components
(void)¶ Remove all elements which are not in a strongly connected component.
-
set<list<NodeTraversal>>
vg::VG::
elementary_cycles
(void)¶ Get simple cycles following Johnson’s elementary cycles algorithm.
-
Node *
vg::VG::
concat_nodes
(const list<NodeTraversal> &nodes)¶ Concatenates the nodes into a new node with the same external linkage as the provided component. After calling this, paths will be invalid until Paths::compact_ranks() is called.
-
Node *
vg::VG::
merge_nodes
(const list<Node *> &nodes)¶ Merge the nodes into a single node, preserving external linkages. Use the orientation of the first node as the basis.
-
void
vg::VG::
normalize
(int max_iter = 1)¶ Use unchop and sibling merging to simplify the graph into a normalized form.
-
void
vg::VG::
bluntify
(void)¶ Remove redundant overlaps.
-
VG
vg::VG::
dagify
(uint32_t expand_scc_steps, map<id_t, pair<id_t, bool>> &node_translation, size_t target_min_walk_length = 0, size_t component_length_max = 0)¶ Turn the graph into a dag by copying strongly connected components expand_scc_steps times and translating the edges in the component to flow through the copies in one direction.
-
VG
vg::VG::
backtracking_unroll
(uint32_t max_length, uint32_t max_depth, map<id_t, pair<id_t, bool>> &node_translation)¶ Generate a new graph that unrolls the current one using backtracking. Caution: exponential in branching.
-
VG
vg::VG::
unfold
(uint32_t max_length, map<id_t, pair<id_t, bool>> &node_translation)¶ Represent the whole graph up to max_length across an inversion on the forward strand.
-
map<id_t, pair<id_t, bool>>
vg::VG::
overlay_node_translations
(const map<id_t, pair<id_t, bool>> &over, const map<id_t, pair<id_t, bool>> &under)¶ Assume two node translations, the over is based on the under; merge them.
-
vector<Edge>
vg::VG::
break_cycles
(void)¶ Use our topological sort to quickly break cycles in the graph, return the edges which are removed. Very non-optimal, but fast.
-
void
vg::VG::
remove_non_path
(void)¶ Remove pieces of the graph which are not part of any path.
-
void
vg::VG::
flip_doubly_reversed_edges
(void)¶ Convert edges that are both from_start and to_end to “regular” ones from end to start.
-
void
vg::VG::
from_gfa
(istream &in, bool showp = false)¶ Build a graph from a GFA stream.
-
void
vg::VG::
from_turtle
(string filename, string baseuri, bool showp = false)¶ Build a graph from a Turtle stream.
-
vg::VG::
~VG
(void)¶ Destructor.
-
void
vg::VG::
build_indexes
(void)¶
-
void
vg::VG::
build_node_indexes
(void)¶
-
void
vg::VG::
build_edge_indexes
(void)¶
-
void
vg::VG::
index_paths
(void)¶
-
void
vg::VG::
clear_node_indexes
(void)¶
-
void
vg::VG::
clear_node_indexes_no_resize
(void)¶
-
void
vg::VG::
clear_edge_indexes
(void)¶
-
void
vg::VG::
clear_edge_indexes_no_resize
(void)¶
-
void
vg::VG::
clear_indexes
(void)¶
-
void
vg::VG::
clear_indexes_no_resize
(void)¶
-
void
vg::VG::
resize_indexes
(void)¶
-
void
vg::VG::
rebuild_indexes
(void)¶
-
void
vg::VG::
rebuild_edge_indexes
(void)¶
-
void
vg::VG::
clear_paths
(void)¶ Clear the paths object (which indexes the graph.paths) and the graph paths themselves.
-
void
vg::VG::
sync_paths
(void)¶ Synchronize in-memory indexes and protobuf graph.
-
void
vg::VG::
merge_union
(VG &g)¶ Merge protobufs after removing overlaps. Good when there aren’t many overlaps.
-
void
vg::VG::
remove_duplicates
(void)¶ Remove duplicated nodes and edges.
-
void
vg::VG::
prune_complex_paths
(int length, int edge_max, Node *head_node, Node *tail_node)¶ Limit the local complexity of the graph, connecting pruned components to a head and tail node depending on the direction which we come into the node when the edge_max is passed.
-
void
vg::VG::
prune_short_subgraphs
(size_t min_size)¶
-
void
vg::VG::
serialize_to_ostream
(ostream &out, id_t chunk_size = 1000)¶ Write to a stream in chunked graphs.
-
void
vg::VG::
compact_ids
(void)¶ Squish the node IDs down into as small a space as possible. Fixes up paths itself.
-
void
vg::VG::
increment_node_ids
(id_t increment)¶ Add the given value to all node IDs. Preserves the paths.
-
void
vg::VG::
decrement_node_ids
(id_t decrement)¶ Subtract the given value from all the node IDs. Must not create a node with 0 or negative IDs. Invalidates the paths.
-
void
vg::VG::
swap_node_id
(id_t node_id, id_t new_id)¶ Change the ID of the node with the first id to the second, new ID not used by any node. Invalidates any paths containing the node, since they are not updated.
-
void
vg::VG::
swap_node_id
(Node *node, id_t new_id)¶ Change the ID of the given node to the second, new ID not used by any node. Invalidates the paths. Invalidates any paths containing the node, since they are not updated.
-
void
vg::VG::
extend
(VG &g, bool warn_on_duplicates = false)¶ Iteratively add when nodes and edges are novel. Good when there are very many overlaps. TODO: If you are using this with warn on duplicates on, and you know there shouldn’t be any duplicates, maybe you should use merge instead. This version sorts paths on rank after adding in the path mappings from the other graph.
-
void
vg::VG::
extend
(Graph &graph, bool warn_on_duplicates = false)¶ This version does not sort path mappings by rank. In order to preserve paths, call Paths::sort_by_mapping_rank() and Paths::rebuild_mapping_aux() after you are done adding in graphs to this graph.
-
void
vg::VG::
append
(VG &g)¶ Add another graph into this graph, attaching tails to heads. Modify ids of the second graph to ensure we don’t have conflicts. Then attach tails of this graph to the heads of the other, and extend(g).
-
void
vg::VG::
combine
(VG &g)¶ Add another graph into this graph. Don’t append or join the nodes in the graphs; just ensure that ids are unique, then apply extend.
-
vector<Translation>
vg::VG::
edit
(const vector<Path> &paths)¶ Edit the graph to include all the sequence and edges added by the given paths. Can handle paths that visit nodes in any orientation.
-
void
vg::VG::
find_breakpoints
(const Path &path, map<id_t, set<pos_t>> &breakpoints)¶ Find all the points at which a Path enters or leaves nodes in the graph. Adds them to the given map by node ID of sets of bases in the node that will need to become the starts of new nodes.
-
map<pos_t, Node *>
vg::VG::
ensure_breakpoints
(const map<id_t, set<pos_t>> &breakpoints)¶ Take a map from node ID to a set of offsets at which new nodes should start (which may include 0 and 1-past-the-end, which should be ignored), break the specified nodes at those positions. Returns a map from old node ID to a map from old node start position to new node pointer in the graph. Note that the caller will have to crear and rebuild path rank data.
-
map<id_t, set<pos_t>>
vg::VG::
forwardize_breakpoints
(const map<id_t, set<pos_t>> &breakpoints)¶ Flips the breakpoints onto the forward strand.
-
void
vg::VG::
add_nodes_and_edges
(const Path &path, const map<pos_t, Node *> &node_translation, map<pair<pos_t, string>, Node *> &added_seqs, map<Node *, Path> &added_nodes, const map<id_t, size_t> &orig_node_sizes)¶ Given a path on nodes that may or may not exist, and a map from node ID in the path’s node ID space to a table of offset and actual node, add in all the new sequence and edges required by the path. The given path must not contain adjacent perfect match edits in the same mapping (the removal of which can be accomplished with the Path::simplify() function).
-
vector<Translation>
vg::VG::
make_translation
(const map<pos_t, Node *> &node_translation, const map<Node *, Path> &added_nodes, const map<id_t, size_t> &orig_node_sizes)¶ Produce a graph Translation object from information about the editing process.
-
id_t
vg::VG::
total_length_of_nodes
(void)¶ Get the total sequence length of nodes in the graph. TODO: redundant with length().
-
int
vg::VG::
node_rank
(Node *node)¶ Get the rank of the node in the protobuf array that backs the graph.
-
int
vg::VG::
node_rank
(id_t id)¶ Get the rank of the node in the protobuf array that backs the graph.
-
int
vg::VG::
left_degree
(NodeTraversal node)¶ Get the number of edges attached to the left side of a NodeTraversal.
-
int
vg::VG::
right_degree
(NodeTraversal node)¶ Get the number of edges attached to the right side of a NodeTraversal.
-
void
vg::VG::
edges_of_node
(Node *node, vector<Edge *> &edges)¶ Get the edges of the specified node, and add them to the given vector. Guaranteed to add each edge only once per call.
-
void
vg::VG::
edges_of_nodes
(set<Node *> &nodes, set<Edge *> &edges)¶ Get the edges of the specified set of nodes, and add them to the given set of edge pointers.
-
set<NodeSide>
vg::VG::
sides_to
(NodeSide side)¶ Get the sides on the other side of edges to this side of the node.
-
set<NodeSide>
vg::VG::
sides_from
(NodeSide side)¶ Get the sides on the other side of edges from this side of the node.
-
set<pair<NodeSide, bool>>
vg::VG::
sides_context
(id_t node_id)¶ Get all sides connecting to this node.
-
bool
vg::VG::
same_context
(id_t id1, id_t id2)¶ Use sides_from an sides_to to determine if both nodes have the same context.
-
bool
vg::VG::
is_ancestor_prev
(id_t node_id, id_t candidate_id)¶ Determine if the node is a prev ancestor of this one.
-
bool
vg::VG::
is_ancestor_prev
(id_t node_id, id_t candidate_id, set<id_t> &seen, size_t steps = 64)¶ Determine if the node is a prev ancestor of this one by trying to find it in a given number of steps.
-
bool
vg::VG::
is_ancestor_next
(id_t node_id, id_t candidate_id)¶ Determine if the node is a next ancestor of this one.
-
bool
vg::VG::
is_ancestor_next
(id_t node_id, id_t candidate_id, set<id_t> &seen, size_t steps = 64)¶ Determine if the node is a next ancestor of this one by trying to find it in a given number of steps.
-
id_t
vg::VG::
common_ancestor_prev
(id_t id1, id_t id2, size_t steps = 64)¶ Try to find a common ancestor by walking back up to steps from the first node.
-
id_t
vg::VG::
common_ancestor_next
(id_t id1, id_t id2, size_t steps = 64)¶ Try to find a common ancestor by walking forward up to steps from the first node.
-
set<NodeTraversal>
vg::VG::
siblings_to
(const NodeTraversal &traversal)¶ To-siblings are nodes which also have edges to them from the same nodes as this one.
-
set<NodeTraversal>
vg::VG::
siblings_from
(const NodeTraversal &traversal)¶ From-siblings are nodes which also have edges to them from the same nodes as this one.
-
set<NodeTraversal>
vg::VG::
full_siblings_to
(const NodeTraversal &trav)¶ Full to-siblings are nodes traversals which share exactly the same upstream
NodeSide
s.
-
set<NodeTraversal>
vg::VG::
full_siblings_from
(const NodeTraversal &trav)¶ Full from-siblings are nodes traversals which share exactly the same downstream
NodeSide
s.
-
void
vg::VG::
simplify_siblings
(void)¶ Remove easily-resolvable redundancy in the graph.
-
void
vg::VG::
simplify_to_siblings
(const set<set<NodeTraversal>> &to_sibs)¶ Remove easily-resolvable redundancy in the graph for all provided to-sibling sets.
-
void
vg::VG::
simplify_from_siblings
(const set<set<NodeTraversal>> &from_sibs)¶ Remove easily-resolvable redundancy in the graph for all provided from-sibling sets.
-
set<set<NodeTraversal>>
vg::VG::
transitive_sibling_sets
(const set<set<NodeTraversal>> &sibs)¶ Remove intransitive sibling sets, such as where (A, B, C) = S1 but C ∊ S2.
-
set<set<NodeTraversal>>
vg::VG::
identically_oriented_sibling_sets
(const set<set<NodeTraversal>> &sibs)¶ Remove sibling sets which don’t have identical orientation.
-
bool
vg::VG::
adjacent
(const Position &pos1, const Position &pos2)¶ Determine if pos1 occurs directly before pos2.
-
Node *
vg::VG::
create_node
(const string &seq, id_t id = 0)¶ Create a node. Use the VG class to generate ids.
-
void
vg::VG::
nonoverlapping_node_context_without_paths
(Node *node, VG &g)¶ Get the subgraph of a node and all the edges it is responsible for (where it has the minimal ID) and add it into the given VG.
-
void
vg::VG::
destroy_node
(Node *node)¶ Destroy the node at the given pointer. This pointer must point to a Node owned by the graph.
-
Node *
vg::VG::
find_node_by_name_or_add_new
(string name)¶ Find a node with the given name, or create a new one if none is found.
-
void
vg::VG::
for_each_node_parallel
(function<void(Node *)> lambda)¶ Run the given function on every node in parallel.
-
void
vg::VG::
for_each_connected_node
(Node *node, function<void(Node *)> lambda)¶ Go through all the nodes in the same connected component as the given node. Ignores relative orientation.
-
void
vg::VG::
dfs
(const function<void(NodeTraversal)> &node_begin_fn, const function<void(NodeTraversal)> &node_end_fn, const function<bool(void)> &break_fn, const function<void(Edge *)> &edge_fn, const function<void(Edge *)> &tree_fn, const function<void(Edge *)> &edge_curr_fn, const function<void(Edge *)> &edge_cross_fn, const vector<NodeTraversal> *sources, const set<NodeTraversal> *sinks, )¶ - Parameters
node_begin_fn
: Called when node orientattion is first encountered.node_end_fn
: Called when node orientation goes out of scope.break_fn
: Called to check if we should stop the DFS.edge_fn
: Called when an edge is encountered.tree_fn
: Called when an edge forms part of the DFS spanning tree.edge_curr_fn
: Called when we meet an edge in the current tree component.edge_cross_fn
: Called when we meet an edge in an already-traversed tree component.sources
: Start only at these node traversals.sinks
: When hitting a sink, don’t keep walking.
Do a DFS search of the bidirected graph. A bidirected DFS starts at some root node, and traverses first all the nodes found reading out the right of that node in their appropriate relative orientations (including the root), and then all the nodes found reading left out of that node in their appropriate orientations (including the root). If any unvisited nodes are left in other connected components, the process will repeat from one such node, until all nodes have been visited in each orientation.
-
void
vg::VG::
dfs
(const function<void(NodeTraversal)> &node_begin_fn, const function<void(NodeTraversal)> &node_end_fn, const vector<NodeTraversal> *sources = NULL, const set<NodeTraversal> *sinks = NULL, )¶ Specialization of dfs for only handling nodes.
-
void
vg::VG::
dfs
(const function<void(NodeTraversal)> &node_begin_fn, const function<void(NodeTraversal)> &node_end_fn, const function<bool(void)> &break_fn)¶ Specialization of dfs for only handling nodes + break function.
-
bool
vg::VG::
empty
(void)¶ Is the graph empty?
-
const string
vg::VG::
hash
(void)¶ Generate a digest of the serialized graph.
-
void
vg::VG::
remove_null_nodes
(void)¶ Remove nodes with no sequence. These are created in some cases during the process of graph construction.
-
void
vg::VG::
remove_node_forwarding_edges
(Node *node)¶ Remove a node but connect all of its predecessor and successor nodes with new edges.
-
void
vg::VG::
remove_null_nodes_forwarding_edges
(void)¶ Remove null nodes but connect predecessors and successors, preserving structure.
-
void
vg::VG::
remove_orphan_edges
(void)¶ Remove edges for which one of the nodes is not present.
-
void
vg::VG::
remove_inverting_edges
(void)¶ Remove edges representing an inversion and edges on the reverse complement.
-
bool
vg::VG::
has_inverting_edges
(void)¶ Determine if the graph has inversions.
-
void
vg::VG::
keep_paths
(set<string> &path_names, set<string> &kept_names)¶ Keep paths in the given set of path names. Populates kept_names with the names of the paths it actually found to keep. The paths specified may not overlap. Removes all nodes and edges not used by one of the specified paths.
-
void
vg::VG::
keep_path
(string &path_name)¶
-
int
vg::VG::
path_edge_count
(list<NodeTraversal> &path, int32_t offset, int path_length)¶ Path stats. Starting from offset in the first node, how many edges do we cross? path must be nonempty and longer than the given length. offset is interpreted as relative to the first node in its on-path orientation, and is inclusive.
-
int
vg::VG::
path_end_node_offset
(list<NodeTraversal> &path, int32_t offset, int path_length)¶ Determine the offset in its last node at which the path starting at this offset in its first node ends. path must be nonempty and longer than the given length. offset is interpreted as relative to the first node in its on-path orientation, and is inclusive. Returned offset is remaining unused length in the last node touched.
-
const vector<Alignment>
vg::VG::
paths_as_alignments
(void)¶ Convert the stored paths in this graph to alignments.
-
double
vg::VG::
path_identity
(const Path &path1, const Path &path2)¶ Return percent identity between two paths (# matches / (#matches + #mismatches)). Note: uses ssw aligner, so will only work on small paths.
-
string
vg::VG::
trav_sequence
(const NodeTraversal &trav)¶ Get the sequence of a NodeTraversal.
-
vector<pair<id_t, id_t>>
vg::VG::
get_superbubbles
(SB_Input sbi)¶ Find the superbubbles in the given input graph.
-
id_t
vg::VG::
get_node_at_nucleotide
(string pathname, int nuc)¶ Takes in a pathname and the nucleotide position (like from a vcf) and returns the node id which contains that position.
-
Edge *
vg::VG::
create_edge
(Node *from, Node *to, bool from_start = false, bool to_end = false)¶ Create an edge. If the given edge cannot be created, returns null. If the given edge already exists, returns the existing edge.
-
Edge *
vg::VG::
create_edge
(id_t from, id_t to, bool from_start = false, bool to_end = false)¶ Create an edge. If the given edge cannot be created, returns null. If the given edge already exists, returns the existing edge.
-
Edge *
vg::VG::
create_edge
(NodeTraversal left, NodeTraversal right)¶ Make a left-to-right edge from the left NodeTraversal to the right one, respecting orientations. If the given edge cannot be created, returns null. If the given edge already exists, returns the existing edge.
-
Edge *
vg::VG::
create_edge
(NodeSide side1, NodeSide side2)¶ Make an edge connecting the given sides of nodes. If the given edge cannot be created, returns null. If the given edge already exists, returns the existing edge.
-
Edge *
vg::VG::
get_edge
(const NodeSide &side1, const NodeSide &side2)¶ Get a pointer to the specified edge. This can take sides in any order.
-
Edge *
vg::VG::
get_edge
(const pair<NodeSide, NodeSide> &sides)¶ Get a pointer to the specified edge. This can take sides in any order.
-
Edge *
vg::VG::
get_edge
(const NodeTraversal &left, const NodeTraversal &right)¶ Get the edge connecting the given oriented nodes in the given order.
-
void
vg::VG::
destroy_edge
(Edge *edge)¶ Festroy the edge at the given pointer. This pointer must point to an edge owned by the graph.
-
void
vg::VG::
destroy_edge
(const NodeSide &side1, const NodeSide &side2)¶ Destroy the edge between the given sides of nodes. These can be in either order.
-
void
vg::VG::
destroy_edge
(const pair<NodeSide, NodeSide> &sides)¶ Destroy the edge between the given sides of nodes. This can take sides in any order.
-
void
vg::VG::
unindex_edge_by_node_sides
(const NodeSide &side1, const NodeSide &side2)¶ Remove an edge from the node side indexes, so it doesn’t show up when you ask for the edges connected to the side of a node. Makes the edge untraversable until the indexes are rebuilt.
-
void
vg::VG::
unindex_edge_by_node_sides
(Edge *edge)¶ Remove an edge from the node side indexes, so it doesn’t show up when you ask for the edges connected to the side of a node. Makes the edge untraversable until the indexes are rebuilt.
-
void
vg::VG::
index_edge_by_node_sides
(Edge *edge)¶ Add an edge to the node side indexes. Doesn’t touch the index of edges by node pairs or the graph; those must be updated seperately.
-
bool
vg::VG::
has_edge
(const NodeSide &side1, const NodeSide &side2)¶ Get the edge between the given node sides, which can be in either order.
-
bool
vg::VG::
has_edge
(const pair<NodeSide, NodeSide> &sides)¶ Determine if the graph has an edge. This can take sides in any order.
-
bool
vg::VG::
has_edge
(Edge *edge)¶ Determine if the graph has an edge. This can take sides in any order.
-
bool
vg::VG::
has_edge
(const Edge &edge)¶ Determine if the graph has an edge. This can take sides in any order.
-
bool
vg::VG::
has_inverting_edge
(Node *n)¶ Determine if the graph has an inverting edge on the given node.
-
bool
vg::VG::
has_inverting_edge_from
(Node *n)¶ Determine if the graph has an inverting edge from the given node.
-
bool
vg::VG::
has_inverting_edge_to
(Node *n)¶ Determine if the graph has an inverting edge to the given node.
-
void
vg::VG::
for_each_edge_parallel
(function<void(Edge *)> lambda)¶ Run the given function for each edge, in parallel.
-
void
vg::VG::
circularize
(id_t head, id_t tail)¶ Circularize a subgraph / path using the head / tail nodes.
-
void
vg::VG::
circularize
(vector<string> pathnames)¶
-
void
vg::VG::
connect_node_to_nodes
(NodeTraversal node, vector<NodeTraversal> &nodes)¶ Connect node -> nodes. Connects from the right side of the first to the left side of the second.
-
void
vg::VG::
connect_node_to_nodes
(Node *node, vector<Node *> &nodes, bool from_start = false)¶ Connect node -> nodes. You can optionally use the start of the first node instead of the end.
-
void
vg::VG::
connect_nodes_to_node
(vector<NodeTraversal> &nodes, NodeTraversal node)¶ connect nodes -> node. Connects from the right side of the first to the left side of the second.
-
void
vg::VG::
connect_nodes_to_node
(vector<Node *> &nodes, Node *node, bool to_end = false)¶ connect nodes -> node.
-
void
vg::VG::
divide_node
(Node *node, int pos, Node *&left, Node *&right)¶ Divide a node at a given internal position. Inserts the new nodes in the correct paths, but can’t update the ranks, so they need to be cleared and re-calculated by the caller.
-
void
vg::VG::
divide_node
(Node *node, vector<int> positions, vector<Node *> &parts)¶ Divide a node at a given internal position. This version works on a collection of internal positions, in linear time.
-
void
vg::VG::
divide_path
(map<long, id_t> &path, long pos, Node *&left, Node *&right)¶ Divide a path at a position. Also invalidates stored rank information.
-
void
vg::VG::
to_dot
(ostream &out, vector<Alignment> alignments = {}, vector<Locus> loci = {}, bool show_paths = false, bool walk_paths = false, bool annotate_paths = false, bool show_mappings = false, bool simple_mode = false, bool invert_edge_ports = false, bool color_variants = false, bool superbubble_ranking = false, bool superbubble_labeling = false, bool ultrabubble_labeling = false, bool skip_missing_nodes = false, int random_seed = 0)¶ Convert the graph to Dot format.
-
void
vg::VG::
to_dot
(ostream &out, vector<Alignment> alignments = {}, bool show_paths = false, bool walk_paths = false, bool annotate_paths = false, bool show_mappings = false, bool invert_edge_ports = false, int random_seed = 0, bool color_variants = false)¶ Convert the graph to Dot format.
-
void
vg::VG::
to_gfa
(ostream &out)¶ Convert the graph to GFA format.
-
void
vg::VG::
to_turtle
(ostream &out, const string &rdf_base_uri, bool precompress)¶ Convert the graph to Turtle format.
-
bool
vg::VG::
is_valid
(bool check_nodes = true, bool check_edges = true, bool check_paths = true, bool check_orphans = true)¶ Determine if the graph is valid or not, according to the specified criteria.
-
void
vg::VG::
sort
(void)¶ Topologically order nodes. Makes sure that Nodes appear in the Protobuf Graph object in their topological sort order.
-
void
vg::VG::
topological_sort
(deque<NodeTraversal> &l)¶ Topological sort helper function, not really meant for external use.
Order and orient the nodes in the graph using a topological sort.
We use a bidirected adaptation of Kahn’s topological sort (1962), which can handle components with no heads or tails.
L ← Empty list that will contain the sorted and oriented elements S ← Set of nodes which have been oriented, but which have not had their downstream edges examined N ← Set of all nodes that have not yet been put into S
while N is nonempty do remove a node from N, orient it arbitrarily, and add it to S (In practice, we use “seeds”: the heads and any nodes we have seen that had too many incoming edges) while S is non-empty do remove an oriented node n from S add n to tail of L for each node m with an edge e from n to m do remove edge e from the graph if m has no other edges to that side then orient m such that the side the edge comes to is first remove m from N insert m into S otherwise put an oriented m on the list of arbitrary places to start when S is empty (This helps start at natural entry points to cycles) return L (a topologically sorted order and orientation)
-
void
vg::VG::
orient_nodes_forward
(set<id_t> &nodes_flipped)¶ Use a topological sort to order and orient the nodes, and then flip some nodes around so that they are oriented the way they are in the sort. Populates nodes_flipped with the ids of the nodes that have had their orientations changed. TODO: update the paths that touch nodes that flipped around
-
void
vg::VG::
force_path_match
(void)¶ For each path, assign edits that describe a total match of the mapping to the node.
-
void
vg::VG::
fill_empty_path_mappings
(void)¶ For each path, if a mapping has no edits then make it a perfect match against a node. This is the same as force_path_match, but only for empty mappings.
-
Alignment
vg::VG::
align
(const string &sequence, Aligner &aligner, size_t max_query_graph_ratio = 0, bool print_score_matrices = false)¶ Align without base quality adjusted scores. Align to the graph. The graph must be acyclic and contain only end-to-start edges. Will modify the graph by re-ordering the nodes. May add nodes to the graph, but cleans them up afterward.
-
Alignment
vg::VG::
align
(const Alignment &alignment, Aligner &aligner, size_t max_query_graph_ratio = 0, bool print_score_matrices = false)¶ Align without base quality adjusted scores. Align to the graph. The graph must be acyclic and contain only end-to-start edges. Will modify the graph by re-ordering the nodes. May add nodes to the graph, but cleans them up afterward.
-
Alignment
vg::VG::
align
(const Alignment &alignment, size_t max_query_graph_ratio = 0, bool print_score_matrices = false)¶ Align with default Aligner. Align to the graph. The graph must be acyclic and contain only end-to-start edges. Will modify the graph by re-ordering the nodes. May add nodes to the graph, but cleans them up afterward.
-
Alignment
vg::VG::
align
(const string &sequence, size_t max_query_graph_ratio = 0, bool print_score_matrices = false)¶ Align with default Aligner. Align to the graph. The graph must be acyclic and contain only end-to-start edges. Will modify the graph by re-ordering the nodes. May add nodes to the graph, but cleans them up afterward.
-
Alignment
vg::VG::
align_qual_adjusted
(const Alignment &alignment, QualAdjAligner &qual_adj_aligner, size_t max_query_graph_ratio = 0, bool print_score_matrices = false)¶ Align with base quality adjusted scores. Align to the graph. The graph must be acyclic and contain only end-to-start edges. Will modify the graph by re-ordering the nodes. May add nodes to the graph, but cleans them up afterward.
-
Alignment
vg::VG::
align_qual_adjusted
(const string &sequence, QualAdjAligner &qual_adj_aligner, size_t max_query_graph_ratio = 0, bool print_score_matrices = false)¶ Align with base quality adjusted scores. Align to the graph. The graph must be acyclic and contain only end-to-start edges. Will modify the graph by re-ordering the nodes. May add nodes to the graph, but cleans them up afterward.
-
void
vg::VG::
for_each_kpath
(int k, bool path_only, int edge_max, function<void(NodeTraversal)> handle_prev_maxed, function<void(NodeTraversal)> handle_next_maxed, function<void(list<NodeTraversal>::iterator, list<NodeTraversal>&)> lambda)¶ Calls a function on all node-crossing paths with up to length across node boundaries. Considers each node in forward orientation to produce the kpaths around it.
-
void
vg::VG::
for_each_kpath_parallel
(int k, bool path_only, int edge_max, function<void(NodeTraversal)> handle_prev_maxed, function<void(NodeTraversal)> handle_next_maxed, function<void(list<NodeTraversal>::iterator, list<NodeTraversal>&)> lambda)¶ Calls a function on all kpaths of the given node.
-
void
vg::VG::
for_each_kpath
(int k, bool path_only, int edge_max, function<void(NodeTraversal)> handle_prev_maxed, function<void(NodeTraversal)> handle_next_maxed, function<void(size_t, Path&)> lambda)¶ Calls a function on all node-crossing paths with up to length across node boundaries. Considers each node in forward orientation to produce the kpaths around it.
-
void
vg::VG::
for_each_kpath_parallel
(int k, bool path_only, int edge_max, function<void(NodeTraversal)> handle_prev_maxed, function<void(NodeTraversal)> handle_next_maxed, function<void(size_t, Path&)> lambda)¶ Calls a function on all kpaths of the given node.
-
void
vg::VG::
for_each_kpath_of_node
(Node *node, int k, bool path_only, int edge_max, function<void(NodeTraversal)> handle_prev_maxed, function<void(NodeTraversal)> handle_next_maxed, function<void(list<NodeTraversal>::iterator, list<NodeTraversal>&)> lambda)¶ Calls a function on all kpaths of the given node.
-
void
vg::VG::
for_each_kpath_of_node
(Node *n, int k, bool path_only, int edge_max, function<void(NodeTraversal)> handle_prev_maxed, function<void(NodeTraversal)> handle_next_maxed, function<void(size_t, Path&)> lambda)¶ Calls a function on all kpaths of the given node.
-
void
vg::VG::
kpaths
(set<list<NodeTraversal>> &paths, int length, bool path_only, int edge_max, function<void(NodeTraversal)> prev_maxed, function<void(NodeTraversal)> next_maxed)¶ Get kpaths. TODO: what is this for?
-
void
vg::VG::
kpaths
(vector<Path> &paths, int length, bool path_only, int edge_max, function<void(NodeTraversal)> prev_maxed, function<void(NodeTraversal)> next_maxed)¶ Get kpaths. TODO: what is this for?
-
void
vg::VG::
kpaths_of_node
(Node *node, set<list<NodeTraversal>> &paths, int length, bool path_only, int edge_max, function<void(NodeTraversal)> prev_maxed, function<void(NodeTraversal)> next_maxed)¶ Get kpaths on a particular node. TODO: what is this for?
-
void
vg::VG::
kpaths_of_node
(Node *node, vector<Path> &paths, int length, bool path_only, int edge_max, function<void(NodeTraversal)> prev_maxed, function<void(NodeTraversal)> next_maxed)¶ Get kpaths on a particular node. TODO: what is this for?
-
void
vg::VG::
kpaths_of_node
(id_t node_id, vector<Path> &paths, int length, bool path_only, int edge_max, function<void(NodeTraversal)> prev_maxed, function<void(NodeTraversal)> next_maxed)¶ Get kpaths on a particular node. TODO: what is this for?
-
void
vg::VG::
prev_kpaths_from_node
(NodeTraversal node, int length, bool path_only, int edge_max, bool edge_bounding, list<NodeTraversal> postfix, set<list<NodeTraversal>> &walked_paths, const vector<string> &followed_paths, function<void(NodeTraversal)> &maxed_nodes)¶ Given an oriented start node, a length in bp, a maximum number of edges to cross, and a stack of nodes visited so far, fill in the set of paths with all the paths starting at the oriented start node and going left off its end no longer than the specified length, calling maxed_nodes on nodes which can’t be visited due to the edge-crossing limit. Produces paths ending with the specified node. TODO: postfix should not be (potentially) copied on every call.
-
void
vg::VG::
next_kpaths_from_node
(NodeTraversal node, int length, bool path_only, int edge_max, bool edge_bounding, list<NodeTraversal> prefix, set<list<NodeTraversal>> &walked_paths, const vector<string> &followed_paths, function<void(NodeTraversal)> &maxed_nodes)¶ Do the same as prec_kpaths_from_node, except going right, producing a path starting with the specified node.
-
void
vg::VG::
likelihoods
(vector<Alignment> &alignments, vector<Path> &paths, vector<long double> &likelihoods)¶
-
void
vg::VG::
nodes_prev
(NodeTraversal n, vector<NodeTraversal> &nodes)¶ Get the nodes attached to the left side of the given NodeTraversal, in their proper orientations.
-
vector<NodeTraversal>
vg::VG::
nodes_prev
(NodeTraversal n)¶ Get the nodes attached to the left side of the given NodeTraversal, in their proper orientations.
-
set<NodeTraversal>
vg::VG::
travs_to
(NodeTraversal node)¶ Get traversals before this node on the same strand. Same as nodes_prev but using set.
-
void
vg::VG::
nodes_next
(NodeTraversal n, vector<NodeTraversal> &nodes)¶ Get the nodes attached to the right side of the given NodeTraversal, in their proper orientations.
-
vector<NodeTraversal>
vg::VG::
nodes_next
(NodeTraversal n)¶ Get the nodes attached to the right side of the given NodeTraversal, in their proper orientations.
-
set<NodeTraversal>
vg::VG::
travs_from
(NodeTraversal node)¶ Get traversals after this node on the same strand. Same as nodes_next but using set.
-
set<NodeTraversal>
vg::VG::
travs_of
(NodeTraversal node)¶ Get traversals either before or after this node on the same strand.
-
int
vg::VG::
node_count_prev
(NodeTraversal n)¶ Count the nodes attached to the left side of the given NodeTraversal.
-
int
vg::VG::
node_count_next
(NodeTraversal n)¶ Count the nodes attached to the right side of the given NodeTraversal.
-
Path
vg::VG::
create_path
(const list<NodeTraversal> &nodes)¶ Create a path.
-
Path
vg::VG::
create_path
(const vector<NodeTraversal> &nodes)¶ Create a path.
-
string
vg::VG::
path_string
(const list<NodeTraversal> &nodes)¶ Get the string sequence for all the NodeTraversals on the given path.
-
string
vg::VG::
path_string
(const Path &path)¶ Get the string sequence for traversing the given path. Assumes the path covers the entirety of any nodes visited. Handles backward nodes.
-
void
vg::VG::
expand_path
(const list<NodeTraversal> &path, vector<NodeTraversal> &expanded)¶ Expand a path. TODO: what does that mean?
-
void
vg::VG::
node_starts_in_path
(const list<NodeTraversal> &path, map<Node *, int> &node_start)¶ Fill in the node_start map with the first index along the path at which each node appears. Caller is responsible for dealing with orientations.
-
bool
vg::VG::
nodes_are_perfect_path_neighbors
(NodeTraversal left, NodeTraversal right)¶ Return true if nodes share all paths and the mappings they share in these paths are adjacent, in the specified relative order and orientation.
-
bool
vg::VG::
mapping_is_total_match
(const Mapping &m)¶ Return true if the mapping completely covers the node it maps to and is a perfect match.
-
map<string, vector<Mapping>>
vg::VG::
concat_mappings_for_node_pair
(id_t id1, id_t id2)¶ Concatenate the mappings for a pair of nodes; handles multiple mappings per path.
-
map<string, vector<Mapping>>
vg::VG::
concat_mappings_for_nodes
(const list<NodeTraversal> &nodes)¶ Concatenate mappings for a list of nodes that we want to concatenate. Returns, for each path name, a vector of merged mappings, once per path traversal of the run of nodes. Those merged mappings are in the orientation of the merged node (so mappings to nodes that are traversed in reverse will have their flags toggled). We assume that all mappings on the given nodes are full-length perfect matches, and that all the nodes are perfect path neighbors.
-
void
vg::VG::
expand_path
(list<NodeTraversal> &path, vector<list<NodeTraversal>::iterator> &expanded)¶ Expand a path. TODO: what does that mean? These versions handle paths in which nodes can be traversed multiple times. Unfortunately since we’re throwing non-const iterators around, we can’t take the input path as const.
-
void
vg::VG::
node_starts_in_path
(list<NodeTraversal> &path, map<NodeTraversal *, int> &node_start)¶ Find node starts in a path. TODO: what does that mean? To get the starts out of the map this produces, you need to dereference the iterator and then get the address of the NodeTraversal (stored in the list) that you are talking about.
-
void
vg::VG::
for_each_kmer_parallel
(int kmer_size, bool path_only, int edge_max, function<void(string&, list<NodeTraversal>::iterator, int, list<NodeTraversal>&, VG&)> lambda, int stride = 1, bool allow_dups = false, bool allow_negatives = false, )¶ Call a function for each kmer in the graph, in parallel.
-
void
vg::VG::
for_each_kmer
(int kmer_size, bool path_only, int edge_max, function<void(string&, list<NodeTraversal>::iterator, int, list<NodeTraversal>&, VG&)> lambda, int stride = 1, bool allow_dups = false, bool allow_negatives = false, )¶ Call a function for each kmer in the graph.
-
void
vg::VG::
for_each_kmer_of_node
(Node *node, int kmer_size, bool path_only, int edge_max, function<void(string&, list<NodeTraversal>::iterator, int, list<NodeTraversal>&, VG&)> lambda, int stride = 1, bool allow_dups = false, bool allow_negatives = false, )¶ Call a function for each kmer on a node.
-
void
vg::VG::
kmer_context
(string &kmer, int kmer_size, bool path_only, int edge_max, bool forward_only, list<NodeTraversal> &path, list<NodeTraversal>::iterator start_node, int32_t start_offset, list<NodeTraversal>::iterator &end_node, int32_t &end_offset, set<tuple<char, id_t, bool, int32_t>> &prev_positions, set<tuple<char, id_t, bool, int32_t>> &next_positions)¶ For the given kmer of the given length starting at the given offset into the given Node along the given path, fill in end_node and end_offset with where the end of the kmer falls (counting from the right side of the NodeTraversal), prev_chars with the characters that preceed it, next_chars with the characters that follow it, prev_ and next_positions with the ((node ID, orientation), offset) pairs of the places you can come from/go next (from the right end of the kmer). Refuses to follow more than edge_max edges. Offsets are in the path orientation. Meant for gcsa2.
-
void
vg::VG::
gcsa_handle_node_in_graph
(Node *node, int kmer_size, bool path_only, int edge_max, int stride, bool forward_only, Node *head_node, Node *tail_node, function<void(KmerPosition&)> lambda)¶ Do the GCSA2 kmers for a node. head_node and tail_node must both be non- null, but only one of those nodes actually needs to be in the graph. They will be examined directly to get their representative characters. They also don’t need to be actually owned by the graph; they can be copies.
-
void
vg::VG::
for_each_gcsa_kmer_position_parallel
(int kmer_size, bool path_only, int edge_max, int stride, bool forward_only, id_t &head_id, id_t &tail_id, function<void(KmerPosition&)> lambda)¶ Call a function for each GCSA2 kemr position in parallel. GCSA kmers are the kmers in the graph with each node existing in both its forward and reverse-complement orientation. Node IDs in the GCSA graph are 2 * original node ID, +1 if the GCSA node represents the reverse complement, and +0 if it does not. Non-reversing edges link the forward copy of the from node to the forward copy of the to node, and similarly for the reverse complement copies, while reversing edges link the forward copy of the from node to the reverse complement copy of the to node, and visa versa. This allows us to index both the forward and reverse strands of every node, and to deal with GCSA’s lack of support for reversing edges, with the same trick. Note that start_tail_id, if zero, will be replaced with the ID actually used for the start/end node before lambda is ever called.
-
void
vg::VG::
get_gcsa_kmers
(int kmer_size, bool path_only, int edge_max, int stride, bool forward_only, const function<void(vector<gcsa::KMer>&, bool)> &handle_kmers, id_t &head_id, id_t &tail_id, )¶ Get the GCSA2 kmers in the graph.
-
void
vg::VG::
write_gcsa_kmers
(int kmer_size, bool path_only, int edge_max, int stride, bool forward_only, ostream &out, id_t &head_id, id_t &tail_id)¶ Writhe the GCSA2 kmer file for the graph to the goven stream.
-
string
vg::VG::
write_gcsa_kmers_to_tmpfile
(int kmer_size, bool paths_only, bool forward_only, id_t &head_id, id_t &tail_id, size_t doubling_steps = 2, size_t size_limit = 200, const string &base_file_name = ".vg-kmers-tmp-")¶ Write the GCSA2 kmers to a temp file with the given base. Return the name of the file.
-
void
vg::VG::
build_gcsa_lcp
(gcsa::GCSA *&gcsa, gcsa::LCPArray *&lcp, int kmer_size, bool paths_only, bool forward_only, size_t doubling_steps = 2, size_t size_limit = 200, const string &base_file_name = ".vg-kmers-tmp-")¶ Construct the GCSA2 index for this graph.
-
void
vg::VG::
prune_complex
(int path_length, int edge_max, Node *head_node, Node *tail_node)¶ Take all nodes that would introduce paths of > edge_max edge crossings, remove them, and link their neighbors to head_node or tail_node depending on which direction the path extension was stopped. For pruning graph prior to indexing with gcsa2.
-
void
vg::VG::
prune_complex_with_head_tail
(int path_length, int edge_max)¶ Wrap the graph with heads and tails before doing the prune. Utility function for preparing for indexing.
-
Alignment
vg::VG::
random_read
(size_t read_len, mt19937 &rng, id_t min_id, id_t max_id, bool either_strand)¶ Generate random reads. Note that even if either_strand is false, having backward nodes in the graph will result in some reads from the global reverse strand.
-
void
vg::VG::
head_nodes
(vector<Node *> &nodes)¶ Get the head nodes (nodes with edges only to their right sides). These are required to be oriented forward.
-
vector<Node *>
vg::VG::
head_nodes
(void)¶ Get the head nodes (nodes with edges only to their right sides). These are required to be oriented forward.
-
int32_t
vg::VG::
distance_to_head
(NodeTraversal node, int32_t limit = 1000)¶ Get the distance from head of node to beginning of graph, or -1 if limit exceeded.
-
int32_t
vg::VG::
distance_to_head
(NodeTraversal node, int32_t limit, int32_t dist, set<NodeTraversal> &seen)¶ Get the distance from head of node to beginning of graph, or -1 if limit exceeded.
-
vector<Node *>
vg::VG::
tail_nodes
(void)¶ Get the tail nodes (nodes with edges only to their left sides). These are required to be oriented forward.
-
void
vg::VG::
tail_nodes
(vector<Node *> &nodes)¶ Get the tail nodes (nodes with edges only to their left sides). These are required to be oriented forward.
-
int32_t
vg::VG::
distance_to_tail
(NodeTraversal node, int32_t limit = 1000)¶ Get the distance from tail of node to end of graph, or -1 if limit exceeded.
-
int32_t
vg::VG::
distance_to_tail
(NodeTraversal node, int32_t limit, int32_t dist, set<NodeTraversal> &seen)¶ Get the distance from tail of node to end of graph, or -1 if limit exceeded.
-
int32_t
vg::VG::
distance_to_tail
(id_t id, int32_t limit = 1000)¶ Get the distance from tail of node to end of graph, or -1 if limit exceeded.
-
void
vg::VG::
collect_subgraph
(Node *node, set<Node *> &subgraph)¶ Collect the subgraph of a Node. TODO: what does that mean?
-
Node *
vg::VG::
join_heads
(void)¶ Join head nodes of graph to common null node, creating a new single head.
-
void
vg::VG::
join_heads
(Node *node, bool from_start = false)¶ Join head nodes of graph to specified node. Optionally from the start/to the end of the new node.
-
void
vg::VG::
join_tails
(Node *node, bool to_end = false)¶ Join tail nodes of graph to specified node. Optionally from the start/to the end of the new node.
-
void
vg::VG::
wrap_with_null_nodes
(void)¶ Add singular head and tail null nodes to graph.
-
void
vg::VG::
add_start_end_markers
(int length, char start_char, char end_char, Node *&start_node, Node *&end_node, id_t start_id = 0, id_t end_id = 0)¶ Add a start node and an end node, where all existing heads in the graph are connected to the start node, and all existing tails in the graph are connected to the end node. Any connected components in the graph which do not have either are connected to the start at an arbitrary point, and the end node from nodes going to that arbitrary point. If start_node or end_node is null, a new node will be created. Otherwise, the passed node will be used. Note that this visits every node, to make sure it is attached to all connected components. Note that if a graph has, say, heads but no tails, the start node will be attached buut the end node will be free-floating.
Public Members
-
string
vg::VG::
name
¶ Name of the graph.
-
pair_hash_map<pair<NodeSide, NodeSide>, Edge *>
vg::VG::
edge_by_sides
¶ Edge
s by sides ofNode
s they connect. Since duplicate edges are not permitted, two edges cannot connect the same pair of node sides. Each edge is indexed here with the smaller NodeSide first. The actual node order is recorded in the Edge object.
-
hash_map<Node *, int>
vg::VG::
node_index
¶ nodes by position in nodes repeated field. this is critical to allow fast deletion of nodes
Private Functions
-
void
vg::VG::
_for_each_kmer
(int kmer_size, bool path_only, int edge_max, function<void(string&, list<NodeTraversal>::iterator, int, list<NodeTraversal>&, VG&)> lambda, bool parallel, int stride, bool allow_dups, bool allow_negatives, Node *node = nullptr, )¶ Call the given function on each kmer. If parallel is specified, goes through nodes one per thread. If node is not null, looks only at kmers of that specific node.
-
Alignment
vg::VG::
align
(const Alignment &alignment, Aligner *aligner, QualAdjAligner *qual_adj_aligner, size_t max_query_graph_ratio, bool print_score_matrices)¶ Private method to funnel other align functions into.
-
void
vg::VG::
init
(void)¶ setup, ensures that gssw == NULL on startup
-
void
- class
- #include <vg_set.hpp>
Public Functions
-
vg::VGset::
VGset
()¶
-
vg::VGset::
VGset
(vector<string> &files)¶
-
int64_t
vg::VGset::
merge_id_space
(void)¶
-
xg::XG
vg::VGset::
to_xg
(bool store_threads = false)¶
-
xg::XG
vg::VGset::
to_xg
(bool store_threads, const regex &paths_to_take, map<string, Path> &removed_paths)¶
-
void
vg::VGset::
index_kmers
(Index &index, int kmer_size, bool path_only, int edge_max, int stride = 1, bool allow_negatives = false)¶
-
void
vg::VGset::
for_each_kmer_parallel
(const function<void(string&, list<NodeTraversal>::iterator, int, list<NodeTraversal>&, VG&)> &lambda, int kmer_size, bool path_only, int edge_max, int stride, bool allow_dups, bool allow_negatives = false, )¶
-
void
vg::VGset::
write_gcsa_out
(ostream &out, int kmer_size, bool path_only, bool forward_only, int64_t head_id = 0, int64_t tail_id = 0)¶
-
void
vg::VGset::
write_gcsa_kmers_binary
(ostream &out, int kmer_size, bool path_only, bool forward_only, int64_t head_id = 0, int64_t tail_id = 0)¶
-
void
vg::VGset::
get_gcsa_kmers
(int kmer_size, bool path_only, bool forward_only, const function<void(vector<gcsa::KMer>&, bool)> &handle_kmers, int64_t head_id = 0, int64_t tail_id = 0, )¶
-
vector<string>
vg::VGset::
write_gcsa_kmers_binary
(int kmer_size, bool path_only, bool forward_only, int64_t head_id = 0, int64_t tail_id = 0)¶
Private Functions
-
void
vg::VGset::
for_each_gcsa_kmer_position_parallel
(int kmer_size, bool path_only, bool forward_only, int64_t &head_id, int64_t &tail_id, function<void(KmerPosition&)> lambda)¶
-
- struct
- #include <genotypekit.hpp>
Public Functions
-
vg::SiteTraversal::Visit::
Visit
(Node *node, bool backward = false)¶ Make a Visit form a node and an orientation
-
vg::SiteTraversal::Visit::
Visit
(NestedSite *child, bool backward = false)¶ Make a Visit from a child site and an orientation.
-
vg::SiteTraversal::Visit::
Visit
(const NodeTraversal &traversal)¶ Make a Visit from a NodeTraversal.
Public Members
-
NestedSite *
vg::SiteTraversal::Visit::
child
¶
-
bool
vg::SiteTraversal::Visit::
backward
¶
-
- namespace
- namespace
- namespace
Typedefs
- typedef
Functions
-
static long double
glenn2vcf::
gammaln
(long double x)¶
-
static long double
glenn2vcf::
factorial
(int n)¶
-
long double
glenn2vcf::
poissonp
(int observed, int expected)¶
-
Support
glenn2vcf::
operator+
(const Support &one, const Support &other)¶ Add two Support values together, accounting for strand.
- template <typename Scalar>
-
Support
glenn2vcf::
operator*
(const Support &support, const Scalar &scale)¶ Scale a support by a factor.
- template <typename Scalar>
-
Support
glenn2vcf::
operator*
(const Scalar &scale, const Support &support)¶ Scale a support by a factor, the other way
- template <typename Scalar>
-
Support
glenn2vcf::
operator/
(const Support &support, const Scalar &scale)¶ Divide a support by a factor.
-
std::ostream &
glenn2vcf::
operator<<
(std::ostream &stream, const Support &support)¶ Allow printing a support.
-
Support
glenn2vcf::
support_min
(const Support &a, const Support &b)¶ Get the minimum support of a pair of supports, by taking the min in each orientation.
-
std::string
glenn2vcf::
char_to_string
(const char &letter)¶ Make a letter into a full string because apparently that’s too fancy for the standard library.
-
void
glenn2vcf::
write_vcf_header
(std::ostream &stream, std::string &sample_name, std::string &contig_name, size_t contig_size, int min_mad_for_filter)¶ Write a minimal VCF header for a single-sample file.
-
void
glenn2vcf::
create_ref_allele
(vcflib::Variant &variant, const std::string &allele)¶ Create the reference allele for an empty vcflib Variant, since apaprently there’s no method for that already. Must be called before any alt alleles are added.
-
int
glenn2vcf::
add_alt_allele
(vcflib::Variant &variant, const std::string &allele)¶ Add a new alt allele to a vcflib Variant, since apaprently there’s no method for that already.
If that allele already exists in the variant, does not add it again.
Returns the allele number (0, 1, 2, etc.) corresponding to the given allele string in the given variant.
-
bool
glenn2vcf::
can_write_alleles
(vcflib::Variant &variant)¶ Return true if a variant may be output, or false if this variant is valid but the GATK might choke on it.
Mostly used to throw out variants with very long alleles, because GATK has an allele length limit. How alleles that really are 1 megabase deletions are to be specified to GATK is left as an exercise to the reader.
-
bool
glenn2vcf::
mapping_is_perfect_match
(const vg::Mapping &mapping)¶ Return true if a mapping is a perfect match, and false if it isn’t.
-
size_t
glenn2vcf::
bp_length
(const std::list<vg::NodeTraversal> &path)¶ Get the length of a path through nodes, in base pairs.
-
Support
glenn2vcf::
min_support_in_path
(vg::VG &graph, const std::map<vg::Node *, Support> &nodeReadSupport, const std::map<vg::Edge *, Support> &edgeReadSupport, const std::list<vg::NodeTraversal> &path)¶ Get the minimum support of all nodes and edges in path
-
std::set<std::pair<size_t, std::list<vg::NodeTraversal>>>
glenn2vcf::
bfs_left
(vg::VG &graph, vg::NodeTraversal node, const ReferenceIndex &index, const std::map<vg::Node *, Support> &nodeReadSupport, const std::map<vg::Edge *, Support> &edgeReadSupport, int64_t maxDepth = 10, bool stopIfVisited = false)¶ Do a breadth-first search left from the given node traversal, and return lengths and paths starting at the given node and ending on the indexed reference path. Refuses to visit nodes with no support.
-
vg::NodeTraversal
glenn2vcf::
flip
(vg::NodeTraversal toFlip)¶ Flip a NodeTraversal around and return the flipped copy.
-
std::set<std::pair<size_t, std::list<vg::NodeTraversal>>>
glenn2vcf::
bfs_right
(vg::VG &graph, vg::NodeTraversal node, const ReferenceIndex &index, const std::map<vg::Node *, Support> &nodeReadSupport, const std::map<vg::Edge *, Support> &edgeReadSupport, int64_t maxDepth = 10, bool stopIfVisited = false)¶ Do a breadth-first search right from the given node traversal, and return lengths and paths starting at the given node and ending on the indexed reference path.
-
std::pair<Support, std::vector<vg::NodeTraversal>>
glenn2vcf::
find_bubble
(vg::VG &graph, vg::Node *node, vg::Edge *edge, const ReferenceIndex &index, const std::map<vg::Node *, Support> &nodeReadSupport, const std::map<vg::Edge *, Support> &edgeReadSupport, int64_t maxDepth, size_t max_bubble_paths)¶ Given a vg graph, an edge or node in the graph, and an index for the reference path, look out from the edge or node in both directions to find a shortest bubble relative to the path, with a consistent orientation. The bubble may not visit the same node twice.
Exactly one of edge and node must be null, and one not null.
Takes a max depth for the searches producing the paths on each side.
Return the ordered and oriented nodes in the bubble, with the outer nodes being oriented forward along the named path, and with the first node coming before the last node in the reference. Also return the minimum support found on any edge or node in the bubble (including the reference node endpoints and their edges which aren’t stored in the path)
-
ReferenceIndex
glenn2vcf::
trace_reference_path
(vg::VG &vg, std::string refPathName, bool verbose)¶ Trace out the reference path in the given graph named by the given name. Returns a structure with useful indexes of the reference.
-
std::string
glenn2vcf::
get_pileup_line
(const std::map<int64_t, vg::NodePileup> &nodePileups, const std::set<std::pair<int64_t, size_t>> &refCrossreferences, const std::set<std::pair<int64_t, size_t>> &altCrossreferences)¶ Given a collection of pileups by original node ID, and a set of original node id:offset cross-references in both ref and alt categories, produce a VCF comment line giving the pileup for each of those positions on those nodes. Includes a trailing newline if nonempty.
TODO: VCF comments aren’t really a thing.
-
void
glenn2vcf::
parse_tsv
(const std::string &tsvFile, vg::VG &vg, std::map<vg::Node *, Support> &nodeReadSupport, std::map<vg::Edge *, Support> &edgeReadSupport, std::map<vg::Node *, double> &nodeLikelihood, std::map<vg::Edge *, double> &edgeLikelihood, std::set<vg::Edge *> &deletionEdges, std::map<vg::Node *, std::pair<int64_t, size_t>> &nodeSources, std::set<vg::Node *> &knownNodes, std::set<vg::Edge *> &knownEdges, bool verbose)¶ Parse tsv into an internal format, where we track status and copy number for nodes and edges.
-
int
glenn2vcf::
call2vcf
(vg::VG &vg, const std::string &glennFile, std::string refPathName, std::string contigName, std::string sampleName, int64_t variantOffset, int64_t maxDepth, int64_t lengthOverride, std::string pileupFilename, double minFractionForCall, double maxHetBias, double maxRefHetBias, double indelBiasMultiple, size_t minTotalSupportForCall, size_t refBinSize, size_t expCoverage, bool suppress_overlaps, bool useAverageSupport, bool multiallelic_support, size_t max_ref_length, size_t max_bubble_paths, size_t min_mad_for_filter, bool verbose)¶
-
int
glenn2vcf::
call2vcf
(vg::VG &vg, const string &glennfile, string refPathName, string contigName, string sampleName, int64_t variantOffset, int64_t maxDepth, int64_t lengthOverride, string pileupFilename, double minFractionForCall, double maxHetBias, double maxRefHetBias, double indelBiasMultiple, size_t minTotalSupportForCall, size_t refBinSize, size_t expCoverage, bool suppress_overlaps, bool useAverageSupport, bool multiallelic_support, size_t max_ref_length, size_t max_bubble_paths, size_t min_mad_for_filter, bool verbose)¶
- namespace
- namespace
- namespace
- namespace
- namespace
Functions
- template <typename T>
-
bool
stream::
write
(std::ostream &out, uint64_t count, const std::function<T(uint64_t)> &lambda)¶
- template <typename T>
-
bool
stream::
write_buffered
(std::ostream &out, std::vector<T> &buffer, uint64_t buffer_limit)¶
- template <typename T>
-
void
stream::
for_each
(std::istream &in, const std::function<void(T&)> &lambda, const std::function<void(uint64_t)> &handle_count)¶
- template <typename T>
-
void
stream::
for_each
(std::istream &in, const std::function<void(T&)> &lambda)¶
- namespace
- namespace
- namespace
Typedefs
- typedef
- typedef
- typedef
-
using
vg::real_t = typedef long double
- typedef
Represents a Node ID. ID type is a 64-bit signed int.
- typedef
Represents an offset along the sequence of a Node. Offsets are size_t.
Enums
-
enum type
vg::
MappingQualityMethod
¶ Values:
Functions
-
bam_hdr_t *
vg::
hts_file_header
(string &filename, string &header)¶
-
bam_hdr_t *
vg::
hts_string_header
(string &header, map<string, int64_t> &path_length, map<string, string> &rg_sample)¶
-
bool
vg::
get_next_interleaved_alignment_pair_from_fastq
(gzFile fp, char *buffer, size_t len, Alignment &mate1, Alignment &mate2)¶
-
bool
vg::
get_next_alignment_pair_from_fastqs
(gzFile fp1, gzFile fp2, char *buffer, size_t len, Alignment &mate1, Alignment &mate2)¶
-
size_t
vg::
fastq_paired_interleaved_for_each_parallel
(string &filename, function<void(Alignment&, Alignment&)> lambda)¶
-
size_t
vg::
fastq_paired_two_files_for_each_parallel
(string &file1, string &file2, function<void(Alignment&, Alignment&)> lambda)¶
-
size_t
vg::
fastq_paired_interleaved_for_each
(string &filename, function<void(Alignment&, Alignment&)> lambda)¶
-
size_t
vg::
fastq_paired_two_files_for_each
(string &file1, string &file2, function<void(Alignment&, Alignment&)> lambda)¶
-
void
vg::
gam_paired_interleaved_for_each_parallel
(ifstream &in, function<void(Alignment&, Alignment&)> lambda)¶
-
void
vg::
parse_rg_sample_map
(char *hts_header, map<string, string> &rg_sample)¶
-
short
vg::
quality_char_to_short
(char c)¶
-
char
vg::
quality_short_to_char
(short i)¶
-
string
vg::
string_quality_short_to_char
(const string &quality)¶
-
string
vg::
string_quality_char_to_short
(const string &quality)¶
-
bam1_t *
vg::
alignment_to_bam
(const string &sam_header, const Alignment &alignment, const string &refseq, const int32_t refpos, const bool refrev, const string &cigar, const string &mateseq, const int32_t matepos, const int32_t tlen)¶
-
string
vg::
alignment_to_sam
(const Alignment &alignment, const string &refseq, const int32_t refpos, const bool refrev, const string &cigar, const string &mateseq, const int32_t matepos, const int32_t tlen)¶
-
string
vg::
cigar_string
(vector<pair<int, char>> &cigar)¶
-
vector<Alignment>
vg::
reverse_complement_alignments
(const vector<Alignment> &alns, const function<int64_t(int64_t)> &node_length)¶
-
Alignment
vg::
reverse_complement_alignment
(const Alignment &aln, const function<int64_t(id_t)> &node_length)¶
-
void
vg::
translate_nodes
(Alignment &a, const map<id_t, pair<id_t, bool>> &ids, const std::function<size_t(int64_t)> &node_length)¶
-
void
vg::
flip_nodes
(Alignment &a, const set<int64_t> &ids, const std::function<size_t(int64_t)> &node_length)¶
-
static void
vg::
compute_side_components
(VG &graph, vector<SideSet> &components, Side2Component &side_to_component)¶
-
void *
vg::
mergeNodeObjects
(void *a, void *b)¶
-
pair<stCactusGraph *, stCactusNode *>
vg::
vg_to_cactus
(VG &graph)¶
-
static void
vg::
ultrabubble_recurse
(VG &graph, stList *chains_list, NodeSide side1, NodeSide side2, BubbleTree::Node *out_node)¶
-
BubbleTree *
vg::
ultrabubble_tree
(VG &graph)¶
-
VG
vg::
cactus_to_vg
(stCactusGraph *cactus_graph)¶
-
ostream &
vg::
operator<<
(ostream &os, const NodeDivider::NodeMap &nm)¶
-
ostream &
vg::
operator<<
(ostream &os, NodeDivider::Entry entry)¶
-
ostream &
vg::
operator<<
(ostream &os, const Caller::NodeOffSide &no)¶
-
StrandSupport
vg::
minSup
(vector<StrandSupport> &s)¶
-
StrandSupport
vg::
avgSup
(vector<StrandSupport> &s)¶
-
ostream &
vg::
operator<<
(ostream &os, const StrandSupport &sup)¶
-
real_t
vg::
gamma_ln
(real_t x)¶ Calculate the natural log of the gamma function of the given argument.
-
real_t
vg::
factorial_ln
(int n)¶ Calculate the natural log of the factorial of the given integer. TODO: replace with a cache or giant lookup table from Freebayes.
-
real_t
vg::
pow_ln
(real_t m, int n)¶ Raise a log probability to a power
-
real_t
vg::
choose_ln
(int n, int k)¶ Compute the number of ways to select k items from a collection of n distinguishable items, ignoring order. Returns the natural log of the (integer) result.
- template <typename ProbIn>
-
real_t
vg::
multinomial_sampling_prob_ln
(const vector<ProbIn> &probs, const vector<int> &obs)¶ Get the probability for sampling the counts in obs from a set of categories weighted by the probabilities in probs. Works for both double and real_t probabilities. Also works for binomials.
- template <typename ProbIn>
-
real_t
vg::
binomial_cmf_ln
(ProbIn success_logprob, size_t trials, size_t successes)¶ Compute the probability of having the given number of successes or fewer in the given number of trials, with the given success probability. Returns the resulting log probability.
-
double
vg::
entropy
(string &st)¶
-
string
vg::
allele_to_string
(VG &graph, const Path &allele)¶ Turn the given path (which must be a thread) into an allele. Drops the first and last mappings and looks up the sequences for the nodes of the others.
- template <typename T>
-
void
vg::
set_intersection
(const unordered_set<T> &set_1, const unordered_set<T> &set_2, unordered_set<T> *out_intersection)¶
-
void
vg::
create_ref_allele
(vcflib::Variant &variant, const std::string &allele)¶ Create the reference allele for an empty vcflib Variant, since apaprently there’s no method for that already. Must be called before any alt alleles are added.
-
int
vg::
add_alt_allele
(vcflib::Variant &variant, const std::string &allele)¶ Add a new alt allele to a vcflib Variant, since apaprently there’s no method for that already.
If that allele already exists in the variant, does not add it again.
Retuerns the allele number (0, 1, 2, etc.) corresponding to the given allele string in the given variant.
-
void
vg::
node_path_position
(int64_t id, string &path_name, int64_t &position, bool backward, int64_t &offset)¶
-
const string
vg::
mems_to_json
(const vector<MaximalExactMatch> &mems)¶
-
const int
vg::
balanced_stride
(int read_length, int kmer_size, int stride)¶
-
const vector<string>
vg::
balanced_kmers
(const string &seq, const int kmer_size, const int stride)¶
-
bool
vg::
operator==
(const MaximalExactMatch &m1, const MaximalExactMatch &m2)¶
-
bool
vg::
operator<
(const MaximalExactMatch &m1, const MaximalExactMatch &m2)¶
-
ostream &
vg::
operator<<
(ostream &out, const NodeTraversal &nodetraversal)¶ Print the given NodeTraversal.
-
Mapping
vg::
reverse_complement_mapping
(const Mapping &m, const function<int64_t(id_t)> &node_length)¶
-
Path
vg::
path_from_node_traversals
(const list<NodeTraversal> &traversals)¶
-
pos_t
vg::
make_pos_t
(id_t id, bool is_rev, off_t off)¶ Create a pos_t from a Node ID, an orientation flag, and an offset.
-
Position
vg::
make_position
(id_t id, bool is_rev, off_t off)¶ Create a Position from a Node ID, an orientation flag, and an offset.
-
pos_t
vg::
reverse
(const pos_t &pos, size_t node_length)¶ Reverse a pos_t and get a pos_t at the same base, going the other direction.
-
Position
vg::
reverse
(const Position &pos, size_t node_length)¶ Reverse a Position and get a Position at the same base, going the orther direction.
-
size_t
vg::
xg_cached_node_length
(id_t id, xg::XG *xgidx, LRUCache<id_t, Node> &node_cache)¶ Get the length of a Node from an xg::XG index, with cacheing of deserialized nodes.
-
char
vg::
xg_cached_pos_char
(pos_t pos, xg::XG *xgidx, LRUCache<id_t, Node> &node_cache)¶ Get the character at a position in an xg::XG index, with cacheing of deserialized nodes.
-
map<pos_t, char>
vg::
xg_cached_next_pos_chars
(pos_t pos, xg::XG *xgidx, LRUCache<id_t, Node> &node_cache)¶ Get the characters at positions after the given position from an xg::XG index, with cacheing of deserialized nodes.
-
void
vg::
parse_region
(string ®ion, string &startSeq, int &startPos, int &stopPos)¶
-
bool
vg::
is_match
(const Translation &translation)¶
-
char
vg::
reverse_complement
(const char &c)¶
-
string
vg::
reverse_complement
(const string &seq)¶
-
int
vg::
get_thread_count
(void)¶
-
std::vector<std::string> &
vg::
split_delims
(const std::string &s, const std::string &delims, std::vector<std::string> &elems)¶
-
string
vg::
wrap_text
(const string &str, size_t width)¶
-
bool
vg::
allATGC
(const string &s)¶
-
string
vg::
nonATGCNtoN
(const string &s)¶
-
string
vg::
tmpfilename
(const string &base)¶
-
string
vg::
get_or_make_variant_id
(vcflib::Variant variant)¶
-
string
vg::
make_variant_id
(vcflib::Variant variant)¶
-
void
vg::
get_input_file
(int &optind, int argc, char **argv, function<void(istream&)> callback)¶
-
string
vg::
get_input_file_name
(int &optind, int argc, char **argv)¶
-
void
vg::
get_input_file
(const string &file_name, function<void(istream&)> callback)¶
-
bool
vg::
is_number
(const string &s)¶
- template <typename T>
-
double
vg::
stdev
(const T &v)¶
-
double
vg::
prob_to_logprob
(double prob)¶
-
double
vg::
logprob_to_prob
(double logprob)¶
-
double
vg::
logprob_add
(double logprob1, double logprob2)¶
-
double
vg::
logprob_invert
(double logprob)¶
-
double
vg::
phred_to_prob
(int phred)¶
-
int
vg::
prob_to_phred
(double prob)¶
-
double
vg::
phred_to_logprob
(int phred)¶
-
int
vg::
logprob_to_phred
(double logprob)¶
- template <typename T, typename V>
-
set<T>
vg::
map_keys_to_set
(const map<T, V> &m)¶
- template <typename Collection>
-
Collection::value_type
vg::
sum
(const Collection &collection)¶ Compute the sum of the values in a collection. Values must be default- constructable (like numbers are).
- template <typename Collection>
-
Collection::value_type
vg::
logprob_sum
(const Collection &collection)¶ Compute the sum of the values in a collection, where the values are log probabilities and the result is the log of the total probability. Items must be convertible to/from doubles for math.
-
static void
vg::
triple_to_vg
(void *user_data, raptor_statement *triple)¶
Variables
-
const char *const
vg::
BAM_DNA_LOOKUP
¶
-
const int8_t
vg::
default_match
¶
-
const int8_t
vg::
default_mismatch
¶
-
const int8_t
vg::
default_gap_open
¶
-
const int8_t
vg::
default_gap_extension
¶
-
const int8_t
vg::
default_max_scaled_score
¶
-
const uint8_t
vg::
default_max_qual_score
¶
-
const double
vg::
default_gc_content
¶
-
const char
vg::
complement
[256]¶
-
const char *
vg::
VG_VERSION_STRING
¶
- namespace
- namespace
Functions
-
int
vg::unittest::
run_unit_tests
(int argc, char **argv)¶ Take the original argc and argv from a
vg unittest
command-line call and run the unit tests. We keep this in its own CPP/HPP to keep our unit test library from being a dependency of main.o and other real application code.Passes the args along to the unit test system.
Returns exit code 0 on success, other codes on failure.
-
int
-
file
alignment.cpp
- #include “alignment.hpp”#include “stream.hpp”
-
file
alignment.hpp
- #include <iostream>#include <functional>#include <zlib.h>#include “utility.hpp”#include “path.hpp”#include “position.hpp”#include “vg.pb.h”#include “htslib/hfile.h”#include “htslib/hts.h”#include “htslib/sam.h”#include “htslib/vcf.h”
-
file
banded_global_aligner.cpp
- #include “banded_global_aligner.hpp”#include “json2pb.h”
-
file
banded_global_aligner.hpp
- #include <stdio.h>#include <ctype.h>#include <iostream>#include <vector>#include <unordered_set>#include <unordered_map>#include <list>#include “vg.pb.h”
-
file
bin2ascii.h
- #include <string>#include <stdexcept>
Defines
-
__BIN2ASCII_H__
¶
-
-
file
bubbles.cpp
- #include <unordered_set>#include “bubbles.hpp”#include “vg.hpp”#include “sonLib.h”#include “stCactusGraphs.h”
-
file
bubbles.hpp
- #include <vector>#include <map>#include “types.hpp”#include “utility.hpp”#include “nodeside.hpp”#include “DetectSuperBubble.hpp”
Typedefs
- typedef
- typedef
-
file
call2vcf.cpp
- #include <iostream>#include <fstream>#include <sstream>#include <regex>#include <set>#include <utility>#include <algorithm>#include <getopt.h>#include “vg.hpp”#include “index.hpp”#include “Variant.h”#include “genotypekit.hpp”
-
file
caller.cpp
- #include <cstdlib>#include <stdexcept>#include “json2pb.h”#include “caller.hpp”#include “stream.hpp”
-
file
caller.hpp
- #include <iostream>#include <algorithm>#include <functional>#include <cmath>#include <limits>#include <unordered_set>#include <tuple>#include “vg.pb.h”#include “vg.hpp”#include “hash_map.hpp”#include “utility.hpp”#include “pileup.hpp”
-
file
colors.hpp
- #include <vector>#include <random>
-
file
constructor.cpp
- #include “vg.hpp”#include “constructor.hpp”#include <set>#include <tuple>#include <list>#include <algorithm>#include <memory>
-
file
constructor.hpp
- #include <vector>#include <set>#include <map>#include “types.hpp”#include “progressive.hpp”#include “vg.pb.h”#include “Variant.h”#include “Fasta.h”
constructor.hpp: defines a tool class used for constructing VG graphs from VCF files.
-
file
convert.hpp
- #include <sstream>
-
file
deconstructor.cpp
- #include “deconstructor.hpp”#include “bubbles.hpp”
-
file
deconstructor.hpp
- #include <vector>#include <set>#include <array>#include <list>#include <string>#include <iostream>#include <unordered_map>#include <map>#include <climits>#include <queue>#include <fstream>#include <cstdlib>#include <sstream>#include <stack>#include “Variant.h”#include “index.hpp”#include “path.hpp”#include “vg.hpp”#include “vg.pb.h”#include “Fasta.h”#include “xg.hpp”#include “position.hpp”#include “vcfheader.hpp”
Deconstruct is getting rewritten. New functionality: -Detect superbubbles and bubbles -Fix command line interface. -harmonize on XG / raw graph (i.e. deprecate index) -Use unroll/DAGify if needed to avoid cycles
Much of this is taken from Brankovic’s “Linear-Time Superbubble Identification Algorithm for Genome Assembly”
-
file
distributions.hpp
- #include <map>#include <cmath>#include “utility.hpp”
-
file
edit.cpp
- #include “edit.hpp”#include “utility.hpp”
-
file
edit.hpp
- #include “vg.pb.h”#include <utility>#include <iostream>#include “json2pb.h”
-
file
entropy.cpp
- #include “entropy.hpp”
-
file
entropy.hpp
- #include <iostream>#include <set>#include <vector>#include <string>#include <cmath>
-
file
filter.cpp
- #include “filter.hpp”
-
file
filter.hpp
- #include <vector>#include <cstdlib>#include <iostream>#include <unordered_map>#include <sstream>#include <string>#include “vg.hpp”#include “xg.hpp”#include “vg.pb.h”
Provides a way to filter Edits contained within Alignments. This can be used to clean out sequencing errors and to find high-quality candidates for variant calling.
-
file
genotypekit.cpp
- #include “genotypekit.hpp”
-
file
genotypekit.hpp
- #include <iostream>#include <algorithm>#include <functional>#include <cmath>#include <limits>#include <unordered_set>#include <list>#include “vg.pb.h”#include “vg.hpp”#include “translator.hpp”#include “hash_map.hpp”#include “utility.hpp”#include “types.hpp”
-
file
genotyper.cpp
- #include <cstdint>#include “genotyper.hpp”#include “bubbles.hpp”#include “distributions.hpp”
-
file
genotyper.hpp
- #include <iostream>#include <algorithm>#include <functional>#include <cmath>#include <limits>#include <unordered_set>#include <list>#include “vg.pb.h”#include “vg.hpp”#include “translator.hpp”#include “hash_map.hpp”#include “utility.hpp”#include “types.hpp”
-
file
gssw_aligner.cpp
- #include “gssw_aligner.hpp”#include “json2pb.h”
Functions
-
double
add_log
(double log_x, double log_y)¶
-
double
-
file
gssw_aligner.hpp
- #include <cmath>#include <algorithm>#include <vector>#include <set>#include <string>#include <unordered_map>#include “gssw.h”#include “vg.pb.h”#include “Variant.h”#include “Fasta.h”#include “path.hpp”#include “banded_global_aligner.hpp”
-
file
hash_map.hpp
- #include “sparsehash/sparse_hash_map”#include “sparsehash/dense_hash_map”
-
file
index.cpp
- #include “index.hpp”
-
file
index.cpp
- #include <omp.h>#include <unistd.h>#include <getopt.h>#include <string>#include <vector>#include <regex>#include “subcommand.hpp”#include ”../vg.hpp”#include ”../index.hpp”#include ”../stream.hpp”#include ”../vg_set.hpp”#include ”../utility.hpp”#include “gcsa.h”
Variables
-
Subcommand vg_construct("index","index graphs or alignments for random access or mapping", main_index)
-
-
file
index.hpp
- #include <iostream>#include <exception>#include <sstream>#include <climits>#include “rocksdb/db.h”#include “rocksdb/env.h”#include “rocksdb/options.h”#include “rocksdb/write_batch.h”#include “rocksdb/memtablerep.h”#include “rocksdb/statistics.h”#include “rocksdb/cache.h”#include “rocksdb/slice_transform.h”#include “rocksdb/table.h”#include “rocksdb/filter_policy.h”#include “json2pb.h”#include “vg.hpp”#include “hash_map.hpp”
-
file
json2pb.cpp
- #include <errno.h>#include <jansson.h>#include <google/protobuf/message.h>#include <google/protobuf/descriptor.h>#include <json2pb.h>#include <stdexcept>#include <cstdio>#include “bin2ascii.h”
Defines
-
json_boolean
(val)¶
-
_CONVERT
(type, ctype, fmt, sfunc, afunc)¶
-
_SET_OR_ADD
(sfunc, afunc, value)¶
-
_CONVERT
(type, ctype, fmt, sfunc, afunc)
Functions
-
static json_t *
_pb2json
(const Message &msg)¶
-
static json_t *
_field2json
(const Message &msg, const FieldDescriptor *field, size_t index)¶
-
static void
_json2pb
(Message &msg, json_t *root)¶
-
static void
_json2field
(Message &msg, const FieldDescriptor *field, json_t *jf)¶
-
void
json2pb
(Message &msg, const char *buf, size_t size)¶
-
void
json2pb
(Message &msg, FILE *fp)¶
-
int
json_dump_std_string
(const char *buf, size_t size, void *data)¶
-
-
file
json2pb.h
- #include <string>#include <cstdio>#include <functional>#include <vector>#include <stream.hpp>#include <iostream>
-
file
main.cpp
- #include <iostream>#include <fstream>#include <ctime>#include <cstdio>#include <getopt.h>#include <sys/stat.h>#include “gcsa.h”#include “files.h”#include “json2pb.h”#include “vg.hpp”#include “vg.pb.h”#include “vg_set.hpp”#include “index.hpp”#include “mapper.hpp”#include “Variant.h”#include “Fasta.h”#include “stream.hpp”#include “alignment.hpp”#include “convert.hpp”#include “pileup.hpp”#include “caller.hpp”#include “deconstructor.hpp”#include “vectorizer.hpp”#include “sampler.hpp”#include “filter.hpp”#include “google/protobuf/stubs/common.h”#include “progress_bar.hpp”#include “version.hpp”#include “genotyper.hpp”#include “bubbles.hpp”#include “translator.hpp”#include “readfilter.hpp”#include “distributions.hpp”#include “unittest/driver.hpp”#include “subcommand/subcommand.hpp”
Functions
-
void
help_translate
(char **argv)¶
-
int
main_translate
(int argc, char **argv)¶
-
void
help_filter
(char **argv)¶
-
int
main_filter
(int argc, char **argv)¶
-
void
help_validate
(char **argv)¶
-
int
main_validate
(int argc, char **argv)¶
-
void
help_scrub
(char **argv)¶
-
int
main_scrub
(int argc, char **argv)¶
-
void
help_vectorize
(char **argv)¶
-
int
main_vectorize
(int argc, char **argv)¶
-
void
help_compare
(char **argv)¶
-
int
main_compare
(int argc, char **argv)¶
-
void
help_call
(char **argv)¶
-
int
main_call
(int argc, char **argv)¶
-
void
help_genotype
(char **argv)¶
-
int
main_genotype
(int argc, char **argv)¶
-
void
help_pileup
(char **argv)¶
-
int
main_pileup
(int argc, char **argv)¶
-
void
help_msga
(char **argv)¶
-
int
main_msga
(int argc, char **argv)¶
-
void
help_surject
(char **argv)¶
-
int
main_surject
(int argc, char **argv)¶
-
void
help_circularize
(char **argv)¶
-
int
main_circularize
(int argc, char **argv)¶
-
void
help_sim
(char **argv)¶
-
int
main_sim
(int argc, char **argv)¶
-
void
help_kmers
(char **argv)¶
-
int
main_kmers
(int argc, char **argv)¶
-
void
help_concat
(char **argv)¶
-
int
main_concat
(int argc, char **argv)¶
-
void
help_ids
(char **argv)¶
-
int
main_ids
(int argc, char **argv)¶
-
void
help_join
(char **argv)¶
-
int
main_join
(int argc, char **argv)¶
-
void
help_stats
(char **argv)¶
-
int
main_stats
(int argc, char **argv)¶
-
void
help_paths
(char **argv)¶
-
int
main_paths
(int argc, char **argv)¶
-
void
help_find
(char **argv)¶
-
int
main_find
(int argc, char **argv)¶
-
void
help_align
(char **argv)¶
-
int
main_align
(int argc, char **argv)¶
-
void
help_map
(char **argv)¶
-
int
main_map
(int argc, char **argv)¶
-
void
help_view
(char **argv)¶
-
int
main_view
(int argc, char **argv)¶
-
void
help_sv
(char **argv)¶
-
void
help_deconstruct
(char **argv)¶
-
void
help_locify
(char **argv)¶
-
int
main_locify
(int argc, char **argv)¶
-
int
main_deconstruct
(int argc, char **argv)¶
-
void
help_version
(char **argv)¶
-
int
main_version
(int argc, char **argv)¶
-
int
main_test
(int argc, char **argv)¶
-
void
vg_help
(char **argv)¶
-
int
main
(int argc, char *argv[])¶
-
void
-
file
mapper.cpp
- #include <unordered_set>#include “mapper.hpp”
-
file
mapper.hpp
- #include <iostream>#include <map>#include <chrono>#include <ctime>#include “vg.hpp”#include “xg.hpp”#include “index.hpp”#include “gcsa.h”#include “lcp.h”#include “alignment.hpp”#include “path.hpp”#include “position.hpp”#include “lru_cache.h”#include “json2pb.h”#include “entropy.hpp”#include “gssw_aligner.hpp”
-
file
nodeside.hpp
- #include <ostream>#include <utility>#include “vg.pb.h”#include “types.hpp”#include “hash_map.hpp”
-
file
nodetraversal.hpp
- #include “vg.pb.h”
-
file
path.cpp
- #include “path.hpp”#include “stream.hpp”
-
file
path.hpp
- #include <iostream>#include <algorithm>#include <functional>#include <set>#include <list>#include <sstream>#include “json2pb.h”#include “vg.pb.h”#include “edit.hpp”#include “hash_map.hpp”#include “utility.hpp”#include “types.hpp”#include “position.hpp”#include “nodetraversal.hpp”
-
file
pictographs.hpp
- #include <vector>#include <random>#include <functional>
-
file
pileup.cpp
- #include <cstdlib>#include <stdexcept>#include <regex>#include “json2pb.h”#include “pileup.hpp”#include “stream.hpp”
-
file
pileup.hpp
- #include <iostream>#include <algorithm>#include <functional>#include “vg.pb.h”#include “vg.hpp”#include “hash_map.hpp”#include “utility.hpp”
-
file
position.cpp
- #include “position.hpp”
-
file
position.hpp
- #include “vg.pb.h”#include “types.hpp”#include “xg.hpp”#include “lru_cache.h”#include “utility.hpp”#include “json2pb.h”#include <iostream>
Functions for working with Positions and
pos_t
s.
-
file
progressive.cpp
- #include “progressive.hpp”#include <iostream>
-
file
progressive.hpp
- #include <string>#include “progress_bar.hpp”
-
file
readfilter.cpp
- #include “readfilter.hpp”#include “IntervalTree.h”#include <fstream>#include <sstream>
-
file
readfilter.hpp
- #include <vector>#include <cstdlib>#include <iostream>#include <string>#include “vg.hpp”#include “xg.hpp”#include “vg.pb.h”
Provides a way to filter and transform reads, implementing the bulk of the
vg filter
command.
-
file
realigner.cpp
- #include “realigner.hpp”
-
file
realigner.hpp
- #include <iostream>#include <map>#include “vg.hpp”#include “mapper.hpp”#include “alignment.hpp”#include “path.hpp”#include “json2pb.h”
-
file
region.cpp
- #include <iostream>#include <fstream>#include <cassert>#include “region.hpp”
-
file
region.hpp
- #include <string>#include <vector>#include <sstream>
-
file
sampler.cpp
- #include “sampler.hpp”
-
file
sampler.hpp
- #include <iostream>#include <map>#include <chrono>#include <ctime>#include “vg.hpp”#include “xg.hpp”#include “alignment.hpp”#include “path.hpp”#include “position.hpp”#include “lru_cache.h”#include “json2pb.h”
-
file
ssw_aligner.cpp
- #include “ssw_aligner.hpp”
-
file
ssw_aligner.hpp
- #include <vector>#include <set>#include <string>#include “ssw_cpp.h”#include “vg.pb.h”#include “path.hpp”
-
file
stream.hpp
- #include <cassert>#include <iostream>#include <fstream>#include <functional>#include <vector>#include <list>#include “google/protobuf/stubs/common.h”#include “google/protobuf/io/zero_copy_stream.h”#include “google/protobuf/io/zero_copy_stream_impl.h”#include “google/protobuf/io/gzip_stream.h”#include “google/protobuf/io/coded_stream.h”
-
file
construct.cpp
- #include <omp.h>#include <unistd.h>#include <getopt.h>#include <memory>#include “subcommand.hpp”#include ”../stream.hpp”#include ”../constructor.hpp”#include ”../region.hpp”
Variables
-
Subcommand vg_construct("construct","graph construction", main_construct)
-
-
file
mod.cpp
- #include <omp.h>#include <unistd.h>#include <getopt.h>#include <string>#include <vector>#include <regex>#include “subcommand.hpp”#include ”../vg.hpp”#include ”../stream.hpp”#include ”../utility.hpp”
Variables
-
Subcommand vg_construct("mod","filter, transform, and edit the graph", main_mod)
-
-
file
simplify.cpp
- #include <omp.h>#include <unistd.h>#include <getopt.h>#include <list>#include “subcommand.hpp”#include ”../vg.hpp”#include ”../genotypekit.hpp”#include ”../utility.hpp”
Variables
-
Subcommand vg_construct("simplify","graph simplification", main_simplify)
-
-
file
subcommand.cpp
- #include “subcommand.hpp”
-
file
subcommand.hpp
- #include <map>#include <functional>#include <string>
subcommand.hpp: defines a system for registering subcommands of the vg command (vg construct, vg view, etc.) at compile time. Replaces the system of defining two functions and a giant run of if statements in main.cpp.
main.cpp does not need to include any subcommand headers!
Subcommands are created as static global objects in their own compilation units, which have to be explicitly linked into the binary (they won’t be pulled out of a library if nothing references their symbols).
Subcommands are responsible for printing their own help; we can do “vg help” and print all the subcommands that exist (via a help subcommand), but we can’t do “vg help subcommand” (because the help subcommand doesn’t know how to get help info on the others).
Subcommands get passed all of argv, so they have to skip past their names when parsing arguments.
To make a subcommand, do something like this in a cpp file in this “subcommand” directory:
#include "subcommand.hpp" using namespace vg::subcommand; int main_frobnicate(int argc, char** argv) { return 0; } static Subcommand vg_frobnicate("frobnicate", "frobnicate nodes and edges", main_frobnicate);
-
file
swap_remove.hpp
- #include <vector>#include <algorithm>
-
file
translator.cpp
- #include “translator.hpp”#include “stream.hpp”
-
file
translator.hpp
- #include <iostream>#include <algorithm>#include <functional>#include <set>#include <vector>#include <list>#include “vg.pb.h”#include “vg.hpp”#include “hash_map.hpp”#include “utility.hpp”#include “types.hpp”
-
file
types.hpp
- #include <tuple>
Contains typedefs for basic types useful for talking about graphs.
-
file
driver.cpp
- #include “driver.hpp”#include “catch.hpp”#include <sstream>#include <stdexcept>#include <algorithm>#include <ostream>#include <string>#include <memory>#include <vector>#include <stdlib.h>#include <cstddef>#include <iomanip>#include <limits>#include <stdint.h>#include <iterator>#include <cmath>#include <set>#include <iostream>#include <streambuf>#include <fstream>#include <ctime>#include <map>#include <assert.h>#include <signal.h>#include <cstdio>#include <unistd.h>#include <sys/time.h>#include <cstring>
Defines
-
CATCH_CONFIG_RUNNER
¶
-
-
file
driver.hpp
-
file
utility.cpp
- #include “utility.hpp”
-
file
utility.hpp
- #include <string>#include <vector>#include <sstream>#include <omp.h>#include <cstring>#include <algorithm>#include <numeric>#include <cmath>#include <unistd.h>#include “vg.pb.h”#include “sha1.hpp”#include “Variant.h”
-
file
vectorizer.cpp
- #include “vectorizer.hpp”
-
file
vectorizer.hpp
- #include <iostream>#include <sstream>#include “sdsl/bit_vectors.hpp”#include <vector>#include <unordered_map>#include “vg.hpp”#include “xg.hpp”#include “vg.pb.h”
-
file
version.cpp
- #include “version.hpp”#include “vg_git_version.hpp”
Defines
-
VG_GIT_VERSION
¶
-
-
file
version.hpp
-
file
vg.cpp
- #include “vg.hpp”#include “stream.hpp”#include “gssw_aligner.hpp”#include <raptor2/raptor2.h>
-
file
vg.hpp
- #include <vector>#include <set>#include <string>#include <deque>#include <list>#include <array>#include <omp.h>#include <unistd.h>#include <limits.h>#include <algorithm>#include <random>#include “gssw.h”#include “gcsa.h”#include “lcp.h”#include “gssw_aligner.hpp”#include “ssw_aligner.hpp”#include “region.hpp”#include “path.hpp”#include “utility.hpp”#include “alignment.hpp”#include “vg.pb.h”#include “hash_map.hpp”#include “progressive.hpp”#include “lru_cache.h”#include “Variant.h”#include “Fasta.h”#include “swap_remove.hpp”#include “pictographs.hpp”#include “colors.hpp”#include “types.hpp”#include “gfakluge.hpp”#include “globalDefs.hpp”#include “Graph.hpp”#include “helperDefs.hpp”#include “bubbles.hpp”#include “nodetraversal.hpp”#include “nodeside.hpp”
-
file
vg.proto
Variables
-
syntax
-
-
file
vg_set.cpp
- #include “vg_set.hpp”#include “stream.hpp”
-
file
vg_set.hpp
- #include <set>#include <regex>#include <stdlib.h>#include “gcsa.h”#include “vg.hpp”#include “index.hpp”#include “xg.hpp”
-
dir
src
-
dir
src/subcommand
-
dir
src/unittest