vg Internal API Reference

Below is an index of all classes, files, and namespaces in vg, in alphabetical order.

Useful starting points include vg::Node, vg::Edge, vg::Path, and vg::Graph, which define the Protobuf graph data model, and vg::VG, which is the main graph class with all the useful graph methods on it.

struct
#include <genotyper.hpp>

Public Functions

vg::Genotyper::Affinity::Affinity()
vg::Genotyper::Affinity::Affinity(double affinity, bool is_reverse)

Public Members

bool vg::Genotyper::Affinity::consistent
double vg::Genotyper::Affinity::affinity
bool vg::Genotyper::Affinity::is_reverse
double vg::Genotyper::Affinity::score
double vg::Genotyper::Affinity::likelihood_ln
class
#include <gssw_aligner.hpp>

Subclassed by vg::QualAdjAligner

Public Functions

Aligner::Aligner(int32_t _match = default_match, int32_t _mismatch = default_mismatch, int32_t _gap_open = default_gap_open, int32_t _gap_extension = default_gap_extension)
Aligner::~Aligner(void)
void Aligner::align(Alignment &alignment, Graph &g, bool print_score_matrices = false)
void Aligner::align_pinned(Alignment &alignment, Graph &g, int64_t pinned_node_id, bool pin_left)
void Aligner::align_pinned_multi(Alignment &alignment, vector<Alignment> &alt_alignments, Graph &g, int64_t pinned_node_id, bool pin_left, int32_t max_alt_alns)
void Aligner::align_global_banded(Alignment &alignment, Graph &g, int32_t band_padding = 0, bool permissive_banding = true)
void Aligner::align_global_banded_multi(Alignment &alignment, vector<Alignment> &alt_alignments, Graph &g, int32_t max_alt_alns, int32_t band_padding = 0, bool permissive_banding = true)
void Aligner::init_mapping_quality(double gc_content)
bool Aligner::is_mapping_quality_initialized()
void Aligner::compute_mapping_quality(vector<Alignment> &alignments, bool fast_approximation)
void Aligner::compute_paired_mapping_quality(pair<vector<Alignment>, vector<Alignment>> &alignment_pairs, bool fast_approximation)
double Aligner::score_to_unnormalized_likelihood_ln(double score)
int32_t Aligner::score_exact_match(const string &sequence)

Public Members

int8_t *vg::Aligner::nt_table
int8_t *vg::Aligner::score_matrix
int32_t vg::Aligner::match
int32_t vg::Aligner::mismatch
int32_t vg::Aligner::gap_open
int32_t vg::Aligner::gap_extension

Protected Functions

gssw_graph *Aligner::create_gssw_graph(Graph &g, int64_t pinned_node_id, gssw_node **gssw_pinned_node_out)
void vg::Aligner::topological_sort(list<gssw_node *> &sorted_nodes)
void vg::Aligner::visit_node(gssw_node *node, list<gssw_node *> &sorted_nodes, set<gssw_node *> &unmarked_nodes, set<gssw_node *> &temporary_marks)
void Aligner::reverse_graph(Graph &g, Graph &reversed_graph_out)
void Aligner::unreverse_graph(Graph &graph)
void Aligner::unreverse_graph_mapping(gssw_graph_mapping *gm)
void Aligner::gssw_mapping_to_alignment(gssw_graph *graph, gssw_graph_mapping *gm, Alignment &alignment, bool print_score_matrices = false)
string Aligner::graph_cigar(gssw_graph_mapping *gm)
void Aligner::align_internal(Alignment &alignment, vector<Alignment> *multi_alignments, Graph &g, int64_t pinned_node_id, bool pin_left, int32_t max_alt_alns, bool print_score_matrices = false)
double Aligner::maximum_mapping_quality_exact(vector<double> &scaled_scores, size_t *max_idx_out)
double Aligner::maximum_mapping_quality_approx(vector<double> &scaled_scores, size_t *max_idx_out)

Protected Attributes

double vg::Aligner::log_base
struct

Alignments link query strings, such as other genomes or reads, to Paths.

Public Members

string vg::Alignment::sequence

The sequence that has been aligned.

Path vg::Alignment::path

The Path that the sequence follows in the graph it has been aligned to, containing the Edits that modify the graph to produce the sequence.

string vg::Alignment::name

The name of the sequence that has been aligned. Similar to read name in BAM.

bytes vg::Alignment::quality

The quality scores for the sequence, as values on a 0-255 scale.

int32 vg::Alignment::mapping_quality

The mapping quality score for the alignment, in Phreds.

int32 vg::Alignment::score

The score for the alignment, in points.

int32 vg::Alignment::query_position

The offset in the query at which this Alignment occurs.

string vg::Alignment::sample_name

The name of the sample that produced the aligned read.

string vg::Alignment::read_group

The name of the read group to which the aligned read belongs.

Alignment vg::Alignment::fragment_prev

The previous Alignment in the fragment. Contains just enough information to locate the full Alignment; e.g. contains an Alignment with only a name, or only a graph mapping position.

Alignment vg::Alignment::fragment_next

Similarly, the next Alignment in the fragment.

bool vg::Alignment::is_secondary

Flag marking the Alignment as secondary. All but one maximal-scoring alignment of a given read in a GAM file must be secondary.

double vg::Alignment::identity

Portion of aligned bases that are perfect matches, or 0 if no bases are aligned.

repeated<Path> vg::Alignment::fragment

An estimate of the length of the fragment, if this Alignment is paired.

repeated<Locus> vg::Alignment::locus

The loci that this alignment supports. TODO: get rid of this, we have annotations in our data model again.

class
#include <banded_global_aligner.hpp>

Public Functions

BandedGlobalAligner::AltTracebackStack::AltTracebackStack(int64_t max_multi_alns, vector<BAMatrix *> sink_node_matrices)
BandedGlobalAligner::AltTracebackStack::~AltTracebackStack()
void BandedGlobalAligner::AltTracebackStack::get_alignment_start(int64_t &node_id, matrix_t &matrix)
void BandedGlobalAligner::AltTracebackStack::next()
bool BandedGlobalAligner::AltTracebackStack::has_next()
void BandedGlobalAligner::AltTracebackStack::propose_deflection(const IntType score, const int64_t from_node_id, const int64_t row_idx, const int64_t col_idx, const int64_t to_node_id, const matrix_t to_matrix)
IntType BandedGlobalAligner::AltTracebackStack::current_traceback_score()
bool BandedGlobalAligner::AltTracebackStack::at_next_deflection(int64_t node_id, int64_t row_idx, int64_t col_idx)
BandedGlobalAligner<IntType>::matrix_t BandedGlobalAligner::AltTracebackStack::deflect_to_matrix()
BandedGlobalAligner<IntType>::matrix_t BandedGlobalAligner::AltTracebackStack::deflect_to_matrix(int64_t &to_node_id)

Private Functions

void BandedGlobalAligner::AltTracebackStack::insert_traceback(const vector<Deflection> &traceback_prefix, const IntType score, const int64_t from_node_id, const int64_t row_idx, const int64_t col_idx, const int64_t to_node_id, const matrix_t to_matrix)

Private Members

template<>
int64_t vg::BandedGlobalAligner<IntType>::AltTracebackStack::max_multi_alns
template<>
list<pair<vector<Deflection>, IntType>> vg::BandedGlobalAligner<IntType>::AltTracebackStack::alt_tracebacks
template<>
list<pair<vector<Deflection>, IntType>>::iterator vg::BandedGlobalAligner<IntType>::AltTracebackStack::curr_traceback
template<>
vector<Deflection>::iterator vg::BandedGlobalAligner<IntType>::AltTracebackStack::curr_deflxn
class
#include <banded_global_aligner.hpp>

Public Functions

BandedGlobalAligner::BABuilder::BABuilder(Alignment &alignment)
BandedGlobalAligner::BABuilder::~BABuilder()
void BandedGlobalAligner::BABuilder::update_state(matrix_t matrix, Node *node, int64_t read_idx, int64_t node_idx)
void BandedGlobalAligner::BABuilder::finalize_alignment()

Private Functions

void BandedGlobalAligner::BABuilder::finish_current_edit()
void BandedGlobalAligner::BABuilder::finish_current_node()

Private Members

template<>
Alignment &vg::BandedGlobalAligner<IntType>::BABuilder::alignment
template<>
list<Mapping> vg::BandedGlobalAligner<IntType>::BABuilder::node_mappings
template<>
list<Edit> vg::BandedGlobalAligner<IntType>::BABuilder::mapping_edits
template<>
matrix_t vg::BandedGlobalAligner<IntType>::BABuilder::matrix_state
template<>
bool vg::BandedGlobalAligner<IntType>::BABuilder::matching
template<>
Node *vg::BandedGlobalAligner<IntType>::BABuilder::current_node
template<>
int64_t vg::BandedGlobalAligner<IntType>::BABuilder::edit_length
template<>
int64_t vg::BandedGlobalAligner<IntType>::BABuilder::edit_read_end_idx
class
#include <banded_global_aligner.hpp>

Public Functions

BandedGlobalAligner::BAMatrix::BAMatrix(Alignment &alignment, Node *node, int64_t top_diag, int64_t bottom_diag, BAMatrix **seeds, int64_t num_seeds, int64_t cumulative_seq_len)
BandedGlobalAligner::BAMatrix::~BAMatrix()
void BandedGlobalAligner::BAMatrix::fill_matrix(int8_t *score_mat, int8_t *nt_table, int8_t gap_open, int8_t gap_extend, bool qual_adjusted, IntType min_inf)
void BandedGlobalAligner::BAMatrix::traceback(BABuilder &builder, AltTracebackStack &traceback_stack, matrix_t start_mat, int8_t *score_mat, int8_t *nt_table, int8_t gap_open, int8_t gap_extend, bool qual_adjusted, IntType min_inf)
void BandedGlobalAligner::BAMatrix::print_full_matrices()
void BandedGlobalAligner::BAMatrix::print_rectangularized_bands()

Private Functions

void BandedGlobalAligner::BAMatrix::traceback_internal(BABuilder &builder, AltTracebackStack &traceback_stack, int64_t start_row, int64_t start_col, matrix_t start_mat, bool in_lead_gap, int8_t *score_mat, int8_t *nt_table, int8_t gap_open, int8_t gap_extend, bool qual_adjusted, IntType min_inf)
void BandedGlobalAligner::BAMatrix::print_matrix(matrix_t which_mat)
void BandedGlobalAligner::BAMatrix::print_band(matrix_t which_mat)

Private Members

template<>
int64_t vg::BandedGlobalAligner<IntType>::BAMatrix::top_diag
template<>
int64_t vg::BandedGlobalAligner<IntType>::BAMatrix::bottom_diag
template<>
Node *vg::BandedGlobalAligner<IntType>::BAMatrix::node
template<>
Alignment &vg::BandedGlobalAligner<IntType>::BAMatrix::alignment
template<>
int64_t vg::BandedGlobalAligner<IntType>::BAMatrix::cumulative_seq_len
template<>
BAMatrix **vg::BandedGlobalAligner<IntType>::BAMatrix::seeds
template<>
int64_t vg::BandedGlobalAligner<IntType>::BAMatrix::num_seeds
template<>
IntType *vg::BandedGlobalAligner<IntType>::BAMatrix::match
template<>
IntType *vg::BandedGlobalAligner<IntType>::BAMatrix::insert_col
template<>
IntType *vg::BandedGlobalAligner<IntType>::BAMatrix::insert_row

Friends

friend vg::BandedGlobalAligner::BAMatrix::BABuilder
friend vg::BandedGlobalAligner::BAMatrix::AltTracebackStack
template <class IntType>
class
#include <banded_global_aligner.hpp>

Public Functions

BandedGlobalAligner::BandedGlobalAligner(Alignment &alignment, Graph &g, int64_t band_padding, bool permissive_banding = false, bool adjust_for_base_quality = false)
BandedGlobalAligner::BandedGlobalAligner(Alignment &alignment, Graph &g, vector<Alignment> &alt_alignments, int64_t max_multi_alns, int64_t band_padding, bool permissive_banding = false, bool adjust_for_base_quality = false)
BandedGlobalAligner::~BandedGlobalAligner()
void BandedGlobalAligner::align(int8_t *score_mat, int8_t *nt_table, int8_t gap_open, int8_t gap_extend)

Private Types

enum type vg::BandedGlobalAligner::matrix_t

Values:

Private Functions

BandedGlobalAligner::BandedGlobalAligner(Alignment &alignment, Graph &g, vector<Alignment> *alt_alignments, int64_t max_multi_alns, int64_t band_padding, bool permissive_banding = false, bool adjust_for_base_quality = false)
void BandedGlobalAligner::traceback(int8_t *score_mat, int8_t *nt_table, int8_t gap_open, int8_t gap_extend, IntType min_inf)
void BandedGlobalAligner::graph_edge_lists(Graph &g, bool outgoing_edges, vector<vector<int64_t>> &out_edge_list)
void BandedGlobalAligner::topological_sort(Graph &g, vector<vector<int64_t>> &node_edges_out, vector<Node *> &out_topological_order)
void BandedGlobalAligner::path_lengths_to_sinks(const string &read, vector<vector<int64_t>> &node_edges_in, vector<int64_t> &shortest_path_to_sink, vector<int64_t> &longest_path_to_sink)
void BandedGlobalAligner::find_banded_paths(const string &read, bool permissive_banding, vector<vector<int64_t>> &node_edges_in, vector<vector<int64_t>> &node_edges_out, int64_t band_padding, vector<bool> &node_masked, vector<pair<int64_t, int64_t>> &band_ends)
void BandedGlobalAligner::shortest_seq_paths(vector<vector<int64_t>> &node_edges_out, vector<int64_t> &seq_lens_out, unordered_set<Node *> source_nodes)

Private Members

Alignment &vg::BandedGlobalAligner::alignment
vector<Alignment> *vg::BandedGlobalAligner::alt_alignments
int64_t vg::BandedGlobalAligner::max_multi_alns
bool vg::BandedGlobalAligner::adjust_for_base_quality
vector<BAMatrix *> vg::BandedGlobalAligner::banded_matrices
unordered_map<int64_t, int64_t> vg::BandedGlobalAligner::node_id_to_idx
vector<Node *> vg::BandedGlobalAligner::topological_order
unordered_set<Node *> vg::BandedGlobalAligner::source_nodes
unordered_set<Node *> vg::BandedGlobalAligner::sink_nodes
struct

Summarizes reads that map to single position in the graph. This structure is pretty much identical to a line in Samtools pileup format if qualities set, it must have size = num_bases

Public Members

int32 vg::BasePileup::ref_base
int32 vg::BasePileup::num_bases
string vg::BasePileup::bases
bytes vg::BasePileup::qualities
struct
#include <bubbles.hpp>

Public Members

NodeSide vg::Bubble::start
NodeSide vg::Bubble::end
vector<id_t> vg::Bubble::contents
vector<int> vg::Bubble::chain_offsets
bool vg::Bubble::acyclic
struct

Public Members

int64_t vg::CactusSide::node
bool vg::CactusSide::is_end
class
#include <genotypekit.hpp>

This site finder finds sites with Cactus.

Inherits from vg::SiteFinder

Public Functions

vg::CactusSiteFinder::CactusSiteFinder(VG &graph, const string &hint_path_name)

Make a new CactusSiteFinder to find sites in the given graph.

virtual vg::CactusSiteFinder::~CactusSiteFinder()
void vg::CactusSiteFinder::for_each_site_parallel(const function<void(NestedSite)> &lambda)

Find all the sites in parallel with Cactus, make the site tree, and call the given function on all the top-level sites.

Private Members

VG &vg::CactusSiteFinder::graph
string vg::CactusSiteFinder::hint_path_name
class
#include <caller.hpp>

Public Types

typedef
typedef
typedef
typedef
typedef

Public Functions

vg::Caller::Caller(VG *graph, double het_prior = Default_het_prior, int min_depth = Default_min_depth, int max_depth = Default_max_depth, int min_support = Default_min_support, double min_frac = Default_min_frac, double min_log_likelihood = Default_min_log_likelihood, bool leave_uncalled = false, int default_quality = Default_default_quality, double max_strand_bias = Default_max_strand_bias, ostream *text_calls = NULL, bool bridge_alts = false)
vg::Caller::~Caller()
void vg::Caller::clear()
void vg::Caller::write_call_graph(ostream &out, bool json)
void vg::Caller::call_node_pileup(const NodePileup &pileup)
void vg::Caller::call_edge_pileup(const EdgePileup &pileup)
void vg::Caller::update_call_graph()
void vg::Caller::map_paths()
void vg::Caller::verify_path(const Path &in_path, const list<Mapping> &call_path)
void vg::Caller::call_base_pileup(const NodePileup &np, int64_t offset, bool insertions)
void vg::Caller::compute_top_frequencies(const BasePileup &bp, const vector<pair<int64_t, int64_t>> &base_offsets, string &top_base, int &top_count, int &top_rev_count, string &second_base, int &second_count, int &second_rev_count, int &total_count, bool inserts)
pair<double, int> vg::Caller::base_log_likelihood(const BasePileup &pb, const vector<pair<int64_t, int64_t>> &base_offsets, const string &val, const string &first, const string &second)
void vg::Caller::create_node_calls(const NodePileup &np)
void vg::Caller::create_augmented_edge(Node *node1, int from_offset, bool left_side1, bool aug1, Node *node2, int to_offset, bool left_side2, bool aug2, char cat, StrandSupport support)
void vg::Caller::write_node_tsv(Node *node, char call, StrandSupport support, int64_t orig_id, int orig_offset)
void vg::Caller::write_edge_tsv(Edge *edge, char call, StrandSupport support)
void vg::Caller::write_nd_tsv()

Public Members

VG *vg::Caller::_graph
VG vg::Caller::_call_graph
ostream *vg::Caller::_text_calls
vector<Genotype> vg::Caller::_node_calls
vector<pair<StrandSupport, StrandSupport>> vg::Caller::_node_supports
vector<Genotype> vg::Caller::_insert_calls
vector<pair<StrandSupport, StrandSupport>> vg::Caller::_insert_supports
const Node *vg::Caller::_node
int64_t vg::Caller::_max_id
NodeDivider vg::Caller::_node_divider
unordered_set<int64_t> vg::Caller::_visited_nodes
unordered_map<pair<NodeSide, NodeSide>, StrandSupport> vg::Caller::_called_edges
EdgeHash vg::Caller::_augmented_edges
InsertionHash vg::Caller::_inserted_nodes
EdgeSupHash vg::Caller::_insertion_supports
EdgeSupHash vg::Caller::_deletion_supports
double vg::Caller::_het_log_prior
double vg::Caller::_hom_log_prior
int vg::Caller::_buffer_size
int vg::Caller::_min_depth
int vg::Caller::_max_depth
int vg::Caller::_min_support
double vg::Caller::_min_frac
double vg::Caller::_min_log_likelihood
bool vg::Caller::_leave_uncalled
char vg::Caller::_default_quality
double vg::Caller::_max_strand_bias
bool vg::Caller::_bridge_alts

Public Static Functions

static double vg::Caller::safe_log(double v)
static bool vg::Caller::missing_call(const Genotype &g)
static bool vg::Caller::ref_call(const Genotype &g)
static int vg::Caller::call_cat(const Genotype &g)

Public Static Attributes

const double vg::Caller::Log_zero
const double vg::Caller::Default_het_prior
const int vg::Caller::Default_min_depth
const int vg::Caller::Default_max_depth
const int vg::Caller::Default_min_support
const double vg::Caller::Default_min_frac
const double vg::Caller::Default_min_log_likelihood
const char vg::Caller::Default_default_quality
const double vg::Caller::Default_max_strand_bias
class
#include <colors.hpp>

Public Functions

vg::Colors::Colors(void)
vg::Colors::Colors(int seed_val)
vg::Colors::~Colors(void)
string vg::Colors::hashed(const string &str)
string vg::Colors::random(void)

Public Members

const vector<string> vg::Colors::colors

Private Members

mt19937 vg::Colors::rng
class
#include <genotypekit.hpp>

Represents a strategy for computing consistency between Alignments and SiteTraversals. Determines whether a read is consistent with a SiteTraversal or not (but has access to all the SiteTraversals). Polymorphic base class/interface.

Public Functions

virtual vg::ConsistencyCalculator::~ConsistencyCalculator()
virtual vector<bool> vg::ConsistencyCalculator::calculate_consistency(const NestedSite &site, const vector<SiteTraversal> &traversals, const Alignment &read) const
= 0

Return true or false for each tarversal of the site, depending on if the read is consistent with it or not.

struct
#include <constructor.hpp>

Represents a constructed region of the graph alogn a single linear sequence. Contains the protobuf Graph holding all the created components (which may be too large to serialize), a set of node IDs whose left sides need to be connected to when you connect to the start of the chunk, and a set of node IDs whose right sides need to be connected to when you connect to the end of the chunk.

Node ordering is restricted: if there is a single source, it must be the very first node in the graph with ID 1, and if there is a single sink it must be the very last node in the graph with ID max_id. Additionally, single sources and single sinks must be visited by only a single path, the reference path.

The overall reference path must also always be path 0. Also, all mappings in all paths must be full-length matches on the forward strand, and they must be sorted by rank. Ranks must be filled and start with rank 1 in each path.

Public Members

Graph vg::ConstructedChunk::graph
id_t vg::ConstructedChunk::max_id
set<id_t> vg::ConstructedChunk::left_ends
set<id_t> vg::ConstructedChunk::right_ends
class
#include <constructor.hpp>

Inherits from vg::Progressive

Public Functions

void vg::Constructor::add_name_mapping(const string &vcf_name, const string &fasta_name)

Add a name mapping between a VCF contig name and a FASTA sequence name. Both must be unique.

string vg::Constructor::vcf_to_fasta(const string &vcf_name) const

Convert the given VCF contig name to a FASTA sequence name, through the rename mappings.

string vg::Constructor::fasta_to_vcf(const string &fasta_name) const

Convert the given FASTA sequence name to a VCF contig name, through the rename mappings.

ConstructedChunk vg::Constructor::construct_chunk(string reference_sequence, string reference_path_name, vector<vcflib::Variant> variants, size_t chunk_offset) const

Construct a ConstructedChunk of graph from the given piece of sequence, with the given name, applying the given variants. The variants need to be sorted by start position, and have their start positions set to be ZERO- BASED. However, they also need to have their start positions relative to the global start of the contig, so that hash-based names come out right for them. They also need to not overlap with any variants not in the vector we have (i.e. we need access to all overlapping variants for this region). The variants must not extend beyond the given sequence, though they can abut its edges.

Variants in the vector may not use symbolic alleles.

chunk_offset gives the global 0-based position at which this chunk starts in the reference contig it is part of, which is used to correctly place variants.

void vg::Constructor::construct_graph(string vcf_contig, FastaReference &reference, VcfBuffer &variant_source, function<void(Graph&)> callback)

Construct a graph for the given VCF contig name, using the given reference and the variants from the given buffered VCF file. Emits a sequence of Graph chunks, which may be too big to serealize directly.

Doesn’t handle any of the setup for VCF indexing. Just scans all the variants that can come out of the buffer, so make sure indexing is set on the file first before passing it in.

void vg::Constructor::construct_graph(const vector<FastaReference *> &references, const vector<vcflib::VariantCallFile *> &variant_files, function<void(Graph&)> callback)

Construct a graph using the given FASTA references and VCFlib VCF files. The VCF files are assumed to be grouped by contig and then sorted by position within the contig, such that each contig is present in only one file. If multiple FASTAs are used, each contig must be present in only one FASTA file. Reference and VCF vectors may not contain nulls.

Public Members

bool vg::Constructor::flat
bool vg::Constructor::alt_paths
bool vg::Constructor::greedy_pieces
bool vg::Constructor::chain_deletions
size_t vg::Constructor::max_node_size
size_t vg::Constructor::vars_per_chunk
size_t vg::Constructor::bases_per_chunk
set<string> vg::Constructor::allowed_vcf_names
map<string, pair<size_t, size_t>> vg::Constructor::allowed_vcf_regions

Protected Attributes

map<string, string> vg::Constructor::vcf_to_fasta_renames
map<string, string> vg::Constructor::fasta_to_vcf_renames
struct
#include <readfilter.hpp>

Public Functions

vg::ReadFilter::Counts::Counts()
Counts &vg::ReadFilter::Counts::operator+=(const Counts &other)

Public Members

vector<size_t> vg::ReadFilter::Counts::read
vector<size_t> vg::ReadFilter::Counts::filtered
vector<size_t> vg::ReadFilter::Counts::min_score
vector<size_t> vg::ReadFilter::Counts::max_overhang
vector<size_t> vg::ReadFilter::Counts::min_mapq
vector<size_t> vg::ReadFilter::Counts::split
vector<size_t> vg::ReadFilter::Counts::repeat
vector<size_t> vg::ReadFilter::Counts::defray
class
#include <deconstructor.hpp>

Public Functions

vg::Deconstructor::Deconstructor()
vg::Deconstructor::Deconstructor(VG *graph)
vg::Deconstructor::~Deconstructor()
void vg::Deconstructor::set_xg(xg::XG *xindex)
void vg::Deconstructor::unroll_my_vg(int steps)
void vg::Deconstructor::dagify_my_vg(int steps)
vg::VG *vg::Deconstructor::compact(int compact_steps)

For each superbubble in the graph: If a superbubble is nested and simple (contains no superbubbles), transform it into a node. Record the translation from new node in the graph -> old superbubble map<id_t, SuperBubble>

At each step, find the new superbubbles of the graph and continue with this process.

bool vg::Deconstructor::is_nested(SuperBubble sb)
bool vg::Deconstructor::contains_nested(pair<int64_t, int64_t> start_and_end)

detect if there are superbubbles contained within the current superbubble (defined by Start and End)

This is easiest done using a simple linear search between the nodes in topologically order.

SuperBubble vg::Deconstructor::report_superbubble(int64_t start, int64_t end)

BFS through a superbubble and fill out the corresponding SuperBubble struct.

map<pair<id_t, id_t>, vector<id_t>> vg::Deconstructor::get_all_superbubbles()

Uses a BFS between nodes in the graph labeled as the endpoints of superbubbles to enumerate the nodes between them. TODO: the dagify transform records the node translation

IDEALLY: return the topological order, the starts/ends of superbubbles, and an index from node -> location in topo order. This makes checking if things are nested trivial.

void vg::Deconstructor::sb2vcf(string outfile)

Private Functions

vector<int64_t> vg::Deconstructor::nt_to_ids(deque<NodeTraversal> &nt)
SuperBubble vg::Deconstructor::translate_id(id_t id)
void vg::Deconstructor::init()

Private Members

VG *vg::Deconstructor::my_vg
xg::XG *vg::Deconstructor::my_xg
map<pair<id_t, id_t>, vector<id_t>> vg::Deconstructor::my_sbs
map<id_t, pair<id_t, bool>> vg::Deconstructor::my_translation
map<id_t, pair<id_t, bool>> vg::Deconstructor::my_unroll_translation
map<id_t, pair<id_t, bool>> vg::Deconstructor::my_dagify_translation
map<id_t, SuperBubble> vg::Deconstructor::id_to_bub
vector<id_t> vg::Deconstructor::reverse_topo_order
string vg::Deconstructor::mask_file
vector<SuperBubble> vg::Deconstructor::my_superbubbles
size_t vg::Deconstructor::my_max_length
size_t vg::Deconstructor::my_max_component_length
class
#include <banded_global_aligner.hpp>

Public Functions

BandedGlobalAligner::AltTracebackStack::Deflection::Deflection(const int64_t from_node_id, const int64_t row_idx, const int64_t col_idx, const int64_t to_node_id, const matrix_t to_matrix)
BandedGlobalAligner::AltTracebackStack::Deflection::~Deflection()

Public Members

template<>
const int64_t vg::BandedGlobalAligner<IntType>::AltTracebackStack::Deflection::from_node_id
template<>
const int64_t vg::BandedGlobalAligner<IntType>::AltTracebackStack::Deflection::row_idx
template<>
const int64_t vg::BandedGlobalAligner<IntType>::AltTracebackStack::Deflection::col_idx
template<>
const int64_t vg::BandedGlobalAligner<IntType>::AltTracebackStack::Deflection::to_node_id
template<>
const matrix_t vg::BandedGlobalAligner<IntType>::AltTracebackStack::Deflection::to_matrix
struct

Edges describe linkages between nodes. They are bidirected, connecting the end (default) or start of the “from” node to the start (default) or end of the “to” node.

Public Members

int64 vg::Edge::from

ID of upstream node.

int64 vg::Edge::to

ID of downstream node.

bool vg::Edge::from_start

If the edge leaves from the 5’ (start) of a node.

bool vg::Edge::to_end

If the edge goes to the 3’ (end) of a node.

int32 vg::Edge::overlap

Length of overlap between the connected Nodes.

struct

Keep pileup-like record for reads that span edges.

Public Members

Edge vg::EdgePileup::edge
int32 vg::EdgePileup::num_reads

total reads mapped

int32 vg::EdgePileup::num_forward_reads

number of reads mapped on forward strand

bytes vg::EdgePileup::qualities
struct

Edits describe how to generate a new string from elements in the graph. To determine the new string, just walk the series of edits, stepping from_length distance in the basis node, and to_length in the novel element, replacing from_length in the basis node with the sequence.

There are several types of Edit:

  • matches: from_length == to_length; sequence is empty
  • snps: from_length == to_length; sequence = alt
  • deletions: to_length == 0 && from_length > to_length; sequence is empty
  • insertions: from_length < to_length; sequence = alt

Public Members

int32 vg::Edit::from_length

Length in the target/ref sequence that is removed.

int32 vg::Edit::to_length

Length in read/alt of the sequence it is replaced with.

string vg::Edit::sequence

The replacement sequence, if different from the original sequence.

struct
#include <caller.hpp>

Public Functions

vg::NodeDivider::Entry::Entry(Node *r = 0, vector<StrandSupport> sup_r = vector< StrandSupport >(), Node *a1 = 0, vector<StrandSupport> sup_a1 = vector< StrandSupport >(), Node *a2 = 0, vector<StrandSupport> sup_a2 = vector< StrandSupport >())
Node *&vg::NodeDivider::Entry::operator[](int i)
vector<StrandSupport> &vg::NodeDivider::Entry::sup(int i)

Public Members

Node *vg::NodeDivider::Entry::ref
Node *vg::NodeDivider::Entry::alt1
Node *vg::NodeDivider::Entry::alt2
vector<StrandSupport> vg::NodeDivider::Entry::sup_ref
vector<StrandSupport> vg::NodeDivider::Entry::sup_alt1
vector<StrandSupport> vg::NodeDivider::Entry::sup_alt2
class
#include <filter.hpp>

Public Functions

vg::Filter::Filter()
vg::Filter::~Filter()
Alignment vg::Filter::depth_filter(Alignment &aln)

Looks for Alignments that have large overhangs at the end of them.

Default behavior: if an alignment has a right- or left- clip that is longer than the maximum allowed, return an empty alignment.

Inverse Behavior: if the alignment has a clip that is larger than the maximum allowed at either end, return the alignment. CLI: vg filter -d 10 -q 40 -r -R -r: track depth of both novel variants and those in the graph. -R: remove edits that fail the filter (otherwise toss the whole alignment)

Alignment vg::Filter::qual_filter(Alignment &aln)
Alignment vg::Filter::coverage_filter(Alignment &aln)
Alignment vg::Filter::avg_qual_filter(Alignment &aln)
Alignment vg::Filter::percent_identity_filter(Alignment &aln)

Filter reads that are less than <PCTID> reference. I.E. if a read matches the reference along 80% of its length, and your cutoff is 90% PCTID, throw it out.

Alignment vg::Filter::soft_clip_filter(Alignment &aln)
Alignment vg::Filter::split_read_filter(Alignment &aln)

Split reads map to two separate paths in the graph OR vastly separated non-consecutive nodes in a single path.

They’re super important for detecting structural variants, so we may want to filter them out or collect only split reads.

Alignment vg::Filter::path_divergence_filter(Alignment &aln)

Looks for alignments that transition from one path to another over their length. This may occur for one of several reasons:

  1. The read covers a translocation
  2. The read looks a lot like two different (but highly-similar paths)
  3. The read is shattered (e.g. as in chromothripsis)

Default behavior: if the Alignment is path divergent, return an empty Alignment, else return aln Inverse behavior: if the Alignment is path divergent, return aln, else return an empty Alignment

Alignment vg::Filter::reversing_filter(Alignment &aln)

Looks for alignments that change direction over their length. This may happen because of:

  1. Mapping artifacts
  2. Cycles
  3. Highly repetitive regions
  4. Inversions (if you’re lucky enough)

Default behavior: if the Alignment reverses, return an empty Alignment. inverse behavior: if the Alignment reverses, return the Alignment.

Alignment vg::Filter::kmer_filter(Alignment &aln)
void vg::Filter::set_min_depth(int depth)
void vg::Filter::set_min_qual(int qual)
void vg::Filter::set_min_percent_identity(double pct_id)
void vg::Filter::set_avg_qual(double avg_qual)
void vg::Filter::set_filter_matches(bool fm)
void vg::Filter::set_remove_failing_edits(bool fm)
void vg::Filter::set_soft_clip_limit(int max_clip)
void vg::Filter::set_split_read_limit(int split_limit)
void vg::Filter::set_reversing(bool do_reversing_filter)
void vg::Filter::set_path_divergence(bool do_path_divergence)
void vg::Filter::set_window_length(int window_length)
void vg::Filter::set_my_vg(vg::VG *vg)
void vg::Filter::set_my_xg_idx(xg::XG *xg_idx)
void vg::Filter::set_inverse(bool do_inv)
int vg::Filter::get_min_depth()
int vg::Filter::get_min_qual()
int vg::Filter::get_window_length()
int vg::Filter::get_soft_clip_limit()
int vg::Filter::get_split_read_limit()
double vg::Filter::get_min_percent_identity()
double vg::Filter::get_min_avg_qual()
bool vg::Filter::get_inverse()
bool vg::Filter::get_filter_matches()
bool vg::Filter::get_remove_failing_edits()
bool vg::Filter::get_do_path_divergence()
bool vg::Filter::get_do_reversing()

Private Members

vg::VG *vg::Filter::my_vg
xg::XG *vg::Filter::my_xg_idx
unordered_map<string, unordered_map<string, int>> vg::Filter::pos_to_edit_to_depth
unordered_map<int, int> vg::Filter::pos_to_qual
bool vg::Filter::inverse
bool vg::Filter::remove_failing_edits
bool vg::Filter::filter_matches
bool vg::Filter::do_path_divergence
bool vg::Filter::do_reversing
int vg::Filter::min_depth
int vg::Filter::min_qual
int vg::Filter::min_cov
int vg::Filter::window_length
int vg::Filter::qual_offset
int vg::Filter::soft_clip_limit
int vg::Filter::split_read_limit
double vg::Filter::min_percent_identity
double vg::Filter::min_avg_qual
class
#include <genotypekit.hpp>

This genotype prior calculator has a fixed prior for homozygous genotypes and a fixed prior for hets.

Inherits from vg::GenotypePriorCalculator

Public Functions

virtual vg::FixedGenotypePriorCalculator::~FixedGenotypePriorCalculator()
double vg::FixedGenotypePriorCalculator::calculate_log_prior(const Genotype &genotype)

Return the log prior of the given genotype.

TODO: ploidy priors on nested sites???

Public Members

double vg::FixedGenotypePriorCalculator::homozygous_prior_ln
double vg::FixedGenotypePriorCalculator::heterozygous_prior_ln
struct

Describes a genotype at a particular locus.

Public Members

repeated<int32> vg::Genotype::allele

These refer to the offsets of the alleles in the Locus object.

bool vg::Genotype::is_phased
double vg::Genotype::likelihood
double vg::Genotype::log_likelihood

Likelihood natural logged.

double vg::Genotype::log_prior

Prior natural logged.

double vg::Genotype::log_posterior

Posterior natural logged (unnormalized).

class
#include <genotypekit.hpp>

Represents a strategy for calculating genotype likelihood for a (nested) Site. Polymorphic base class/interface.

Public Functions

virtual vg::GenotypeLikelihoodCalculator::~GenotypeLikelihoodCalculator()
virtual double vg::GenotypeLikelihoodCalculator::calculate_log_likelihood(const NestedSite &site, const vector<SiteTraversal> &traversals, const Genotype &genotype, const vector<vector<bool>> &consistencies, const vector<Support> &supports, const vector<Alignment *> &reads)
= 0

Return the log likelihood of the given genotype.

class
#include <genotypekit.hpp>

Represents a strategy for assigning genotype priors. Polymorphic base class/interface.

Subclassed by vg::FixedGenotypePriorCalculator

Public Functions

virtual vg::GenotypePriorCalculator::~GenotypePriorCalculator()
virtual double vg::GenotypePriorCalculator::calculate_log_prior(const Genotype &genotype)
= 0

Return the log prior of the given genotype.

TODO: ploidy priors on nested sites???

class
#include <genotyper.hpp>

Class to hold on to genotyping parameters and genotyping functions.

Public Functions

void vg::Genotyper::run(VG &graph, vector<Alignment> &alignments, ostream &out, string ref_path_name = "", string contig_name = "", string sample_name = "", string augmented_file_name = "", bool use_cactus = false, bool subset_graph = false, bool show_progress = false, bool output_vcf = false, bool output_json = false, int length_override = 0, int variant_offset = 0)
int vg::Genotyper::alignment_qual_score(VG &graph, const Site &site, const Alignment &alignment)

Given an Alignment and a Site, compute a phred score for the quality of the alignment’s bases within the site overall (not counting the start and end nodes), which is supposed to be interpretable as the probability that the call of the sequence is wrong (to the degree that it would no longer support the alleles it appears to support).

In practice we’re just going to average the quality scores for all the bases interior to the site (i.e. not counting the start and end nodes).

If the alignment doesn’t have base qualities, or no qualities are available for bases internal to the site, returns a default value.

vector<Genotyper::Site> vg::Genotyper::find_sites_with_supbub(VG &graph)

Unfold and dagify a graph, find the superbubbles, and then convert them back to the space of the original graph.

Returns a collection of Sites.

vector<Genotyper::Site> vg::Genotyper::find_sites_with_cactus(VG &graph, const string &ref_path_name = "")

Same as find_sites but use Cactus instead of Superbubbles. This is more general and doesn’t require DAGifcation etc., but we keep both versions around for now for debugging and comparison

If ref_path_name is the empty string, it is not used. Otherwise, it must be the name of a path present in the graph.

list<NodeTraversal> vg::Genotyper::get_traversal_of_site(VG &graph, const Site &site, const Path &path)

Given a path (which may run either direction through a site, or not touch the ends at all), collect a list of NodeTraversals in order for the part of the path that is inside the site, in the same orientation as the path.

string vg::Genotyper::traversals_to_string(const list<NodeTraversal> &path)

Make a list of NodeTraversals into the string they represent.

vector<list<NodeTraversal>> vg::Genotyper::get_paths_through_site(VG &graph, const Site &site, const map<string, Alignment *> &reads_by_name)

For the given site, emit all subpaths with unique sequences that run from start to end, out of the paths in the graph. Uses the map of reads by name to determine if a path is a read or a real named path. Paths through the site supported only by reads are subject to a min recurrence count, while those supported by actual embedded named paths are not.

string vg::Genotyper::get_qualities_in_site(VG &graph, const Site &site, const Alignment &alignment)

Get all the quality values in the alignment between the start and end nodes of a site. Handles alignments that enter the site from the end, and alignments that never make it through the site.

If we run out of qualities, or qualities aren’t present, returns no qualities.

If an alignment goes through the site multipe times, we get all the qualities from when it is in the site.

Does not return qualities on the start and end nodes. May return an empty string.

map<Alignment *, vector<Genotyper::Affinity>> vg::Genotyper::get_affinities(VG &graph, const map<string, Alignment *> &reads_by_name, const Site &site, const vector<list<NodeTraversal>> &superbubble_paths)

Get the affinity of all the reads relevant to the superbubble to all the paths through the superbubble.

Affinity is a double out of 1.0. Higher is better.

map<Alignment *, vector<Genotyper::Affinity>> vg::Genotyper::get_affinities_fast(VG &graph, const map<string, Alignment *> &reads_by_name, const Site &site, const vector<list<NodeTraversal>> &superbubble_paths)

Get affinities as above but using only string comparison instead of alignment. Affinities are 0 for mismatch and 1 for a perfect match.

Locus vg::Genotyper::genotype_site(VG &graph, const Site &site, const vector<list<NodeTraversal>> &superbubble_paths, const map<Alignment *, vector<Affinity>> &affinities)

Compute annotated genotype from affinities and superbubble paths. Needs access to the graph so it can chop up the alignments, which requires node sizes.

double vg::Genotyper::get_genotype_log_likelihood(VG &graph, const Site &site, const vector<int> &genotype, const vector<pair<Alignment *, vector<Affinity>>> &alignment_consistency)

Compute the probability of the observed alignments given the genotype.

Takes a genotype as a vector of allele numbers, and support data as a collection of pairs of Alignments and vectors of bools marking whether each alignment is consistent with each allele.

Alignments should have had their quality values trimmed down to just the part covering the superbubble.

Returns a natural log likelihood.

double vg::Genotyper::get_genotype_log_prior(const vector<int> &genotype)

Compute the prior probability of the given genotype.

Takes a genotype as a vector of allele numbers. It is not guaranteed that allele 0 corresponds to any notion of primary reference-ness.

Returns a natural log prior probability.

TODO: add in strand bias

vector<vcflib::Variant> vg::Genotyper::locus_to_variant(VG &graph, const Site &site, const ReferenceIndex &index, vcflib::VariantCallFile &vcf, const Locus &locus, const string &sample_name = "SAMPLE")

Make a VCFlib variant from a called Locus. Depends on an index of the reference path we want to call against.

Returns 0 or more variants we can articulate from the superbubble. Sometimes if we can’t make a variant for the superbubble against the reference path, we’ll emit 0 variants.

void vg::Genotyper::write_vcf_header(std::ostream &stream, const std::string &sample_name, const std::string &contig_name, size_t contig_size)

Make a VCF header

vcflib::VariantCallFile *vg::Genotyper::start_vcf(std::ostream &stream, const ReferenceIndex &index, const string &sample_name, const string &contig_name, size_t contig_size)

Start VCF output to a stream. Returns a VCFlib VariantCallFile that needs to be deleted.

pair<pair<int64_t, int64_t>, bool> vg::Genotyper::get_site_reference_bounds(const Site &site, const ReferenceIndex &index)

Utility function for getting the reference bounds (start and past-end) of a site with relation to a given reference index. Computes bounds of the variable region, not including the fixed start and end node lengths. Also returns whether the reference path goes through the site forwards (false) or backwards (true).

void vg::Genotyper::report_site(const Site &site, const ReferenceIndex *index = nullptr)

Tell the statistics tracking code that a site exists. We can do things like count up the site length in the reference and so on. Called only once per site, but may be called on multiple threads simultaneously.

void vg::Genotyper::report_site_traversal(const Site &site, const string &read_name)

Tell the statistics tracking code that a read traverses a site completely. May be called multiple times for a given read and site, and may be called in parallel.

void vg::Genotyper::print_statistics(ostream &out)

Print site statistics to the given stream.

void vg::Genotyper::edge_allele_labels(const VG &graph, const Site &site, const vector<list<NodeTraversal>> &superbubble_paths, unordered_map<pair<NodeTraversal, NodeTraversal>, unordered_set<size_t>, hash_oriented_edge> *out_edge_allele_sets)
void vg::Genotyper::allele_ambiguity_log_probs(const VG &graph, const Site &site, const vector<list<NodeTraversal>> &superbubble_paths, const unordered_map<pair<NodeTraversal, NodeTraversal>, unordered_set<size_t>, hash_oriented_edge> &edge_allele_sets, vector<unordered_map<vector<size_t>, double, hash_ambiguous_allele_set>> *out_allele_ambiguity_probs)

Public Members

size_t vg::Genotyper::max_path_search_steps
int vg::Genotyper::unfold_max_length
int vg::Genotyper::dagify_steps
double vg::Genotyper::max_het_bias
bool vg::Genotyper::use_mapq
bool vg::Genotyper::realign_indels
int vg::Genotyper::default_sequence_quality
int vg::Genotyper::min_recurrence
int vg::Genotyper::min_consistent_per_strand
double vg::Genotyper::min_score_per_base
double vg::Genotyper::het_prior_logprob
Translator vg::Genotyper::translator
map<size_t, size_t> vg::Genotyper::site_reference_length_histogram
map<const Site *, set<string>> vg::Genotyper::site_traversals
set<const Site *> vg::Genotyper::all_sites
Aligner vg::Genotyper::normal_aligner
QualAdjAligner vg::Genotyper::quality_aligner
struct

Graphs are collections of nodes and edges. They can represent subgraphs of larger graphs or be wholly-self-sufficient. Protobuf memory limits of 67108864 bytes mean we typically keep the size of them small generating graphs as collections of smaller subgraphs.

Public Members

repeated<Node> vg::Graph::node

The Nodes that make up the graph.

repeated<Edge> vg::Graph::edge

The Edges that connect the Nodes in the graph.

repeated<Path> vg::Graph::path

A set of named Paths that visit sequences of oriented Nodes.

template <typename A, typename B>
struct class std::hash<pair<A, B>>
#include <hash_map.hpp>

Public Functions

size_t std::hash::operator()(const pair<A, B> &x) const
template <>
struct class std::hash<vg::NodeSide>
#include <nodeside.hpp>

Hash functor to hash NodeSides. We need to implement a hash function for these if we want to be able to use them in keys in hash maps.

Public Functions

size_t std::hash::operator()(const vg::NodeSide &item) const

Produce a hash of a NodeSide.

struct
#include <genotyper.hpp>

Public Functions

size_t vg::Genotyper::hash_ambiguous_allele_set::operator()(const vector<size_t> &ambiguous_set) const
template <typename K, typename V>
class
#include <hash_map.hpp>

Inherits from google::dense_hash_map< K, V >

Public Functions

vg::hash_map::hash_map()
template <typename K, typename V>
class class vg::hash_map<K *, V>
#include <hash_map.hpp>

Inherits from google::dense_hash_map< K *, V >

Public Functions

vg::hash_map::hash_map()
struct
#include <genotyper.hpp>

Public Functions

size_t vg::Genotyper::hash_node_traversal::operator()(const NodeTraversal &node_traversal) const
struct
#include <genotyper.hpp>

Public Functions

size_t vg::Genotyper::hash_oriented_edge::operator()(const pair<const NodeTraversal, const NodeTraversal> &edge) const
class
#include <index.hpp>

Public Functions

vg::Index::Index(void)
vg::Index::Index(string &name)
vg::Index::~Index(void)
rocksdb::Options vg::Index::GetOptions(void)
void vg::Index::open(const std::string &dir, bool read_only = false)
void vg::Index::open_read_only(string &dir)
void vg::Index::open_for_write(string &dir)
void vg::Index::open_for_bulk_load(string &dir)
void vg::Index::reset_options(void)
void vg::Index::flush(void)
void vg::Index::compact(void)
void vg::Index::close(void)
void vg::Index::load_graph(VG &graph)
void vg::Index::dump(std::ostream &out)
void vg::Index::for_all(std::function<void(string&, string&)> lambda)
void vg::Index::for_range(string &key_start, string &key_end, std::function<void(string&, string&)> lambda)
void vg::Index::put_node(const Node *node)
void vg::Index::put_edge(const Edge *edge)
void vg::Index::batch_node(const Node *node, rocksdb::WriteBatch &batch)
void vg::Index::batch_edge(const Edge *edge, rocksdb::WriteBatch &batch)
void vg::Index::put_kmer(const string &kmer, const int64_t id, const int32_t pos)
void vg::Index::batch_kmer(const string &kmer, const int64_t id, const int32_t pos, rocksdb::WriteBatch &batch)
void vg::Index::put_metadata(const string &tag, const string &data)
void vg::Index::put_node_path(int64_t node_id, int64_t path_id, int64_t path_pos, bool backward, const Mapping &mapping)
void vg::Index::put_path_position(int64_t path_id, int64_t path_pos, bool backward, int64_t node_id, const Mapping &mapping)
void vg::Index::put_mapping(const Mapping &mapping)
void vg::Index::put_alignment(const Alignment &alignment)
void vg::Index::put_base(int64_t aln_id, const Alignment &alignment)
void vg::Index::put_traversal(int64_t aln_id, const Mapping &mapping)
void vg::Index::cross_alignment(int64_t aln_id, const Alignment &alignment)
rocksdb::Status vg::Index::get_node(int64_t id, Node &node)
rocksdb::Status vg::Index::get_edge(int64_t from, bool from_start, int64_t to, bool to_end, Edge &edge)
rocksdb::Status vg::Index::get_metadata(const string &key, string &data)
int vg::Index::get_node_path(int64_t node_id, int64_t path_id, int64_t &path_pos, bool &backward, Mapping &mapping)
void vg::Index::get_mappings(int64_t node_id, vector<Mapping> &mappings)
void vg::Index::get_alignments(int64_t node_id, vector<Alignment> &alignments)
void vg::Index::get_alignments(int64_t id1, int64_t id2, vector<Alignment> &alignments)
void vg::Index::for_alignment_in_range(int64_t id1, int64_t id2, std::function<void(const Alignment&)> lambda)
void vg::Index::for_alignment_to_node(int64_t node_id, std::function<void(const Alignment&)> lambda)
void vg::Index::for_alignment_to_nodes(const vector<int64_t> &ids, std::function<void(const Alignment&)> lambda)
void vg::Index::for_base_alignments(const set<int64_t> &aln_ids, std::function<void(const Alignment&)> lambda)
const string vg::Index::key_for_node(int64_t id)
const string vg::Index::key_for_edge_on_start(int64_t node_id, int64_t other, bool backward)
const string vg::Index::key_for_edge_on_end(int64_t node_id, int64_t other, bool backward)
const string vg::Index::key_prefix_for_edges_on_node_start(int64_t node)
const string vg::Index::key_prefix_for_edges_on_node_end(int64_t node)
const string vg::Index::key_for_kmer(const string &kmer, int64_t id)
const string vg::Index::key_prefix_for_kmer(const string &kmer)
const string vg::Index::key_for_metadata(const string &tag)
const string vg::Index::key_for_path_position(int64_t path_id, int64_t path_pos, bool backward, int64_t node_id)
const string vg::Index::key_for_node_path_position(int64_t node_id, int64_t path_id, int64_t path_pos, bool backward)
const string vg::Index::key_prefix_for_node_path(int64_t node_id, int64_t path_id)
const string vg::Index::key_for_mapping_prefix(int64_t node_id)
const string vg::Index::key_for_mapping(const Mapping &mapping)
const string vg::Index::key_for_alignment_prefix(int64_t node_id)
const string vg::Index::key_for_alignment(const Alignment &alignment)
const string vg::Index::key_for_base(int64_t aln_id)
const string vg::Index::key_prefix_for_traversal(int64_t node_id)
const string vg::Index::key_for_traversal(int64_t aln_id, const Mapping &mapping)
void vg::Index::parse_node(const string &key, const string &value, int64_t &id, Node &node)
void vg::Index::parse_edge(const string &key, const string &value, char &type, int64_t &id1, int64_t &id2, Edge &edge)
void vg::Index::parse_edge(const string &key, char &type, int64_t &node_id, int64_t &other_id, bool &backward)
void vg::Index::parse_kmer(const string &key, const string &value, string &kmer, int64_t &id, int32_t &pos)
void vg::Index::parse_node_path(const string &key, const string &value, int64_t &node_id, int64_t &path_id, int64_t &path_pos, bool &backward, Mapping &mapping)
void vg::Index::parse_path_position(const string &key, const string &value, int64_t &path_id, int64_t &path_pos, bool &backward, int64_t &node_id, Mapping &mapping)
void vg::Index::parse_mapping(const string &key, const string &value, int64_t &node_id, string &hash, Mapping &mapping)
void vg::Index::parse_alignment(const string &key, const string &value, int64_t &node_id, string &hash, Alignment &alignment)
void vg::Index::parse_base(const string &key, const string &value, int64_t &aln_id, Alignment &alignment)
void vg::Index::parse_traversal(const string &key, const string &value, int64_t &node_id, int16_t &rank, bool &backward, int64_t &aln_id)
string vg::Index::entry_to_string(const string &key, const string &value)
string vg::Index::graph_entry_to_string(const string &key, const string &value)
string vg::Index::kmer_entry_to_string(const string &key, const string &value)
string vg::Index::position_entry_to_string(const string &key, const string &value)
string vg::Index::metadata_entry_to_string(const string &key, const string &value)
string vg::Index::node_path_to_string(const string &key, const string &value)
string vg::Index::path_position_to_string(const string &key, const string &value)
string vg::Index::mapping_entry_to_string(const string &key, const string &value)
string vg::Index::alignment_entry_to_string(const string &key, const string &value)
string vg::Index::base_entry_to_string(const string &key, const string &value)
string vg::Index::traversal_entry_to_string(const string &key, const string &value)
void vg::Index::get_context(int64_t id, VG &graph)
void vg::Index::expand_context(VG &graph, int steps)
void vg::Index::get_range(int64_t from_id, int64_t to_id, VG &graph)
void vg::Index::for_graph_range(int64_t from_id, int64_t to_id, function<void(string&, string&)> lambda)
void vg::Index::get_connected_nodes(VG &graph)
void vg::Index::get_edges_on_end(int64_t node, vector<Edge> &edges)
void vg::Index::get_edges_on_start(int64_t node, vector<Edge> &edges)
void vg::Index::get_nodes_next(int64_t node, bool backward, vector<pair<int64_t, bool>> &destinations)
void vg::Index::get_nodes_prev(int64_t node, bool backward, vector<pair<int64_t, bool>> &destinations)
void vg::Index::get_path(VG &graph, const string &name, int64_t start, int64_t end)
void vg::Index::node_path_position(int64_t id, string &path_name, int64_t &position, bool &backward, int64_t &offset)
pair<list<pair<int64_t, bool>>, pair<int64_t, bool>> vg::Index::get_nearest_node_prev_path_member(int64_t node_id, bool backward, int64_t path_id, int64_t &path_pos, bool &relative_orientation, int max_steps = 4)
pair<list<pair<int64_t, bool>>, pair<int64_t, bool>> vg::Index::get_nearest_node_next_path_member(int64_t node_id, bool backward, int64_t path_id, int64_t &path_pos, bool &relative_orientation, int max_steps = 4)
bool vg::Index::get_node_path_relative_position(int64_t node_id, bool backward, int64_t path_id, list<pair<int64_t, bool>> &path_prev, int64_t &prev_pos, bool &prev_orientation, list<pair<int64_t, bool>> &path_next, int64_t &next_pos, bool &next_orientation)
Mapping vg::Index::path_relative_mapping(int64_t node_id, bool backward, int64_t path_id, list<pair<int64_t, bool>> &path_prev, int64_t &prev_pos, bool &prev_orientation, list<pair<int64_t, bool>> &path_next, int64_t &next_pos, bool &next_orientation)
bool vg::Index::surject_alignment(const Alignment &source, set<string> &path_names, Alignment &surjection, string &path_name, int64_t &path_pos, bool &path_reverse, int window = 5)
void vg::Index::path_layout(map<string, pair<pair<int64_t, bool>, pair<int64_t, bool>>> &layout, map<string, int64_t> &lengths)
pair<int64_t, bool> vg::Index::path_first_node(int64_t path_id)
pair<int64_t, bool> vg::Index::path_last_node(int64_t path_id, int64_t &path_length)
void vg::Index::get_kmer_subgraph(const string &kmer, VG &graph)
uint64_t vg::Index::approx_size_of_kmer_matches(const string &kmer)
void vg::Index::approx_sizes_of_kmer_matches(const vector<string> &kmers, vector<uint64_t> &sizes)
void vg::Index::for_kmer_range(const string &kmer, function<void(string&, string&)> lambda)
void vg::Index::get_kmer_positions(const string &kmer, map<int64_t, vector<int32_t>> &positions)
void vg::Index::get_kmer_positions(const string &kmer, map<string, vector<pair<int64_t, int32_t>>> &positions)
void vg::Index::prune_kmers(int max_kb_on_disk)
void vg::Index::remember_kmer_size(int size)
set<int> vg::Index::stored_kmer_sizes(void)
void vg::Index::store_batch(map<string, string> &items)
void vg::Index::kmer_matches(std::string &kmer, std::set<int64_t> &node_ids, std::set<int64_t> &edge_ids)
string vg::Index::first_kmer_key(const string &kmer)
pair<int64_t, int64_t> vg::Index::compare_kmers(Index &other)
int64_t vg::Index::get_max_path_id(void)
void vg::Index::put_max_path_id(int64_t id)
int64_t vg::Index::new_path_id(const string &name)
string vg::Index::path_name_prefix(const string &name)
string vg::Index::path_id_prefix(int64_t id)
void vg::Index::put_path_id_to_name(int64_t id, const string &name)
void vg::Index::put_path_name_to_id(int64_t id, const string &name)
string vg::Index::get_path_name(int64_t id)
int64_t vg::Index::get_path_id(const string &name)
void vg::Index::load_paths(VG &graph)
void vg::Index::store_paths(VG &graph)
void vg::Index::store_path(VG &graph, const Path &path)
map<string, int64_t> vg::Index::paths_by_id(void)
void vg::Index::for_each_mapping(function<void(const Mapping&)> lambda)
void vg::Index::for_each_alignment(function<void(const Alignment&)> lambda)
char vg::Index::graph_key_type(const string &key)

Public Members

string vg::Index::name
char vg::Index::start_sep
char vg::Index::end_sep
int vg::Index::threads
rocksdb::DB *vg::Index::db
bool vg::Index::is_open
bool vg::Index::use_snappy
rocksdb::Options vg::Index::db_options
rocksdb::WriteOptions vg::Index::write_options
rocksdb::ColumnFamilyOptions vg::Index::column_family_options
bool vg::Index::bulk_load
bool vg::Index::mem_env
size_t vg::Index::block_cache_size
mt19937 vg::Index::rng
class
#include <index.hpp>

Inherits from exception

Public Functions

vg::indexOpenException::indexOpenException(string message = "")

Private Functions

virtual const char *vg::indexOpenException::what() const

Private Members

string vg::indexOpenException::message
struct
#include <caller.hpp>

Public Members

Node *vg::Caller::InsertionRecord::node
StrandSupport vg::Caller::InsertionRecord::sup
int64_t vg::Caller::InsertionRecord::orig_id
int vg::Caller::InsertionRecord::orig_offset
struct

We need to suppress overlapping variants, but interval trees are hard to write. This accomplishes the collision check with a massive bit vector.

Public Functions

glenn2vcf::IntervalBitfield::IntervalBitfield(size_t length)

Make a new IntervalBitfield covering a region of the specified length.

bool glenn2vcf::IntervalBitfield::collides(size_t start, size_t pastEnd)

Scan for a collision (O(n) in interval length)

void glenn2vcf::IntervalBitfield::add(size_t start, size_t pastEnd)

Take up an interval.

Public Members

std::vector<bool> glenn2vcf::IntervalBitfield::used
class

Inherits from exception

Public Functions

j2pb_error::j2pb_error(const std::string &e)
j2pb_error::j2pb_error(const FieldDescriptor *field, const std::string &e)
virtual j2pb_error::~j2pb_error()
virtual const char *j2pb_error::what() const

Private Members

std::string j2pb_error::_error
struct

Public Functions

json_autoptr::json_autoptr(json_t *json)
json_autoptr::~json_autoptr()
json_t *json_autoptr::release()

Public Members

json_t *json_autoptr::ptr
template <class T>
class
#include <json2pb.h>

Public Functions

JSONStreamHelper::JSONStreamHelper(const std::string &file_name)
JSONStreamHelper::~JSONStreamHelper()
std::function<bool(T&)> JSONStreamHelper::get_read_fn()
int64_t JSONStreamHelper::write(std::ostream &out, bool json_out = false, int64_t buf_size = 1000)

Private Members

FILE *JSONStreamHelper::_fp
class
#include <index.hpp>

Inherits from exception

Private Functions

virtual const char *vg::keyNotFoundException::what() const
struct

Used to serialize kmer matches.

Public Members

string vg::KmerMatch::sequence
int64 vg::KmerMatch::node_id
sint32 vg::KmerMatch::position
bool vg::KmerMatch::backward

If true, this kmer is backwards relative to its node, and position counts from the end of the node.

struct
#include <vg.hpp>

We create a struct that represents each kmer record we want to send to gcsa2

Public Members

string vg::KmerPosition::kmer
string vg::KmerPosition::pos
set<char> vg::KmerPosition::prev_chars
set<char> vg::KmerPosition::next_chars
set<string> vg::KmerPosition::next_positions
struct

Describes a genetic locus with multiple possible alleles, a genotype, and observational support.

Public Members

string vg::Locus::name

A locus may have an identifying name.

repeated<Path> vg::Locus::allele

These are all the alleles at the locus, not just the called ones. Note that a primary reference allele may or may not appear.

repeated<Support> vg::Locus::support

These supports are per-allele, matching the alleles above.

repeated<Genotype> vg::Locus::genotype

sorted by likelihood or posterior the first one is the “call”

Support vg::Locus::overall_support

We also have a Support for the locus overall, because reads may have supported multiple alleles and we want to know how many total there were.

class
#include <mapper.hpp>

Public Functions

vg::Mapper::Mapper(Index *idex, gcsa::GCSA *g = nullptr, gcsa::LCPArray *a = nullptr)
vg::Mapper::Mapper(xg::XG *xidex, gcsa::GCSA *g, gcsa::LCPArray *a)
vg::Mapper::Mapper(void)
vg::Mapper::~Mapper(void)
void vg::Mapper::clear_aligners(void)
QualAdjAligner *vg::Mapper::get_qual_adj_aligner(void)
Aligner *vg::Mapper::get_regular_aligner(void)
LRUCache<id_t, Node> &vg::Mapper::get_node_cache(void)
void vg::Mapper::init_node_cache(void)
void vg::Mapper::record_fragment_length(int length)
double vg::Mapper::fragment_length_stdev(void)
double vg::Mapper::fragment_length_mean(void)
double vg::Mapper::estimate_gc_content()
void vg::Mapper::init_aligner(int32_t match, int32_t mismatch, int32_t gap_open, int32_t gap_extend)
void vg::Mapper::set_alignment_scores(int32_t match, int32_t mismatch, int32_t gap_open, int32_t gap_extend)
map<string, double> vg::Mapper::alignment_mean_path_positions(const Alignment &aln)
bool vg::Mapper::alignments_consistent(const map<string, double> &pos1, const map<string, double> &pos2, int fragment_size_bound)
void vg::Mapper::align_mate_in_window(const Alignment &read1, Alignment &read2, int pair_window)
vector<Alignment> vg::Mapper::resolve_banded_multi(vector<vector<Alignment>> &multi_alns)
set<MaximalExactMatch *> vg::Mapper::resolve_paired_mems(vector<MaximalExactMatch> &mems1, vector<MaximalExactMatch> &mems2)
vector<Alignment> vg::Mapper::mems_id_clusters_to_alignments(const Alignment &alignment, vector<MaximalExactMatch> &mems, int additional_multimaps)
vector<Alignment> vg::Mapper::mems_pos_clusters_to_alignments(const Alignment &aln, vector<MaximalExactMatch> &mems, int additional_multimaps)
Alignment vg::Mapper::mems_to_alignment(const Alignment &aln, vector<MaximalExactMatch> &mems)
Alignment vg::Mapper::mem_to_alignment(MaximalExactMatch &mem)
Alignment vg::Mapper::patch_alignment(const Alignment &aln)
bool vg::Mapper::adjacent_positions(const Position &pos1, const Position &pos2)
int64_t vg::Mapper::get_node_length(int64_t node_id)
bool vg::Mapper::check_alignment(const Alignment &aln)
VG vg::Mapper::alignment_subgraph(const Alignment &aln, int context_size = 1)
Alignment vg::Mapper::align(const string &seq, int kmer_size = 0, int stride = 0, int max_mem_length = 0, int band_width = 1000)
Alignment vg::Mapper::align(const Alignment &read, int kmer_size = 0, int stride = 0, int max_mem_length = 0, int band_width = 1000)
vector<Alignment> vg::Mapper::align_multi(const Alignment &aln, int kmer_size = 0, int stride = 0, int max_mem_length = 0, int band_width = 1000)
pair<vector<Alignment>, vector<Alignment>> vg::Mapper::align_paired_multi(const Alignment &read1, const Alignment &read2, bool &queued_resolve_later, int kmer_size = 0, int stride = 0, int max_mem_length = 0, int band_width = 1000, int pair_window = 64)
pair<Alignment, Alignment> vg::Mapper::align_paired(const Alignment &read1, const Alignment &read2, bool &queued_resolve_later, int kmer_size = 0, int stride = 0, int max_mem_length = 0, int band_width = 1000, int pair_window = 64)
Alignment vg::Mapper::surject_alignment(const Alignment &source, set<string> &path_names, string &path_name, int64_t &path_pos, bool &path_reverse, int window)
vector<MaximalExactMatch> vg::Mapper::find_smems(const string &seq, int max_length)
bool vg::Mapper::get_mem_hits_if_under_max(MaximalExactMatch &mem)
void vg::Mapper::check_mems(const vector<MaximalExactMatch> &mems)
vector<MaximalExactMatch> vg::Mapper::find_forward_mems(const string &seq, size_t step = 1, int max_mem_length = 0)
void vg::Mapper::resolve_softclips(Alignment &aln, VG &graph)
char vg::Mapper::pos_char(pos_t pos)
map<pos_t, char> vg::Mapper::next_pos_chars(pos_t pos)
Alignment vg::Mapper::walk_match(const string &seq, pos_t pos)
vector<Alignment> vg::Mapper::walk_match(const Alignment &base, const string &seq, pos_t pos)
vector<Alignment> vg::Mapper::mem_to_alignments(MaximalExactMatch &mem)
set<pos_t> vg::Mapper::sequence_positions(const string &seq)
map<string, int> vg::Mapper::approx_pair_fragment_length(const Alignment &aln1, const Alignment &aln2)

Public Members

Index *vg::Mapper::index
xg::XG *vg::Mapper::xindex
gcsa::GCSA *vg::Mapper::gcsa
gcsa::LCPArray *vg::Mapper::lcp
vector<QualAdjAligner *> vg::Mapper::qual_adj_aligners
vector<Aligner *> vg::Mapper::regular_aligners
vector<LRUCache<id_t, Node> *> vg::Mapper::node_cache
vector<pair<Alignment, Alignment>> vg::Mapper::imperfect_pairs_to_retry
deque<double> vg::Mapper::fragment_lengths
int vg::Mapper::cached_fragment_length_mean
int vg::Mapper::cached_fragment_length_stdev
int vg::Mapper::since_last_fragment_length_estimate
int vg::Mapper::fragment_length_estimate_interval
bool vg::Mapper::debug
int vg::Mapper::alignment_threads
set<int> vg::Mapper::kmer_sizes
int vg::Mapper::best_clusters
int vg::Mapper::cluster_min
int vg::Mapper::hit_size_threshold
float vg::Mapper::min_kmer_entropy
int vg::Mapper::kmer_min
int vg::Mapper::max_thread_gap
int vg::Mapper::kmer_sensitivity_step
bool vg::Mapper::prefer_forward
bool vg::Mapper::greedy_accept
float vg::Mapper::accept_identity
int vg::Mapper::min_mem_length
int vg::Mapper::mem_threading
int vg::Mapper::hit_max
int vg::Mapper::context_depth
int vg::Mapper::max_attempts
int vg::Mapper::thread_extension
int vg::Mapper::max_target_factor
size_t vg::Mapper::max_query_graph_ratio
int vg::Mapper::max_multimaps
int vg::Mapper::softclip_threshold
int vg::Mapper::max_softclip_iterations
float vg::Mapper::min_identity
int vg::Mapper::extra_pairing_multimaps
bool vg::Mapper::adjust_alignments_for_base_quality
MappingQualityMethod vg::Mapper::mapping_quality_method
bool vg::Mapper::always_rescue
int vg::Mapper::fragment_max
int vg::Mapper::fragment_size
double vg::Mapper::fragment_sigma
int vg::Mapper::fragment_length_cache_size

Private Functions

vg::Mapper::Mapper(Index *idex, xg::XG *xidex, gcsa::GCSA *g, gcsa::LCPArray *a)
Alignment vg::Mapper::align_to_graph(const Alignment &aln, VG &vg, size_t max_query_graph_ratio)
vector<Alignment> vg::Mapper::align_multi_internal(bool compute_unpaired_qualities, const Alignment &aln, int kmer_size, int stride, int max_mem_length, int band_width, int additional_multimaps = 0, vector<MaximalExactMatch> *restricted_mems = nullptr)
void vg::Mapper::compute_mapping_qualities(vector<Alignment> &alns)
void vg::Mapper::compute_mapping_qualities(pair<vector<Alignment>, vector<Alignment>> &pair_alns)
vector<Alignment> vg::Mapper::score_sort_and_deduplicate_alignments(vector<Alignment> &all_alns, const Alignment &original_alignment)
void vg::Mapper::filter_and_process_multimaps(vector<Alignment> &all_alns, int additional_multimaps)
vector<Alignment> vg::Mapper::align_multi_kmers(const Alignment &aln, int kmer_size = 0, int stride = 0, int band_width = 1000)
Alignment vg::Mapper::align_banded(const Alignment &read, int kmer_size = 0, int stride = 0, int max_mem_length = 0, int band_width = 1000)
vector<Alignment> vg::Mapper::align_mem_multi(const Alignment &alignment, vector<MaximalExactMatch> &mems, int additional_multimaps = 0)
vector<Alignment> vg::Mapper::align_threaded(const Alignment &read, int &hit_count, int kmer_size = 0, int stride = 0, int attempt = 0)
struct

A Mapping defines the relationship between a node in system and another entity. An empty edit list implies complete match, however it is preferred to specify the full edit structure. as it is more complex to handle special cases.

Public Members

Position vg::Mapping::position

The position at which the first Edit, if any, in the Mapping starts. Inclusive.

repeated<Edit> vg::Mapping::edit

The series of Edits to transform to region in read/alt.

int64 vg::Mapping::rank

The 1-based rank of the mapping in its containing path.

class
#include <mapper.hpp>

Public Functions

vg::MaximalExactMatch::MaximalExactMatch(string::const_iterator b, string::const_iterator e, gcsa::range_type r, size_t m = 0)
string vg::MaximalExactMatch::sequence(void) const
void vg::MaximalExactMatch::fill_nodes(gcsa::GCSA *gcsa)
void vg::MaximalExactMatch::fill_match_count(gcsa::GCSA *gcsa)
int vg::MaximalExactMatch::length(void) const

Public Members

string::const_iterator vg::MaximalExactMatch::begin
string::const_iterator vg::MaximalExactMatch::end
gcsa::range_type vg::MaximalExactMatch::range
size_t vg::MaximalExactMatch::match_count
std::vector<gcsa::node_type> vg::MaximalExactMatch::nodes

Friends

bool operator==(const MaximalExactMatch &m1, const MaximalExactMatch &m2)
bool operator<(const MaximalExactMatch &m1, const MaximalExactMatch &m2)
struct
#include <genotypekit.hpp>

Represents a genotypeable site, with input and output NodeTraversals, that can contain other nested sites within it.

Must be understood in relation to some vg graph.

Public Members

vector<NestedSite> vg::NestedSite::children
map<NodeTraversal, size_t> vg::NestedSite::child_border_index
set<Node *> vg::NestedSite::nodes
set<Edge *> vg::NestedSite::edges
NodeTraversal vg::NestedSite::start
NodeTraversal vg::NestedSite::end
struct

Nodes store sequence data.

Public Members

string vg::Node::sequence

Sequence of DNA bases represented by the Node.

string vg::Node::name

A name provides an identifier.

int64 vg::Node::id

Each Node has a unique positive nonzero ID within its Graph.

struct
#include <caller.hpp>

Public Types

enum type vg::NodeDivider::EntryCat

Values:

= 0
typedef
typedef

Public Functions

void vg::NodeDivider::add_fragment(const Node *orig_node, int offset, Node *subnode, EntryCat cat, vector<StrandSupport> sup)
NodeDivider::Entry vg::NodeDivider::break_end(const Node *orig_node, VG *graph, int offset, bool left_side)
list<Mapping> vg::NodeDivider::map_node(int64_t node_id, int64_t start_offset, int64_t length, bool reverse)
void vg::NodeDivider::clear()

Public Members

NodeHash vg::NodeDivider::index
int64_t *vg::NodeDivider::_max_id
struct

Collect pileup records by node. Saves some space and hashing over storing individually, assuming not too sparse and avg. node length more than couple bases the ith BasePileup in the array corresponds to the position at offset i.

Public Members

int64 vg::NodePileup::node_id
repeated<BasePileup> vg::NodePileup::base_pileup
class
#include <nodeside.hpp>

Represents one side of a Node, identified by ID, for the purposes of indexing edges. TODO: duplicates much of the functionality of NodeTraversal, and causes API duplication to accomodate both. There should only be one.

Public Functions

vg::NodeSide::NodeSide(id_t node, bool is_end = false)

Create a NodeSide for the given side of the given Node. We need this to be a converting constructor so we can represent the empty and deleted item keys in a pair_hash_map.

vg::NodeSide::NodeSide()

Create a NodeSide for no Node.

bool vg::NodeSide::operator==(const NodeSide &other) const

Equality operator.

bool vg::NodeSide::operator!=(const NodeSide &other) const

Inequality operator.

bool vg::NodeSide::operator<(const NodeSide &other) const

Comparison operator for sets and maps.

NodeSide vg::NodeSide::flip(void) const

Reverse complement the node side, obtaining the other side of the same Node.

Public Members

id_t vg::NodeSide::node

What Node are we a side of?

bool vg::NodeSide::is_end

Are we the end side? Or the start side?

Public Static Functions

static pair<NodeSide, NodeSide> vg::NodeSide::pair_from_edge(Edge *e)

Make an edge into a canonically ordered pair of NodeSides.

static pair<NodeSide, NodeSide> vg::NodeSide::pair_from_edge(const Edge &e)

Make an edge into a canonically ordered pair of NodeSides.

static pair<NodeSide, NodeSide> vg::NodeSide::pair_from_start_edge(id_t start_id, const pair<id_t, bool> &oriented_other)

Make a canonically ordered pair of NodeSides from an edge off of the start of a node, to another node in the given relative orientation.

static pair<NodeSide, NodeSide> vg::NodeSide::pair_from_end_edge(id_t end_id, const pair<id_t, bool> &oriented_other)

Make a canonically ordered pair of NodeSides from an edge off of the end of a node, to another node in the given relative orientation.

class
#include <nodetraversal.hpp>

Represents a node traversed in a certain orientation. The default orientation is start to end, but if backward is set, represents the node being traversed end to start. A list of these can serve as an edit-free version of a path, especially if supplemented with a length and an initial node offset. A path node has a left and a right side, which are the start and end of the node if it is forward, or the end and start of the node if it is backward.

Public Functions

vg::NodeTraversal::NodeTraversal(Node *node, bool backward = false)

Make a NodeTraversal that traverses the given Node in the given orientation. We don’t want Node*s to turn into NodeTraversals when we aren’t expecting it, so this is explicit.

vg::NodeTraversal::NodeTraversal()

Create a NodeTraversal of no node.

bool vg::NodeTraversal::operator==(const NodeTraversal &other) const

Equality operator.

bool vg::NodeTraversal::operator!=(const NodeTraversal &other) const

Inequality operator.

bool vg::NodeTraversal::operator<(const NodeTraversal &other) const

Comparison operator for sorting in sets and maps. Make sure to sort by node ID and not pointer value, because people will expect that.

NodeTraversal vg::NodeTraversal::reverse(void) const

Reverse complement the node traversal, returning a traversal of the same node in the opposite direction.

Public Members

Node *vg::NodeTraversal::node

What Node is being traversed?

bool vg::NodeTraversal::backward

In what orientation is it being traversed?

template <typename K, typename V>
class
#include <hash_map.hpp>

Inherits from google::dense_hash_map< K, V, std::hash< K > >

Public Functions

vg::pair_hash_map::pair_hash_map()
struct

Paths are walks through nodes defined by a series of Edits. They can be used to represent:

  • haplotypes
  • mappings of reads, or alignments, by including edits
  • relationships between nodes
  • annotations from other data sources, such as: genes, exons, motifs, transcripts, peaks

Public Members

string vg::Path::name

The name of the path. Path names starting with underscore (_) are reserved for internal VG use.

repeated<Mapping> vg::Path::mapping

The Mappings which describe the order and orientation in which the Path visits Nodes.

bool vg::Path::is_circular

Set to true if the path is circular.

int64 vg::Path::length

Optional length annotation for the Path.

class
#include <path.hpp>

Public Functions

vg::Paths::Paths(void)
vg::Paths::Paths(const Paths &other)
vg::Paths::Paths(Paths &&other)
Paths &vg::Paths::operator=(const Paths &other)
Paths &vg::Paths::operator=(Paths &&other)
void vg::Paths::sort_by_mapping_rank(void)
void vg::Paths::rebuild_mapping_aux(void)
bool vg::Paths::is_head_or_tail_node(id_t id)
vector<string> vg::Paths::all_path_names(void)
void vg::Paths::make_circular(const string &name)
void vg::Paths::make_linear(const string &name)
void vg::Paths::rebuild_node_mapping(void)
list<Mapping>::iterator vg::Paths::find_mapping(Mapping *m)
list<Mapping>::iterator vg::Paths::remove_mapping(Mapping *m)
list<Mapping>::iterator vg::Paths::insert_mapping(list<Mapping>::iterator w, const string &path_name, const Mapping &m)
pair<Mapping *, Mapping *> vg::Paths::divide_mapping(Mapping *m, const Position &pos)
pair<Mapping *, Mapping *> vg::Paths::divide_mapping(Mapping *m, size_t offset)
pair<Mapping *, Mapping *> vg::Paths::replace_mapping(Mapping *m, pair<Mapping, Mapping> n)
void vg::Paths::remove_paths(const set<string> &names)
void vg::Paths::remove_path(const string &name)
void vg::Paths::keep_paths(const set<string> &name)
void vg::Paths::remove_node(id_t id)
bool vg::Paths::has_path(const string &name)
void vg::Paths::to_json(ostream &out)
list<Mapping> &vg::Paths::get_path(const string &name)
list<Mapping> &vg::Paths::get_create_path(const string &name)
list<Mapping> &vg::Paths::create_path(const string &name)
bool vg::Paths::has_mapping(const string &name, size_t rank)
bool vg::Paths::has_node_mapping(id_t id)
bool vg::Paths::has_node_mapping(Node *n)
map<string, set<Mapping *>> &vg::Paths::get_node_mapping(Node *n)
map<string, set<Mapping *>> &vg::Paths::get_node_mapping(id_t id)
map<string, map<int, Mapping *>> vg::Paths::get_node_mappings_by_rank(id_t id)
map<string, map<int, Mapping>> vg::Paths::get_node_mapping_copies_by_rank(id_t id)
Mapping *vg::Paths::traverse_left(Mapping *mapping)
Mapping *vg::Paths::traverse_right(Mapping *mapping)
const string vg::Paths::mapping_path_name(Mapping *m)
set<string> vg::Paths::of_node(id_t id)
map<string, int> vg::Paths::node_path_traversal_counts(id_t id, bool rev = false)
vector<string> vg::Paths::node_path_traversals(id_t id, bool rev = false)
bool vg::Paths::are_consecutive_nodes_in_path(id_t id1, id_t id2, const string &path_name)
vector<string> vg::Paths::over_edge(id_t id1, bool rev1, id_t id2, bool rev2, vector<string> following)
vector<string> vg::Paths::over_directed_edge(id_t id1, bool rev1, id_t id2, bool rev2, vector<string> following)
size_t vg::Paths::size(void) const
bool vg::Paths::empty(void) const
void vg::Paths::clear(void)
void vg::Paths::clear_mapping_ranks(void)
void vg::Paths::compact_ranks(void)
void vg::Paths::load(istream &in)
void vg::Paths::write(ostream &out)
void vg::Paths::to_graph(Graph &g)
Path vg::Paths::path(const string &name)
void vg::Paths::append_mapping(const string &name, const Mapping &m)
void vg::Paths::append_mapping(const string &name, id_t id, size_t rank = 0, bool is_reverse = false)
void vg::Paths::prepend_mapping(const string &name, const Mapping &m)
void vg::Paths::prepend_mapping(const string &name, id_t id, size_t rank = 0, bool is_reverse = false)
size_t vg::Paths::get_next_rank(const string &name)
void vg::Paths::append(Paths &p)
void vg::Paths::append(Graph &g)
void vg::Paths::extend(Paths &p)
void vg::Paths::extend(const Path &p)
void vg::Paths::for_each(const function<void(const Path&)> &lambda)
void vg::Paths::for_each_name(const function<void(const string&)> &lambda)
void vg::Paths::for_each_stream(istream &in, const function<void(Path&)> &lambda)
void vg::Paths::increment_node_ids(id_t inc)
void vg::Paths::swap_node_ids(hash_map<id_t, id_t> &id_mapping)
void vg::Paths::reassign_node(id_t new_id, Mapping *m)
void vg::Paths::for_each_mapping(const function<void(Mapping *)> &lambda)

Public Members

map<string, list<Mapping>> vg::Paths::_paths
map<Mapping *, list<Mapping>::iterator> vg::Paths::mapping_itr
map<Mapping *, string> vg::Paths::mapping_path
map<string, map<size_t, Mapping *>> vg::Paths::mappings_by_rank
map<id_t, map<string, set<Mapping *>>> vg::Paths::node_mapping
set<id_t> vg::Paths::head_tail_nodes
set<string> vg::Paths::circular
class
#include <pictographs.hpp>

Public Functions

vg::Pictographs::Pictographs(void)
vg::Pictographs::Pictographs(int seed_val)
vg::Pictographs::~Pictographs(void)
string vg::Pictographs::hashed(const string &str)
string vg::Pictographs::random(void)

Public Members

const string vg::Pictographs::symbols
const int vg::Pictographs::count

Private Members

mt19937 vg::Pictographs::rng
struct

Bundle up Node and Edge pileups.

Public Members

repeated<NodePileup> vg::Pileup::node_pileups
repeated<EdgePileup> vg::Pileup::edge_pileups
class
#include <pileup.hpp>

Public Types

typedef
typedef

Public Functions

vg::Pileups::Pileups(VG *graph, int min_quality = 0, int max_mismatches = 1, int window_size = 0, int max_depth = 1000, bool use_mapq = false)
vg::Pileups::Pileups(const Pileups &other)
vg::Pileups::Pileups(Pileups &&other)
Pileups &vg::Pileups::operator=(const Pileups &other)
Pileups &vg::Pileups::operator=(Pileups &&other)
vg::Pileups::~Pileups()
void vg::Pileups::clear()
void vg::Pileups::to_json(ostream &out)
void vg::Pileups::load(istream &in)
void vg::Pileups::write(ostream &out, uint64_t buffer_size = 5)
void vg::Pileups::for_each_node_pileup(const function<void(NodePileup&)> &lambda)
NodePileup *vg::Pileups::get_node_pileup(int64_t node_id)
NodePileup *vg::Pileups::get_create_node_pileup(const Node *node)
void vg::Pileups::for_each_edge_pileup(const function<void(EdgePileup&)> &lambda)
EdgePileup *vg::Pileups::get_edge_pileup(pair<NodeSide, NodeSide> sides)
EdgePileup *vg::Pileups::get_create_edge_pileup(pair<NodeSide, NodeSide> sides)
void vg::Pileups::extend(Pileup &pileup)
bool vg::Pileups::insert_node_pileup(NodePileup *pileup)
bool vg::Pileups::insert_edge_pileup(EdgePileup *edge_pileup)
void vg::Pileups::compute_from_alignment(Alignment &alignment)
void vg::Pileups::compute_from_edit(NodePileup &pileup, int64_t &node_offset, int64_t &read_offset, const Node &node, const Alignment &alignment, const Mapping &mapping, const Edit &edit, const Edit *next_edit, const vector<int> &mismatch_counts, pair<const Mapping *, int64_t> &last_match, pair<const Mapping *, int64_t> &last_del, pair<const Mapping *, int64_t> &open_del)
bool vg::Pileups::pass_filter(const Alignment &alignment, int64_t read_offset, int64_t length, const vector<int> &mismatches) const
Pileups &vg::Pileups::merge(Pileups &other)
BasePileup &vg::Pileups::merge_base_pileups(BasePileup &p1, BasePileup &p2)
NodePileup &vg::Pileups::merge_node_pileups(NodePileup &p1, NodePileup &p2)
EdgePileup &vg::Pileups::merge_edge_pileups(EdgePileup &p1, EdgePileup &p2)
char vg::Pileups::combined_quality(char base_quality, int map_quality) const

Public Members

VG *vg::Pileups::_graph
NodePileupHash vg::Pileups::_node_pileups
EdgePileupHash vg::Pileups::_edge_pileups
int vg::Pileups::_min_quality
int vg::Pileups::_max_mismatches
int vg::Pileups::_window_size
int vg::Pileups::_max_depth
bool vg::Pileups::_use_mapq
uint64_t vg::Pileups::_min_quality_count
uint64_t vg::Pileups::_max_mismatch_count
uint64_t vg::Pileups::_bases_count

Public Static Functions

void vg::Pileups::count_mismatches(VG &graph, const Path &path, vector<int> &mismatches, bool skipIndels = false)
static BasePileup *vg::Pileups::get_base_pileup(NodePileup &np, int64_t offset)
static const BasePileup *vg::Pileups::get_base_pileup(const NodePileup &np, int64_t offset)
static BasePileup *vg::Pileups::get_create_base_pileup(NodePileup &np, int64_t offset)
void vg::Pileups::parse_base_offsets(const BasePileup &bp, vector<pair<int64_t, int64_t>> &offsets)
void vg::Pileups::casify(string &seq, bool is_reverse)
void vg::Pileups::make_match(string &seq, int64_t from_length, bool is_reverse)
void vg::Pileups::make_insert(string &seq, bool is_reverse)
void vg::Pileups::make_delete(string &seq, bool is_reverse, const pair<const Mapping *, int64_t> &last_match, const Mapping &mapping, int64_t node_offset)
void vg::Pileups::make_delete(string &seq, bool is_reverse, int64_t from_id, int64_t from_offset, bool from_start, int64_t to_id, int64_t to_offset, bool to_end)
void vg::Pileups::parse_insert(const string &tok, int64_t &len, string &seq, bool &is_reverse)
void vg::Pileups::parse_delete(const string &tok, bool &is_reverse, int64_t &from_id, int64_t &from_offset, bool &from_start, int64_t &to_id, int64_t &to_offset, bool &to_end)
bool vg::Pileups::base_equal(char c1, char c2, bool is_reverse)
char vg::Pileups::extract_match(const BasePileup &bp, int64_t offset)
string vg::Pileups::extract(const BasePileup &bp, int64_t offset)
struct
#include <vg.hpp>

Structure for managing parallel construction of a graph.

Public Functions

vg::VG::Plan::Plan(VG *graph, map<long, vector<vcflib::VariantAllele>> &&alleles, map<pair<long, int>, vector<bool>> &&phase_visits, map<pair<long, int>, vector<pair<string, int>>> &&variant_alts, string seq, string name)

Public Members

VG *vg::VG::Plan::graph
map<long, vector<vcflib::VariantAllele>> vg::VG::Plan::alleles
map<pair<long, int>, vector<bool>> vg::VG::Plan::phase_visits
map<pair<long, int>, vector<pair<string, int>>> vg::VG::Plan::variant_alts
string vg::VG::Plan::seq
string vg::VG::Plan::name
struct

A position in the graph is a node, direction, and offset. The node is stored by ID, and the offset is 0-based and counts from the start of the node in the specified orientation. The direction specifies which orientation of the node we are considering, the forward (as stored) or reverse complement.

Example:

seq+        G A T T A C A
offset+  → 0 1 2 3 4 5 6 7

seq-        C T A A T G T
offset-  → 0 1 2 3 4 5 6 7

Or both at once:

offset-    7 6 5 4 3 2 1 0 ←
seq+        G A T T A C A
offset+  → 0 1 2 3 4 5 6 7

Public Members

int64 vg::Position::node_id

The Node on which the Position is.

int64 vg::Position::offset

The offset into that node’s sequence at which the Position occurs.

bool vg::Position::is_reverse

True if we obtain the original sequence of the path by reverse complementing the mappings.

class
#include <progressive.hpp>

Inherit form this class to give your class create_progress(), update_progress(), and destroy_progress() methods, and a public show_progress field that can be toggled on and off.

Must not be destroyed while a progress bar is active.

Subclassed by vg::Constructor, vg::VG

Public Functions

void vg::Progressive::preload_progress(const string &message)

If no progress bar is currently displayed, set the message to use for the next progress bar to be created. Does nothing if show_progress is false or when a progress bar is displayed.

Public so that users of a class can provide descriptive messages for generic progress operations (like VG‘s for_each_kmer_parallel).

void vg::Progressive::create_progress(const string &message, long count)

Create a progress bar showing the given message, with the given number of items to process. Does nothing if show_progress is false. Replaces any existing progress bar.

void vg::Progressive::create_progress(long count)

Create a progress bar with the given number of items to process, using either a default message, or the message passed to the last preload_progress call since a progress bar was destroyed. Does nothing if show_progress is false. Replaces any existing progress bar.

void vg::Progressive::update_progress(long i)

Update the progress bar, noting that the given number of items have been processed. Does nothing if no progress bar is displayed.

void vg::Progressive::increment_progress()

Update the progress bar, noting that one additional item has been processed. Does nothing if no progress bar is displayed.

void vg::Progressive::destroy_progress(void)

Destroy the current progress bar, if it exists.

Public Members

bool vg::Progressive::show_progress

Private Members

string vg::Progressive::progress_message
long vg::Progressive::progress_count
long vg::Progressive::last_progress
long vg::Progressive::progress_seen
ProgressBar *vg::Progressive::progress
class
#include <gssw_aligner.hpp>

Inherits from vg::Aligner

Public Functions

QualAdjAligner::QualAdjAligner(int8_t _match = default_match, int8_t _mismatch = default_mismatch, int8_t _gap_open = default_gap_open, int8_t _gap_extension = default_gap_extension, int8_t _max_scaled_score = default_max_scaled_score, uint8_t _max_qual_score = default_max_qual_score, double gc_content = default_gc_content)
QualAdjAligner::~QualAdjAligner(void)
void QualAdjAligner::align(Alignment &alignment, Graph &g, bool print_score_matrices = false)
void QualAdjAligner::align_global_banded(Alignment &alignment, Graph &g, int32_t band_padding = 0, bool permissive_banding = true)
void vg::QualAdjAligner::align_pinned(Alignment &alignment, Graph &g, int64_t node_id, bool pin_left)
void QualAdjAligner::align_global_banded_multi(Alignment &alignment, vector<Alignment> &alt_alignments, Graph &g, int32_t max_alt_alns, int32_t band_padding = 0, bool permissive_banding = true)
void QualAdjAligner::init_mapping_quality(double gc_content)
int32_t QualAdjAligner::score_exact_match(const string &sequence, const string &base_quality)

Public Members

uint8_t vg::QualAdjAligner::max_qual_score
int8_t vg::QualAdjAligner::scaled_gap_open
int8_t vg::QualAdjAligner::scaled_gap_extension
int8_t *vg::QualAdjAligner::adjusted_score_matrix

Private Functions

void QualAdjAligner::init_quality_adjusted_scores(int8_t _max_scaled_score, uint8_t _max_qual_score, double gc_content)
class
#include <readfilter.hpp>

Public Functions

int vg::ReadFilter::filter(istream *alignment_stream, xg::XG *xindex = nullptr)

Filter the alignments available from the given stream, placing them on standard output or in the appropriate file. Returns 0 on success, exit code to use on error.

If an XG index is required, use the specified one. If one is required and not provided, the function will complain and return nonzero.

TODO: Refactor to be less CLI-aware and more modular-y.

bool vg::ReadFilter::trim_ambiguous_ends(xg::XG *index, Alignment &alignment, int k)

Look at either end of the given alignment, up to k bases in from the end. See if that tail of the alignment is mapped such that another embedding in the given graph can produce the same sequence as the sequence along the embedding that the read actually has, and if so trim back the read.

In the case of softclips, the aligned portion of the read is considered, and if trimmign is required, the softclips are hard-clipped off.

Returns true if the read had to be modified, and false otherwise.

MUST NOT be called with a null index.

Public Members

double vg::ReadFilter::min_secondary
double vg::ReadFilter::min_primary
bool vg::ReadFilter::frac_score
bool vg::ReadFilter::sub_score
int vg::ReadFilter::max_overhang
int vg::ReadFilter::context_size
bool vg::ReadFilter::verbose
double vg::ReadFilter::min_mapq
int vg::ReadFilter::repeat_size
int vg::ReadFilter::defray_length
int vg::ReadFilter::defray_count
bool vg::ReadFilter::drop_split
int vg::ReadFilter::threads
string vg::ReadFilter::regions_file
string vg::ReadFilter::outbase

Private Functions

bool vg::ReadFilter::has_repeat(Alignment &aln, int k)

 * quick and dirty filter to see if removing reads that can slip around and still map perfectly helps vg call. returns true if at either end of read sequence, at least k bases are repetitive, checking repeats of up to size 2k

bool vg::ReadFilter::trim_ambiguous_end(xg::XG *index, Alignment &alignment, int k)

Trim only the end of the given alignment, leaving the start alone. Two calls of this implement trim_ambiguous_ends above.

bool vg::ReadFilter::is_split(xg::XG *index, Alignment &alignment)

Return false if the read only follows edges in the xg index, and true if the read is split (or just incorrect) and takes edges not in the index.

Throws an error if no XG index is specified.

class
#include <realigner.hpp>

Public Functions

vg::Realigner::Realigner(vcflib::VariantCallFile &v, FastaReference &r, const string &t)
void vg::Realigner::construct(void)
Alignment vg::Realigner::realign(const Alignment &aln)

Public Members

FastaReference &vg::Realigner::ref
vcflib::VariantCallFile vg::Realigner::vcf_file
string vg::Realigner::target
string vg::Realigner::seq_name
int vg::Realigner::start_pos
int vg::Realigner::end_pos
bool vg::Realigner::debug
double vg::Realigner::identity_trigger
bool vg::Realigner::realign_unpaired
double vg::Realigner::softclip_trigger
int vg::Realigner::idx_kmer_size
int vg::Realigner::edge_max
bool vg::Realigner::idx_path_only
int vg::Realigner::doubling_steps
Mapper *vg::Realigner::mapper
gcsa::GCSA *vg::Realigner::gcsaidx
gcsa::LCPArray *vg::Realigner::lcpidx
xg::XG *vg::Realigner::xgidx
struct
#include <genotyper.hpp>

Holds indexes of the reference in a graph: position to node, node to position and orientation, and the full reference string.

Public Functions

vg::ReferenceIndex::ReferenceIndex(VG &vg, string refPathName)

Public Members

std::map<int64_t, std::pair<size_t, bool>> vg::ReferenceIndex::byId
std::map<size_t, vg::NodeTraversal> vg::ReferenceIndex::byStart
std::string vg::ReferenceIndex::sequence
struct

Holds indexes of the reference: position to node, node to position and orientation, and the full reference string.

Public Members

std::map<int64_t, std::pair<size_t, bool>> glenn2vcf::ReferenceIndex::byId
std::map<size_t, vg::NodeTraversal> glenn2vcf::ReferenceIndex::byStart
std::string glenn2vcf::ReferenceIndex::sequence
struct
#include <region.hpp>

Public Members

string vg::Region::seq
int vg::Region::start
int vg::Region::end
class
#include <sampler.hpp>

Public Functions

vg::Sampler::Sampler(xg::XG *x, int seed = 0, bool forward_only = false)
pos_t vg::Sampler::position(void)
string vg::Sampler::sequence(size_t length)
Alignment vg::Sampler::alignment(size_t length)
Alignment vg::Sampler::alignment_with_error(size_t length, double base_error, double indel_error)
vector<Alignment> vg::Sampler::alignment_pair(size_t read_length, size_t fragment_length, double fragment_std_dev, double base_error, double indel_error)
size_t vg::Sampler::node_length(id_t id)
char vg::Sampler::pos_char(pos_t pos)
map<pos_t, char> vg::Sampler::next_pos_chars(pos_t pos)
Alignment vg::Sampler::mutate(const Alignment &aln, double base_error, double indel_error)
vector<Edit> vg::Sampler::mutate_edit(const Edit &edit, const pos_t &position, double base_error, double indel_error, const string &bases, uniform_real_distribution<double> &rprob, uniform_int_distribution<int> &rbase)
string vg::Sampler::alignment_seq(const Alignment &aln)

Public Members

xg::XG *vg::Sampler::xgidx
LRUCache<id_t, Node> vg::Sampler::node_cache
mt19937 vg::Sampler::rng
int64_t vg::Sampler::nonce
bool vg::Sampler::forward_only
struct
#include <bubbles.hpp>

Public Members

int vg::SB_Input::num_vertices
vector<pair<id_t, id_t>> vg::SB_Input::edges
struct
#include <genotyper.hpp>

Public Members

NodeTraversal vg::Genotyper::Site::start
NodeTraversal vg::Genotyper::Site::end
set<id_t> vg::Genotyper::Site::contents
class
#include <genotypekit.hpp>

Represents a strategy for finding (nested) Sites in a vg graph. Polymorphic base class/interface.

Subclassed by vg::CactusSiteFinder

Public Functions

virtual vg::SiteFinder::~SiteFinder()
virtual void vg::SiteFinder::for_each_site_parallel(const function<void(NestedSite)> &lambda)
= 0

Run a function on all root-level NestedSites in parallel. Site trees are passed by value so they have a clear place to live during parallel operations.

struct
#include <genotypekit.hpp>

Represents a traversal of a (possibly nested) site, going from start to end and visiting nodes, edges, and contained nested sites. Basic component of a genotype.

Public Members

list<Visit> vg::SiteTraversal::visits
class
#include <ssw_aligner.hpp>

Public Functions

vg::SSWAligner::SSWAligner(uint8_t _match = 1, uint8_t _mismatch = 4, uint8_t _gap_open = 6, uint8_t _gap_extension = 1)
vg::SSWAligner::~SSWAligner(void)
Alignment vg::SSWAligner::align(const string &query, const string &ref)
Alignment vg::SSWAligner::ssw_to_vg(const StripedSmithWaterman::Alignment &ssw_aln, const string &query, const string &ref)
void vg::SSWAligner::PrintAlignment(const StripedSmithWaterman::Alignment &alignment)

Public Members

uint8_t vg::SSWAligner::match
uint8_t vg::SSWAligner::mismatch
uint8_t vg::SSWAligner::gap_open
uint8_t vg::SSWAligner::gap_extension
struct
#include <caller.hpp>

Public Functions

vg::StrandSupport::StrandSupport(int f = 0, int r = 0, int o = 0, double ll = -1e100)
bool vg::StrandSupport::operator<(const StrandSupport &other) const
bool vg::StrandSupport::operator>=(const StrandSupport &other) const
bool vg::StrandSupport::operator==(const StrandSupport &other) const
StrandSupport vg::StrandSupport::operator-(const StrandSupport &other) const
StrandSupport &vg::StrandSupport::operator+=(const StrandSupport &other)
int vg::StrandSupport::depth()
int vg::StrandSupport::total()

Public Members

int vg::StrandSupport::fs
int vg::StrandSupport::rs
int vg::StrandSupport::os
double vg::StrandSupport::likelihood
template <typename K, typename V>
class
#include <hash_map.hpp>

Inherits from google::dense_hash_map< K, V >

Public Functions

vg::string_hash_map::string_hash_map()
class
#include <subcommand.hpp>

Represents a subcommand with a name, a description, and some functions. Registers itself on construction in a static registry, and provides static functions for enumerating through that registry.

Public Functions

vg::subcommand::Subcommand::Subcommand(std::string name, std::string description, std::function<int(int, char **)> main_function)

Make and register a subcommand with the given name and description, which calls the given main function when invoked.

const std::string &vg::subcommand::Subcommand::get_name() const

Get the name of a subcommand.

const std::string &vg::subcommand::Subcommand::get_description() const

Get the description of a subcommand.

const int vg::subcommand::Subcommand::operator()(int argc, char **argv) const

Run the main function of a subcommand. Return the return code.

Public Static Functions

const Subcommand *vg::subcommand::Subcommand::get(int argc, char **argv)

Get the appropriate subcommand to handle the given arguments, or nullptr if no matching subcommand is found.

void vg::subcommand::Subcommand::for_each(const std::function<void(const Subcommand&)> &lambda)

Call the given lambda with each known subcommand, in order.

Private Functions

const std::function<int(int, char **)> &vg::subcommand::Subcommand::get_main() const

Get the main function of a subcommand.

Private Members

std::string vg::subcommand::Subcommand::name
std::string vg::subcommand::Subcommand::description
std::function<int(int, char **)> vg::subcommand::Subcommand::main_function

Private Static Functions

std::map<std::string, Subcommand *> &vg::subcommand::Subcommand::get_registry()

Since we can’t rely on a static member field being constructed before any static code that creates actual subcommands gets run, we rely on keeping the registry in a static variable inside a static method, so it gets constructed on first use. Note that at shutdown some of the poinbters in the registry may be to already-destructed static objects.

struct
#include <deconstructor.hpp>

Public Members

map<int, vector<id_t>> vg::SuperBubble::level_to_nodes
id_t vg::SuperBubble::start_node
id_t vg::SuperBubble::end_node
bool vg::SuperBubble::isNested
struct

Aggregates information about the reads supporting an allele.

Public Members

double vg::Support::quality
int32 vg::Support::forward
int32 vg::Support::reverse
int32 vg::Support::left
int32 vg::Support::right
struct

Translations map from one graph to another. A collection of these provides a covering mapping between a from and to graph. If each “from” path through the base graph corresponds to a “to” path in an updated graph, then we can use these translations to project positions, mappings, and paths in the new graph into the old one using the Translator interface.

Public Members

Path vg::Translation::from
Path vg::Translation::to
class
#include <translator.hpp>

Class to map paths into a base graph found via a set of Translations

Public Functions

vg::Translator::Translator(void)
vg::Translator::Translator(istream &in)
vg::Translator::Translator(const vector<Translation> &trans)
void vg::Translator::load(const vector<Translation> &trans)
void vg::Translator::build_position_table(void)
Translation vg::Translator::get_translation(const Position &position)
Position vg::Translator::translate(const Position &position)
Position vg::Translator::translate(const Position &position, const Translation &translation)
Mapping vg::Translator::translate(const Mapping &mapping)
Path vg::Translator::translate(const Path &path)
Alignment vg::Translator::translate(const Alignment &aln)
Locus vg::Translator::translate(const Locus &locus)
Translation vg::Translator::overlay(const Translation &trans)

Public Members

vector<Translation> vg::Translator::translations
map<pos_t, Translation *> vg::Translator::pos_to_trans
class
#include <genotypekit.hpp>

Represents a strategy for finding traversals of (nested) sites. Polymorphic base class/interface.

Subclassed by vg::TrivialTraversalFinder

Public Functions

virtual vg::TraversalFinder::~TraversalFinder()
virtual vector<SiteTraversal> vg::TraversalFinder::find_traversals(const NestedSite &site)
= 0
class
#include <genotypekit.hpp>

Represents a strategy for calculating Supports for SiteTraversals. Polymorphic base class/interface.

Public Functions

virtual vg::TraversalSupportCalculator::~TraversalSupportCalculator()
virtual vector<Support> vg::TraversalSupportCalculator::calculate_supports(const NestedSite &site, const vector<SiteTraversal> &traversals, const vector<Alignment *> &reads, const vector<vector<bool>> &consistencies) const
= 0

Return Supports for all the SiteTraversals, given the reads and their consistency flags.

template <typename T>
struct
#include <utility.hpp>

Public Types

typedef

Public Functions

vg::Tree::Tree(Node *r = 0)
vg::Tree::~Tree()
void vg::Tree::for_each_preorder(function<void(Node *)> lambda)
void vg::Tree::for_each_postorder(function<void(Node *)> lambda)

Public Members

Node *vg::Tree::root
template <typename T>
struct
#include <utility.hpp>

Public Functions

vg::TreeNode::TreeNode()
vg::TreeNode::~TreeNode()
void vg::TreeNode::for_each_preorder(function<void(TreeNode<T> *)> lambda)
void vg::TreeNode::for_each_postorder(function<void(TreeNode<T> *)> lambda)

Public Members

T vg::TreeNode::v
vector<TreeNode<T> *> vg::TreeNode::children
TreeNode<T> *vg::TreeNode::parent
class
#include <genotypekit.hpp>

This traversal finder finds one or more traversals through leaf sites with no children. It uses a depth-first search. It doesn’t work on non-leaf sites, and is not guaranteed to find all traversals.

Inherits from vg::TraversalFinder

Public Functions

vg::TrivialTraversalFinder::TrivialTraversalFinder(VG &graph)
virtual vg::TrivialTraversalFinder::~TrivialTraversalFinder()
vector<SiteTraversal> vg::TrivialTraversalFinder::find_traversals(const NestedSite &site)

Find at least one traversal of the site by depth first search, if any exist. Only works on sites with no children.

Private Members

VG &vg::TrivialTraversalFinder::graph
class
#include <constructor.hpp>

Provides a one-variant look-ahead buffer on a vcflib::VariantFile. Lets construction functions peek and see if they want the next variant, or lets them ignore it for the next construction function for a different contig to handle. Ought not to be copied.

Handles conversion from 1-based vcflib coordinates to 0-based vg coordinates.

Public Functions

vcflib::Variant *vg::VcfBuffer::get()

Return a pointer to the buffered variant, or null if no variant is buffered. Pointer is invalidated when the buffer is handled. The variant will have a 0-based start coordinate.

void vg::VcfBuffer::handle_buffer()

To be called when the buffer is filled. Marks the buffered variant as handled, discarding it, and allowing another to be read.

void vg::VcfBuffer::fill_buffer()

Can be called when the buffer is filled or empty. If there is no variant in the buffer, tries to load a variant into the buffer, if one can be obtained from the file.

bool vg::VcfBuffer::has_tabix()

This returns true if we have a tabix index, and false otherwise. If this is false, set_region may be called, but will do nothing and return false.

bool vg::VcfBuffer::set_region(const string &contig, int64_t start = -1, int64_t end = -1)

This tries to set the region on the underlying vcflib VariantCallFile to the given contig and region, if specified. Coordinates coming in should be 0-based,a nd will be converted to 1-based internally.

Returns true if the region was successfully set, and false otherwise (for example, if there is not tabix index, or if the given region is not part of this VCF. Note that if there is a tabix index, and set_region returns false, the position in the VCF file is undefined until the next successful set_region call.

If either of start and end are specified, then both of start and end must be specified.

vg::VcfBuffer::VcfBuffer(vcflib::VariantCallFile *file = nullptr)

Make a new VcfBuffer buffering the file at the given pointer (which must outlive the buffer, but which may be null).

Protected Attributes

bool vg::VcfBuffer::has_buffer
bool vg::VcfBuffer::safe_to_get
vcflib::Variant vg::VcfBuffer::buffer
vcflib::VariantCallFile *const vg::VcfBuffer::file

Private Functions

vg::VcfBuffer::VcfBuffer(const VcfBuffer &other)
VcfBuffer &vg::VcfBuffer::operator=(const VcfBuffer &other)
class
#include <genotypekit.hpp>

Represents a strategy for converting Locus objects to VCF records. Polymorphic base class/interface.

Public Functions

virtual vg::VcfRecordConverter::~VcfRecordConverter()
virtual vcflib::Variant vg::VcfRecordConverter::convert(const Locus &locus)
= 0
class
#include <genotypekit.hpp>

Represents a filter that passes or rejects VCF records according to some criteria. Polymorphic base class/interface.

Public Functions

virtual vg::VcfRecordFilter::~VcfRecordFilter()
virtual bool vg::VcfRecordFilter::accept_record(const vcflib::Variant &variant)
= 0

Returns true if we should keep the given VCF record, and false otherwise.

class
#include <vectorizer.hpp>

Public Functions

Vectorizer::Vectorizer(xg::XG *x)
Vectorizer::~Vectorizer()
void Vectorizer::add_bv(bit_vector v)
void Vectorizer::add_name(string n)
void Vectorizer::emit(ostream &out, bool r_format, bool annotate)
bit_vector Vectorizer::alignment_to_onehot(Alignment a)
vector<int> Vectorizer::alignment_to_a_hot(Alignment a)
vector<double> Vectorizer::alignment_to_custom_score(Alignment a, std::function<double(Alignment)> lambda)
vector<double> Vectorizer::alignment_to_identity_hot(Alignment a)
string Vectorizer::output_wabbit_map()
template <typename T>
string Vectorizer::format(T v)
template <typename T>
string Vectorizer::wabbitize(string name, T v)

Private Members

xg::XG *Vectorizer::my_xg
vector<bit_vector> Vectorizer::my_vectors
vector<string> Vectorizer::my_names
bool Vectorizer::output_tabbed
bool Vectorizer::output_names
unordered_map<string, int> Vectorizer::wabbit_map
class
#include <vg.hpp>

Represents a variation graph. Graphs consist of nodes, connected by edges. Graphs are bidirected and may be cyclic. Nodes carry forward-oriented sequences. Edges are directed, with a “from” and to” node, and are generally used to connect the end of the “from” node to the start of the “to” node. However, edges can connect to either the start or end of either node.

Inherits from vg::Progressive

Public Functions

void vg::VG::set_edge(Edge *edge)

Set the edge indexes through this function. Picks up the sides being connected by the edge automatically, and silently drops the edge if they are already connected.

void vg::VG::print_edges(void)
vector<pair<id_t, bool>> &vg::VG::edges_start(Node *node)

Get nodes and backward flags following edges that attach to this node’s start.

vector<pair<id_t, bool>> &vg::VG::edges_start(id_t id)

Get nodes and backward flags following edges that attach to this node’s start.

vector<pair<id_t, bool>> &vg::VG::edges_end(Node *node)

Get nodes and backward flags following edges that attach to this node’s end.

vector<pair<id_t, bool>> &vg::VG::edges_end(id_t id)

Get nodes and backward flags following edges that attach to this node’s end.

size_t vg::VG::size(void)

Number of nodes.

size_t vg::VG::length(void)

Total sequence length.

vg::VG::VG(void)

Default constructor.

vg::VG::VG(istream &in, bool showp = false)

Construct from protobufs.

vg::VG::VG(function<bool(Graph&)> &get_next_graph, bool showp = false, )

Construct from an arbitrary source of Graph protobuf messages (which populates the given Graph and returns a flag for whether it’s valid).

vg::VG::VG(set<Node *> &nodes, set<Edge *> &edges)

Construct from sets of nodes and edges. For example, from a subgraph of another graph.

map<id_t, vcflib::Variant> vg::VG::get_node_id_to_variant(vcflib::VariantCallFile vfile)

Takes in a VCF file and returns a map [node] = vcflib::variant. Unfortunately this is specific to a given graph and VCF.

It will need to throw warnings if the node or variant is not in the graph.

This is useful for VCF masking:

if map.find(node) then mask variant

It’s also useful for calling known variants

for m in alignment.mappings:
   node = m.Pos.nodeID
   if node in node_to_vcf:
       return (alignment supports variant)

It would be nice if this also supported edges (e.g. for inversions/transversions/breakpoints?).

void vg::VG::dice_nodes(int max_node_size)

Chop up the nodes.

void vg::VG::unchop(void)

Does the reverse combines nodes by removing edges where doing so has no effect on the graph labels.

set<list<NodeTraversal>> vg::VG::simple_components(int min_size = 1)

Get the set of components that could be merged into single nodes without changing the path space of the graph. Emits oriented traversals of nodes, in the order and orientation in which they are to be merged.

set<list<NodeTraversal>> vg::VG::simple_multinode_components(void)

Get the simple components of multiple nodes.

set<set<id_t>> vg::VG::strongly_connected_components(void)

Get the strongly connected components of the graph.

set<set<id_t>> vg::VG::multinode_strongly_connected_components(void)

Get only multi-node strongly connected components.

bool vg::VG::is_acyclic(void)

Returns true if the graph does not contain cycles.

void vg::VG::keep_multinode_strongly_connected_components(void)

Remove all elements which are not in a strongly connected component.

bool vg::VG::is_self_looping(Node *node)

Does the specified node have any self-loops?

set<list<NodeTraversal>> vg::VG::elementary_cycles(void)

Get simple cycles following Johnson’s elementary cycles algorithm.

Node *vg::VG::concat_nodes(const list<NodeTraversal> &nodes)

Concatenates the nodes into a new node with the same external linkage as the provided component. After calling this, paths will be invalid until Paths::compact_ranks() is called.

Node *vg::VG::merge_nodes(const list<Node *> &nodes)

Merge the nodes into a single node, preserving external linkages. Use the orientation of the first node as the basis.

void vg::VG::normalize(int max_iter = 1)

Use unchop and sibling merging to simplify the graph into a normalized form.

void vg::VG::bluntify(void)

Remove redundant overlaps.

VG vg::VG::dagify(uint32_t expand_scc_steps, map<id_t, pair<id_t, bool>> &node_translation, size_t target_min_walk_length = 0, size_t component_length_max = 0)

Turn the graph into a dag by copying strongly connected components expand_scc_steps times and translating the edges in the component to flow through the copies in one direction.

VG vg::VG::backtracking_unroll(uint32_t max_length, uint32_t max_depth, map<id_t, pair<id_t, bool>> &node_translation)

Generate a new graph that unrolls the current one using backtracking. Caution: exponential in branching.

VG vg::VG::unfold(uint32_t max_length, map<id_t, pair<id_t, bool>> &node_translation)

Represent the whole graph up to max_length across an inversion on the forward strand.

map<id_t, pair<id_t, bool>> vg::VG::overlay_node_translations(const map<id_t, pair<id_t, bool>> &over, const map<id_t, pair<id_t, bool>> &under)

Assume two node translations, the over is based on the under; merge them.

vector<Edge> vg::VG::break_cycles(void)

Use our topological sort to quickly break cycles in the graph, return the edges which are removed. Very non-optimal, but fast.

void vg::VG::remove_non_path(void)

Remove pieces of the graph which are not part of any path.

void vg::VG::flip_doubly_reversed_edges(void)

Convert edges that are both from_start and to_end to “regular” ones from end to start.

void vg::VG::from_gfa(istream &in, bool showp = false)

Build a graph from a GFA stream.

void vg::VG::from_turtle(string filename, string baseuri, bool showp = false)

Build a graph from a Turtle stream.

vg::VG::~VG(void)

Destructor.

vg::VG::VG(const VG &other)

Copy constructor.

vg::VG::VG(VG &&other)

Move constructor.

VG &vg::VG::operator=(const VG &other)

Copy assignment operator.

VG &vg::VG::operator=(VG &&other)

Move assignment operator.

void vg::VG::build_indexes(void)
void vg::VG::build_node_indexes(void)
void vg::VG::build_edge_indexes(void)
void vg::VG::index_paths(void)
void vg::VG::clear_node_indexes(void)
void vg::VG::clear_node_indexes_no_resize(void)
void vg::VG::clear_edge_indexes(void)
void vg::VG::clear_edge_indexes_no_resize(void)
void vg::VG::clear_indexes(void)
void vg::VG::clear_indexes_no_resize(void)
void vg::VG::resize_indexes(void)
void vg::VG::rebuild_indexes(void)
void vg::VG::rebuild_edge_indexes(void)
void vg::VG::merge(Graph &g)

Literally merge protobufs.

void vg::VG::merge(VG &g)

Literally merge protobufs.

void vg::VG::clear_paths(void)

Clear the paths object (which indexes the graph.paths) and the graph paths themselves.

void vg::VG::sync_paths(void)

Synchronize in-memory indexes and protobuf graph.

void vg::VG::merge_union(VG &g)

Merge protobufs after removing overlaps. Good when there aren’t many overlaps.

void vg::VG::remove_duplicated_in(VG &g)

Helper to merge_union.

void vg::VG::remove_duplicates(void)

Remove duplicated nodes and edges.

void vg::VG::prune_complex_paths(int length, int edge_max, Node *head_node, Node *tail_node)

Limit the local complexity of the graph, connecting pruned components to a head and tail node depending on the direction which we come into the node when the edge_max is passed.

void vg::VG::prune_short_subgraphs(size_t min_size)
void vg::VG::serialize_to_ostream(ostream &out, id_t chunk_size = 1000)

Write to a stream in chunked graphs.

void vg::VG::serialize_to_file(const string &file_name, id_t chunk_size = 1000)
id_t vg::VG::max_node_id(void)

Get the maximum node ID in the graph.

id_t vg::VG::min_node_id(void)

Get the minimum node ID in the graph.

void vg::VG::compact_ids(void)

Squish the node IDs down into as small a space as possible. Fixes up paths itself.

void vg::VG::increment_node_ids(id_t increment)

Add the given value to all node IDs. Preserves the paths.

void vg::VG::decrement_node_ids(id_t decrement)

Subtract the given value from all the node IDs. Must not create a node with 0 or negative IDs. Invalidates the paths.

void vg::VG::swap_node_id(id_t node_id, id_t new_id)

Change the ID of the node with the first id to the second, new ID not used by any node. Invalidates any paths containing the node, since they are not updated.

void vg::VG::swap_node_id(Node *node, id_t new_id)

Change the ID of the given node to the second, new ID not used by any node. Invalidates the paths. Invalidates any paths containing the node, since they are not updated.

void vg::VG::extend(VG &g, bool warn_on_duplicates = false)

Iteratively add when nodes and edges are novel. Good when there are very many overlaps. TODO: If you are using this with warn on duplicates on, and you know there shouldn’t be any duplicates, maybe you should use merge instead. This version sorts paths on rank after adding in the path mappings from the other graph.

void vg::VG::extend(Graph &graph, bool warn_on_duplicates = false)

This version does not sort path mappings by rank. In order to preserve paths, call Paths::sort_by_mapping_rank() and Paths::rebuild_mapping_aux() after you are done adding in graphs to this graph.

void vg::VG::append(VG &g)

Add another graph into this graph, attaching tails to heads. Modify ids of the second graph to ensure we don’t have conflicts. Then attach tails of this graph to the heads of the other, and extend(g).

void vg::VG::combine(VG &g)

Add another graph into this graph. Don’t append or join the nodes in the graphs; just ensure that ids are unique, then apply extend.

void vg::VG::include(const Path &path)

Edit the graph to include the path.

vector<Translation> vg::VG::edit(const vector<Path> &paths)

Edit the graph to include all the sequence and edges added by the given paths. Can handle paths that visit nodes in any orientation.

void vg::VG::find_breakpoints(const Path &path, map<id_t, set<pos_t>> &breakpoints)

Find all the points at which a Path enters or leaves nodes in the graph. Adds them to the given map by node ID of sets of bases in the node that will need to become the starts of new nodes.

map<pos_t, Node *> vg::VG::ensure_breakpoints(const map<id_t, set<pos_t>> &breakpoints)

Take a map from node ID to a set of offsets at which new nodes should start (which may include 0 and 1-past-the-end, which should be ignored), break the specified nodes at those positions. Returns a map from old node ID to a map from old node start position to new node pointer in the graph. Note that the caller will have to crear and rebuild path rank data.

map<id_t, set<pos_t>> vg::VG::forwardize_breakpoints(const map<id_t, set<pos_t>> &breakpoints)

Flips the breakpoints onto the forward strand.

void vg::VG::add_nodes_and_edges(const Path &path, const map<pos_t, Node *> &node_translation, map<pair<pos_t, string>, Node *> &added_seqs, map<Node *, Path> &added_nodes, const map<id_t, size_t> &orig_node_sizes)

Given a path on nodes that may or may not exist, and a map from node ID in the path’s node ID space to a table of offset and actual node, add in all the new sequence and edges required by the path. The given path must not contain adjacent perfect match edits in the same mapping (the removal of which can be accomplished with the Path::simplify() function).

vector<Translation> vg::VG::make_translation(const map<pos_t, Node *> &node_translation, const map<Node *, Path> &added_nodes, const map<id_t, size_t> &orig_node_sizes)

Produce a graph Translation object from information about the editing process.

void vg::VG::add_node(const Node &node)

Add in the given node, by value.

void vg::VG::add_nodes(const vector<Node> &nodes)

Add in the given nodes, by value.

void vg::VG::add_edge(const Edge &edge)

Add in the given edge, by value.

void vg::VG::add_edges(const vector<Edge> &edges)

Add in the given edges, by value.

void vg::VG::add_edges(const vector<Edge *> &edges)

Add in the given edges, by value.

void vg::VG::add_nodes(const set<Node *> &nodes)

Add in the given nodes, by value.

void vg::VG::add_edges(const set<Edge *> &edges)

Add in the given edges, by value.

id_t vg::VG::node_count(void)

Count the number of nodes in the graph.

id_t vg::VG::edge_count(void)

Count the number of edges in the graph.

id_t vg::VG::total_length_of_nodes(void)

Get the total sequence length of nodes in the graph. TODO: redundant with length().

int vg::VG::node_rank(Node *node)

Get the rank of the node in the protobuf array that backs the graph.

int vg::VG::node_rank(id_t id)

Get the rank of the node in the protobuf array that backs the graph.

int vg::VG::start_degree(Node *node)

Get the number of edges attached to the start of a node.

int vg::VG::end_degree(Node *node)

Get the number of edges attached to the end of a node.

int vg::VG::left_degree(NodeTraversal node)

Get the number of edges attached to the left side of a NodeTraversal.

int vg::VG::right_degree(NodeTraversal node)

Get the number of edges attached to the right side of a NodeTraversal.

void vg::VG::edges_of_node(Node *node, vector<Edge *> &edges)

Get the edges of the specified node, and add them to the given vector. Guaranteed to add each edge only once per call.

vector<Edge *> vg::VG::edges_of(Node *node)

Get the edges of the specified node.

vector<Edge *> vg::VG::edges_from(Node *node)

Get the edges from the specified node.

vector<Edge *> vg::VG::edges_to(Node *node)

Get the edges to the specified node.

void vg::VG::edges_of_nodes(set<Node *> &nodes, set<Edge *> &edges)

Get the edges of the specified set of nodes, and add them to the given set of edge pointers.

set<NodeSide> vg::VG::sides_to(NodeSide side)

Get the sides on the other side of edges to this side of the node.

set<NodeSide> vg::VG::sides_from(NodeSide side)

Get the sides on the other side of edges from this side of the node.

set<NodeSide> vg::VG::sides_from(id_t id)

Get the sides from both sides of the node.

set<NodeSide> vg::VG::sides_to(id_t id)

Get the sides to both sides of the node.

set<NodeSide> vg::VG::sides_of(NodeSide side)

Union of sides_to and sides_from.

set<pair<NodeSide, bool>> vg::VG::sides_context(id_t node_id)

Get all sides connecting to this node.

bool vg::VG::same_context(id_t id1, id_t id2)

Use sides_from an sides_to to determine if both nodes have the same context.

bool vg::VG::is_ancestor_prev(id_t node_id, id_t candidate_id)

Determine if the node is a prev ancestor of this one.

bool vg::VG::is_ancestor_prev(id_t node_id, id_t candidate_id, set<id_t> &seen, size_t steps = 64)

Determine if the node is a prev ancestor of this one by trying to find it in a given number of steps.

bool vg::VG::is_ancestor_next(id_t node_id, id_t candidate_id)

Determine if the node is a next ancestor of this one.

bool vg::VG::is_ancestor_next(id_t node_id, id_t candidate_id, set<id_t> &seen, size_t steps = 64)

Determine if the node is a next ancestor of this one by trying to find it in a given number of steps.

id_t vg::VG::common_ancestor_prev(id_t id1, id_t id2, size_t steps = 64)

Try to find a common ancestor by walking back up to steps from the first node.

id_t vg::VG::common_ancestor_next(id_t id1, id_t id2, size_t steps = 64)

Try to find a common ancestor by walking forward up to steps from the first node.

set<NodeTraversal> vg::VG::siblings_to(const NodeTraversal &traversal)

To-siblings are nodes which also have edges to them from the same nodes as this one.

set<NodeTraversal> vg::VG::siblings_from(const NodeTraversal &traversal)

From-siblings are nodes which also have edges to them from the same nodes as this one.

set<NodeTraversal> vg::VG::full_siblings_to(const NodeTraversal &trav)

Full to-siblings are nodes traversals which share exactly the same upstream NodeSides.

set<NodeTraversal> vg::VG::full_siblings_from(const NodeTraversal &trav)

Full from-siblings are nodes traversals which share exactly the same downstream NodeSides.

set<Node *> vg::VG::siblings_of(Node *node)

Get general siblings of a node.

void vg::VG::simplify_siblings(void)

Remove easily-resolvable redundancy in the graph.

void vg::VG::simplify_to_siblings(const set<set<NodeTraversal>> &to_sibs)

Remove easily-resolvable redundancy in the graph for all provided to-sibling sets.

void vg::VG::simplify_from_siblings(const set<set<NodeTraversal>> &from_sibs)

Remove easily-resolvable redundancy in the graph for all provided from-sibling sets.

set<set<NodeTraversal>> vg::VG::transitive_sibling_sets(const set<set<NodeTraversal>> &sibs)

Remove intransitive sibling sets, such as where (A, B, C) = S1 but C ∊ S2.

set<set<NodeTraversal>> vg::VG::identically_oriented_sibling_sets(const set<set<NodeTraversal>> &sibs)

Remove sibling sets which don’t have identical orientation.

bool vg::VG::adjacent(const Position &pos1, const Position &pos2)

Determine if pos1 occurs directly before pos2.

Node *vg::VG::create_node(const string &seq, id_t id = 0)

Create a node. Use the VG class to generate ids.

Node *vg::VG::get_node(id_t id)

Find a particular node.

void vg::VG::nonoverlapping_node_context_without_paths(Node *node, VG &g)

Get the subgraph of a node and all the edges it is responsible for (where it has the minimal ID) and add it into the given VG.

void vg::VG::expand_context(VG &g, size_t steps, bool add_paths = true)
void vg::VG::destroy_node(Node *node)

Destroy the node at the given pointer. This pointer must point to a Node owned by the graph.

void vg::VG::destroy_node(id_t id)

Destroy the node with the given ID.

bool vg::VG::has_node(id_t id)

Determine if the graph has a node with the given ID.

bool vg::VG::has_node(Node *node)

Determine if the graph contains the given node.

bool vg::VG::has_node(const Node &node)

Determine if the graph contains the given node.

Node *vg::VG::find_node_by_name_or_add_new(string name)

Find a node with the given name, or create a new one if none is found.

void vg::VG::for_each_node(function<void(Node *)> lambda)

Run the given function on every node.

void vg::VG::for_each_node_parallel(function<void(Node *)> lambda)

Run the given function on every node in parallel.

void vg::VG::for_each_connected_node(Node *node, function<void(Node *)> lambda)

Go through all the nodes in the same connected component as the given node. Ignores relative orientation.

void vg::VG::dfs(const function<void(NodeTraversal)> &node_begin_fn, const function<void(NodeTraversal)> &node_end_fn, const function<bool(void)> &break_fn, const function<void(Edge *)> &edge_fn, const function<void(Edge *)> &tree_fn, const function<void(Edge *)> &edge_curr_fn, const function<void(Edge *)> &edge_cross_fn, const vector<NodeTraversal> *sources, const set<NodeTraversal> *sinks, )

Parameters
  • node_begin_fn: Called when node orientattion is first encountered.
  • node_end_fn: Called when node orientation goes out of scope.
  • break_fn: Called to check if we should stop the DFS.
  • edge_fn: Called when an edge is encountered.
  • tree_fn: Called when an edge forms part of the DFS spanning tree.
  • edge_curr_fn: Called when we meet an edge in the current tree component.
  • edge_cross_fn: Called when we meet an edge in an already-traversed tree component.
  • sources: Start only at these node traversals.
  • sinks: When hitting a sink, don’t keep walking.

Do a DFS search of the bidirected graph. A bidirected DFS starts at some root node, and traverses first all the nodes found reading out the right of that node in their appropriate relative orientations (including the root), and then all the nodes found reading left out of that node in their appropriate orientations (including the root). If any unvisited nodes are left in other connected components, the process will repeat from one such node, until all nodes have been visited in each orientation.

void vg::VG::dfs(const function<void(NodeTraversal)> &node_begin_fn, const function<void(NodeTraversal)> &node_end_fn, const vector<NodeTraversal> *sources = NULL, const set<NodeTraversal> *sinks = NULL, )

Specialization of dfs for only handling nodes.

void vg::VG::dfs(const function<void(NodeTraversal)> &node_begin_fn, const function<void(NodeTraversal)> &node_end_fn, const function<bool(void)> &break_fn)

Specialization of dfs for only handling nodes + break function.

bool vg::VG::empty(void)

Is the graph empty?

const string vg::VG::hash(void)

Generate a digest of the serialized graph.

void vg::VG::remove_null_nodes(void)

Remove nodes with no sequence. These are created in some cases during the process of graph construction.

void vg::VG::remove_node_forwarding_edges(Node *node)

Remove a node but connect all of its predecessor and successor nodes with new edges.

void vg::VG::remove_null_nodes_forwarding_edges(void)

Remove null nodes but connect predecessors and successors, preserving structure.

void vg::VG::remove_orphan_edges(void)

Remove edges for which one of the nodes is not present.

void vg::VG::remove_inverting_edges(void)

Remove edges representing an inversion and edges on the reverse complement.

bool vg::VG::has_inverting_edges(void)

Determine if the graph has inversions.

void vg::VG::keep_paths(set<string> &path_names, set<string> &kept_names)

Keep paths in the given set of path names. Populates kept_names with the names of the paths it actually found to keep. The paths specified may not overlap. Removes all nodes and edges not used by one of the specified paths.

void vg::VG::keep_path(string &path_name)
int vg::VG::path_edge_count(list<NodeTraversal> &path, int32_t offset, int path_length)

Path stats. Starting from offset in the first node, how many edges do we cross? path must be nonempty and longer than the given length. offset is interpreted as relative to the first node in its on-path orientation, and is inclusive.

int vg::VG::path_end_node_offset(list<NodeTraversal> &path, int32_t offset, int path_length)

Determine the offset in its last node at which the path starting at this offset in its first node ends. path must be nonempty and longer than the given length. offset is interpreted as relative to the first node in its on-path orientation, and is inclusive. Returned offset is remaining unused length in the last node touched.

const vector<Alignment> vg::VG::paths_as_alignments(void)

Convert the stored paths in this graph to alignments.

const string vg::VG::path_sequence(const Path &path)

Return sequence string of path.

double vg::VG::path_identity(const Path &path1, const Path &path2)

Return percent identity between two paths (# matches / (#matches + #mismatches)). Note: uses ssw aligner, so will only work on small paths.

string vg::VG::trav_sequence(const NodeTraversal &trav)

Get the sequence of a NodeTraversal.

SB_Input vg::VG::vg_to_sb_input()

Convert a VG graph to superbubble algorithm input format.

vector<pair<id_t, id_t>> vg::VG::get_superbubbles(SB_Input sbi)

Find the superbubbles in the given input graph.

vector<pair<id_t, id_t>> vg::VG::get_superbubbles()

Find the superbubbles in this graph.

id_t vg::VG::get_node_at_nucleotide(string pathname, int nuc)

Takes in a pathname and the nucleotide position (like from a vcf) and returns the node id which contains that position.

Edge *vg::VG::create_edge(Node *from, Node *to, bool from_start = false, bool to_end = false)

Create an edge. If the given edge cannot be created, returns null. If the given edge already exists, returns the existing edge.

Edge *vg::VG::create_edge(id_t from, id_t to, bool from_start = false, bool to_end = false)

Create an edge. If the given edge cannot be created, returns null. If the given edge already exists, returns the existing edge.

Edge *vg::VG::create_edge(NodeTraversal left, NodeTraversal right)

Make a left-to-right edge from the left NodeTraversal to the right one, respecting orientations. If the given edge cannot be created, returns null. If the given edge already exists, returns the existing edge.

Edge *vg::VG::create_edge(NodeSide side1, NodeSide side2)

Make an edge connecting the given sides of nodes. If the given edge cannot be created, returns null. If the given edge already exists, returns the existing edge.

Edge *vg::VG::get_edge(const NodeSide &side1, const NodeSide &side2)

Get a pointer to the specified edge. This can take sides in any order.

Edge *vg::VG::get_edge(const pair<NodeSide, NodeSide> &sides)

Get a pointer to the specified edge. This can take sides in any order.

Edge *vg::VG::get_edge(const NodeTraversal &left, const NodeTraversal &right)

Get the edge connecting the given oriented nodes in the given order.

void vg::VG::destroy_edge(Edge *edge)

Festroy the edge at the given pointer. This pointer must point to an edge owned by the graph.

void vg::VG::destroy_edge(const NodeSide &side1, const NodeSide &side2)

Destroy the edge between the given sides of nodes. These can be in either order.

void vg::VG::destroy_edge(const pair<NodeSide, NodeSide> &sides)

Destroy the edge between the given sides of nodes. This can take sides in any order.

void vg::VG::unindex_edge_by_node_sides(const NodeSide &side1, const NodeSide &side2)

Remove an edge from the node side indexes, so it doesn’t show up when you ask for the edges connected to the side of a node. Makes the edge untraversable until the indexes are rebuilt.

void vg::VG::unindex_edge_by_node_sides(Edge *edge)

Remove an edge from the node side indexes, so it doesn’t show up when you ask for the edges connected to the side of a node. Makes the edge untraversable until the indexes are rebuilt.

void vg::VG::index_edge_by_node_sides(Edge *edge)

Add an edge to the node side indexes. Doesn’t touch the index of edges by node pairs or the graph; those must be updated seperately.

bool vg::VG::has_edge(const NodeSide &side1, const NodeSide &side2)

Get the edge between the given node sides, which can be in either order.

bool vg::VG::has_edge(const pair<NodeSide, NodeSide> &sides)

Determine if the graph has an edge. This can take sides in any order.

bool vg::VG::has_edge(Edge *edge)

Determine if the graph has an edge. This can take sides in any order.

bool vg::VG::has_edge(const Edge &edge)

Determine if the graph has an edge. This can take sides in any order.

bool vg::VG::has_inverting_edge(Node *n)

Determine if the graph has an inverting edge on the given node.

bool vg::VG::has_inverting_edge_from(Node *n)

Determine if the graph has an inverting edge from the given node.

bool vg::VG::has_inverting_edge_to(Node *n)

Determine if the graph has an inverting edge to the given node.

void vg::VG::for_each_edge(function<void(Edge *)> lambda)

Run the given function for each edge.

void vg::VG::for_each_edge_parallel(function<void(Edge *)> lambda)

Run the given function for each edge, in parallel.

void vg::VG::circularize(id_t head, id_t tail)

Circularize a subgraph / path using the head / tail nodes.

void vg::VG::circularize(vector<string> pathnames)
void vg::VG::connect_node_to_nodes(NodeTraversal node, vector<NodeTraversal> &nodes)

Connect node -> nodes. Connects from the right side of the first to the left side of the second.

void vg::VG::connect_node_to_nodes(Node *node, vector<Node *> &nodes, bool from_start = false)

Connect node -> nodes. You can optionally use the start of the first node instead of the end.

void vg::VG::connect_nodes_to_node(vector<NodeTraversal> &nodes, NodeTraversal node)

connect nodes -> node. Connects from the right side of the first to the left side of the second.

void vg::VG::connect_nodes_to_node(vector<Node *> &nodes, Node *node, bool to_end = false)

connect nodes -> node.

void vg::VG::divide_node(Node *node, int pos, Node *&left, Node *&right)

Divide a node at a given internal position. Inserts the new nodes in the correct paths, but can’t update the ranks, so they need to be cleared and re-calculated by the caller.

void vg::VG::divide_node(Node *node, vector<int> positions, vector<Node *> &parts)

Divide a node at a given internal position. This version works on a collection of internal positions, in linear time.

void vg::VG::divide_path(map<long, id_t> &path, long pos, Node *&left, Node *&right)

Divide a path at a position. Also invalidates stored rank information.

void vg::VG::to_dot(ostream &out, vector<Alignment> alignments = {}, vector<Locus> loci = {}, bool show_paths = false, bool walk_paths = false, bool annotate_paths = false, bool show_mappings = false, bool simple_mode = false, bool invert_edge_ports = false, bool color_variants = false, bool superbubble_ranking = false, bool superbubble_labeling = false, bool ultrabubble_labeling = false, bool skip_missing_nodes = false, int random_seed = 0)

Convert the graph to Dot format.

void vg::VG::to_dot(ostream &out, vector<Alignment> alignments = {}, bool show_paths = false, bool walk_paths = false, bool annotate_paths = false, bool show_mappings = false, bool invert_edge_ports = false, int random_seed = 0, bool color_variants = false)

Convert the graph to Dot format.

void vg::VG::to_gfa(ostream &out)

Convert the graph to GFA format.

void vg::VG::to_turtle(ostream &out, const string &rdf_base_uri, bool precompress)

Convert the graph to Turtle format.

bool vg::VG::is_valid(bool check_nodes = true, bool check_edges = true, bool check_paths = true, bool check_orphans = true)

Determine if the graph is valid or not, according to the specified criteria.

void vg::VG::sort(void)

Topologically order nodes. Makes sure that Nodes appear in the Protobuf Graph object in their topological sort order.

void vg::VG::topological_sort(deque<NodeTraversal> &l)

Topological sort helper function, not really meant for external use.

Order and orient the nodes in the graph using a topological sort.

We use a bidirected adaptation of Kahn’s topological sort (1962), which can handle components with no heads or tails.

L ← Empty list that will contain the sorted and oriented elements S ← Set of nodes which have been oriented, but which have not had their downstream edges examined N ← Set of all nodes that have not yet been put into S

while N is nonempty do remove a node from N, orient it arbitrarily, and add it to S (In practice, we use “seeds”: the heads and any nodes we have seen that had too many incoming edges) while S is non-empty do remove an oriented node n from S add n to tail of L for each node m with an edge e from n to m do remove edge e from the graph if m has no other edges to that side then orient m such that the side the edge comes to is first remove m from N insert m into S otherwise put an oriented m on the list of arbitrary places to start when S is empty (This helps start at natural entry points to cycles) return L (a topologically sorted order and orientation)

void vg::VG::swap_nodes(Node *a, Node *b)

Swap the given nodes. TODO: what does that mean?

void vg::VG::orient_nodes_forward(set<id_t> &nodes_flipped)

Use a topological sort to order and orient the nodes, and then flip some nodes around so that they are oriented the way they are in the sort. Populates nodes_flipped with the ids of the nodes that have had their orientations changed. TODO: update the paths that touch nodes that flipped around

void vg::VG::force_path_match(void)

For each path, assign edits that describe a total match of the mapping to the node.

void vg::VG::fill_empty_path_mappings(void)

For each path, if a mapping has no edits then make it a perfect match against a node. This is the same as force_path_match, but only for empty mappings.

Alignment vg::VG::align(const string &sequence, Aligner &aligner, size_t max_query_graph_ratio = 0, bool print_score_matrices = false)

Align without base quality adjusted scores. Align to the graph. The graph must be acyclic and contain only end-to-start edges. Will modify the graph by re-ordering the nodes. May add nodes to the graph, but cleans them up afterward.

Alignment vg::VG::align(const Alignment &alignment, Aligner &aligner, size_t max_query_graph_ratio = 0, bool print_score_matrices = false)

Align without base quality adjusted scores. Align to the graph. The graph must be acyclic and contain only end-to-start edges. Will modify the graph by re-ordering the nodes. May add nodes to the graph, but cleans them up afterward.

Alignment vg::VG::align(const Alignment &alignment, size_t max_query_graph_ratio = 0, bool print_score_matrices = false)

Align with default Aligner. Align to the graph. The graph must be acyclic and contain only end-to-start edges. Will modify the graph by re-ordering the nodes. May add nodes to the graph, but cleans them up afterward.

Alignment vg::VG::align(const string &sequence, size_t max_query_graph_ratio = 0, bool print_score_matrices = false)

Align with default Aligner. Align to the graph. The graph must be acyclic and contain only end-to-start edges. Will modify the graph by re-ordering the nodes. May add nodes to the graph, but cleans them up afterward.

Alignment vg::VG::align_qual_adjusted(const Alignment &alignment, QualAdjAligner &qual_adj_aligner, size_t max_query_graph_ratio = 0, bool print_score_matrices = false)

Align with base quality adjusted scores. Align to the graph. The graph must be acyclic and contain only end-to-start edges. Will modify the graph by re-ordering the nodes. May add nodes to the graph, but cleans them up afterward.

Alignment vg::VG::align_qual_adjusted(const string &sequence, QualAdjAligner &qual_adj_aligner, size_t max_query_graph_ratio = 0, bool print_score_matrices = false)

Align with base quality adjusted scores. Align to the graph. The graph must be acyclic and contain only end-to-start edges. Will modify the graph by re-ordering the nodes. May add nodes to the graph, but cleans them up afterward.

void vg::VG::for_each_kpath(int k, bool path_only, int edge_max, function<void(NodeTraversal)> handle_prev_maxed, function<void(NodeTraversal)> handle_next_maxed, function<void(list<NodeTraversal>::iterator, list<NodeTraversal>&)> lambda)

Calls a function on all node-crossing paths with up to length across node boundaries. Considers each node in forward orientation to produce the kpaths around it.

void vg::VG::for_each_kpath_parallel(int k, bool path_only, int edge_max, function<void(NodeTraversal)> handle_prev_maxed, function<void(NodeTraversal)> handle_next_maxed, function<void(list<NodeTraversal>::iterator, list<NodeTraversal>&)> lambda)

Calls a function on all kpaths of the given node.

void vg::VG::for_each_kpath(int k, bool path_only, int edge_max, function<void(NodeTraversal)> handle_prev_maxed, function<void(NodeTraversal)> handle_next_maxed, function<void(size_t, Path&)> lambda)

Calls a function on all node-crossing paths with up to length across node boundaries. Considers each node in forward orientation to produce the kpaths around it.

void vg::VG::for_each_kpath_parallel(int k, bool path_only, int edge_max, function<void(NodeTraversal)> handle_prev_maxed, function<void(NodeTraversal)> handle_next_maxed, function<void(size_t, Path&)> lambda)

Calls a function on all kpaths of the given node.

void vg::VG::for_each_kpath_of_node(Node *node, int k, bool path_only, int edge_max, function<void(NodeTraversal)> handle_prev_maxed, function<void(NodeTraversal)> handle_next_maxed, function<void(list<NodeTraversal>::iterator, list<NodeTraversal>&)> lambda)

Calls a function on all kpaths of the given node.

void vg::VG::for_each_kpath_of_node(Node *n, int k, bool path_only, int edge_max, function<void(NodeTraversal)> handle_prev_maxed, function<void(NodeTraversal)> handle_next_maxed, function<void(size_t, Path&)> lambda)

Calls a function on all kpaths of the given node.

void vg::VG::kpaths(set<list<NodeTraversal>> &paths, int length, bool path_only, int edge_max, function<void(NodeTraversal)> prev_maxed, function<void(NodeTraversal)> next_maxed)

Get kpaths. TODO: what is this for?

void vg::VG::kpaths(vector<Path> &paths, int length, bool path_only, int edge_max, function<void(NodeTraversal)> prev_maxed, function<void(NodeTraversal)> next_maxed)

Get kpaths. TODO: what is this for?

void vg::VG::kpaths_of_node(Node *node, set<list<NodeTraversal>> &paths, int length, bool path_only, int edge_max, function<void(NodeTraversal)> prev_maxed, function<void(NodeTraversal)> next_maxed)

Get kpaths on a particular node. TODO: what is this for?

void vg::VG::kpaths_of_node(Node *node, vector<Path> &paths, int length, bool path_only, int edge_max, function<void(NodeTraversal)> prev_maxed, function<void(NodeTraversal)> next_maxed)

Get kpaths on a particular node. TODO: what is this for?

void vg::VG::kpaths_of_node(id_t node_id, vector<Path> &paths, int length, bool path_only, int edge_max, function<void(NodeTraversal)> prev_maxed, function<void(NodeTraversal)> next_maxed)

Get kpaths on a particular node. TODO: what is this for?

void vg::VG::prev_kpaths_from_node(NodeTraversal node, int length, bool path_only, int edge_max, bool edge_bounding, list<NodeTraversal> postfix, set<list<NodeTraversal>> &walked_paths, const vector<string> &followed_paths, function<void(NodeTraversal)> &maxed_nodes)

Given an oriented start node, a length in bp, a maximum number of edges to cross, and a stack of nodes visited so far, fill in the set of paths with all the paths starting at the oriented start node and going left off its end no longer than the specified length, calling maxed_nodes on nodes which can’t be visited due to the edge-crossing limit. Produces paths ending with the specified node. TODO: postfix should not be (potentially) copied on every call.

void vg::VG::next_kpaths_from_node(NodeTraversal node, int length, bool path_only, int edge_max, bool edge_bounding, list<NodeTraversal> prefix, set<list<NodeTraversal>> &walked_paths, const vector<string> &followed_paths, function<void(NodeTraversal)> &maxed_nodes)

Do the same as prec_kpaths_from_node, except going right, producing a path starting with the specified node.

void vg::VG::paths_between(Node *from, Node *to, vector<Path> &paths)
void vg::VG::paths_between(id_t from, id_t to, vector<Path> &paths)
void vg::VG::likelihoods(vector<Alignment> &alignments, vector<Path> &paths, vector<long double> &likelihoods)
void vg::VG::nodes_prev(NodeTraversal n, vector<NodeTraversal> &nodes)

Get the nodes attached to the left side of the given NodeTraversal, in their proper orientations.

vector<NodeTraversal> vg::VG::nodes_prev(NodeTraversal n)

Get the nodes attached to the left side of the given NodeTraversal, in their proper orientations.

set<NodeTraversal> vg::VG::travs_to(NodeTraversal node)

Get traversals before this node on the same strand. Same as nodes_prev but using set.

void vg::VG::nodes_next(NodeTraversal n, vector<NodeTraversal> &nodes)

Get the nodes attached to the right side of the given NodeTraversal, in their proper orientations.

vector<NodeTraversal> vg::VG::nodes_next(NodeTraversal n)

Get the nodes attached to the right side of the given NodeTraversal, in their proper orientations.

set<NodeTraversal> vg::VG::travs_from(NodeTraversal node)

Get traversals after this node on the same strand. Same as nodes_next but using set.

set<NodeTraversal> vg::VG::travs_of(NodeTraversal node)

Get traversals either before or after this node on the same strand.

int vg::VG::node_count_prev(NodeTraversal n)

Count the nodes attached to the left side of the given NodeTraversal.

int vg::VG::node_count_next(NodeTraversal n)

Count the nodes attached to the right side of the given NodeTraversal.

Path vg::VG::create_path(const list<NodeTraversal> &nodes)

Create a path.

Path vg::VG::create_path(const vector<NodeTraversal> &nodes)

Create a path.

string vg::VG::path_string(const list<NodeTraversal> &nodes)

Get the string sequence for all the NodeTraversals on the given path.

string vg::VG::path_string(const Path &path)

Get the string sequence for traversing the given path. Assumes the path covers the entirety of any nodes visited. Handles backward nodes.

void vg::VG::expand_path(const list<NodeTraversal> &path, vector<NodeTraversal> &expanded)

Expand a path. TODO: what does that mean?

void vg::VG::node_starts_in_path(const list<NodeTraversal> &path, map<Node *, int> &node_start)

Fill in the node_start map with the first index along the path at which each node appears. Caller is responsible for dealing with orientations.

bool vg::VG::nodes_are_perfect_path_neighbors(NodeTraversal left, NodeTraversal right)

Return true if nodes share all paths and the mappings they share in these paths are adjacent, in the specified relative order and orientation.

bool vg::VG::mapping_is_total_match(const Mapping &m)

Return true if the mapping completely covers the node it maps to and is a perfect match.

map<string, vector<Mapping>> vg::VG::concat_mappings_for_node_pair(id_t id1, id_t id2)

Concatenate the mappings for a pair of nodes; handles multiple mappings per path.

map<string, vector<Mapping>> vg::VG::concat_mappings_for_nodes(const list<NodeTraversal> &nodes)

Concatenate mappings for a list of nodes that we want to concatenate. Returns, for each path name, a vector of merged mappings, once per path traversal of the run of nodes. Those merged mappings are in the orientation of the merged node (so mappings to nodes that are traversed in reverse will have their flags toggled). We assume that all mappings on the given nodes are full-length perfect matches, and that all the nodes are perfect path neighbors.

void vg::VG::expand_path(list<NodeTraversal> &path, vector<list<NodeTraversal>::iterator> &expanded)

Expand a path. TODO: what does that mean? These versions handle paths in which nodes can be traversed multiple times. Unfortunately since we’re throwing non-const iterators around, we can’t take the input path as const.

void vg::VG::node_starts_in_path(list<NodeTraversal> &path, map<NodeTraversal *, int> &node_start)

Find node starts in a path. TODO: what does that mean? To get the starts out of the map this produces, you need to dereference the iterator and then get the address of the NodeTraversal (stored in the list) that you are talking about.

void vg::VG::for_each_kmer_parallel(int kmer_size, bool path_only, int edge_max, function<void(string&, list<NodeTraversal>::iterator, int, list<NodeTraversal>&, VG&)> lambda, int stride = 1, bool allow_dups = false, bool allow_negatives = false, )

Call a function for each kmer in the graph, in parallel.

void vg::VG::for_each_kmer(int kmer_size, bool path_only, int edge_max, function<void(string&, list<NodeTraversal>::iterator, int, list<NodeTraversal>&, VG&)> lambda, int stride = 1, bool allow_dups = false, bool allow_negatives = false, )

Call a function for each kmer in the graph.

void vg::VG::for_each_kmer_of_node(Node *node, int kmer_size, bool path_only, int edge_max, function<void(string&, list<NodeTraversal>::iterator, int, list<NodeTraversal>&, VG&)> lambda, int stride = 1, bool allow_dups = false, bool allow_negatives = false, )

Call a function for each kmer on a node.

void vg::VG::kmer_context(string &kmer, int kmer_size, bool path_only, int edge_max, bool forward_only, list<NodeTraversal> &path, list<NodeTraversal>::iterator start_node, int32_t start_offset, list<NodeTraversal>::iterator &end_node, int32_t &end_offset, set<tuple<char, id_t, bool, int32_t>> &prev_positions, set<tuple<char, id_t, bool, int32_t>> &next_positions)

For the given kmer of the given length starting at the given offset into the given Node along the given path, fill in end_node and end_offset with where the end of the kmer falls (counting from the right side of the NodeTraversal), prev_chars with the characters that preceed it, next_chars with the characters that follow it, prev_ and next_positions with the ((node ID, orientation), offset) pairs of the places you can come from/go next (from the right end of the kmer). Refuses to follow more than edge_max edges. Offsets are in the path orientation. Meant for gcsa2.

void vg::VG::gcsa_handle_node_in_graph(Node *node, int kmer_size, bool path_only, int edge_max, int stride, bool forward_only, Node *head_node, Node *tail_node, function<void(KmerPosition&)> lambda)

Do the GCSA2 kmers for a node. head_node and tail_node must both be non- null, but only one of those nodes actually needs to be in the graph. They will be examined directly to get their representative characters. They also don’t need to be actually owned by the graph; they can be copies.

void vg::VG::for_each_gcsa_kmer_position_parallel(int kmer_size, bool path_only, int edge_max, int stride, bool forward_only, id_t &head_id, id_t &tail_id, function<void(KmerPosition&)> lambda)

Call a function for each GCSA2 kemr position in parallel. GCSA kmers are the kmers in the graph with each node existing in both its forward and reverse-complement orientation. Node IDs in the GCSA graph are 2 * original node ID, +1 if the GCSA node represents the reverse complement, and +0 if it does not. Non-reversing edges link the forward copy of the from node to the forward copy of the to node, and similarly for the reverse complement copies, while reversing edges link the forward copy of the from node to the reverse complement copy of the to node, and visa versa. This allows us to index both the forward and reverse strands of every node, and to deal with GCSA’s lack of support for reversing edges, with the same trick. Note that start_tail_id, if zero, will be replaced with the ID actually used for the start/end node before lambda is ever called.

void vg::VG::get_gcsa_kmers(int kmer_size, bool path_only, int edge_max, int stride, bool forward_only, const function<void(vector<gcsa::KMer>&, bool)> &handle_kmers, id_t &head_id, id_t &tail_id, )

Get the GCSA2 kmers in the graph.

void vg::VG::write_gcsa_kmers(int kmer_size, bool path_only, int edge_max, int stride, bool forward_only, ostream &out, id_t &head_id, id_t &tail_id)

Writhe the GCSA2 kmer file for the graph to the goven stream.

string vg::VG::write_gcsa_kmers_to_tmpfile(int kmer_size, bool paths_only, bool forward_only, id_t &head_id, id_t &tail_id, size_t doubling_steps = 2, size_t size_limit = 200, const string &base_file_name = ".vg-kmers-tmp-")

Write the GCSA2 kmers to a temp file with the given base. Return the name of the file.

void vg::VG::build_gcsa_lcp(gcsa::GCSA *&gcsa, gcsa::LCPArray *&lcp, int kmer_size, bool paths_only, bool forward_only, size_t doubling_steps = 2, size_t size_limit = 200, const string &base_file_name = ".vg-kmers-tmp-")

Construct the GCSA2 index for this graph.

void vg::VG::prune_complex(int path_length, int edge_max, Node *head_node, Node *tail_node)

Take all nodes that would introduce paths of > edge_max edge crossings, remove them, and link their neighbors to head_node or tail_node depending on which direction the path extension was stopped. For pruning graph prior to indexing with gcsa2.

void vg::VG::prune_complex_with_head_tail(int path_length, int edge_max)

Wrap the graph with heads and tails before doing the prune. Utility function for preparing for indexing.

Alignment vg::VG::random_read(size_t read_len, mt19937 &rng, id_t min_id, id_t max_id, bool either_strand)

Generate random reads. Note that even if either_strand is false, having backward nodes in the graph will result in some reads from the global reverse strand.

void vg::VG::disjoint_subgraphs(list<VG> &subgraphs)

Find subgraphs.

void vg::VG::head_nodes(vector<Node *> &nodes)

Get the head nodes (nodes with edges only to their right sides). These are required to be oriented forward.

vector<Node *> vg::VG::head_nodes(void)

Get the head nodes (nodes with edges only to their right sides). These are required to be oriented forward.

bool vg::VG::is_head_node(id_t id)

Determine if a node is a head node.

bool vg::VG::is_head_node(Node *node)

Determine if a node is a head node.

int32_t vg::VG::distance_to_head(NodeTraversal node, int32_t limit = 1000)

Get the distance from head of node to beginning of graph, or -1 if limit exceeded.

int32_t vg::VG::distance_to_head(NodeTraversal node, int32_t limit, int32_t dist, set<NodeTraversal> &seen)

Get the distance from head of node to beginning of graph, or -1 if limit exceeded.

vector<Node *> vg::VG::tail_nodes(void)

Get the tail nodes (nodes with edges only to their left sides). These are required to be oriented forward.

void vg::VG::tail_nodes(vector<Node *> &nodes)

Get the tail nodes (nodes with edges only to their left sides). These are required to be oriented forward.

bool vg::VG::is_tail_node(id_t id)

Determine if a node is a tail node.

bool vg::VG::is_tail_node(Node *node)

Determine if a node is a tail node.

int32_t vg::VG::distance_to_tail(NodeTraversal node, int32_t limit = 1000)

Get the distance from tail of node to end of graph, or -1 if limit exceeded.

int32_t vg::VG::distance_to_tail(NodeTraversal node, int32_t limit, int32_t dist, set<NodeTraversal> &seen)

Get the distance from tail of node to end of graph, or -1 if limit exceeded.

int32_t vg::VG::distance_to_tail(id_t id, int32_t limit = 1000)

Get the distance from tail of node to end of graph, or -1 if limit exceeded.

void vg::VG::collect_subgraph(Node *node, set<Node *> &subgraph)

Collect the subgraph of a Node. TODO: what does that mean?

Node *vg::VG::join_heads(void)

Join head nodes of graph to common null node, creating a new single head.

void vg::VG::join_heads(Node *node, bool from_start = false)

Join head nodes of graph to specified node. Optionally from the start/to the end of the new node.

void vg::VG::join_tails(Node *node, bool to_end = false)

Join tail nodes of graph to specified node. Optionally from the start/to the end of the new node.

void vg::VG::wrap_with_null_nodes(void)

Add singular head and tail null nodes to graph.

void vg::VG::add_start_end_markers(int length, char start_char, char end_char, Node *&start_node, Node *&end_node, id_t start_id = 0, id_t end_id = 0)

Add a start node and an end node, where all existing heads in the graph are connected to the start node, and all existing tails in the graph are connected to the end node. Any connected components in the graph which do not have either are connected to the start at an arbitrary point, and the end node from nodes going to that arbitrary point. If start_node or end_node is null, a new node will be created. Otherwise, the passed node will be used. Note that this visits every node, to make sure it is attached to all connected components. Note that if a graph has, say, heads but no tails, the start node will be attached buut the end node will be free-floating.

Public Members

Graph vg::VG::graph

Protobuf-based representation.

Paths vg::VG::paths

Manages paths of the graph. Initialized by setting paths._paths = graph.paths.

string vg::VG::name

Name of the graph.

id_t vg::VG::current_id

Current id for Node to be added next.

hash_map<id_t, Node *> vg::VG::node_by_id

Nodes by id.

pair_hash_map<pair<NodeSide, NodeSide>, Edge *> vg::VG::edge_by_sides

Edges by sides of Nodes they connect. Since duplicate edges are not permitted, two edges cannot connect the same pair of node sides. Each edge is indexed here with the smaller NodeSide first. The actual node order is recorded in the Edge object.

hash_map<Node *, int> vg::VG::node_index

nodes by position in nodes repeated field. this is critical to allow fast deletion of nodes

hash_map<Edge *, int> vg::VG::edge_index
hash_map<id_t, vector<pair<id_t, bool>>> vg::VG::edges_on_start

Stores the destinations and backward flags for edges attached to the starts of nodes (whether that node is “from” or “to”).

hash_map<id_t, vector<pair<id_t, bool>>> vg::VG::edges_on_end

Stores the destinations and backward flags for edges attached to the ends of nodes (whether that node is “from” or “to”).

Private Functions

void vg::VG::_for_each_kmer(int kmer_size, bool path_only, int edge_max, function<void(string&, list<NodeTraversal>::iterator, int, list<NodeTraversal>&, VG&)> lambda, bool parallel, int stride, bool allow_dups, bool allow_negatives, Node *node = nullptr, )

Call the given function on each kmer. If parallel is specified, goes through nodes one per thread. If node is not null, looks only at kmers of that specific node.

Alignment vg::VG::align(const Alignment &alignment, Aligner *aligner, QualAdjAligner *qual_adj_aligner, size_t max_query_graph_ratio, bool print_score_matrices)

Private method to funnel other align functions into.

void vg::VG::init(void)

setup, ensures that gssw == NULL on startup

Private Members

vector<id_t> vg::VG::empty_ids

Placeholder for functions that sometimes need to be passed an empty vector.

vector<pair<id_t, bool>> vg::VG::empty_edge_ends

Placeholder for functions that sometimes need to be passed an empty vector.

class
#include <vg_set.hpp>

Public Functions

vg::VGset::VGset()
vg::VGset::VGset(vector<string> &files)
void vg::VGset::transform(std::function<void(VG *)> lambda)
void vg::VGset::for_each(std::function<void(VG *)> lambda)
int64_t vg::VGset::merge_id_space(void)
xg::XG vg::VGset::to_xg(bool store_threads = false)
xg::XG vg::VGset::to_xg(bool store_threads, const regex &paths_to_take, map<string, Path> &removed_paths)
void vg::VGset::store_in_index(Index &index)
void vg::VGset::store_paths_in_index(Index &index)
void vg::VGset::index_kmers(Index &index, int kmer_size, bool path_only, int edge_max, int stride = 1, bool allow_negatives = false)
void vg::VGset::for_each_kmer_parallel(const function<void(string&, list<NodeTraversal>::iterator, int, list<NodeTraversal>&, VG&)> &lambda, int kmer_size, bool path_only, int edge_max, int stride, bool allow_dups, bool allow_negatives = false, )
void vg::VGset::write_gcsa_out(ostream &out, int kmer_size, bool path_only, bool forward_only, int64_t head_id = 0, int64_t tail_id = 0)
void vg::VGset::write_gcsa_kmers_binary(ostream &out, int kmer_size, bool path_only, bool forward_only, int64_t head_id = 0, int64_t tail_id = 0)
void vg::VGset::get_gcsa_kmers(int kmer_size, bool path_only, bool forward_only, const function<void(vector<gcsa::KMer>&, bool)> &handle_kmers, int64_t head_id = 0, int64_t tail_id = 0, )
vector<string> vg::VGset::write_gcsa_kmers_binary(int kmer_size, bool path_only, bool forward_only, int64_t head_id = 0, int64_t tail_id = 0)

Public Members

vector<string> vg::VGset::filenames
bool vg::VGset::show_progress

Private Functions

void vg::VGset::for_each_gcsa_kmer_position_parallel(int kmer_size, bool path_only, bool forward_only, int64_t &head_id, int64_t &tail_id, function<void(KmerPosition&)> lambda)
struct
#include <genotypekit.hpp>

Public Functions

vg::SiteTraversal::Visit::Visit(Node *node, bool backward = false)

Make a Visit form a node and an orientation

vg::SiteTraversal::Visit::Visit(NestedSite *child, bool backward = false)

Make a Visit from a child site and an orientation.

vg::SiteTraversal::Visit::Visit(const NodeTraversal &traversal)

Make a Visit from a NodeTraversal.

Public Members

Node *vg::SiteTraversal::Visit::node
NestedSite *vg::SiteTraversal::Visit::child
bool vg::SiteTraversal::Visit::backward
namespace
namespace
namespace

Typedefs

typedef

Functions

template <typename T>
std::string glenn2vcf::to_string_ss(T val)
static long double glenn2vcf::gammaln(long double x)
static long double glenn2vcf::factorial(int n)
long double glenn2vcf::poissonp(int observed, int expected)
Support glenn2vcf::operator+(const Support &one, const Support &other)

Add two Support values together, accounting for strand.

Support &glenn2vcf::operator+=(Support &one, const Support &other)

Add in a Support to another.

template <typename Scalar>
Support glenn2vcf::operator*(const Support &support, const Scalar &scale)

Scale a support by a factor.

template <typename Scalar>
Support glenn2vcf::operator*(const Scalar &scale, const Support &support)

Scale a support by a factor, the other way

template <typename Scalar>
Support glenn2vcf::operator/(const Support &support, const Scalar &scale)

Divide a support by a factor.

std::ostream &glenn2vcf::operator<<(std::ostream &stream, const Support &support)

Allow printing a support.

double glenn2vcf::total(const Support &support)

Get the total read support in a support.

double glenn2vcf::strand_bias(const Support &support)

Get the strand bias of a support.

Support glenn2vcf::support_min(const Support &a, const Support &b)

Get the minimum support of a pair of supports, by taking the min in each orientation.

std::string glenn2vcf::char_to_string(const char &letter)

Make a letter into a full string because apparently that’s too fancy for the standard library.

void glenn2vcf::write_vcf_header(std::ostream &stream, std::string &sample_name, std::string &contig_name, size_t contig_size, int min_mad_for_filter)

Write a minimal VCF header for a single-sample file.

void glenn2vcf::create_ref_allele(vcflib::Variant &variant, const std::string &allele)

Create the reference allele for an empty vcflib Variant, since apaprently there’s no method for that already. Must be called before any alt alleles are added.

int glenn2vcf::add_alt_allele(vcflib::Variant &variant, const std::string &allele)

Add a new alt allele to a vcflib Variant, since apaprently there’s no method for that already.

If that allele already exists in the variant, does not add it again.

Returns the allele number (0, 1, 2, etc.) corresponding to the given allele string in the given variant.

bool glenn2vcf::can_write_alleles(vcflib::Variant &variant)

Return true if a variant may be output, or false if this variant is valid but the GATK might choke on it.

Mostly used to throw out variants with very long alleles, because GATK has an allele length limit. How alleles that really are 1 megabase deletions are to be specified to GATK is left as an exercise to the reader.

bool glenn2vcf::mapping_is_perfect_match(const vg::Mapping &mapping)

Return true if a mapping is a perfect match, and false if it isn’t.

size_t glenn2vcf::bp_length(const std::list<vg::NodeTraversal> &path)

Get the length of a path through nodes, in base pairs.

Support glenn2vcf::min_support_in_path(vg::VG &graph, const std::map<vg::Node *, Support> &nodeReadSupport, const std::map<vg::Edge *, Support> &edgeReadSupport, const std::list<vg::NodeTraversal> &path)

Get the minimum support of all nodes and edges in path

std::set<std::pair<size_t, std::list<vg::NodeTraversal>>> glenn2vcf::bfs_left(vg::VG &graph, vg::NodeTraversal node, const ReferenceIndex &index, const std::map<vg::Node *, Support> &nodeReadSupport, const std::map<vg::Edge *, Support> &edgeReadSupport, int64_t maxDepth = 10, bool stopIfVisited = false)

Do a breadth-first search left from the given node traversal, and return lengths and paths starting at the given node and ending on the indexed reference path. Refuses to visit nodes with no support.

vg::NodeTraversal glenn2vcf::flip(vg::NodeTraversal toFlip)

Flip a NodeTraversal around and return the flipped copy.

std::set<std::pair<size_t, std::list<vg::NodeTraversal>>> glenn2vcf::bfs_right(vg::VG &graph, vg::NodeTraversal node, const ReferenceIndex &index, const std::map<vg::Node *, Support> &nodeReadSupport, const std::map<vg::Edge *, Support> &edgeReadSupport, int64_t maxDepth = 10, bool stopIfVisited = false)

Do a breadth-first search right from the given node traversal, and return lengths and paths starting at the given node and ending on the indexed reference path.

std::pair<Support, std::vector<vg::NodeTraversal>> glenn2vcf::find_bubble(vg::VG &graph, vg::Node *node, vg::Edge *edge, const ReferenceIndex &index, const std::map<vg::Node *, Support> &nodeReadSupport, const std::map<vg::Edge *, Support> &edgeReadSupport, int64_t maxDepth, size_t max_bubble_paths)

Given a vg graph, an edge or node in the graph, and an index for the reference path, look out from the edge or node in both directions to find a shortest bubble relative to the path, with a consistent orientation. The bubble may not visit the same node twice.

Exactly one of edge and node must be null, and one not null.

Takes a max depth for the searches producing the paths on each side.

Return the ordered and oriented nodes in the bubble, with the outer nodes being oriented forward along the named path, and with the first node coming before the last node in the reference. Also return the minimum support found on any edge or node in the bubble (including the reference node endpoints and their edges which aren’t stored in the path)

ReferenceIndex glenn2vcf::trace_reference_path(vg::VG &vg, std::string refPathName, bool verbose)

Trace out the reference path in the given graph named by the given name. Returns a structure with useful indexes of the reference.

std::string glenn2vcf::get_pileup_line(const std::map<int64_t, vg::NodePileup> &nodePileups, const std::set<std::pair<int64_t, size_t>> &refCrossreferences, const std::set<std::pair<int64_t, size_t>> &altCrossreferences)

Given a collection of pileups by original node ID, and a set of original node id:offset cross-references in both ref and alt categories, produce a VCF comment line giving the pileup for each of those positions on those nodes. Includes a trailing newline if nonempty.

TODO: VCF comments aren’t really a thing.

void glenn2vcf::parse_tsv(const std::string &tsvFile, vg::VG &vg, std::map<vg::Node *, Support> &nodeReadSupport, std::map<vg::Edge *, Support> &edgeReadSupport, std::map<vg::Node *, double> &nodeLikelihood, std::map<vg::Edge *, double> &edgeLikelihood, std::set<vg::Edge *> &deletionEdges, std::map<vg::Node *, std::pair<int64_t, size_t>> &nodeSources, std::set<vg::Node *> &knownNodes, std::set<vg::Edge *> &knownEdges, bool verbose)

Parse tsv into an internal format, where we track status and copy number for nodes and edges.

int glenn2vcf::call2vcf(vg::VG &vg, const std::string &glennFile, std::string refPathName, std::string contigName, std::string sampleName, int64_t variantOffset, int64_t maxDepth, int64_t lengthOverride, std::string pileupFilename, double minFractionForCall, double maxHetBias, double maxRefHetBias, double indelBiasMultiple, size_t minTotalSupportForCall, size_t refBinSize, size_t expCoverage, bool suppress_overlaps, bool useAverageSupport, bool multiallelic_support, size_t max_ref_length, size_t max_bubble_paths, size_t min_mad_for_filter, bool verbose)
int glenn2vcf::call2vcf(vg::VG &vg, const string &glennfile, string refPathName, string contigName, string sampleName, int64_t variantOffset, int64_t maxDepth, int64_t lengthOverride, string pileupFilename, double minFractionForCall, double maxHetBias, double maxRefHetBias, double indelBiasMultiple, size_t minTotalSupportForCall, size_t refBinSize, size_t expCoverage, bool suppress_overlaps, bool useAverageSupport, bool multiallelic_support, size_t max_ref_length, size_t max_bubble_paths, size_t min_mad_for_filter, bool verbose)

Variables

const int glenn2vcf::MAX_ALLELE_LENGTH
const double glenn2vcf::LOG_ZERO
std::map<vg::Node *, Support> glenn2vcf::nodeReadSupport
std::map<vg::Edge *, Support> glenn2vcf::edgeReadSupport
std::set<vg::Edge *> glenn2vcf::deletionEdges
std::map<vg::Node *, std::pair<int64_t, size_t>> glenn2vcf::nodeSources
std::set<vg::Node *> glenn2vcf::knownNodes
std::set<vg::Edge *> glenn2vcf::knownEdges
namespace
namespace
namespace
namespace
namespace

Functions

template <typename T>
bool stream::write(std::ostream &out, uint64_t count, const std::function<T(uint64_t)> &lambda)
template <typename T>
bool stream::write_buffered(std::ostream &out, std::vector<T> &buffer, uint64_t buffer_limit)
template <typename T>
void stream::for_each(std::istream &in, const std::function<void(T&)> &lambda, const std::function<void(uint64_t)> &handle_count)
template <typename T>
void stream::for_each(std::istream &in, const std::function<void(T&)> &lambda)
template <typename T>
void stream::for_each_parallel(std::istream &in, const std::function<void(T&)> &lambda, const std::function<void(uint64_t)> &handle_count)
template <typename T>
void stream::for_each_parallel(std::istream &in, const std::function<void(T&)> &lambda)
namespace
namespace
namespace

Typedefs

typedef
typedef
typedef
using vg::real_t = typedef long double
typedef

Represents a Node ID. ID type is a 64-bit signed int.

typedef

Represents an offset along the sequence of a Node. Offsets are size_t.

typedef

Represents an oriented position on a Node. Position type: id, direction, offset.

Enums

enum type vg::MappingQualityMethod

Values:

Functions

int vg::hts_for_each(string &filename, function<void(Alignment&)> lambda)
int vg::hts_for_each_parallel(string &filename, function<void(Alignment&)> lambda)
bam_hdr_t *vg::hts_file_header(string &filename, string &header)
bam_hdr_t *vg::hts_string_header(string &header, map<string, int64_t> &path_length, map<string, string> &rg_sample)
bool vg::get_next_alignment_from_fastq(gzFile fp, char *buffer, size_t len, Alignment &alignment)
bool vg::get_next_interleaved_alignment_pair_from_fastq(gzFile fp, char *buffer, size_t len, Alignment &mate1, Alignment &mate2)
bool vg::get_next_alignment_pair_from_fastqs(gzFile fp1, gzFile fp2, char *buffer, size_t len, Alignment &mate1, Alignment &mate2)
size_t vg::fastq_unpaired_for_each_parallel(string &filename, function<void(Alignment&)> lambda)
size_t vg::fastq_paired_interleaved_for_each_parallel(string &filename, function<void(Alignment&, Alignment&)> lambda)
size_t vg::fastq_paired_two_files_for_each_parallel(string &file1, string &file2, function<void(Alignment&, Alignment&)> lambda)
size_t vg::fastq_unpaired_for_each(string &filename, function<void(Alignment&)> lambda)
size_t vg::fastq_paired_interleaved_for_each(string &filename, function<void(Alignment&, Alignment&)> lambda)
size_t vg::fastq_paired_two_files_for_each(string &file1, string &file2, function<void(Alignment&, Alignment&)> lambda)
void vg::gam_paired_interleaved_for_each_parallel(ifstream &in, function<void(Alignment&, Alignment&)> lambda)
void vg::parse_rg_sample_map(char *hts_header, map<string, string> &rg_sample)
void vg::write_alignments(std::ostream &out, vector<Alignment> &buf)
short vg::quality_char_to_short(char c)
char vg::quality_short_to_char(short i)
void vg::alignment_quality_short_to_char(Alignment &alignment)
string vg::string_quality_short_to_char(const string &quality)
void vg::alignment_quality_char_to_short(Alignment &alignment)
string vg::string_quality_char_to_short(const string &quality)
bam1_t *vg::alignment_to_bam(const string &sam_header, const Alignment &alignment, const string &refseq, const int32_t refpos, const bool refrev, const string &cigar, const string &mateseq, const int32_t matepos, const int32_t tlen)
string vg::alignment_to_sam(const Alignment &alignment, const string &refseq, const int32_t refpos, const bool refrev, const string &cigar, const string &mateseq, const int32_t matepos, const int32_t tlen)
string vg::cigar_string(vector<pair<int, char>> &cigar)
string vg::mapping_string(const string &source, const Mapping &mapping)
void vg::mapping_cigar(const Mapping &mapping, vector<pair<int, char>> &cigar)
string vg::cigar_against_path(const Alignment &alignment, bool on_reverse_strand)
int32_t vg::sam_flag(const Alignment &alignment, bool on_reverse_strand)
Alignment vg::bam_to_alignment(const bam1_t *b, map<string, string> &rg_sample)
int vg::alignment_to_length(const Alignment &a)
int vg::alignment_from_length(const Alignment &a)
Alignment vg::strip_from_start(const Alignment &aln, size_t drop)
Alignment vg::strip_from_end(const Alignment &aln, size_t drop)
Alignment vg::trim_alignment(const Alignment &aln, const Position &pos1, const Position &pos2)
vector<Alignment> vg::alignment_ends(const Alignment &aln, size_t len1, size_t len2)
vector<Alignment> vg::reverse_complement_alignments(const vector<Alignment> &alns, const function<int64_t(int64_t)> &node_length)
Alignment vg::reverse_complement_alignment(const Alignment &aln, const function<int64_t(id_t)> &node_length)
Alignment vg::merge_alignments(const vector<Alignment> &alns, bool debug)
Alignment &vg::extend_alignment(Alignment &a1, const Alignment &a2, bool debug)
Alignment vg::merge_alignments(const Alignment &a1, const Alignment &a2, bool debug)
void vg::translate_nodes(Alignment &a, const map<id_t, pair<id_t, bool>> &ids, const std::function<size_t(int64_t)> &node_length)
void vg::flip_nodes(Alignment &a, const set<int64_t> &ids, const std::function<size_t(int64_t)> &node_length)
int vg::softclip_start(Alignment &alignment)
int vg::softclip_end(Alignment &alignment)
size_t vg::to_length_after_pos(const Alignment &aln, const Position &pos)
size_t vg::from_length_after_pos(const Alignment &aln, const Position &pos)
size_t vg::to_length_before_pos(const Alignment &aln, const Position &pos)
size_t vg::from_length_before_pos(const Alignment &aln, const Position &pos)
const string vg::hash_alignment(const Alignment &aln)
Alignment vg::simplify(const Alignment &a)
void vg::write_alignment_to_file(const Alignment &aln, const string &filename)
map<id_t, int> vg::alignment_quality_per_node(const Alignment &aln)
int vg::fastq_for_each(string &filename, function<void(Alignment&)> lambda)
void vg::write_alignment_to_file(const string &file, const Alignment &aln)
SB_Input vg::vg_to_sb_input(VG &graph)
vector<pair<id_t, id_t>> vg::get_superbubbles(SB_Input sbi)
vector<pair<id_t, id_t>> vg::get_superbubbles(VG &graph)
map<pair<id_t, id_t>, vector<id_t>> vg::superbubbles(VG &graph)
static void vg::compute_side_components(VG &graph, vector<SideSet> &components, Side2Component &side_to_component)
void *vg::mergeNodeObjects(void *a, void *b)
pair<stCactusGraph *, stCactusNode *> vg::vg_to_cactus(VG &graph)
static void vg::fill_ultrabubble_contents(VG &graph, Bubble &bubble)
static void vg::ultrabubble_recurse(VG &graph, stList *chains_list, NodeSide side1, NodeSide side2, BubbleTree::Node *out_node)
BubbleTree *vg::ultrabubble_tree(VG &graph)
map<pair<id_t, id_t>, vector<id_t>> vg::ultrabubbles(VG &graph)
VG vg::cactus_to_vg(stCactusGraph *cactus_graph)
VG vg::cactusify(VG &graph)
ostream &vg::operator<<(ostream &os, const NodeDivider::NodeMap &nm)
ostream &vg::operator<<(ostream &os, NodeDivider::Entry entry)
ostream &vg::operator<<(ostream &os, const Caller::NodeOffSide &no)
StrandSupport vg::minSup(vector<StrandSupport> &s)
StrandSupport vg::avgSup(vector<StrandSupport> &s)
ostream &vg::operator<<(ostream &os, const StrandSupport &sup)
real_t vg::gamma_ln(real_t x)

Calculate the natural log of the gamma function of the given argument.

real_t vg::factorial_ln(int n)

Calculate the natural log of the factorial of the given integer. TODO: replace with a cache or giant lookup table from Freebayes.

real_t vg::pow_ln(real_t m, int n)

Raise a log probability to a power

real_t vg::choose_ln(int n, int k)

Compute the number of ways to select k items from a collection of n distinguishable items, ignoring order. Returns the natural log of the (integer) result.

template <typename ProbIn>
real_t vg::multinomial_sampling_prob_ln(const vector<ProbIn> &probs, const vector<int> &obs)

Get the probability for sampling the counts in obs from a set of categories weighted by the probabilities in probs. Works for both double and real_t probabilities. Also works for binomials.

template <typename ProbIn>
real_t vg::binomial_cmf_ln(ProbIn success_logprob, size_t trials, size_t successes)

Compute the probability of having the given number of successes or fewer in the given number of trials, with the given success probability. Returns the resulting log probability.

bool vg::edit_is_match(const Edit &e)
bool vg::edit_is_sub(const Edit &e)
bool vg::edit_is_insertion(const Edit &e)
bool vg::edit_is_deletion(const Edit &e)
pair<Edit, Edit> vg::cut_edit_at_to(const Edit &e, size_t to_off)
pair<Edit, Edit> vg::cut_edit_at_from(const Edit &e, size_t from_off)
Edit vg::reverse_complement_edit(const Edit &e)
bool vg::operator==(const Edit &e1, const Edit &e2)
double vg::entropy(string &st)
string vg::allele_to_string(VG &graph, const Path &allele)

Turn the given path (which must be a thread) into an allele. Drops the first and last mappings and looks up the sequences for the nodes of the others.

template <typename T>
void vg::set_intersection(const unordered_set<T> &set_1, const unordered_set<T> &set_2, unordered_set<T> *out_intersection)
void vg::create_ref_allele(vcflib::Variant &variant, const std::string &allele)

Create the reference allele for an empty vcflib Variant, since apaprently there’s no method for that already. Must be called before any alt alleles are added.

int vg::add_alt_allele(vcflib::Variant &variant, const std::string &allele)

Add a new alt allele to a vcflib Variant, since apaprently there’s no method for that already.

If that allele already exists in the variant, does not add it again.

Retuerns the allele number (0, 1, 2, etc.) corresponding to the given allele string in the given variant.

void vg::node_path_position(int64_t id, string &path_name, int64_t &position, bool backward, int64_t &offset)
void vg::index_positions(VG &graph, map<long, Node *> &node_path, map<long, Edge *> &edge_path)
set<pos_t> vg::gcsa_nodes_to_positions(const vector<gcsa::node_type> &nodes)
const string vg::mems_to_json(const vector<MaximalExactMatch> &mems)
const int vg::balanced_stride(int read_length, int kmer_size, int stride)
const vector<string> vg::balanced_kmers(const string &seq, const int kmer_size, const int stride)
bool vg::operator==(const MaximalExactMatch &m1, const MaximalExactMatch &m2)
bool vg::operator<(const MaximalExactMatch &m1, const MaximalExactMatch &m2)
NodeSide vg::node_start(id_t id)

Produce the start NodeSide of a Node.

NodeSide vg::node_end(id_t id)

Produce the end NodeSide of a Node.

ostream &vg::operator<<(ostream &out, const NodeSide &nodeside)

Print a NodeSide to a stream.

ostream &vg::operator<<(ostream &out, const NodeTraversal &nodetraversal)

Print the given NodeTraversal.

Path &vg::append_path(Path &a, const Path &b)
void vg::parse_region(const string &target, string &name, id_t &start, id_t &end)
int vg::path_to_length(const Path &path)
int vg::path_from_length(const Path &path)
int vg::mapping_to_length(const Mapping &m)
int vg::mapping_from_length(const Mapping &m)
int vg::softclip_start(const Mapping &mapping)
int vg::softclip_end(const Mapping &mapping)
Position vg::first_path_position(const Path &path)
Position vg::last_path_position(const Path &path)
int vg::to_length(const Mapping &m)
int vg::from_length(const Mapping &m)
Path &vg::extend_path(Path &path1, const Path &path2)
Path vg::concat_paths(const Path &path1, const Path &path2)
Path vg::simplify(const Path &p)
Mapping vg::concat_mappings(const Mapping &m, const Mapping &n)
Mapping vg::simplify(const Mapping &m)
Path vg::trim_hanging_ends(const Path &p)
bool vg::mapping_ends_in_deletion(const Mapping &m)
bool vg::mapping_starts_in_deletion(const Mapping &m)
bool vg::mapping_is_total_deletion(const Mapping &m)
bool vg::mapping_is_simple_match(const Mapping &m)
bool vg::path_is_simple_match(const Path &p)
const string vg::mapping_sequence(const Mapping &mp, const string &node_seq)
const string vg::mapping_sequence(const Mapping &mp, const Node &n)
Mapping vg::reverse_complement_mapping(const Mapping &m, const function<int64_t(id_t)> &node_length)
Path vg::reverse_complement_path(const Path &path, const function<int64_t(id_t)> &node_length)
pair<Mapping, Mapping> vg::cut_mapping(const Mapping &m, const Position &pos)
pair<Mapping, Mapping> vg::cut_mapping(const Mapping &m, size_t offset)
pair<Path, Path> vg::cut_path(const Path &path, const Position &pos)
pair<Path, Path> vg::cut_path(const Path &path, size_t offset)
bool vg::maps_to_node(const Path &p, id_t id)
Position vg::path_start(const Path &path)
string vg::path_to_string(Path p)
Position vg::path_end(const Path &path)
bool vg::adjacent_mappings(const Mapping &m1, const Mapping &m2)
bool vg::mapping_is_match(const Mapping &m)
double vg::divergence(const Mapping &m)
double vg::identity(const Path &path)
void vg::decompose(const Path &path, map<pos_t, int> &ref_positions, map<int, Edit> &edits)
double vg::overlap(const Path &p1, const Path &p2)
Path vg::path_from_node_traversals(const list<NodeTraversal> &traversals)
Path &vg::increment_node_mapping_ids(Path &p, id_t inc)
const Paths vg::paths_from_graph(Graph &g)
pos_t vg::make_pos_t(const Position &pos)

Convert a Position to a (much smaller) pos_t.

pos_t vg::make_pos_t(id_t id, bool is_rev, off_t off)

Create a pos_t from a Node ID, an orientation flag, and an offset.

Position vg::make_position(const pos_t &pos)

Convert a pos_t to a Position.

Position vg::make_position(id_t id, bool is_rev, off_t off)

Create a Position from a Node ID, an orientation flag, and an offset.

bool vg::is_empty(const pos_t &pos)

Return true if a pos_t is unset.

id_t vg::id(const pos_t &pos)

Extract the id of the node a pos_t is on.

bool vg::is_rev(const pos_t &pos)

Return true if a pos_t is on the reverse strand of its node.

off_t vg::offset(const pos_t &pos)

Get the offset from a pos_t.

id_t &vg::get_id(pos_t &pos)

Get a reference to the Node ID of a pos_t.

bool &vg::get_is_rev(pos_t &pos)

Get a reference to the reverse flag of a pos_t.

off_t &vg::get_offset(pos_t &pos)

Get a reference to the offset field of a pos_t.

pos_t vg::reverse(const pos_t &pos, size_t node_length)

Reverse a pos_t and get a pos_t at the same base, going the other direction.

Position vg::reverse(const Position &pos, size_t node_length)

Reverse a Position and get a Position at the same base, going the orther direction.

ostream &vg::operator<<(ostream &out, const pos_t &pos)

Print a pos_t to a stream.

size_t vg::xg_cached_node_length(id_t id, xg::XG *xgidx, LRUCache<id_t, Node> &node_cache)

Get the length of a Node from an xg::XG index, with cacheing of deserialized nodes.

char vg::xg_cached_pos_char(pos_t pos, xg::XG *xgidx, LRUCache<id_t, Node> &node_cache)

Get the character at a position in an xg::XG index, with cacheing of deserialized nodes.

map<pos_t, char> vg::xg_cached_next_pos_chars(pos_t pos, xg::XG *xgidx, LRUCache<id_t, Node> &node_cache)

Get the characters at positions after the given position from an xg::XG index, with cacheing of deserialized nodes.

void vg::parse_region(string &region, string &startSeq, int &startPos, int &stopPos)
void vg::parse_bed_regions(const string &bed_path, vector<Region> &out_regions)
void vg::parse_region(string &region, Region &out_region)
bool vg::is_match(const Translation &translation)
char vg::reverse_complement(const char &c)
string vg::reverse_complement(const string &seq)
int vg::get_thread_count(void)
std::vector<std::string> &vg::split_delims(const std::string &s, const std::string &delims, std::vector<std::string> &elems)
std::vector<std::string> vg::split_delims(const std::string &s, const std::string &delims)
const std::string vg::sha1sum(const std::string &data)
const std::string vg::sha1head(const std::string &data, size_t head)
string vg::wrap_text(const string &str, size_t width)
bool vg::is_number(const std::string &s)
bool vg::allATGC(const string &s)
string vg::nonATGCNtoN(const string &s)
string vg::tmpfilename(const string &base)
string vg::get_or_make_variant_id(vcflib::Variant variant)
string vg::make_variant_id(vcflib::Variant variant)
double vg::median(std::vector<int> &v)
void vg::get_input_file(int &optind, int argc, char **argv, function<void(istream&)> callback)
string vg::get_input_file_name(int &optind, int argc, char **argv)
void vg::get_input_file(const string &file_name, function<void(istream&)> callback)
bool vg::is_number(const string &s)
double vg::stdev(const std::vector<double> &v)
template <typename T>
double vg::stdev(const T &v)
double vg::prob_to_logprob(double prob)
double vg::logprob_to_prob(double logprob)
double vg::logprob_add(double logprob1, double logprob2)
double vg::logprob_invert(double logprob)
double vg::phred_to_prob(int phred)
int vg::prob_to_phred(double prob)
double vg::phred_to_logprob(int phred)
int vg::logprob_to_phred(double logprob)
template <typename T, typename V>
set<T> vg::map_keys_to_set(const map<T, V> &m)
template <typename T>
vector<T> vg::pmax(const std::vector<T> &a, const std::vector<T> &b)
template <typename T>
vector<T> vg::vpmax(const std::vector<std::vector<T>> &vv)
template <typename Collection>
Collection::value_type vg::sum(const Collection &collection)

Compute the sum of the values in a collection. Values must be default- constructable (like numbers are).

template <typename Collection>
Collection::value_type vg::logprob_sum(const Collection &collection)

Compute the sum of the values in a collection, where the values are log probabilities and the result is the log of the total probability. Items must be convertible to/from doubles for math.

static void vg::triple_to_vg(void *user_data, raptor_statement *triple)

Variables

const char *const vg::BAM_DNA_LOOKUP
const int8_t vg::default_match
const int8_t vg::default_mismatch
const int8_t vg::default_gap_open
const int8_t vg::default_gap_extension
const int8_t vg::default_max_scaled_score
const uint8_t vg::default_max_qual_score
const double vg::default_gc_content
const char vg::complement[256]
const char *vg::VG_VERSION_STRING
namespace
namespace

Functions

int vg::unittest::run_unit_tests(int argc, char **argv)

Take the original argc and argv from a vg unittest command-line call and run the unit tests. We keep this in its own CPP/HPP to keep our unit test library from being a dependency of main.o and other real application code.

Passes the args along to the unit test system.

Returns exit code 0 on success, other codes on failure.

file alignment.cpp
#include “alignment.hpp”#include “stream.hpp”
file alignment.hpp
#include <iostream>#include <functional>#include <zlib.h>#include “utility.hpp”#include “path.hpp”#include “position.hpp”#include “vg.pb.h”#include “htslib/hfile.h”#include “htslib/hts.h”#include “htslib/sam.h”#include “htslib/vcf.h”
file banded_global_aligner.cpp
#include “banded_global_aligner.hpp”#include “json2pb.h”
file banded_global_aligner.hpp
#include <stdio.h>#include <ctype.h>#include <iostream>#include <vector>#include <unordered_set>#include <unordered_map>#include <list>#include “vg.pb.h”
file bin2ascii.h
#include <string>#include <stdexcept>

Defines

__BIN2ASCII_H__

Functions

std::string hex2bin(const std::string &s)
std::string bin2hex(const std::string &s)
std::string b64_encode(const std::string &s)
std::string b64_decode(const std::string &s)
file bubbles.cpp
#include <unordered_set>#include “bubbles.hpp”#include “vg.hpp”#include “sonLib.h”#include “stCactusGraphs.h”
file bubbles.hpp
#include <vector>#include <map>#include “types.hpp”#include “utility.hpp”#include “nodeside.hpp”#include “DetectSuperBubble.hpp”

Typedefs

typedef
typedef
file call2vcf.cpp
#include <iostream>#include <fstream>#include <sstream>#include <regex>#include <set>#include <utility>#include <algorithm>#include <getopt.h>#include “vg.hpp”#include “index.hpp”#include “Variant.h”#include “genotypekit.hpp”
file caller.cpp
#include <cstdlib>#include <stdexcept>#include “json2pb.h”#include “caller.hpp”#include “stream.hpp”
file caller.hpp
#include <iostream>#include <algorithm>#include <functional>#include <cmath>#include <limits>#include <unordered_set>#include <tuple>#include “vg.pb.h”#include “vg.hpp”#include “hash_map.hpp”#include “utility.hpp”#include “pileup.hpp”
file colors.hpp
#include <vector>#include <random>
file constructor.cpp
#include “vg.hpp”#include “constructor.hpp”#include <set>#include <tuple>#include <list>#include <algorithm>#include <memory>
file constructor.hpp
#include <vector>#include <set>#include <map>#include “types.hpp”#include “progressive.hpp”#include “vg.pb.h”#include “Variant.h”#include “Fasta.h”

constructor.hpp: defines a tool class used for constructing VG graphs from VCF files.

file convert.hpp
#include <sstream>

Functions

template <typename T>
bool convert(const std::string &s, T &r)
template <typename T>
std::string convert(const T &r)
file deconstructor.cpp
#include “deconstructor.hpp”#include “bubbles.hpp”
file deconstructor.hpp
#include <vector>#include <set>#include <array>#include <list>#include <string>#include <iostream>#include <unordered_map>#include <map>#include <climits>#include <queue>#include <fstream>#include <cstdlib>#include <sstream>#include <stack>#include “Variant.h”#include “index.hpp”#include “path.hpp”#include “vg.hpp”#include “vg.pb.h”#include “Fasta.h”#include “xg.hpp”#include “position.hpp”#include “vcfheader.hpp”

Deconstruct is getting rewritten. New functionality: -Detect superbubbles and bubbles -Fix command line interface. -harmonize on XG / raw graph (i.e. deprecate index) -Use unroll/DAGify if needed to avoid cycles

Much of this is taken from Brankovic’s “Linear-Time Superbubble Identification Algorithm for Genome Assembly”

file distributions.hpp
#include <map>#include <cmath>#include “utility.hpp”
file edit.cpp
#include “edit.hpp”#include “utility.hpp”
file edit.hpp
#include “vg.pb.h”#include <utility>#include <iostream>#include “json2pb.h”
file entropy.cpp
#include “entropy.hpp”
file entropy.hpp
#include <iostream>#include <set>#include <vector>#include <string>#include <cmath>
file filter.cpp
#include “filter.hpp”
file filter.hpp
#include <vector>#include <cstdlib>#include <iostream>#include <unordered_map>#include <sstream>#include <string>#include “vg.hpp”#include “xg.hpp”#include “vg.pb.h”

Provides a way to filter Edits contained within Alignments. This can be used to clean out sequencing errors and to find high-quality candidates for variant calling.

file genotypekit.cpp
#include “genotypekit.hpp”
file genotypekit.hpp
#include <iostream>#include <algorithm>#include <functional>#include <cmath>#include <limits>#include <unordered_set>#include <list>#include “vg.pb.h”#include “vg.hpp”#include “translator.hpp”#include “hash_map.hpp”#include “utility.hpp”#include “types.hpp”
file genotyper.cpp
#include <cstdint>#include “genotyper.hpp”#include “bubbles.hpp”#include “distributions.hpp”
file genotyper.hpp
#include <iostream>#include <algorithm>#include <functional>#include <cmath>#include <limits>#include <unordered_set>#include <list>#include “vg.pb.h”#include “vg.hpp”#include “translator.hpp”#include “hash_map.hpp”#include “utility.hpp”#include “types.hpp”
file gssw_aligner.cpp
#include “gssw_aligner.hpp”#include “json2pb.h”

Functions

double add_log(double log_x, double log_y)

Variables

const double quality_scale_factor
const double exp_overflow_limit
file gssw_aligner.hpp
#include <cmath>#include <algorithm>#include <vector>#include <set>#include <string>#include <unordered_map>#include “gssw.h”#include “vg.pb.h”#include “Variant.h”#include “Fasta.h”#include “path.hpp”#include “banded_global_aligner.hpp”
file hash_map.hpp
#include “sparsehash/sparse_hash_map”#include “sparsehash/dense_hash_map”

Defines

OVERLOAD_PAIR_HASH
USE_DENSE_HASH
file index.cpp
#include “index.hpp”
file index.cpp
#include <omp.h>#include <unistd.h>#include <getopt.h>#include <string>#include <vector>#include <regex>#include “subcommand.hpp”#include ”../vg.hpp”#include ”../index.hpp”#include ”../stream.hpp”#include ”../vg_set.hpp”#include ”../utility.hpp”#include “gcsa.h”

Functions

void help_index(char **argv)
int main_index(int argc, char **argv)

Variables

Subcommand vg_construct("index","index graphs or alignments for random access or mapping", main_index)
file index.hpp
#include <iostream>#include <exception>#include <sstream>#include <climits>#include “rocksdb/db.h”#include “rocksdb/env.h”#include “rocksdb/options.h”#include “rocksdb/write_batch.h”#include “rocksdb/memtablerep.h”#include “rocksdb/statistics.h”#include “rocksdb/cache.h”#include “rocksdb/slice_transform.h”#include “rocksdb/table.h”#include “rocksdb/filter_policy.h”#include “json2pb.h”#include “vg.hpp”#include “hash_map.hpp”
file json2pb.cpp
#include <errno.h>#include <jansson.h>#include <google/protobuf/message.h>#include <google/protobuf/descriptor.h>#include <json2pb.h>#include <stdexcept>#include <cstdio>#include “bin2ascii.h”

Defines

json_boolean(val)
_CONVERT(type, ctype, fmt, sfunc, afunc)
_SET_OR_ADD(sfunc, afunc, value)
_CONVERT(type, ctype, fmt, sfunc, afunc)

Functions

static json_t *_pb2json(const Message &msg)
static json_t *_field2json(const Message &msg, const FieldDescriptor *field, size_t index)
static void _json2pb(Message &msg, json_t *root)
static void _json2field(Message &msg, const FieldDescriptor *field, json_t *jf)
void json2pb(Message &msg, const char *buf, size_t size)
void json2pb(Message &msg, FILE *fp)
int json_dump_std_string(const char *buf, size_t size, void *data)
std::string pb2json(const Message &msg)
file json2pb.h
#include <string>#include <cstdio>#include <functional>#include <vector>#include <stream.hpp>#include <iostream>

Functions

void json2pb(google::protobuf::Message &msg, const char *buf, size_t size)
void json2pb(google::protobuf::Message &msg, FILE *fp)
std::string pb2json(const google::protobuf::Message &msg)
file main.cpp
#include <iostream>#include <fstream>#include <ctime>#include <cstdio>#include <getopt.h>#include <sys/stat.h>#include “gcsa.h”#include “files.h”#include “json2pb.h”#include “vg.hpp”#include “vg.pb.h”#include “vg_set.hpp”#include “index.hpp”#include “mapper.hpp”#include “Variant.h”#include “Fasta.h”#include “stream.hpp”#include “alignment.hpp”#include “convert.hpp”#include “pileup.hpp”#include “caller.hpp”#include “deconstructor.hpp”#include “vectorizer.hpp”#include “sampler.hpp”#include “filter.hpp”#include “google/protobuf/stubs/common.h”#include “progress_bar.hpp”#include “version.hpp”#include “genotyper.hpp”#include “bubbles.hpp”#include “translator.hpp”#include “readfilter.hpp”#include “distributions.hpp”#include “unittest/driver.hpp”#include “subcommand/subcommand.hpp”

Functions

void help_translate(char **argv)
int main_translate(int argc, char **argv)
void help_filter(char **argv)
int main_filter(int argc, char **argv)
void help_validate(char **argv)
int main_validate(int argc, char **argv)
void help_scrub(char **argv)
int main_scrub(int argc, char **argv)
void help_vectorize(char **argv)
int main_vectorize(int argc, char **argv)
void help_compare(char **argv)
int main_compare(int argc, char **argv)
void help_call(char **argv)
int main_call(int argc, char **argv)
void help_genotype(char **argv)
int main_genotype(int argc, char **argv)
void help_pileup(char **argv)
int main_pileup(int argc, char **argv)
void help_msga(char **argv)
int main_msga(int argc, char **argv)
void help_surject(char **argv)
int main_surject(int argc, char **argv)
void help_circularize(char **argv)
int main_circularize(int argc, char **argv)
void help_sim(char **argv)
int main_sim(int argc, char **argv)
void help_kmers(char **argv)
int main_kmers(int argc, char **argv)
void help_concat(char **argv)
int main_concat(int argc, char **argv)
void help_ids(char **argv)
int main_ids(int argc, char **argv)
void help_join(char **argv)
int main_join(int argc, char **argv)
void help_stats(char **argv)
int main_stats(int argc, char **argv)
void help_paths(char **argv)
int main_paths(int argc, char **argv)
void help_find(char **argv)
int main_find(int argc, char **argv)
void help_align(char **argv)
int main_align(int argc, char **argv)
void help_map(char **argv)
int main_map(int argc, char **argv)
void help_view(char **argv)
int main_view(int argc, char **argv)
void help_sv(char **argv)
void help_deconstruct(char **argv)
void help_locify(char **argv)
int main_locify(int argc, char **argv)
int main_deconstruct(int argc, char **argv)
void help_version(char **argv)
int main_version(int argc, char **argv)
int main_test(int argc, char **argv)
void vg_help(char **argv)
int main(int argc, char *argv[])
file mapper.cpp
#include <unordered_set>#include “mapper.hpp”
file mapper.hpp
#include <iostream>#include <map>#include <chrono>#include <ctime>#include “vg.hpp”#include “xg.hpp”#include “index.hpp”#include “gcsa.h”#include “lcp.h”#include “alignment.hpp”#include “path.hpp”#include “position.hpp”#include “lru_cache.h”#include “json2pb.h”#include “entropy.hpp”#include “gssw_aligner.hpp”
file nodeside.hpp
#include <ostream>#include <utility>#include “vg.pb.h”#include “types.hpp”#include “hash_map.hpp”
file nodetraversal.hpp
#include “vg.pb.h”
file path.cpp
#include “path.hpp”#include “stream.hpp”
file path.hpp
#include <iostream>#include <algorithm>#include <functional>#include <set>#include <list>#include <sstream>#include “json2pb.h”#include “vg.pb.h”#include “edit.hpp”#include “hash_map.hpp”#include “utility.hpp”#include “types.hpp”#include “position.hpp”#include “nodetraversal.hpp”
file pictographs.hpp
#include <vector>#include <random>#include <functional>
file pileup.cpp
#include <cstdlib>#include <stdexcept>#include <regex>#include “json2pb.h”#include “pileup.hpp”#include “stream.hpp”
file pileup.hpp
#include <iostream>#include <algorithm>#include <functional>#include “vg.pb.h”#include “vg.hpp”#include “hash_map.hpp”#include “utility.hpp”
file position.cpp
#include “position.hpp”
file position.hpp
#include “vg.pb.h”#include “types.hpp”#include “xg.hpp”#include “lru_cache.h”#include “utility.hpp”#include “json2pb.h”#include <iostream>

Functions for working with Positions and pos_ts.

file progressive.cpp
#include “progressive.hpp”#include <iostream>
file progressive.hpp
#include <string>#include “progress_bar.hpp”
file readfilter.cpp
#include “readfilter.hpp”#include “IntervalTree.h”#include <fstream>#include <sstream>
file readfilter.hpp
#include <vector>#include <cstdlib>#include <iostream>#include <string>#include “vg.hpp”#include “xg.hpp”#include “vg.pb.h”

Provides a way to filter and transform reads, implementing the bulk of the vg filter command.

file realigner.cpp
#include “realigner.hpp”
file realigner.hpp
#include <iostream>#include <map>#include “vg.hpp”#include “mapper.hpp”#include “alignment.hpp”#include “path.hpp”#include “json2pb.h”
file region.cpp
#include <iostream>#include <fstream>#include <cassert>#include “region.hpp”
file region.hpp
#include <string>#include <vector>#include <sstream>
file sampler.cpp
#include “sampler.hpp”
file sampler.hpp
#include <iostream>#include <map>#include <chrono>#include <ctime>#include “vg.hpp”#include “xg.hpp”#include “alignment.hpp”#include “path.hpp”#include “position.hpp”#include “lru_cache.h”#include “json2pb.h”
file ssw_aligner.cpp
#include “ssw_aligner.hpp”
file ssw_aligner.hpp
#include <vector>#include <set>#include <string>#include “ssw_cpp.h”#include “vg.pb.h”#include “path.hpp”
file stream.hpp
#include <cassert>#include <iostream>#include <fstream>#include <functional>#include <vector>#include <list>#include “google/protobuf/stubs/common.h”#include “google/protobuf/io/zero_copy_stream.h”#include “google/protobuf/io/zero_copy_stream_impl.h”#include “google/protobuf/io/gzip_stream.h”#include “google/protobuf/io/coded_stream.h”
file construct.cpp
#include <omp.h>#include <unistd.h>#include <getopt.h>#include <memory>#include “subcommand.hpp”#include ”../stream.hpp”#include ”../constructor.hpp”#include ”../region.hpp”

Functions

void help_construct(char **argv)
int main_construct(int argc, char **argv)

Variables

Subcommand vg_construct("construct","graph construction", main_construct)
file mod.cpp
#include <omp.h>#include <unistd.h>#include <getopt.h>#include <string>#include <vector>#include <regex>#include “subcommand.hpp”#include ”../vg.hpp”#include ”../stream.hpp”#include ”../utility.hpp”

Functions

void help_mod(char **argv)
int main_mod(int argc, char **argv)

Variables

Subcommand vg_construct("mod","filter, transform, and edit the graph", main_mod)
file simplify.cpp
#include <omp.h>#include <unistd.h>#include <getopt.h>#include <list>#include “subcommand.hpp”#include ”../vg.hpp”#include ”../genotypekit.hpp”#include ”../utility.hpp”

Functions

void help_simplify(char **argv)
int main_simplify(int argc, char **argv)

Variables

Subcommand vg_construct("simplify","graph simplification", main_simplify)
file subcommand.cpp
#include “subcommand.hpp”
file subcommand.hpp
#include <map>#include <functional>#include <string>

subcommand.hpp: defines a system for registering subcommands of the vg command (vg construct, vg view, etc.) at compile time. Replaces the system of defining two functions and a giant run of if statements in main.cpp.

main.cpp does not need to include any subcommand headers!

Subcommands are created as static global objects in their own compilation units, which have to be explicitly linked into the binary (they won’t be pulled out of a library if nothing references their symbols).

Subcommands are responsible for printing their own help; we can do “vg help” and print all the subcommands that exist (via a help subcommand), but we can’t do “vg help subcommand” (because the help subcommand doesn’t know how to get help info on the others).

Subcommands get passed all of argv, so they have to skip past their names when parsing arguments.

To make a subcommand, do something like this in a cpp file in this “subcommand” directory:

#include "subcommand.hpp"
using namespace vg::subcommand;

int main_frobnicate(int argc, char** argv) {
    return 0;
}

static Subcommand vg_frobnicate("frobnicate", "frobnicate nodes and edges",
    main_frobnicate);

file swap_remove.hpp
#include <vector>#include <algorithm>

Functions

template <typename T>
bool swap_remove(std::vector<T> &v, const T &e)
file translator.cpp
#include “translator.hpp”#include “stream.hpp”
file translator.hpp
#include <iostream>#include <algorithm>#include <functional>#include <set>#include <vector>#include <list>#include “vg.pb.h”#include “vg.hpp”#include “hash_map.hpp”#include “utility.hpp”#include “types.hpp”
file types.hpp
#include <tuple>

Contains typedefs for basic types useful for talking about graphs.

file driver.cpp
#include “driver.hpp”#include “catch.hpp”#include <sstream>#include <stdexcept>#include <algorithm>#include <ostream>#include <string>#include <memory>#include <vector>#include <stdlib.h>#include <cstddef>#include <iomanip>#include <limits>#include <stdint.h>#include <iterator>#include <cmath>#include <set>#include <iostream>#include <streambuf>#include <fstream>#include <ctime>#include <map>#include <assert.h>#include <signal.h>#include <cstdio>#include <unistd.h>#include <sys/time.h>#include <cstring>

Defines

CATCH_CONFIG_RUNNER
file driver.hpp
file utility.cpp
#include “utility.hpp”
file utility.hpp
#include <string>#include <vector>#include <sstream>#include <omp.h>#include <cstring>#include <algorithm>#include <numeric>#include <cmath>#include <unistd.h>#include “vg.pb.h”#include “sha1.hpp”#include “Variant.h”
file vectorizer.cpp
#include “vectorizer.hpp”
file vectorizer.hpp
#include <iostream>#include <sstream>#include “sdsl/bit_vectors.hpp”#include <vector>#include <unordered_map>#include “vg.hpp”#include “xg.hpp”#include “vg.pb.h”
file version.cpp
#include “version.hpp”#include “vg_git_version.hpp”

Defines

VG_GIT_VERSION
file version.hpp
file vg.cpp
#include “vg.hpp”#include “stream.hpp”#include “gssw_aligner.hpp”#include <raptor2/raptor2.h>
file vg.hpp
#include <vector>#include <set>#include <string>#include <deque>#include <list>#include <array>#include <omp.h>#include <unistd.h>#include <limits.h>#include <algorithm>#include <random>#include “gssw.h”#include “gcsa.h”#include “lcp.h”#include “gssw_aligner.hpp”#include “ssw_aligner.hpp”#include “region.hpp”#include “path.hpp”#include “utility.hpp”#include “alignment.hpp”#include “vg.pb.h”#include “hash_map.hpp”#include “progressive.hpp”#include “lru_cache.h”#include “Variant.h”#include “Fasta.h”#include “swap_remove.hpp”#include “pictographs.hpp”#include “colors.hpp”#include “types.hpp”#include “gfakluge.hpp”#include “globalDefs.hpp”#include “Graph.hpp”#include “helperDefs.hpp”#include “bubbles.hpp”#include “nodetraversal.hpp”#include “nodeside.hpp”
file vg.proto

Variables

syntax
file vg_set.cpp
#include “vg_set.hpp”#include “stream.hpp”
file vg_set.hpp
#include <set>#include <regex>#include <stdlib.h>#include “gcsa.h”#include “vg.hpp”#include “index.hpp”#include “xg.hpp”
dir src
dir src/subcommand
dir src/unittest