Bloom Filter Trie
bft.h
Go to the documentation of this file.
1 
6 #pragma once
7 
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <stdint.h>
11 
12 #include <stdarg.h>
13 #include <stdbool.h>
14 
15 #include "intersection.h"
16 #include "insertNode.h"
17 #include "branchingNode.h"
18 #include "fasta.h"
19 #include "marking.h"
20 #include "replaceAnnotation.h"
21 #include "write_to_disk.h"
22 #include "extract_kmers.h"
23 #include "printMemory.h"
24 #include "file_io.h"
25 #include "useful_macros.h"
26 
28 typedef BFT_Root BFT;
29 
33 typedef struct{
34  uint8_t* annot;
35  uint8_t* annot_ext;
36  uint8_t* annot_cplx;
37 
38  int size_annot;
39  int size_annot_cplx;
40 
41  uint8_t from_BFT;
43 
51 typedef size_t (*BFT_func_ptr) (BFT_kmer* bft_kmer, BFT* bft, va_list args);
52 
53 inline uint8_t intersection_annots(const uint8_t a, const uint8_t b);
54 inline uint8_t union_annots(const uint8_t a, const uint8_t b);
55 inline uint8_t sym_difference_annots(const uint8_t a, const uint8_t b);
56 
61 BFT* create_cdbg(int k, int treshold_compression);
63 void free_cdbg(BFT* bft);
65 
70 void insert_genomes_from_files(int nb_files, char** paths, BFT* bft, char* prefix_bft_filename);
72 void insert_kmers_new_genome(int nb_kmers, char** kmers, char* genome_name, BFT* bft);
73 void insert_kmers_last_genome(int nb_kmers, char** kmers, BFT* bft);
75 
80 BFT_kmer* create_kmer(const char* kmer, int k);
83 void free_BFT_kmer(BFT_kmer* bft_kmer, int nb_bft_kmer);
84 void free_BFT_kmer_content(BFT_kmer* bft_kmer, int nb_bft_kmer);
85 void extract_kmers_to_disk(BFT* bft, char* filename_output, bool compressed_output);
86 size_t write_kmer_ascii_to_disk(BFT_kmer* bft_kmer, BFT* bft, va_list args);
87 size_t write_kmer_comp_to_disk(BFT_kmer* bft_kmer, BFT* bft, va_list args);
89 
96 void free_BFT_annotation(BFT_annotation* bft_annot);
98 bool presence_genome(uint32_t id_genome, BFT_annotation* bft_annot, BFT* bft);
99 
100 inline uint8_t intersection_annots(const uint8_t a, const uint8_t b){
101  return a & b;
102 }
103 
104 inline uint8_t union_annots(const uint8_t a, const uint8_t b){
105  return a | b;
106 }
107 
108 inline uint8_t sym_difference_annots(const uint8_t a, const uint8_t b){
109  return a ^ b;
110 }
111 
112 BFT_annotation* intersection_annotations(BFT* bft, uint32_t nb_annotations, ... );
113 BFT_annotation* union_annotations(BFT* bft, uint32_t nb_annotations, ... );
114 BFT_annotation* sym_difference_annotations(BFT* bft, uint32_t nb_annotations, ... );
115 uint32_t* get_list_id_genomes(BFT_annotation* bft_annot, BFT* bft);
116 uint32_t get_count_id_genomes(BFT_annotation* bft_annot, BFT* bft);
117 uint32_t* intersection_list_id_genomes(uint32_t* list_a, uint32_t* list_b);
119 
124 BFT_kmer* get_kmer(const char* kmer, BFT* bft);
126 bool is_kmer_in_cdbg(BFT_kmer* bft_kmer);
127 uint32_t* query_sequence(BFT* bft, char* sequence, double threshold, bool canonical_search);
129 
134 bool prefix_matching(BFT* bft, char* prefix, BFT_func_ptr f, ...);
137 
142 void set_marking(BFT* bft);
144 void unset_marking(BFT* bft);
145 void set_flag_kmer(uint8_t flag, BFT_kmer* bft_kmer, BFT* bft);
146 uint8_t get_flag_kmer(BFT_kmer* bft_kmer, BFT* bft);
148 
153 void set_neighbors_traversal(BFT* bft);
155 void unset_neighbors_traversal(BFT* bft);
156 BFT_kmer* get_neighbors(BFT_kmer* bft_kmer, BFT* bft);
157 BFT_kmer* get_predecessors(BFT_kmer* bft_kmer, BFT* bft);
158 BFT_kmer* get_successors(BFT_kmer* bft_kmer, BFT* bft);
160 
165 void iterate_over_kmers(BFT* bft, BFT_func_ptr f, ... );
167 void v_iterate_over_kmers(BFT* bft, BFT_func_ptr f, va_list args);
169 
174 void write_BFT(BFT* bft, char* filename, bool compress_annotations);
176 BFT* load_BFT(char* filename);
BFT_annotation * intersection_annotations(BFT *bft, uint32_t nb_annotations,...)
Function computing the intersection of a set of annotations.
Definition: bft.c:421
void iterate_over_kmers(BFT *bft, BFT_func_ptr f,...)
Function iterating over the k-mers of a BFT.
Definition: bft.c:1049
bool presence_genome(uint32_t id_genome, BFT_annotation *bft_annot, BFT *bft)
Function testing if a k-mer occured in a genome.
Definition: bft.c:395
BFT_Root BFT
Root vertex of a BFT.
Definition: bft.h:28
uint32_t get_count_id_genomes(BFT_annotation *bft_annot, BFT *bft)
Function counting the number of genome identifiers in an annotation.
Definition: bft.c:648
BFT_kmer * get_successors(BFT_kmer *bft_kmer, BFT *bft)
Function extracting the successors of a k-mer.
Definition: bft.c:949
void insert_genomes_from_files(int nb_files, char **paths, BFT *bft, char *prefix_bft_filename)
Function inserting genomes (k-mer file) in a BFT.
Definition: bft.c:31
void insert_kmers_last_genome(int nb_kmers, char **kmers, BFT *bft)
Function inserting k-mers of the last inserted genome in a BFT.
Definition: bft.c:86
uint8_t get_flag_kmer(BFT_kmer *bft_kmer, BFT *bft)
Function getting a k-mer of a BFT with a flag.
Definition: bft.c:746
void unset_marking(BFT *bft)
Function unlocking and the graph locked for vertices marking.
Definition: bft.c:703
void extract_kmers_to_disk(BFT *bft, char *filename_output, bool compressed_output)
Function extracting the k-mers of a BFT in a file.
Definition: bft.c:255
void free_BFT_kmer(BFT_kmer *bft_kmer, int nb_bft_kmer)
Function freeing allocated BFT_kmers.
Definition: bft.c:179
BFT_kmer * create_empty_kmer()
Function creating an empty BFT_kmer object (all its components are NULL).
Definition: bft.c:163
void free_BFT_annotation(BFT_annotation *bft_annot)
Function freeing a BFT_annotation.
Definition: bft.c:346
void free_cdbg(BFT *bft)
Free an allocated colored de Bruijn graph stored in a BFT.
Definition: bft.c:19
BFT_kmer * get_neighbors(BFT_kmer *bft_kmer, BFT *bft)
Function extracting the neighbors of a k-mer.
Definition: bft.c:802
bool is_kmer_in_cdbg(BFT_kmer *bft_kmer)
Function testing if a k-mer is in a BFT.
Definition: bft.c:246
void insert_kmers_new_genome(int nb_kmers, char **kmers, char *genome_name, BFT *bft)
Function inserting k-mers of a new genome in a BFT.
Definition: bft.c:45
BFT_annotation * create_BFT_annotation()
Function creating an empty BFT_annotation.
Definition: bft.c:326
BFT_annotation * sym_difference_annotations(BFT *bft, uint32_t nb_annotations,...)
Function computing the symmetric difference of a set of annotations.
Definition: bft.c:565
BFT_kmer * create_kmer(const char *kmer, int k)
Function creating a BFT_kmer object from a k-mer encoded as an ASCII string (char*).
Definition: bft.c:129
BFT * load_BFT(char *filename)
Function loading a BFT from disk.
Definition: bft.c:1220
void v_iterate_over_kmers(BFT *bft, BFT_func_ptr f, va_list args)
Function iterating over the k-mers of a BFT.
Definition: bft.c:1012
void set_flag_kmer(uint8_t flag, BFT_kmer *bft_kmer, BFT *bft)
Function marking a k-mer of a BFT with a flag.
Definition: bft.c:720
BFT_annotation * get_annotation(BFT_kmer *bft_kmer)
Function extracting the annotation (set of colors) associated with a k-mer of a BFT.
Definition: bft.c:363
Annotation associated with a BFT_kmer.
Definition: bft.h:33
void set_marking(BFT *bft)
Function locking and preparing the graph for vertices marking (no insertion can happen before unlocki...
Definition: bft.c:690
void free_BFT_kmer_content(BFT_kmer *bft_kmer, int nb_bft_kmer)
Function freeing the content of allocated BFT_kmers.
Definition: bft.c:198
size_t(* BFT_func_ptr)(BFT_kmer *bft_kmer, BFT *bft, va_list args)
Pointer on function used by iterate_over_kmers() and v_iterate_over_kmers().
Definition: bft.h:51
void set_neighbors_traversal(BFT *bft)
Function locking the graph for traversal.
Definition: bft.c:771
size_t write_kmer_comp_to_disk(BFT_kmer *bft_kmer, BFT *bft, va_list args)
Function writing an 2 bits encoded k-mer in a file.
Definition: bft.c:315
bool prefix_matching(BFT *bft, char *prefix, BFT_func_ptr f,...)
Function for prefix matching over the k-mers of a BFT.
Definition: bft.c:1094
uint32_t * get_list_id_genomes(BFT_annotation *bft_annot, BFT *bft)
Function extracting a list of genome identifiers from an annotation.
Definition: bft.c:622
K-mer stored in a BFT_Root.
Definition: Node.h:124
size_t write_kmer_ascii_to_disk(BFT_kmer *bft_kmer, BFT *bft, va_list args)
Function writing an ASCII k-mer in a file.
Definition: bft.c:298
void write_BFT(BFT *bft, char *filename, bool compress_annotations)
Function writing a BFT to disk.
Definition: bft.c:1206
uint32_t * query_sequence(BFT *bft, char *sequence, double threshold, bool canonical_search)
Function querying a BFT for a sequence.
Definition: bft.c:1239
void unset_neighbors_traversal(BFT *bft)
Function unlocking a locked graph for traversal.
Definition: bft.c:784
BFT * create_cdbg(int k, int treshold_compression)
Function creating a colored de Bruijn graph stored in a BFT.
Definition: bft.c:12
BFT_kmer * get_predecessors(BFT_kmer *bft_kmer, BFT *bft)
Function extracting the predecessors of a k-mer.
Definition: bft.c:891
BFT_annotation * union_annotations(BFT *bft, uint32_t nb_annotations,...)
Function computing the union of a set of annotations.
Definition: bft.c:488
BFT_kmer * get_kmer(const char *kmer, BFT *bft)
Function searching for a k-mer in a BFT.
Definition: bft.c:216
Root vertex of a BFT.
Definition: Node.h:91