41 #define NO_EDGE (inT64) 0xffffffffffffffffi64
44 #define NO_EDGE (inT64) 0xffffffffffffffffll
84 #define FORWARD_EDGE (inT32) 0
85 #define BACKWARD_EDGE (inT32) 1
86 #define MAX_NODE_EDGES_DISPLAY (inT64) 100
87 #define MARKER_FLAG (inT64) 1
88 #define DIRECTION_FLAG (inT64) 2
89 #define WERD_END_FLAG (inT64) 4
90 #define LETTER_START_BIT 0
91 #define NUM_FLAG_BITS 3
92 #define REFFORMAT "%lld"
96 static const bool kBeginningDawgsType[] = { 1, 1, 1, 1 };
105 static const char kWildcard[] =
"*";
144 bool enable_wildcard)
const;
155 bool word_end)
const = 0;
239 if (
edge_rec_match(next_node, word_end, unichar_id, curr_next_node,
240 curr_word_end, curr_unichar_id))
return 0;
241 if (unichar_id > curr_unichar_id)
return 1;
242 if (unichar_id == curr_unichar_id) {
243 if (next_node > curr_next_node)
return 1;
244 if (next_node == curr_next_node) {
245 if (word_end > curr_word_end)
return 1;
259 return ((unichar_id == other_unichar_id) &&
260 (next_node == NO_EDGE || next_node == other_next_node) &&
261 (!word_end || (word_end == other_word_end)));
267 PermuterType perm,
int unicharset_size,
int debug_level);
331 const char *debug_msg) {
333 if (
data_[i] == new_info)
return false;
355 PermuterType perm,
int debug_level) {
356 read_squished_dawg(file, type, lang, perm, debug_level);
357 num_forward_edges_in_node0 = num_forward_edges(0);
360 const STRING &
lang, PermuterType perm,
int debug_level) {
361 FILE *file = fopen(filename,
"rb");
363 tprintf(
"Failed to open dawg file %s\n", filename);
366 read_squished_dawg(file, type, lang, perm, debug_level);
367 num_forward_edges_in_node0 = num_forward_edges(0);
372 int unicharset_size,
int debug_level) :
373 edges_(edges), num_edges_(num_edges) {
374 init(type, lang, perm, unicharset_size, debug_level);
375 num_forward_edges_in_node0 = num_forward_edges(0);
376 if (debug_level > 3) print_all(
"SquishedDawg:");
384 bool word_end)
const;
390 if (!edge_occupied(edge) || edge == NO_EDGE)
return;
391 assert(forward_edge(edge));
394 }
while (!last_edge(edge++));
424 FILE *file = fopen(filename,
"wb");
426 tprintf(
"Error opening %s\n", filename);
439 inline void set_empty_edge(
EDGE_REF edge_ref) {
443 inline void clear_all_edges() {
444 for (
int edge = 0; edge < num_edges_; edge++) set_empty_edge(edge);
447 inline void clear_marker_flag(
EDGE_REF edge_ref) {
451 inline bool forward_edge(
EDGE_REF edge_ref)
const {
452 return (edge_occupied(edge_ref) &&
456 inline bool backward_edge(
EDGE_REF edge_ref)
const {
457 return (edge_occupied(edge_ref) &&
461 inline bool edge_occupied(
EDGE_REF edge_ref)
const {
465 inline bool last_edge(
EDGE_REF edge_ref)
const {
474 PermuterType perm,
int debug_level);
477 void print_edge(
EDGE_REF edge)
const;
480 void print_all(
const char* msg) {
481 tprintf(
"\n__________________________\n%s\n", msg);
482 for (
int i = 0; i < num_edges_; ++i) print_edge(i);
483 tprintf(
"__________________________\n");
492 int num_forward_edges_in_node0;
497 #endif // DICT_DAWG_H_
GenericVector< NodeChild > NodeChildVector
virtual NODE_REF next_node(EDGE_REF edge_ref) const =0
PermuterType perm_
Permuter code that should be used if the word is found in this Dawg.
NodeChild(UNICHAR_ID id, EDGE_REF ref)
static const inT16 kDawgMagicNumber
Magic number to determine endianness when reading the Dawg from file.
virtual EDGE_REF pattern_loop_edge(EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const
SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type, const STRING &lang, PermuterType perm, int unicharset_size, int debug_level)
void iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore, TessCallback1< const char * > *cb) const
int given_greater_than_edge_rec(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, const EDGE_RECORD &edge_rec) const
bool end_of_word(EDGE_REF edge_ref) const
void unichar_ids_of(NODE_REF node, NodeChildVector *vec) const
bool add_unique(const DawgInfo &new_info, bool debug, const char *debug_msg)
NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the next node visited by following this edge.
void print_node(NODE_REF node, int max_num_edges) const
bool word_in_dawg(const WERD_CHOICE &word) const
Returns true if the given word is in the Dawg.
int push_back(DawgInfoobject)
DawgInfo(int i, EDGE_REF r)
virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const =0
Returns the edge that corresponds to the letter out of this node.
bool edge_rec_match(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, NODE_REF other_next_node, bool other_word_end, UNICHAR_ID other_unichar_id) const
PermuterType permuter() const
bool match_words(WERD_CHOICE *word, inT32 index, NODE_REF node, UNICHAR_ID wildcard) const
virtual UNICHAR_ID edge_letter(EDGE_REF edge_ref) const =0
Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.
virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id, const UNICHARSET &unicharset, GenericVector< UNICHAR_ID > *vec) const
const STRING & lang() const
static const UNICHAR_ID kPatternUnicharID
EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const
Returns the edge that corresponds to the letter out of this node.
void write_squished_dawg(FILE *file)
Writes the squished/reduced Dawg to a file.
bool marker_flag_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the marker flag of this edge.
UNICHAR_ID edge_letter(EDGE_REF edge_ref) const
Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.
void iterate_words(const UNICHARSET &unicharset, TessCallback1< const char * > *cb) const
bool operator==(const DawgInfo &other)
SquishedDawg(const char *filename, DawgType type, const STRING &lang, PermuterType perm, int debug_level)
virtual bool end_of_word(EDGE_REF edge_ref) const =0
void set_next_node_in_edge_rec(EDGE_RECORD *edge_rec, EDGE_REF value)
Sets the next node link for this edge in the Dawg.
DLLSYM void tprintf(const char *format,...)
virtual void print_node(NODE_REF node, int max_num_edges) const =0
unsigned long long int uinT64
void write_squished_dawg(const char *filename)
UNICHAR_ID unichar_id_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns UNICHAR_ID recorded in this edge.
NODE_REF next_node(EDGE_REF edge) const
SquishedDawg(FILE *file, DawgType type, const STRING &lang, PermuterType perm, int debug_level)
~DawgInfoVector()
Overload destructor, since clear() does not delete data_[] any more.
virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec) const =0
GenericVector< int > SuccessorList
void init(DawgType type, const STRING &lang, PermuterType perm, int unicharset_size, int debug_level)
int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the direction flag of this edge.
int check_for_words(const char *filename, const UNICHARSET &unicharset, bool enable_wildcard) const
GenericVector< SuccessorList * > SuccessorListsVector
bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns true if this edge marks the end of a word.
void set_marker_flag_in_edge_rec(EDGE_RECORD *edge_rec)
Sets this edge record to be the last one in a sequence of edges.