Mercurial > hg > toys
view bk-tree.c++ @ 6:cad6633bc1fc
Preliminary final form
author | Jordi Gutiérrez Hermoso <jordigh@gmail.com> |
---|---|
date | Sat, 01 Jan 2011 03:11:04 -0600 |
parents | |
children | b2c93de84b36 |
line wrap: on
line source
#include "levenshtein.c++" #include <fstream> #include <sstream> // unordered_map seems to perform just as well, cheaper op[] but more // expensive ctor, overall balances out, might as well use plain map. #include <map> struct bk_tree { std::string word; std::map<size_t, bk_tree*> children; }; typedef std::map<size_t, bk_tree*> branches; void insert_word(bk_tree& tree, std::string& word) { if(tree.word == "") { tree.word = word; return; } size_t d = levenshtein(tree.word, word); bk_tree* child = tree.children[d]; if(not child) { bk_tree* subtree = new bk_tree; tree.children[d] = subtree; subtree->word = word; } else insert_word(*child, word); } void save_tree(const bk_tree& tree, std::ofstream& ofs) { ofs << tree.word; if(tree.children.size() > 0) { ofs << " ( "; for(branches::const_iterator i = tree.children.begin(); i != tree.children.end(); i++) { if( i-> second != 0) { ofs << i -> first << " "; save_tree( *(i->second), ofs); ofs << " "; } } ofs << ") "; } } std::string load_tree(bk_tree & tree, std::ifstream& ifs) { using namespace std; string word; ifs >> word; tree.word = word; string token; ifs >> token; if(token == "(") { size_t weight; string nexttoken; while(true) { if(nexttoken == ")") { return ""; } stringstream ss; if(nexttoken != "") { ss << nexttoken; ss >> weight; } else { ifs >> token; if(token ==")") return ""; ss << token; ss >> weight; } bk_tree* subtree = new bk_tree; tree.children[weight] = subtree; nexttoken = load_tree(*subtree, ifs); } } return token; } bool query_tree( bk_tree & tree, std::string& word, size_t m) { size_t d; if( (d = levenshtein(tree.word,word)) <= m) return true; bool found = false; for(size_t i = d - m; i <= d+m; i++) { if( tree.children[i] and query_tree(*tree.children[i], word, m)) { found = true; break; } } return found; }