10 #ifndef FASTTEXT_DICTIONARY_H 11 #define FASTTEXT_DICTIONARY_H 19 #include <unordered_map> 38 static const int32_t MAX_VOCAB_SIZE = 30000000;
39 static const int32_t MAX_LINE_SIZE = 1024;
41 int32_t find(
const std::string&)
const;
42 void initTableDiscard();
55 int64_t pruneidx_size_ = -1;
58 std::vector<int32_t>& line,
59 const std::vector<int32_t>& hashes,
64 static const std::string
EOS;
65 static const std::string
BOW;
66 static const std::string
EOW;
69 int32_t nwords()
const;
70 int32_t nlabels()
const;
71 int64_t ntokens()
const;
72 int32_t getId(
const std::string&)
const;
75 bool discard(int32_t,
real)
const;
76 std::string getWord(int32_t)
const;
77 const std::vector<int32_t>& getSubwords(int32_t)
const;
78 const std::vector<int32_t> getSubwords(
const std::string&)
const;
79 void computeSubwords(
const std::string&, std::vector<int32_t>&)
const;
82 std::vector<int32_t>&,
83 std::vector<std::string>&)
const;
86 std::vector<int32_t>&,
87 std::vector<std::string>&)
const;
88 uint32_t hash(
const std::string& str)
const;
89 void add(
const std::string&);
90 bool readWord(std::istream&, std::string&)
const;
91 void readFromFile(std::istream&);
92 std::string getLabel(int32_t)
const;
93 void save(std::ostream&)
const;
94 void load(std::istream&);
95 std::vector<int64_t> getCounts(
entry_type)
const;
96 int32_t getLine(std::istream&, std::vector<int32_t>&, std::vector<int32_t>&,
97 std::vector<int32_t>&, std::minstd_rand&)
const;
98 int32_t getLine(std::istream&, std::vector<int32_t>&,
99 std::vector<int32_t>&, std::minstd_rand&)
const;
100 void threshold(int64_t, int64_t);
101 void prune(std::vector<int32_t>&);
std::unordered_map< int32_t, int32_t > pruneidx_
Definition: dictionary.h:56
Definition: dictionary.h:36
std::string word
Definition: dictionary.h:30
std::vector< real > pdiscard_
Definition: dictionary.h:49
static const std::string EOW
Definition: dictionary.h:66
int32_t nwords_
Definition: dictionary.h:51
static const std::string BOW
Definition: dictionary.h:65
int32_t nlabels_
Definition: dictionary.h:52
entry_type type
Definition: dictionary.h:32
int32_t id_type
Definition: dictionary.h:26
std::shared_ptr< Args > args_
Definition: dictionary.h:45
std::vector< int32_t > word2int_
Definition: dictionary.h:46
std::vector< int32_t > subwords
Definition: dictionary.h:33
Definition: dictionary.h:29
entry_type
Definition: dictionary.h:27
int64_t count
Definition: dictionary.h:31
float real
Definition: real.h:15
int64_t ntokens_
Definition: dictionary.h:53
int32_t size_
Definition: dictionary.h:50
static const std::string EOS
Definition: dictionary.h:64
std::vector< entry > words_
Definition: dictionary.h:47