fastText  d00d36476b15
Fast text processing tool/library
fasttext::Dictionary Class Reference

#include <dictionary.h>

Public Member Functions

 Dictionary (std::shared_ptr< Args >)
 
int32_t nwords () const
 
int32_t nlabels () const
 
int64_t ntokens () const
 
int32_t getId (const std::string &) const
 
entry_type getType (int32_t) const
 
entry_type getType (const std::string &) const
 
bool discard (int32_t, real) const
 
std::string getWord (int32_t) const
 
const std::vector< int32_t > & getSubwords (int32_t) const
 
const std::vector< int32_t > getSubwords (const std::string &) const
 
void computeSubwords (const std::string &, std::vector< int32_t > &) const
 
void computeSubwords (const std::string &, std::vector< int32_t > &, std::vector< std::string > &) const
 
void getSubwords (const std::string &, std::vector< int32_t > &, std::vector< std::string > &) const
 
uint32_t hash (const std::string &str) const
 
void add (const std::string &)
 
bool readWord (std::istream &, std::string &) const
 
void readFromFile (std::istream &)
 
std::string getLabel (int32_t) const
 
void save (std::ostream &) const
 
void load (std::istream &)
 
std::vector< int64_t > getCounts (entry_type) const
 
int32_t getLine (std::istream &, std::vector< int32_t > &, std::vector< int32_t > &, std::vector< int32_t > &, std::minstd_rand &) const
 
int32_t getLine (std::istream &, std::vector< int32_t > &, std::vector< int32_t > &, std::minstd_rand &) const
 
void threshold (int64_t, int64_t)
 
void prune (std::vector< int32_t > &)
 

Static Public Attributes

static const std::string EOS = "</s>"
 
static const std::string BOW = "<"
 
static const std::string EOW = ">"
 

Private Member Functions

int32_t find (const std::string &) const
 
void initTableDiscard ()
 
void initNgrams ()
 
void addWordNgrams (std::vector< int32_t > &line, const std::vector< int32_t > &hashes, int32_t n) const
 

Private Attributes

std::shared_ptr< Argsargs_
 
std::vector< int32_t > word2int_
 
std::vector< entrywords_
 
std::vector< realpdiscard_
 
int32_t size_
 
int32_t nwords_
 
int32_t nlabels_
 
int64_t ntokens_
 
int64_t pruneidx_size_ = -1
 
std::unordered_map< int32_t, int32_t > pruneidx_
 

Static Private Attributes

static const int32_t MAX_VOCAB_SIZE = 30000000
 
static const int32_t MAX_LINE_SIZE = 1024
 

Constructor & Destructor Documentation

◆ Dictionary()

fasttext::Dictionary::Dictionary ( std::shared_ptr< Args args)
explicit

Member Function Documentation

◆ add()

void fasttext::Dictionary::add ( const std::string &  w)

◆ addWordNgrams()

void fasttext::Dictionary::addWordNgrams ( std::vector< int32_t > &  line,
const std::vector< int32_t > &  hashes,
int32_t  n 
) const
private

◆ computeSubwords() [1/2]

void fasttext::Dictionary::computeSubwords ( const std::string &  word,
std::vector< int32_t > &  ngrams 
) const

◆ computeSubwords() [2/2]

void fasttext::Dictionary::computeSubwords ( const std::string &  word,
std::vector< int32_t > &  ngrams,
std::vector< std::string > &  substrings 
) const

◆ discard()

bool fasttext::Dictionary::discard ( int32_t  id,
real  rand 
) const

◆ find()

int32_t fasttext::Dictionary::find ( const std::string &  w) const
private

◆ getCounts()

std::vector< int64_t > fasttext::Dictionary::getCounts ( entry_type  type) const

◆ getId()

int32_t fasttext::Dictionary::getId ( const std::string &  w) const

◆ getLabel()

std::string fasttext::Dictionary::getLabel ( int32_t  lid) const

◆ getLine() [1/2]

int32_t fasttext::Dictionary::getLine ( std::istream &  in,
std::vector< int32_t > &  words,
std::vector< int32_t > &  word_hashes,
std::vector< int32_t > &  labels,
std::minstd_rand &  rng 
) const

◆ getLine() [2/2]

int32_t fasttext::Dictionary::getLine ( std::istream &  in,
std::vector< int32_t > &  words,
std::vector< int32_t > &  labels,
std::minstd_rand &  rng 
) const

◆ getSubwords() [1/3]

const std::vector< int32_t > & fasttext::Dictionary::getSubwords ( int32_t  i) const

◆ getSubwords() [2/3]

const std::vector< int32_t > fasttext::Dictionary::getSubwords ( const std::string &  word) const

◆ getSubwords() [3/3]

void fasttext::Dictionary::getSubwords ( const std::string &  word,
std::vector< int32_t > &  ngrams,
std::vector< std::string > &  substrings 
) const

◆ getType() [1/2]

entry_type fasttext::Dictionary::getType ( int32_t  id) const

◆ getType() [2/2]

entry_type fasttext::Dictionary::getType ( const std::string &  w) const

◆ getWord()

std::string fasttext::Dictionary::getWord ( int32_t  id) const

◆ hash()

uint32_t fasttext::Dictionary::hash ( const std::string &  str) const

◆ initNgrams()

void fasttext::Dictionary::initNgrams ( )
private

◆ initTableDiscard()

void fasttext::Dictionary::initTableDiscard ( )
private

◆ load()

void fasttext::Dictionary::load ( std::istream &  in)

◆ nlabels()

int32_t fasttext::Dictionary::nlabels ( ) const

◆ ntokens()

int64_t fasttext::Dictionary::ntokens ( ) const

◆ nwords()

int32_t fasttext::Dictionary::nwords ( ) const

◆ prune()

void fasttext::Dictionary::prune ( std::vector< int32_t > &  idx)

◆ readFromFile()

void fasttext::Dictionary::readFromFile ( std::istream &  in)

◆ readWord()

bool fasttext::Dictionary::readWord ( std::istream &  in,
std::string &  word 
) const

◆ save()

void fasttext::Dictionary::save ( std::ostream &  out) const

◆ threshold()

void fasttext::Dictionary::threshold ( int64_t  t,
int64_t  tl 
)

Member Data Documentation

◆ args_

std::shared_ptr<Args> fasttext::Dictionary::args_
private

◆ BOW

const std::string fasttext::Dictionary::BOW = "<"
static

◆ EOS

const std::string fasttext::Dictionary::EOS = "</s>"
static

◆ EOW

const std::string fasttext::Dictionary::EOW = ">"
static

◆ MAX_LINE_SIZE

const int32_t fasttext::Dictionary::MAX_LINE_SIZE = 1024
staticprivate

◆ MAX_VOCAB_SIZE

const int32_t fasttext::Dictionary::MAX_VOCAB_SIZE = 30000000
staticprivate

◆ nlabels_

int32_t fasttext::Dictionary::nlabels_
private

◆ ntokens_

int64_t fasttext::Dictionary::ntokens_
private

◆ nwords_

int32_t fasttext::Dictionary::nwords_
private

◆ pdiscard_

std::vector<real> fasttext::Dictionary::pdiscard_
private

◆ pruneidx_

std::unordered_map<int32_t, int32_t> fasttext::Dictionary::pruneidx_
private

◆ pruneidx_size_

int64_t fasttext::Dictionary::pruneidx_size_ = -1
private

◆ size_

int32_t fasttext::Dictionary::size_
private

◆ word2int_

std::vector<int32_t> fasttext::Dictionary::word2int_
private

◆ words_

std::vector<entry> fasttext::Dictionary::words_
private

The documentation for this class was generated from the following files: