Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "NVDMFeaturizer.h"
- #include <folly/MapUtil.h>
- #include <folly/String.h>
- #include "common/strings/LineReader.h"
- namespace facebook {
- namespace nlp_tools {
- namespace nvdm {
- NVDMFeaturizer::NVDMFeaturizer(const std::string &vocabPath) {
- int id = 0;
- for (auto line : strings::byLine(vocabPath)) {
- std::vector<folly::StringPiece> wordId;
- wordId.reserve(2);
- folly::split('\t', line, wordId);
- DCHECK_EQ(2, wordId.size());
- vocab_[wordId[0].toString()] = folly::to<int>(wordId[1]);
- }
- }
- std::vector<int> NVDMFeaturizer::genFeature(const std::string &text) {
- std::vector<std::string> tokens;
- folly::split(' ', text, tokens);
- std::vector<int> result(vocab_.size()); // 0 for all entries
- std::unordered_map<std::string, int> tfs;
- for (const auto &token : tokens) {
- if (vocab_.find(token) != vocab_.end()) { // token in vocab
- if (tfs.find(token) == tfs.end()) {
- tfs[token] = 1;
- } else {
- tfs[token]++;
- }
- }
- }
- for (const auto &entry : tfs) {
- DCHECK_LT(vocab_[entry.first], result.size());
- result[vocab_[entry.first]] = entry.second;
- }
- return result;
- }
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement