Advertisement
lskeeper

Untitled

Oct 17th, 2017
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.17 KB | None | 0 0
  1. #include "NVDMFeaturizer.h"
  2.  
  3. #include <folly/MapUtil.h>
  4. #include <folly/String.h>
  5.  
  6. #include "common/strings/LineReader.h"
  7.  
  8. namespace facebook {
  9. namespace nlp_tools {
  10. namespace nvdm {
  11.  
  12. NVDMFeaturizer::NVDMFeaturizer(const std::string &vocabPath) {
  13. int id = 0;
  14. for (auto line : strings::byLine(vocabPath)) {
  15. std::vector<folly::StringPiece> wordId;
  16. wordId.reserve(2);
  17. folly::split('\t', line, wordId);
  18. DCHECK_EQ(2, wordId.size());
  19. vocab_[wordId[0].toString()] = folly::to<int>(wordId[1]);
  20. }
  21. }
  22.  
  23. std::vector<int> NVDMFeaturizer::genFeature(const std::string &text) {
  24. std::vector<std::string> tokens;
  25. folly::split(' ', text, tokens);
  26.  
  27. std::vector<int> result(vocab_.size()); // 0 for all entries
  28. std::unordered_map<std::string, int> tfs;
  29.  
  30. for (const auto &token : tokens) {
  31. if (vocab_.find(token) != vocab_.end()) { // token in vocab
  32. if (tfs.find(token) == tfs.end()) {
  33. tfs[token] = 1;
  34. } else {
  35. tfs[token]++;
  36. }
  37. }
  38. }
  39.  
  40. for (const auto &entry : tfs) {
  41. DCHECK_LT(vocab_[entry.first], result.size());
  42. result[vocab_[entry.first]] = entry.second;
  43. }
  44.  
  45. return result;
  46. }
  47. }
  48. }
  49. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement