Guest User

Untitled

a guest
Jan 16th, 2018
96
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.57 KB | None | 0 0
  1. <?php
  2.  
  3. namespace App\Utilities;
  4.  
  5. use NlpTools\FeatureFactories\DataAsFeatures;
  6. use NlpTools\Documents\TrainingSet;
  7.  
  8. class InverseDocumentFrequency
  9. {
  10.  
  11. const FREQUENCY_MODE = 1;
  12. const SMOOTH_MODE = 2;
  13. const PROBABILISTIC_MODE = 3;
  14.  
  15. public function __construct(TrainingSet $tset, $mode=self::FREQUENCY_MODE)
  16. {
  17. $this->mode = $mode;
  18.  
  19. $ff = new DataAsFeatures();
  20. $tset->setAsKey(TrainingSet::CLASS_AS_KEY);
  21. foreach ($tset as $class=>$doc) {
  22. $tokens = $ff->getFeatureArray($class,$doc);
  23. $tokens = array_fill_keys($tokens,1);
  24. foreach ($tokens as $token=>$v) {
  25. if (isset($this->idf[$token]))
  26. $this->idf[$token]++;
  27. else
  28. $this->idf[$token] = 1;
  29. }
  30. }
  31.  
  32. $D = count($tset);
  33.  
  34. if($this->mode === self::SMOOTH_MODE){
  35. foreach ($this->idf as $key => &$value) {
  36. $value = log(1 + ($D/$value));
  37. }
  38. }
  39. elseif($this->mode === self::FREQUENCY_MODE){
  40. foreach ($this->idf as $key => &$value) {
  41. $value = log($D/$value);
  42. }
  43. }
  44. elseif($this->mode === self::PROBABILISTIC_MODE){
  45. foreach ($this->idf as $key => &$value) {
  46. $value = log(($D-$value)/$value);
  47. }
  48. }
  49.  
  50.  
  51. $this->logD = log($D);
  52.  
  53. }
  54.  
  55.  
  56. public function getIdf($term)
  57. {
  58. if (isset($this->idf[$term])) {
  59. return $this->idf[$term];
  60. } else {
  61. return $this->logD;
  62. }
  63.  
  64. }
  65.  
  66.  
  67. }
Add Comment
Please, Sign In to add comment