Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- namespace App\Utilities;
- use NlpTools\FeatureFactories\DataAsFeatures;
- use NlpTools\Documents\TrainingSet;
- class InverseDocumentFrequency
- {
- const FREQUENCY_MODE = 1;
- const SMOOTH_MODE = 2;
- const PROBABILISTIC_MODE = 3;
- public function __construct(TrainingSet $tset, $mode=self::FREQUENCY_MODE)
- {
- $this->mode = $mode;
- $ff = new DataAsFeatures();
- $tset->setAsKey(TrainingSet::CLASS_AS_KEY);
- foreach ($tset as $class=>$doc) {
- $tokens = $ff->getFeatureArray($class,$doc);
- $tokens = array_fill_keys($tokens,1);
- foreach ($tokens as $token=>$v) {
- if (isset($this->idf[$token]))
- $this->idf[$token]++;
- else
- $this->idf[$token] = 1;
- }
- }
- $D = count($tset);
- if($this->mode === self::SMOOTH_MODE){
- foreach ($this->idf as $key => &$value) {
- $value = log(1 + ($D/$value));
- }
- }
- elseif($this->mode === self::FREQUENCY_MODE){
- foreach ($this->idf as $key => &$value) {
- $value = log($D/$value);
- }
- }
- elseif($this->mode === self::PROBABILISTIC_MODE){
- foreach ($this->idf as $key => &$value) {
- $value = log(($D-$value)/$value);
- }
- }
- $this->logD = log($D);
- }
- public function getIdf($term)
- {
- if (isset($this->idf[$term])) {
- return $this->idf[$term];
- } else {
- return $this->logD;
- }
- }
- }
Add Comment
Please, Sign In to add comment