Advertisement
Guest User

Index.pm

a guest
Nov 10th, 2014
190
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 50.04 KB | None | 0 0
  1. # Plugin for Foswiki - The Free and Open Source Wiki, http://foswiki.org/
  2. #
  3. # Copyright (C) 2009-2014 Michael Daum http://michaeldaumconsulting.com
  4. #
  5. # This program is free software; you can redistribute it and/or
  6. # modify it under the terms of the GNU General Public License
  7. # as published by the Free Software Foundation; either version 2
  8. # of the License, or (at your option) any later version.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13.  
  14. package Foswiki::Plugins::SolrPlugin::Index;
  15. use strict;
  16. use warnings;
  17.  
  18. use Foswiki::Plugins::SolrPlugin::Base ();
  19. our @ISA = qw( Foswiki::Plugins::SolrPlugin::Base );
  20.  
  21. use Error qw( :try );
  22. use Fcntl qw( :flock );
  23. use DBI ();
  24. use Foswiki::Func ();
  25. use Foswiki::Plugins ();
  26. use Foswiki::Plugins::SolrPlugin ();
  27. use Foswiki::Form ();
  28. use Foswiki::OopsException ();
  29. use Foswiki::Time ();
  30. use Foswiki::Contrib::Stringifier ();
  31.  
  32. use constant TRACE => 0; # toggle me
  33. use constant VERBOSE => 1; # toggle me
  34. use constant PROFILE => 0; # toggle me
  35.  
  36. #use Time::HiRes (); # enable this too when profiling
  37.  
  38. use constant COMMIT_THRESHOLD => 200; # commit every x topics on a bulk index job
  39. use constant WAIT_SEARCHER => "true";
  40. use constant SOFTCOMMIT => "true";
  41.  
  42. ##############################################################################
  43. sub new {
  44. my ($class, $session) = @_;
  45.  
  46. my $this = $class->SUPER::new($session);
  47.  
  48. $this->{url} = $Foswiki::cfg{SolrPlugin}{UpdateUrl} || $Foswiki::cfg{SolrPlugin}{Url};
  49.  
  50. $this->{_groupCache} = {};
  51. $this->{_webACLCache} = {};
  52.  
  53. throw Error::Simple("no solr url defined") unless defined $this->{url};
  54.  
  55. # Compared to the Search constructor there's no autostarting here
  56. # to prevent any indexer to accidentally create a solrindex lock and further
  57. # java inheriting it. So we simply test for connectivity and barf if that fails.
  58. $this->connect();
  59.  
  60. unless ($this->{solr}) {
  61. $this->log("ERROR: can't conect solr daemon");
  62. }
  63.  
  64. # trap SIGINT
  65. $SIG{INT} = sub {
  66. $this->log("got interrupted ... finishing work");
  67. $this->{_trappedSignal} = 1; # will be detected by loops further down
  68. };
  69.  
  70. # TODO: trap SIGALARM
  71. # let the indexer run for a maximum timespan, then flag a signal for it
  72. # to bail out from work done so far
  73.  
  74. $this->{workArea} = Foswiki::Func::getWorkArea('SolrPlugin');
  75. $this->{dsn} = $Foswiki::cfg{SolrPlugin}{Database}{DSN} || 'dbi:SQLite:dbname=' . $this->{workArea} . '/timestamps.db',
  76. $this->{username} = $Foswiki::cfg{SolrPlugin}{Database}{UserName},
  77. $this->{password} = $Foswiki::cfg{SolrPlugin}{Database}{Password},
  78. $this->{tablePrefix} = $Foswiki::cfg{SolrPlugin}{Database}{TablePrefix} || 'foswiki_',
  79.  
  80. $this->initTimestampsDB;
  81.  
  82. return $this;
  83. }
  84.  
  85. ################################################################################
  86. sub initTimestampsDB {
  87. my $this = shift;
  88.  
  89. unless (defined $this->{dbh}) {
  90. $this->{dbh} = DBI->connect(
  91. $this->{dsn},
  92. $this->{username},
  93. $this->{password},
  94. {
  95. PrintError => 0,
  96. RaiseError => 1,
  97. AutoCommit => 1
  98. }
  99. );
  100.  
  101. throw Error::Simple("Can't open database $this->{dsn}: " . $DBI::errstr)
  102. unless defined $this->{dbh};
  103.  
  104. my $timestampsTable = $this->{tablePrefix}.'timestamps';
  105. my $timestampsIndex = $this->{tablePrefix}.'timestamps_index';
  106.  
  107.  
  108. # test whether the table exists
  109. eval { $this->{dbh}->do("select * from $timestampsTable limit 1"); };
  110.  
  111. if ($@) {
  112. $this->log("creating database") if TRACE;
  113.  
  114. $this->{dbh}->do(<<HERE);
  115. create table $timestampsTable (
  116. web char(255),
  117. topic char(255),
  118. epoch int
  119. )
  120. HERE
  121.  
  122. $this->{dbh}->do("create unique index $timestampsIndex on $timestampsTable (web, topic)");
  123. } else {
  124. $this->log("found database") if TRACE;
  125. }
  126. }
  127.  
  128. # migrate
  129. $this->migrateTimestamps;
  130.  
  131. return $this->{dbh};
  132. }
  133.  
  134. ################################################################################
  135. sub migrateTimestamps {
  136. my ($this, $dir) = @_;
  137.  
  138. my $baseDir = $this->{workArea} . '/timestamps';
  139. unless ($dir) {
  140. $dir = $baseDir;
  141. return unless -d $dir;
  142. }
  143.  
  144. my $dh;
  145. opendir($dh, $dir) || die "can't open directory $dir: $!\n";
  146.  
  147. while (my $file = readdir($dh)) {
  148. next if $file eq '.' or $file eq '..';
  149. my $path = "$dir/$file";
  150. if (-d $path) {
  151. $this->migrateTimestamps($path);
  152. next;
  153. }
  154.  
  155. my $web = $dir;
  156. $web =~ s/^$baseDir\/?//;
  157.  
  158. my $epoch = Foswiki::Func::readFile($path);
  159.  
  160. if ($web) {
  161. my $topic = $file;
  162. $topic =~ s/\.timestamp$//;
  163. if (Foswiki::Func::topicExists($web, $topic)) {
  164. $this->log("importing web=$web, topic=$topic, epoch=$epoch") if TRACE;
  165. $this->setTimestamp($web, $topic, $epoch);
  166. } else {
  167. $this->log("found dangling topic timestamp for web=$web, topic=$topic") if TRACE;
  168. }
  169. } else {
  170. $web = $file;
  171. $web =~ s/\.timestamp$//;
  172.  
  173. $this->log("Migrating timestaps database for $web") if VERBOSE;
  174.  
  175. if (Foswiki::Func::webExists($web)) {
  176. $this->log("importing web=$web epoch=$epoch") if TRACE;
  177. $this->setTimestamp($web, undef, $epoch);
  178. } else {
  179. $this->log("found dangling web timestamp for web=$web") if TRACE;
  180. }
  181. }
  182.  
  183. unlink $path;
  184. }
  185.  
  186. closedir($dh);
  187.  
  188. rmdir $dir;
  189. }
  190.  
  191. ################################################################################
  192. sub finish {
  193. my $this = shift;
  194.  
  195. $this->commit(1)
  196. if $Foswiki::cfg{SolrPlugin}{EnableOnSaveUpdates}
  197. || $Foswiki::cfg{SolrPlugin}{EnableOnUploadUpdates}
  198. || $Foswiki::cfg{SolrPlugin}{EnableOnRenameUpdates};
  199.  
  200. $this->{_insert_timestamp}->finish if defined $this->{_insert_timestamp};
  201. $this->{_select_timestamp}->finish if defined $this->{_select_timestamp};
  202. $this->{dbh}->disconnect if defined $this->{dbh};
  203.  
  204. undef $this->{_insert_timestamp};
  205. undef $this->{_select_timestamp};
  206. undef $this->{dbh};
  207. undef $this->{dsn};
  208. undef $this->{_knownUsers};
  209. undef $this->{_groupCache};
  210. undef $this->{_webACLCache};
  211. }
  212.  
  213. ################################################################################
  214. # entry point to either update one topic or a complete web
  215. sub index {
  216. my $this = shift;
  217.  
  218. # exclusively lock the indexer to prevent a delta and a full index
  219. # mode to run in parallel
  220.  
  221. try {
  222.  
  223. # $this->lock();
  224.  
  225. my $query = Foswiki::Func::getCgiQuery();
  226. my $web = $query->param('web') || 'all';
  227. my $topic = $query->param('topic');
  228. my $mode = $query->param('mode') || 'delta';
  229. my $optimize = Foswiki::Func::isTrue($query->param('optimize'));
  230.  
  231. if ($topic) {
  232. $web = $this->{session}->{webName} if !$web || $web eq 'all';
  233.  
  234. $this->log("doing a topic index $web.$topic") if TRACE;
  235. $this->updateTopic($web, $topic);
  236. } else {
  237.  
  238. $this->log("doing a web index in $mode mode") if TRACE;
  239. $this->update($web, $mode);
  240. }
  241.  
  242. $this->commit(1) if $this->{commitCounter};
  243. $this->optimize() if $optimize;
  244. }
  245.  
  246. catch Error::Simple with {
  247. my $error = shift;
  248. print STDERR "Error: " . $error->{-text} . "\n";
  249. }
  250.  
  251. finally {
  252.  
  253. # $this->unlock();
  254. };
  255. }
  256.  
  257. ################################################################################
  258. sub afterSaveHandler {
  259. my $this = shift;
  260.  
  261. return unless $this->{solr};
  262.  
  263. $this->updateTopic(@_);
  264. }
  265.  
  266. ################################################################################
  267. sub afterRenameHandler {
  268. my ($this, $oldWeb, $oldTopic, $oldAttachment, $newWeb, $newTopic, $newAttachment) = @_;
  269.  
  270. return unless $this->{solr};
  271.  
  272. $this->updateTopic($oldWeb, $oldTopic);
  273. $this->updateTopic($newWeb, $newTopic);
  274. }
  275.  
  276. ################################################################################
  277. sub afterUploadHandler {
  278. my ($this, $attachment, $meta) = @_;
  279.  
  280. return unless $this->{solr};
  281.  
  282. my $web = $meta->web;
  283. my $topic = $meta->topic;
  284.  
  285. # SMELL: make sure meta is loaded
  286. $meta = $meta->load() unless $meta->latestIsLoaded();
  287.  
  288. my @aclFields = $this->getAclFields($web, $topic, $meta);
  289.  
  290. $this->indexAttachment($web, $topic, $attachment, \@aclFields);
  291. }
  292.  
  293. ################################################################################
  294. # update documents of a web - either in fully or incremental
  295. # on a full update, the complete web is removed from the index prior to updating it;
  296. # this calls updateTopic for each topic to be updated
  297. sub update {
  298. my ($this, $web, $mode) = @_;
  299.  
  300. $mode ||= 'full';
  301.  
  302. # check if old webs still exist
  303. my $searcher = Foswiki::Plugins::SolrPlugin::getSearcher();
  304. my @webs = $searcher->getListOfWebs();
  305.  
  306. #print STDERR "webs=".join(", ", @webs)."\n";
  307. foreach my $thisWeb (@webs) {
  308. next if Foswiki::Func::webExists($thisWeb);
  309. $this->log("$thisWeb doesn't exist anymore ... deleting");
  310. $this->deleteWeb($thisWeb);
  311. }
  312.  
  313. if (!defined($web) || $web eq 'all') {
  314. @webs = Foswiki::Func::getListOfWebs("user");
  315. } else {
  316. @webs = ();
  317. foreach my $item (split(/\s*,\s*/, $web)) {
  318. push @webs, $item;
  319. push @webs, Foswiki::Func::getListOfWebs("user", $item);
  320. }
  321. }
  322.  
  323. # TODO: check the list of webs we had the last time we did a full index
  324. # of all webs; then possibly delete them
  325.  
  326. foreach my $web (@webs) {
  327. if ($this->isSkippedWeb($web)) {
  328.  
  329. #$this->log("Skipping web $web");
  330. next;
  331. } else {
  332.  
  333. #$this->log("Indexing web $web");
  334. }
  335.  
  336. my $start_time = time();
  337.  
  338. my $found = 0;
  339. if ($mode eq 'full') {
  340.  
  341. # full
  342. $this->deleteWeb($web);
  343. foreach my $topic (Foswiki::Func::getTopicList($web)) {
  344. next if $this->isSkippedTopic($web, $topic);
  345. $this->indexTopic($web, $topic);
  346. $found = 1;
  347. last if $this->{_trappedSignal};
  348. }
  349. } else {
  350.  
  351. # delta
  352. my $webTime = $this->getTimestamp($web);
  353.  
  354. my @topics = Foswiki::Func::getTopicList($web);
  355. foreach my $topic (@topics) {
  356. next if $this->isSkippedTopic($web, $topic);
  357.  
  358. my $topicTime = $this->getTimestamp($web, $topic);
  359. next if $topicTime > $webTime; # this topic has been indexed individually
  360.  
  361. my $changed;
  362. if ($Foswiki::Plugins::SESSION->can('getApproxRevTime')) {
  363. $changed = $Foswiki::Plugins::SESSION->getApproxRevTime($web, $topic);
  364. } else {
  365.  
  366. # This is here for old engines
  367. $changed = $Foswiki::Plugins::SESSION->{store}->getTopicLatestRevTime($web, $topic);
  368. }
  369. next if $topicTime > $changed;
  370.  
  371. $this->deleteTopic($web, $topic);
  372. $this->indexTopic($web, $topic);
  373.  
  374. $this->setTimestamp($web, $topic);
  375. $found = 1;
  376. last if $this->{_trappedSignal};
  377. }
  378. }
  379. last if $this->{_trappedSignal};
  380. $this->setTimestamp($web);# if $found;
  381. }
  382. }
  383.  
  384. ################################################################################
  385. # update one specific topic; deletes the topic from the index before updating it again
  386. sub updateTopic {
  387. my ($this, $web, $topic, $meta, $text) = @_;
  388.  
  389. ($web, $topic) = $this->normalizeWebTopicName($web, $topic);
  390.  
  391. return if $this->isSkippedWeb($web);
  392. return if $this->isSkippedTopic($web, $topic);
  393.  
  394. $this->deleteTopic($web, $topic, $meta);
  395. if (Foswiki::Func::topicExists($web, $topic)) {
  396. $this->indexTopic($web, $topic, $meta, $text);
  397. $this->setTimestamp($web, $topic);
  398. }
  399.  
  400. $this->commit();
  401. }
  402.  
  403. ################################################################################
  404. # work horse: index one topic and all attachments
  405. sub indexTopic {
  406. my ($this, $web, $topic, $meta, $text) = @_;
  407.  
  408. my %outgoingLinks = ();
  409.  
  410. my $t0 = [Time::HiRes::gettimeofday] if PROFILE;
  411.  
  412. # normalize web name
  413. $web =~ s/\//\./g;
  414.  
  415. if (VERBOSE) {
  416. $this->log("Indexing topic $web.$topic");
  417. } else {
  418.  
  419. #$this->log(".", 1);
  420. }
  421.  
  422. # new solr document for the current topic
  423. my $doc = $this->newDocument();
  424.  
  425. unless (defined $meta && defined $text) {
  426. ($meta, $text) = Foswiki::Func::readTopic($web, $topic);
  427. }
  428.  
  429. $text = $this->entityDecode($text);
  430.  
  431. # Eliminate Topic Makup Language elements and newlines.
  432. my $origText = $text;
  433. $text = $this->plainify($text, $web, $topic);
  434.  
  435. # parent data
  436. my $parent = $meta->getParent();
  437. my $parentWeb;
  438. my $parentTopic;
  439. if ($parent) {
  440. ($parentWeb, $parentTopic) = $this->normalizeWebTopicName($web, $parent);
  441. $this->_addLink(\%outgoingLinks, $web, $topic, $parentWeb, $parentTopic);
  442. }
  443.  
  444. # get all outgoing links from topic text
  445. $this->extractOutgoingLinks($web, $topic, $origText, \%outgoingLinks);
  446.  
  447. # all webs
  448.  
  449. # get date
  450. my ($date, undef, $rev) = $this->getRevisionInfo($web, $topic);
  451. $date ||= 0; # prevent formatTime to crap out
  452. $date = Foswiki::Func::formatTime($date, 'iso', 'gmtime');
  453.  
  454. # get create date
  455. my ($createDate) = $this->getRevisionInfo($web, $topic, 1);
  456. $createDate ||= 0; # prevent formatTime to crap out
  457. $createDate = Foswiki::Func::formatTime($createDate, 'iso', 'gmtime');
  458.  
  459. # get contributor and most recent author
  460. my @contributors = $this->getContributors($web, $topic);
  461. my %contributors = map {$_ => 1} @contributors;
  462. $doc->add_fields(contributor => [keys %contributors]);
  463.  
  464. my $author = $contributors[0];
  465. my $createAuthor = $contributors[ scalar(@contributors) - 1 ];
  466.  
  467. # get TopicTitle
  468. my $topicTitle = $this->getTopicTitle($web, $topic, $meta);
  469.  
  470. # get summary
  471. my $summary = $this->getTopicSummary($web, $topic, $meta, $text);
  472.  
  473. # url to topic
  474. my $url = $this->getScriptUrlPath($web, $topic, "view");
  475.  
  476. my $collection = $Foswiki::cfg{SolrPlugin}{DefaultCollection} || "wiki";
  477.  
  478. my $containerTitle = $this->getTopicTitle($web, $Foswiki::cfg{HomeTopicName});
  479. $containerTitle = $web if $containerTitle eq $Foswiki::cfg{HomeTopicName};
  480.  
  481. # gather all webs and parent webs
  482. my @webCats = ();
  483. my @prefix = ();
  484. foreach my $component (split(/\./, $web)) {
  485. push @prefix, $component;
  486. push @webCats, join(".", @prefix);
  487. }
  488.  
  489. $doc->add_fields(
  490.  
  491. # common fields
  492. id => "$web.$topic",
  493. collection => $collection,
  494. url => $url,
  495. topic => $topic,
  496. web => $web,
  497. webcat => [@webCats],
  498. webtopic => "$web.$topic",
  499. title => $topicTitle,
  500. text => $text,
  501. summary => $summary,
  502. author => $author,
  503. date => $date,
  504. version => $rev,
  505. createauthor => $createAuthor,
  506. createdate => $createDate,
  507. type => 'topic',
  508. container_id => $web,
  509. container_url => $this->getScriptUrlPath($web, $Foswiki::cfg{HomeTopicName}, "view"),
  510. container_title => $containerTitle,
  511. icon => $this->mapToIconFileName('topic'),
  512.  
  513. # topic specific
  514. );
  515.  
  516. $doc->add_fields(parent => "$parentWeb.$parentTopic") if $parent;
  517.  
  518. # tag and analyze language
  519. my $contentLanguage = $this->getContentLanguage($web, $topic);
  520. if (defined $contentLanguage && $contentLanguage ne 'detect') {
  521. $doc->add_fields(
  522. language => $contentLanguage,
  523. 'text_' . $contentLanguage => $text,
  524. );
  525. }
  526.  
  527. # process form
  528. my $formName = $meta->getFormName();
  529. if ($formName) {
  530.  
  531. # read form definition to add field type hints
  532. my $formDef;
  533. try {
  534. $formDef = new Foswiki::Form($this->{session}, $web, $formName);
  535. }
  536. catch Foswiki::OopsException with {
  537.  
  538. # Form definition not found, ignore
  539. my $e = shift;
  540. $this->log("ERROR: can't read form definition for $formName");
  541. };
  542.  
  543. $formName =~ s/\//\./g;
  544. $doc->add_fields(form => $formName);
  545.  
  546. if ($formDef) { # form definition found, if not the formfields aren't indexed
  547.  
  548. my %seenFields = ();
  549. my $formFields = $formDef->getFields();
  550. if ($formFields) {
  551. foreach my $fieldDef (@{$formFields}) {
  552. my $attrs = $fieldDef->{attributes}; # TODO: check for Facet
  553. my $name = $fieldDef->{name};
  554. my $type = $fieldDef->{type};
  555. my $isMultiValued = $fieldDef->isMultiValued;
  556. my $isValueMapped = $fieldDef->can("isValueMapped") && $fieldDef->isValueMapped;
  557. my $field = $meta->get('FIELD', $name);
  558. next unless $field;
  559.  
  560. # prevent from mall-formed formDefinitions
  561. if ($seenFields{$name}) {
  562. $this->log("WARNING: malformed form definition for $web.$formName - field $name appear twice must be unique");
  563. next;
  564. }
  565. $seenFields{$name} = 1;
  566.  
  567. my $value = $field->{value};
  568. if ($isValueMapped) {
  569. $fieldDef->getOptions(); # load value map
  570. # SMELL: there's no api to get the mapped display value
  571. $value = $fieldDef->{valueMap}{$value} if defined $fieldDef->{valueMap} && defined $fieldDef->{valueMap}{$value};
  572. }
  573.  
  574. # extract outgoing links for formfield values
  575. $this->extractOutgoingLinks($web, $topic, $value, \%outgoingLinks);
  576.  
  577. # bit of cleanup
  578. $value =~ s/<!--.*?-->//gs;
  579.  
  580. # create a dynamic field indicating the field type to solr
  581.  
  582. # date
  583. if ($type eq 'date') {
  584. try {
  585. my $epoch = $value;
  586. $epoch = Foswiki::Time::parseTime($value) unless $epoch =~ /^\d+$/;
  587. $epoch ||= 0; # prevent formatTime to crap out
  588. $value = Foswiki::Time::formatTime($epoch, 'iso', 'gmtime');
  589. $doc->add_fields('field_' . $name . '_dt' => $value,);
  590. } catch Error::Simple with {
  591. $this->log("WARNING: malformed date value '$value'");
  592. };
  593. }
  594.  
  595. # multi-valued types
  596. elsif ($isMultiValued || $name =~ /TopicType/ || $type eq 'radio') { # TODO: make this configurable
  597. my $fieldName = 'field_' . $name;
  598. $fieldName =~ s/(_(?:i|s|l|t|b|f|dt|lst))$//;
  599.  
  600. $doc->add_fields($fieldName . '_lst' => [ split(/\s*,\s*/, $value) ]);
  601. }
  602.  
  603. # finally make it a non-list field as well
  604. {
  605. my $fieldName = 'field_' . $name;
  606. my $fieldType = '_s';
  607.  
  608. # is there an explicit type info part of the formfield name?
  609. if ($fieldName =~ s/(_(?:i|s|l|t|b|f|dt|lst))$//) {
  610. $fieldType = $1;
  611. }
  612.  
  613. # add an extra check for floats
  614. if ($fieldType eq '_f') {
  615. if ($value =~ /^\s*([\-\+]?\d+(\.\d+)?)\s*$/) {
  616. $doc->add_fields($fieldName . '_f' => $1,);
  617. } else {
  618. $this->log("WARNING: malformed float value '$value'");
  619. }
  620. }
  621.  
  622. # for explicit _s fields apply a full plainify
  623. elsif ($fieldType eq '_s') {
  624.  
  625. # note this might alter the content too much in some cases.
  626. # so we try to remove only those characters that break the json parser
  627. #$value = $this->plainify($value, $web, $topic);
  628. $value =~ s/<!--.*?-->//gs; # remove all HTML comments
  629. $value =~ s/<[^>]*>/ /g; # remove all HTML tags
  630. $value = $this->discardIllegalChars($value); # remove illegal characters
  631.  
  632. $doc->add_fields(
  633. $fieldName . '_s' => $value,
  634. $fieldName . '_search' => $value,
  635. ) if defined $value && $value ne '';
  636. } else {
  637. $doc->add_fields($fieldName . $fieldType => $value,) if defined $value && $value ne '';
  638. }
  639. }
  640. }
  641. }
  642. }
  643. }
  644.  
  645. # store all outgoing links collected so far
  646. foreach my $link (keys %outgoingLinks) {
  647. next if $link eq "$web.$topic"; # self link is not an outgoing link
  648. $doc->add_fields(outgoing => $link);
  649. }
  650.  
  651. # all prefs are of type _t
  652. # TODO it may pay off to detect floats and ints
  653. my @prefs = $meta->find('PREFERENCE');
  654. if (@prefs) {
  655. foreach my $pref (@prefs) {
  656. my $name = $pref->{name};
  657. my $value = $pref->{value};
  658. $doc->add_fields(
  659. 'preference_' . $name . '_t' => $value,
  660. 'preference' => $name,
  661. );
  662. }
  663. }
  664.  
  665. # call index topic handlers
  666. my %seen;
  667. foreach my $sub (@Foswiki::Plugins::SolrPlugin::knownIndexTopicHandler) {
  668. next if $seen{$sub};
  669. try {
  670. &$sub($this, $doc, $web, $topic, $meta, $text);
  671. $seen{$sub} = 1;
  672. }
  673. catch Foswiki::OopsException with {
  674. my $e = shift;
  675. $this->log("ERROR: while calling indexTopicHandler: " . $e->stringify());
  676. };
  677. }
  678.  
  679. # get extra fields like acls and other properties
  680.  
  681. my $t1 = [Time::HiRes::gettimeofday] if PROFILE;
  682. my @aclFields = $this->getAclFields($web, $topic, $meta);
  683. $doc->add_fields(@aclFields) if @aclFields;
  684.  
  685. if (PROFILE) {
  686. my $elapsed = int(Time::HiRes::tv_interval($t1) * 1000);
  687. $this->log("took $elapsed ms to get the extra fields from $web.$topic");
  688. $t1 = [Time::HiRes::gettimeofday];
  689. }
  690.  
  691. # attachments
  692. my @attachments = $meta->find('FILEATTACHMENT');
  693. if (@attachments) {
  694. my $thumbnail;
  695. my $firstImage;
  696. my %sorting = map { $_ => lc($_->{comment} || $_->{name}) } @attachments;
  697. foreach my $attachment (sort { $sorting{$a} cmp $sorting{$b} } @attachments) {
  698.  
  699. # is the attachment is the skip list?
  700. my $name = $attachment->{'name'} || '';
  701. if ($this->isSkippedAttachment($web, $topic, $name)) {
  702. $this->log("Skipping attachment $web.$topic.$name");
  703. next;
  704. }
  705.  
  706. # add attachment names to the topic doc
  707. $doc->add_fields('attachment' => $name);
  708.  
  709. # decide on thumbnail
  710. if (!defined $thumbnail && $attachment->{attr} && $attachment->{attr} =~ /t/) {
  711. $thumbnail = $name;
  712. }
  713. if (!defined $firstImage && $name =~ /\.(png|jpe?g|gif|bmp|svg)$/i) {
  714. $firstImage = $name;
  715. }
  716.  
  717. # then index each of them
  718. $this->indexAttachment($web, $topic, $attachment, \@aclFields);
  719. }
  720.  
  721. # take the first image attachment when no thumbnail was specified explicitly
  722. $thumbnail = $firstImage if !defined($thumbnail) && defined($firstImage);
  723. $doc->add_fields('thumbnail' => $thumbnail) if defined $thumbnail;
  724. }
  725.  
  726. if (PROFILE) {
  727. my $elapsed = int(Time::HiRes::tv_interval($t1) * 1000);
  728. $this->log("took $elapsed ms to index all attachments at $web.$topic");
  729. $t1 = [Time::HiRes::gettimeofday];
  730. }
  731.  
  732. # add the document to the index
  733. try {
  734. $this->add($doc);
  735. }
  736. catch Error::Simple with {
  737. my $e = shift;
  738. $this->log("ERROR: " . $e->{-text});
  739. };
  740.  
  741. $this->commit();
  742.  
  743. if (PROFILE) {
  744. my $elapsed = int(Time::HiRes::tv_interval($t0) * 1000);
  745. $this->log("took $elapsed ms to index topic $web.$topic");
  746. $t0 = [Time::HiRes::gettimeofday];
  747. }
  748.  
  749. }
  750.  
  751. ################################################################################
  752. # returns one of the SupportedLanguages or undef if not found
  753. sub getContentLanguage {
  754. my ($this, $web, $topic) = @_;
  755.  
  756. unless (defined $Foswiki::cfg{SolrPlugin}{SupportedLanguages}) {
  757. Foswiki::Func::writeWarning("{SolrPlugin}{SupportedLanguages} not defined. Please run configure.");
  758. return;
  759. }
  760.  
  761. my $donePush = 0;
  762. if ($web ne $this->{session}{webName} || $topic ne $this->{session}{topicName}) {
  763. Foswiki::Func::pushTopicContext($web, $topic);
  764. $donePush = 1;
  765. }
  766.  
  767. my $prefsLanguage = Foswiki::Func::getPreferencesValue('CONTENT_LANGUAGE') || '';
  768. my $contentLanguage = $Foswiki::cfg{SolrPlugin}{SupportedLanguages}{$prefsLanguage} || 'detect';
  769.  
  770. #$this->log("contentLanguage=$contentLanguage") if TRACE;
  771.  
  772. Foswiki::Func::popTopicContext() if $donePush;
  773.  
  774. return $contentLanguage;
  775. }
  776.  
  777. ################################################################################
  778. sub extractOutgoingLinks {
  779. my ($this, $web, $topic, $text, $outgoingLinks) = @_;
  780.  
  781. my $removed = {};
  782.  
  783. # normal wikiwords
  784. $text = $this->takeOutBlocks($text, 'noautolink', $removed);
  785. $text =~ s#(?:($Foswiki::regex{webNameRegex})\.)?($Foswiki::regex{wikiWordRegex}|$Foswiki::regex{abbrevRegex})#$this->_addLink($outgoingLinks, $web, $topic, $1, $2)#gexom;
  786. $this->putBackBlocks(\$text, $removed, 'noautolink');
  787.  
  788. # square brackets
  789. $text =~ s#\[\[([^\]\[\n]+)\]\]#$this->_addLink($outgoingLinks, $web, $topic, undef, $1)#ge;
  790. $text =~ s#\[\[([^\]\[\n]+)\]\[([^\]\n]+)\]\]#$this->_addLink($outgoingLinks, $web, $topic, undef, $1)#ge;
  791.  
  792. }
  793.  
  794. sub _addLink {
  795. my ($this, $links, $baseWeb, $baseTopic, $web, $topic) = @_;
  796.  
  797. $web ||= $baseWeb;
  798. ($web, $topic) = $this->normalizeWebTopicName($web, $topic);
  799.  
  800. my $link = $web . "." . $topic;
  801. return '' if $link =~ /^http|ftp/; # don't index external links
  802. return '' unless Foswiki::Func::topicExists($web, $topic);
  803.  
  804. $link =~ s/\%SCRIPTURL(PATH)?{.*?}\%\///g;
  805. $link =~ s/%WEB%/$baseWeb/g;
  806. $link =~ s/%TOPIC%/$baseTopic/g;
  807.  
  808. #print STDERR "link=$link\n" unless defined $links->{$link};
  809.  
  810. $links->{$link} = 1;
  811.  
  812. return $link;
  813. }
  814.  
  815. ################################################################################
  816. # add the given attachment to the index.
  817. sub indexAttachment {
  818. my ($this, $web, $topic, $attachment, $commonFields) = @_;
  819.  
  820. #my $t0 = [Time::HiRes::gettimeofday] if PROFILE;
  821.  
  822. my $name = $attachment->{'name'} || '';
  823. if (VERBOSE) {
  824. #$this->log("Indexing attachment $web.$topic.$name");
  825. } else {
  826.  
  827. #$this->log("a", 1);
  828. }
  829.  
  830. # SMELL: while the below test weeds out attachments that somehow where gone physically it is too expensive for the
  831. # average case to open all attachments
  832. #unless (defined(Foswiki::Func::readAttachment($web, $topic, $name))) {
  833. # $this->log("... attachment $web.$topic.$name not found") if TRACE;
  834. # return;
  835. #}
  836.  
  837. # the attachment extension has to be checked
  838.  
  839. my $extension = '';
  840. my $title = $name;
  841. if ($name =~ /^(.+)\.(\w+?)$/) {
  842. $title = $1;
  843. $extension = lc($2);
  844. }
  845. $title =~ s/_+/ /g;
  846. $extension = 'jpg' if $extension =~ /jpe?g/i;
  847.  
  848. # check extension
  849. my $indexextensions = $this->indexExtensions();
  850. my $attText = '';
  851. if ($indexextensions->{$extension}) {
  852. $attText = $this->getStringifiedVersion($web, $topic, $name);
  853. $attText = $this->plainify($attText, $web, $topic);
  854. } else {
  855.  
  856. #$this->log("not reading attachment $web.$topic.$name");
  857. }
  858.  
  859. my $doc = $this->newDocument();
  860.  
  861. my $comment = $attachment->{'comment'} || '';
  862. my $size = $attachment->{'size'} || 0;
  863. my $date = $attachment->{'date'} || 0;
  864. $date = Foswiki::Func::formatTime($date, 'iso', 'gmtime');
  865. my $author = getWikiName($attachment->{user});
  866. my $rev = $attachment->{'version'} || 1;
  867.  
  868. # get summary
  869. my $summary = $this->substr($attText, 0, 300);
  870.  
  871. # my $author = $attachment->{'user'} || $attachment->{'author'} || '';
  872. # $author = Foswiki::Func::getWikiName($author) || 'UnknownUser';
  873. # # weed out some strangers
  874. # $author = 'UnknownUser' unless Foswiki::Func::isValidWikiWord($author);
  875.  
  876. # get contributor and most recent author
  877. my @contributors = $this->getContributors($web, $topic, $attachment);
  878. my %contributors = map {$_ => 1} @contributors;
  879. $doc->add_fields(contributor => [keys %contributors]);
  880.  
  881. # normalize web name
  882. $web =~ s/\//\./g;
  883. my $id = "$web.$topic.$name";
  884.  
  885. # view url
  886. #my $url = $this->getScriptUrlPath($web, $topic, 'viewfile', filename => $name);
  887. my $webDir = $web;
  888. $webDir =~ s/\./\//g;
  889. my $url = $Foswiki::cfg{PubUrlPath}.'/'.$webDir.'/'.$topic.'/'.$name;
  890.  
  891. my $collection = $Foswiki::cfg{SolrPlugin}{DefaultCollection} || "wiki";
  892. my $icon = $this->mapToIconFileName($extension);
  893.  
  894. # gather all webs and parent webs
  895. my @webCats = ();
  896. my @prefix = ();
  897. foreach my $component (split(/\./, $web)) {
  898. push @prefix, $component;
  899. push @webCats, join(".", @prefix);
  900. }
  901.  
  902. # TODO: what about createdate and createauthor for attachments
  903. $doc->add_fields(
  904.  
  905. # common fields
  906. id => $id,
  907. collection => $collection,
  908. url => $url,
  909. web => $web,
  910. webcat => [@webCats],
  911. topic => $topic,
  912. webtopic => "$web.$topic",
  913. title => $title,
  914. type => $extension,
  915. text => $attText,
  916. summary => $summary,
  917. author => $author,
  918. date => $date,
  919. version => $rev,
  920.  
  921. # attachment fields
  922. name => $name,
  923. comment => $comment,
  924. size => $size,
  925. icon => $icon,
  926. container_id => $web . '.' . $topic,
  927. container_url => $this->getScriptUrlPath($web, $topic, "view"),
  928. container_title => $this->getTopicTitle($web, $topic),
  929. );
  930.  
  931. # tag and analyze language
  932. # SMELL: silently assumes all attachments to a topic are the same langauge
  933. my $contentLanguage = $this->getContentLanguage($web, $topic);
  934. if (defined $contentLanguage && $contentLanguage ne 'detect') {
  935. $doc->add_fields(
  936. language => $contentLanguage,
  937. 'text_' . $contentLanguage => $attText,
  938. );
  939. }
  940.  
  941. # add extra fields, i.e. ACLs
  942. $doc->add_fields(@$commonFields) if $commonFields;
  943.  
  944. # call index attachment handlers
  945. my %seen;
  946. foreach my $sub (@Foswiki::Plugins::SolrPlugin::knownIndexAttachmentHandler) {
  947. next if $seen{$sub};
  948. &$sub($this, $doc, $web, $topic, $attachment);
  949. $seen{$sub} = 1;
  950. }
  951.  
  952. # add the document to the index
  953. try {
  954. $this->add($doc);
  955. }
  956. catch Error::Simple with {
  957. my $e = shift;
  958. $this->log("ERROR: " . $e->{-text});
  959. };
  960.  
  961. $this->commit();
  962.  
  963. #if (PROFILE) {
  964. # my $elapsed = int(Time::HiRes::tv_interval($t0) * 1000);
  965. # $this->log("took $elapsed ms to index attachment $web.$topic.$name");
  966. #}
  967. }
  968.  
  969. ################################################################################
  970. # add a document to the index
  971. sub add {
  972. my ($this, $doc) = @_;
  973.  
  974. #my ($package, $file, $line) = caller;
  975. #print STDERR "called add from $package:$line\n";
  976.  
  977. return unless $this->{solr};
  978. return $this->{solr}->add($doc);
  979. }
  980.  
  981. ################################################################################
  982. # optimize index
  983. sub optimize {
  984. my $this = shift;
  985.  
  986. return unless $this->{solr};
  987.  
  988. # temporarily set a different timeout for this operation
  989. my $agent = $this->{solr}->agent();
  990. my $oldTimeout = $agent->timeout();
  991.  
  992. $agent->timeout($this->{optimizeTimeout});
  993.  
  994. $this->{solr}->commit();
  995. $this->log("Optimizing index");
  996. $this->{solr}->optimize({
  997. waitSearcher => WAIT_SEARCHER,
  998. softCommit => SOFTCOMMIT,
  999. });
  1000.  
  1001. $agent->timeout($oldTimeout);
  1002. }
  1003.  
  1004. ################################################################################
  1005. # commit every COMMIT_THRESHOLD times
  1006. sub commit {
  1007. my ($this, $force) = @_;
  1008.  
  1009. return unless $this->{solr};
  1010.  
  1011. $this->{commitCounter}++;
  1012.  
  1013. if ($this->{commitCounter} > 1 && ($this->{commitCounter} >= COMMIT_THRESHOLD || $force)) {
  1014. $this->log("Committing index") if VERBOSE;
  1015. $this->{solr}->commit({
  1016. waitSearcher => WAIT_SEARCHER,
  1017. softCommit => SOFTCOMMIT,
  1018. });
  1019. $this->{commitCounter} = 0;
  1020.  
  1021. # invalidate page cache for all search interfaces
  1022. if ($Foswiki::cfg{Cache}{Enabled} && $this->{session}{cache}) {
  1023. my @webs = Foswiki::Func::getListOfWebs("user, public");
  1024. foreach my $web (@webs) {
  1025. next if $web eq $Foswiki::cfg{TrashWebName};
  1026.  
  1027. #$this->log("firing dependencies in $web");
  1028. $this->{session}->{cache}->fireDependency($web, "WebSearch");
  1029.  
  1030. # SMELL: should record all topics a SOLRSEARCH is on, outside of a dirtyarea
  1031. }
  1032. }
  1033. }
  1034. }
  1035.  
  1036. ################################################################################
  1037. sub newDocument {
  1038.  
  1039. #my $this = shift;
  1040.  
  1041. return WebService::Solr::Document->new;
  1042. }
  1043.  
  1044. ################################################################################
  1045. sub deleteTopic {
  1046. my ($this, $web, $topic, $meta) = @_;
  1047.  
  1048. $this->deleteDocument($web, $topic);
  1049.  
  1050. if ($meta) {
  1051. my @attachments = $meta->find('FILEATTACHMENT');
  1052. if (@attachments) {
  1053. foreach my $attachment (@attachments) {
  1054. $this->deleteDocument($web, $topic, $attachment);
  1055. }
  1056. }
  1057. } else {
  1058. $this->deleteByQuery("web:\"$web\" topic:\"$topic\"");
  1059. }
  1060. }
  1061.  
  1062. ################################################################################
  1063. sub deleteWeb {
  1064. my ($this, $web) = @_;
  1065.  
  1066. $web =~ s/\//./g;
  1067. $this->deleteByQuery("web:\"$web\"");
  1068. }
  1069.  
  1070. ################################################################################
  1071. sub deleteByQuery {
  1072. my ($this, $query) = @_;
  1073.  
  1074. return unless $query;
  1075.  
  1076. #$this->log("Deleting documents by query $query") if VERBOSE;
  1077.  
  1078. my $success;
  1079. try {
  1080. $success = $this->{solr}->delete_by_query($query);
  1081. $this->commit();
  1082. }
  1083. catch Error::Simple with {
  1084. my $e = shift;
  1085. $this->log("ERROR: " . $e->{-text});
  1086. };
  1087.  
  1088. return $success;
  1089. }
  1090.  
  1091. ################################################################################
  1092. sub deleteDocument {
  1093. my ($this, $web, $topic, $attachment) = @_;
  1094.  
  1095. $web =~ s/\//\./g;
  1096. my $id = "$web.$topic";
  1097. $id .= ".$attachment" if $attachment;
  1098.  
  1099. #$this->log("Deleting document $id");
  1100.  
  1101. try {
  1102. $this->{solr}->delete_by_id($id);
  1103. $this->commit();
  1104. }
  1105. catch Error::Simple with {
  1106. my $e = shift;
  1107. $this->log("ERROR: " . $e->{-text});
  1108. };
  1109.  
  1110. }
  1111.  
  1112. ################################################################################
  1113. sub lock {
  1114. my $this = shift;
  1115.  
  1116. my $lockfile = $this->{workArea} . "/indexer.lock";
  1117. open($this->{lock}, ">$lockfile")
  1118. or die "can't create lockfile $lockfile";
  1119.  
  1120. flock($this->{lock}, LOCK_EX)
  1121. or die "can't lock indexer: $!";
  1122. }
  1123.  
  1124. ################################################################################
  1125. sub unlock {
  1126. my $this = shift;
  1127.  
  1128. flock($this->{lock}, LOCK_UN)
  1129. or die "unable to unlock: $!";
  1130. }
  1131.  
  1132. ################################################################################
  1133. sub getStringifiedVersion {
  1134. my ($this, $web, $topic, $attachment) = @_;
  1135.  
  1136. my $pubpath = Foswiki::Func::getPubDir();
  1137. my $dirWeb = $web;
  1138. $dirWeb =~ s/\./\//g;
  1139. $web =~ s/\//\./g;
  1140.  
  1141. my $filename = "$pubpath/$dirWeb/$topic/$attachment";
  1142.  
  1143. # untaint..
  1144. $filename =~ /(.*)/;
  1145. $filename = $1;
  1146.  
  1147. my $mime = $this->mmagic->checktype_filename($filename);
  1148. my $skipCaching = ($mime =~ /^(text\/plain)$/)?1:0;
  1149.  
  1150. #print STDERR "filename=$filename, mime=$mime\n";
  1151.  
  1152. my $workArea = $this->{workArea};
  1153. my $cachedFilename = "$workArea/$web/$topic/$attachment.txt";
  1154.  
  1155. # untaint..
  1156. $cachedFilename =~ /(.*)/;
  1157. $cachedFilename = $1;
  1158.  
  1159. my $attText = '';
  1160.  
  1161. if ($skipCaching) {
  1162. #print STDERR "skipping caching attachment $filename as it is a $mime\n";
  1163. $attText = Foswiki::Contrib::Stringifier->stringFor($filename) || '';
  1164. } else {
  1165.  
  1166. mkdir "$workArea/$web" unless -d "$workArea/$web";
  1167. mkdir "$workArea/$web/$topic" unless -d "$workArea/$web/$topic";
  1168.  
  1169. my $origModified = modificationTime($filename);
  1170. my $cachedModified = modificationTime($cachedFilename);
  1171.  
  1172. if ($origModified > $cachedModified) {
  1173.  
  1174. #$this->log("caching stringified version of $attachment in $cachedFilename");
  1175. $attText = Foswiki::Contrib::Stringifier->stringFor($filename) || '';
  1176. Foswiki::Func::saveFile($cachedFilename, $attText);
  1177. } else {
  1178.  
  1179. #$this->log("found stringified version of $attachment in cache");
  1180. $attText = Foswiki::Func::readFile($cachedFilename);
  1181. }
  1182. }
  1183.  
  1184. return $attText;
  1185. }
  1186.  
  1187. ################################################################################
  1188. sub mmagic {
  1189. my $this = shift;
  1190.  
  1191. unless (defined $this->{mmagic}) {
  1192. require File::MMagic;
  1193. $this->{mmagic} = File::MMagic->new();
  1194. }
  1195.  
  1196. return $this->{mmagic};
  1197. }
  1198.  
  1199. ################################################################################
  1200. sub modificationTime {
  1201. my $filename = shift;
  1202.  
  1203. my @stat = stat($filename);
  1204. return $stat[9] || $stat[10] || 0;
  1205. }
  1206.  
  1207. ################################################################################
  1208. sub nrKnownUsers {
  1209. my ($this, $id) = @_;
  1210.  
  1211. $this->getListOfUsers();
  1212. return $this->{_nrKnownUsers};
  1213. }
  1214.  
  1215. ################################################################################
  1216. sub isKnownUser {
  1217. my ($this, $id) = @_;
  1218.  
  1219. $this->getListOfUsers();
  1220. return (exists $this->{_knownUsers}{$id}?1:0);
  1221. }
  1222.  
  1223. ################################################################################
  1224. # Get a list of all registered users
  1225. sub getListOfUsers {
  1226. my $this = shift;
  1227.  
  1228. unless (defined $this->{_knownUsers}) {
  1229.  
  1230. my $it = Foswiki::Func::eachUser();
  1231. while ($it->hasNext()) {
  1232. my $user = $it->next();
  1233. next if $user eq 'UnknownUser';
  1234. $this->{_knownUsers}{$user} = 1;# if Foswiki::Func::topicExists($Foswiki::cfg{UsersWebName}, $user);
  1235. }
  1236.  
  1237. #$this->log("known users=".join(", ", sort keys %{$this->{_knownUsers}})) if TRACE;
  1238. $this->{_nrKnownUsers} = scalar(keys %{ $this->{_knownUsers} });
  1239.  
  1240. #$this->log("found ".$this->{_nrKnownUsers}." users");
  1241. }
  1242.  
  1243. return $this->{_knownUsers};
  1244. }
  1245.  
  1246. ################################################################################
  1247. sub getContributors {
  1248. my ($this, $web, $topic, $attachment) = @_;
  1249.  
  1250. #my $t0 = [Time::HiRes::gettimeofday] if PROFILE;
  1251.  
  1252. my $maxRev;
  1253. try {
  1254. (undef, undef, $maxRev) = $this->getRevisionInfo($web, $topic, undef, $attachment);
  1255. }
  1256. catch Error::Simple with {
  1257. my $e = shift;
  1258. $this->log("ERROR: " . $e->{-text});
  1259. };
  1260. return () unless defined $maxRev;
  1261.  
  1262. $maxRev =~ s/r?1\.//go; # cut 'r' and major
  1263.  
  1264. my %contributors = ();
  1265.  
  1266. # get most recent
  1267. my (undef, $user, $rev) = $this->getRevisionInfo($web, $topic, $maxRev, $attachment, $maxRev);
  1268. my $mostRecent = getWikiName($user);
  1269. $contributors{$mostRecent} = 1;
  1270.  
  1271. # get creator
  1272. (undef, $user, $rev) = $this->getRevisionInfo($web, $topic, 1, $attachment, $maxRev);
  1273. my $creator = getWikiName($user);
  1274. $contributors{$creator} = 1;
  1275.  
  1276. # only take the top 10; extracting revinfo takes too long otherwise :(
  1277. $maxRev = 10 if $maxRev > 10;
  1278.  
  1279. for (my $i = $maxRev; $i > 0; $i--) {
  1280. my (undef, $user, $rev) = $this->getRevisionInfo($web, $topic, $i, $attachment, $maxRev);
  1281. my $wikiName = getWikiName($user);
  1282. $contributors{$wikiName} = 1;
  1283. }
  1284.  
  1285. #if (PROFILE) {
  1286. # my $elapsed = int(Time::HiRes::tv_interval($t0) * 1000);
  1287. # $this->log("took $elapsed ms to get contributors of $web.$topic".($attachment?'.'.$attachment->{name}:''));
  1288. #}
  1289. delete $contributors{$mostRecent};
  1290. delete $contributors{$creator};
  1291.  
  1292. my @contributors = ($mostRecent, keys %contributors, $creator);
  1293. return @contributors;
  1294. }
  1295.  
  1296. ################################################################################
  1297. sub getWikiName {
  1298. my $user = shift;
  1299.  
  1300. my $wikiName = Foswiki::Func::getWikiName($user) || 'UnknownUser';
  1301. $wikiName = 'UnknownUser' unless Foswiki::Func::isValidWikiWord($wikiName); # weed out some strangers
  1302.  
  1303. return $wikiName;
  1304. }
  1305.  
  1306. ################################################################################
  1307. # wrapper around original getRevisionInfo which
  1308. # can't deal with dots in the webname
  1309. sub getRevisionInfo {
  1310. my ($this, $web, $topic, $rev, $attachment, $maxRev) = @_;
  1311.  
  1312. ($web, $topic) = $this->normalizeWebTopicName($web, $topic);
  1313.  
  1314. if ($attachment && (!defined($rev) || $rev == $maxRev)) {
  1315.  
  1316. # short cut for attachments
  1317. my $info = {};
  1318. $info->{version} = $attachment->{version} || $maxRev;
  1319. $info->{date} = $attachment->{date};
  1320. $info->{author} = $attachment->{author} || $attachment->{user};
  1321.  
  1322. #$info->{date} = $this->getTimestamp() unless defined $info->{date};
  1323. #$info->{author} = $Foswiki::Users::BaseUserMapping::DEFAULT_USER_CUID unless defined $info->{author};
  1324. return $info;
  1325. } else {
  1326. return Foswiki::Func::getRevisionInfo($web, $topic, $rev, $attachment);
  1327. }
  1328. }
  1329.  
  1330. ################################################################################
  1331. # returns the list of users granted view access, or "all" if all users have got view access
  1332. sub getGrantedUsers {
  1333. my ($this, $web, $topic, $meta, $text) = @_;
  1334.  
  1335. my %grantedUsers;
  1336. my $forbiddenUsers;
  1337.  
  1338. my $allow = $this->getACL($meta, 'ALLOWTOPICVIEW');
  1339. my $deny = $this->getACL($meta, 'DENYTOPICVIEW');
  1340.  
  1341. if (TRACE) {
  1342. $this->log("topicAllow=@$allow") if defined $allow;
  1343. $this->log("topicDeny=@$deny") if defined $deny;
  1344. }
  1345.  
  1346. my $isDeprecatedEmptyDeny =
  1347. !defined($Foswiki::cfg{AccessControlACL}{EnableDeprecatedEmptyDeny}) || $Foswiki::cfg{AccessControlACL}{EnableDeprecatedEmptyDeny};
  1348.  
  1349. # Check DENYTOPIC
  1350. if (defined $deny) {
  1351. if (scalar(@$deny)) {
  1352. $forbiddenUsers = $this->expandUserList(@$deny);
  1353. } else {
  1354.  
  1355. if ($isDeprecatedEmptyDeny) {
  1356. $this->log("empty deny -> grant all access") if TRACE;
  1357.  
  1358. # Empty deny
  1359. return ['all'];
  1360. } else {
  1361. $deny = undef;
  1362. }
  1363. }
  1364. }
  1365. $this->log("(1) forbiddenUsers=@$forbiddenUsers") if TRACE && defined $forbiddenUsers;
  1366.  
  1367. # Check ALLOWTOPIC
  1368. if (defined($allow)) {
  1369. if (scalar(@$allow)) {
  1370.  
  1371. if (!$isDeprecatedEmptyDeny && grep {/^\*$/} @$allow) {
  1372. $this->log("access * -> grant all access") if TRACE;
  1373.  
  1374. # Empty deny
  1375. return ['all'];
  1376. } else {
  1377.  
  1378. $grantedUsers{$_} = 1 foreach grep {!/^UnknownUser/} @{$this->expandUserList(@$allow)};
  1379.  
  1380. if (defined $forbiddenUsers) {
  1381. delete $grantedUsers{$_} foreach @$forbiddenUsers;
  1382. }
  1383. my @grantedUsers = keys %grantedUsers;
  1384.  
  1385. $this->log("(1) granting access for @grantedUsers") if TRACE;
  1386.  
  1387. # A non-empty ALLOW is final
  1388. return \@grantedUsers;
  1389. }
  1390. }
  1391. }
  1392.  
  1393. # use cache if possible (no topic-level perms set)
  1394. if (!defined($deny) && exists $this->{_webACLCache}{$web}) {
  1395. #$this->log("found in acl cache ".join(", ", sort @{$this->{_webACLCache}{$web}})) if TRACE;
  1396. return $this->{_webACLCache}{$web};
  1397. }
  1398.  
  1399. my $webMeta = $meta->getContainer;
  1400. my $webAllow = $this->getACL($webMeta, 'ALLOWWEBVIEW');
  1401. my $webDeny = $this->getACL($webMeta, 'DENYWEBVIEW');
  1402.  
  1403. if (TRACE) {
  1404. $this->log("webAllow=@$webAllow") if defined $webAllow;
  1405. $this->log("webDeny=@$webDeny") if defined $webDeny;
  1406. }
  1407.  
  1408. # Check DENYWEB, but only if DENYTOPIC is not set
  1409. if (!defined($deny) && defined($webDeny) && scalar(@$webDeny)) {
  1410. push @{$forbiddenUsers}, @{$this->expandUserList(@$webDeny)};
  1411. }
  1412. $this->log("(2) forbiddenUsers=@$forbiddenUsers") if TRACE && defined $forbiddenUsers;
  1413.  
  1414. if (defined($webAllow) && scalar(@$webAllow)) {
  1415. $grantedUsers{$_} = 1 foreach grep {!/^UnknownUser/} @{$this->expandUserList(@$webAllow)};
  1416. } elsif (!defined($deny) && !defined($webDeny)) {
  1417.  
  1418. #$this->log("no denies, no allows -> grant all access") if TRACE;
  1419.  
  1420. # No denies, no allows -> open door policy
  1421. $this->{_webACLCache}{$web} = ['all'];
  1422. return ['all'];
  1423.  
  1424. } else {
  1425. %grantedUsers = %{$this->getListOfUsers()};
  1426. }
  1427.  
  1428. if (defined $forbiddenUsers) {
  1429. delete $grantedUsers{$_} foreach @$forbiddenUsers;
  1430. }
  1431.  
  1432. # get list of users granted access that actually still exist
  1433. foreach my $user (keys %grantedUsers) {
  1434. $grantedUsers{$user}++ if defined $this->isKnownUser($user);
  1435. }
  1436.  
  1437. my @grantedUsers = ();
  1438. foreach my $user (keys %grantedUsers) {
  1439. push @grantedUsers, $user if $grantedUsers{$user} > 1;
  1440. }
  1441.  
  1442. #$this->log("grantedUsers=@grantedUsers");
  1443.  
  1444. $this->log("nr granted users=".scalar(@grantedUsers).", nr known users=".$this->nrKnownUsers) if TRACE;
  1445. @grantedUsers = ('all') if scalar(@grantedUsers) == $this->nrKnownUsers;
  1446.  
  1447. # can't cache when there are topic-level perms
  1448. $this->{_webACLCache}{$web} = \@grantedUsers unless defined($deny);
  1449.  
  1450. $this->log("(2) granting access for ".scalar(@grantedUsers)." users") if TRACE;
  1451.  
  1452. return \@grantedUsers;
  1453. }
  1454.  
  1455. ################################################################################
  1456. # SMELL: coppied from core; only works with topic-based ACLs
  1457. sub getACL {
  1458. my ($this, $meta, $mode) = @_;
  1459.  
  1460. if (defined $meta->{_topic} && !defined $meta->{_loadedRev}) {
  1461. # Lazy load the latest version.
  1462. $meta->loadVersion();
  1463. }
  1464.  
  1465. my $text = $meta->getPreference($mode);
  1466. return unless defined $text;
  1467.  
  1468. # Remove HTML tags (compatibility, inherited from Users.pm
  1469. $text =~ s/(<[^>]*>)//g;
  1470.  
  1471. # Dump the users web specifier if userweb
  1472. my @list = grep { /\S/ } map {
  1473. s/^($Foswiki::cfg{UsersWebName}|%USERSWEB%|%MAINWEB%)\.//;
  1474. $_
  1475. } split(/[,\s]+/, $text);
  1476.  
  1477. #print STDERR "getACL($mode): ".join(', ', @list)."\n";
  1478.  
  1479. return \@list;
  1480. }
  1481.  
  1482. ################################################################################
  1483. sub expandUserList {
  1484. my ($this, @users) = @_;
  1485.  
  1486. my %result = ();
  1487.  
  1488. foreach my $id (@users) {
  1489. $id =~ s/(<[^>]*>)//go;
  1490. $id =~ s/^($Foswiki::cfg{UsersWebName}|%USERSWEB%|%MAINWEB%)\.//;
  1491. next unless $id;
  1492.  
  1493. if (Foswiki::Func::isGroup($id)) {
  1494. $result{$_} = 1 foreach @{$this->_expandGroup($id)};
  1495. } else {
  1496. $result{getWikiName($id)} = 1;
  1497. }
  1498. }
  1499.  
  1500. return [keys %result];
  1501. }
  1502.  
  1503. sub _expandGroup {
  1504. my ($this, $group) = @_;
  1505.  
  1506. return $this->{_groupCache}{$group} if exists $this->{_groupCache}{$group};
  1507.  
  1508. my %result = ();
  1509.  
  1510. my $it = Foswiki::Func::eachGroupMember($group);
  1511.  
  1512. while ($it->hasNext) {
  1513. my $id = $it->next;
  1514.  
  1515. if (Foswiki::Func::isGroup($id)) {
  1516. $result{$_} = 1 foreach @{$this->_expandGroup($id)};
  1517. } else {
  1518. $result{getWikiName($id)} = 1;
  1519. }
  1520. }
  1521.  
  1522. $this->{_groupCache}{$group} = [keys %result];
  1523.  
  1524. return [keys %result];
  1525. }
  1526.  
  1527.  
  1528. ################################################################################
  1529. sub getAclFields {
  1530. my $this = shift;
  1531.  
  1532. my $grantedUsers = $this->getGrantedUsers(@_);
  1533. return () unless $grantedUsers;
  1534. return ('access_granted' => $grantedUsers);
  1535. }
  1536.  
  1537. ################################################################################
  1538. sub setTimestamp {
  1539. my ($this, $web, $topic, $time) = @_;
  1540.  
  1541. return unless $web;
  1542. return unless Foswiki::Func::webExists($web);
  1543.  
  1544. $time = time() unless defined $time;
  1545.  
  1546. unless (defined $this->{_insert_timestamp}) {
  1547. my $timestampsTable = $this->{tablePrefix}.'timestamps';
  1548. $this->{_insert_timestamp} = $this->{dbh}->prepare(<<HERE);
  1549. replace into $timestampsTable
  1550. (web, topic, epoch) values
  1551. (?, ?, ?)
  1552. HERE
  1553. }
  1554.  
  1555. $this->{_insert_timestamp}->execute($web, ($topic||'undef'), $time) or die("Can't execute statement: " . $this->{_insert_timestamp}->errstr);
  1556.  
  1557. return $time;
  1558. }
  1559.  
  1560. ################################################################################
  1561. sub getTimestamp {
  1562. my ($this, $web, $topic) = @_;
  1563.  
  1564. #print STDERR "called getTimestamp($web, ".($topic||'').")\n";
  1565.  
  1566. unless (defined $this->{_select_timestamp}) {
  1567. my $timestampsTable = $this->{tablePrefix}.'timestamps';
  1568. $this->{_select_timestamp} = $this->{dbh}->prepare(<<HERE);
  1569. select epoch from $timestampsTable where web = ? and topic = ?
  1570. HERE
  1571. }
  1572.  
  1573. my ($epoch) = $this->{dbh}->selectrow_array($this->{_select_timestamp}, undef, $web, ($topic||'undef'));
  1574. $epoch ||= 0;
  1575.  
  1576. return $epoch;
  1577. }
  1578.  
  1579. 1;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement