Guest User

Untitled

a guest
Jan 28th, 2015
243
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 33.51 KB | None | 0 0
  1. ###############################################################################
  2. #
  3. # Copyright (c) 2007, Andrew Scott
  4. # All rights reserved.
  5. #
  6. # Redistribution and use in source and binary forms, with or without
  7. # modification, are permitted provided that the following conditions are met:
  8. #
  9. # * Redistributions of source code must retain the above copyright notice,
  10. # this list of conditions and the following disclaimer.
  11. # * Redistributions in binary form must reproduce the above copyright
  12. # notice, this list of conditions and the following disclaimer in the
  13. # documentation and/or other materials provided with the distribution.
  14. # * Neither the name of the author nor the names of its contributors
  15. # may be used to endorse or promote products derived from this software
  16. # without specific prior written permission.
  17. #
  18. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  22. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28. # POSSIBILITY OF SUCH DAMAGE.
  29.  
  30. #
  31. # Eggdrop RSS Syndication
  32. # -----------------------
  33. # Date: 2007-02-08
  34. # Version: v0.4
  35. # Author(s): Andrew Scott <andrew.scott@wizzer-it.com>
  36. # Website: http://labs.wizzer-it.com/
  37. #
  38.  
  39. #
  40. # Please read the README file for help and the HISTORY file for a list of
  41. # what has been changed.
  42. #
  43.  
  44. #
  45. # Start of Settings
  46. #
  47.  
  48. namespace eval ::rss-synd {
  49. variable rss
  50. variable default
  51.  
  52. # This is an example of a basic feed, If you dont understand why all
  53. # the \'s are in the examples below use this one as a template.
  54. set rss(blacklistednews) {
  55. "url" "http://blacklistednews.com/rss.php"
  56. "channels" "##information"
  57. "database" "./scripts/bln.db"
  58. "output" "[\002Black Listed news feed\002] @@item!title@@ - @@item!guid@@"
  59. "trigger" "!blacklistednews"
  60. }
  61.  
  62. set rss(nesaranews) {
  63. "url" "http://feeds.feedburner.com/NesaraForumsAndNewsSources"
  64. "channels" "##information"
  65. "database" "./scripts/nn.db"
  66. "output" "[\002Nesara News RSS feed\002] @@item!title@@ - @@item!guid@@"
  67. "trigger" "!nesaranews"
  68. }
  69.  
  70. set rss(globalresearch) {
  71. "url" "http://www.globalresearch.ca/feed"
  72. "channels" "##information"
  73. "database" "./scripts/gr.db"
  74. "output" "[\002Global Research news feed\002] @@item!title@@ - @@item!guid@@"
  75. "trigger" "!globalresearch"
  76. }
  77. set rss(gizmag) {
  78. "url" "http://feeds.feedburner.com/GizmagEmergingTechnologyMagazine"
  79. "channels" "##information"
  80. "database" "./scripts/gizmag.db"
  81. "output" "[\002Gizmag news feed\002] @@item!title@@ - @@item!guid@@"
  82. "trigger" "!gizmag"
  83. }
  84. set rss(redice) {
  85. "url" "http://www.redicecreations.com/rss-news.php"
  86. "channels" "##information"
  87. "database" "./scripts/redice.db"
  88. "output" "[\002Red Ice news feed\002] @@item!title@@ - @@item!guid@@"
  89. "trigger" "!redice"
  90. }
  91. set rss(gas2) {
  92. "url" "http://gas2.org/feed/"
  93. "channels" "##tesla"
  94. "database" "./scripts/gas2.db"
  95. "output" "[\002Gas2.org news feed\002] @@item!title@@ - @@item!guid@@"
  96. "trigger" "!gas2"
  97. }
  98. set rss(fenews) {
  99. "url" "http://feeds.feedburner.com/freeenergynews/"
  100. "channels" "##tesla"
  101. "database" "./scripts/fenews.db"
  102. "output" "[\002Free Energy news feed\002] @@item!title@@ - @@item!guid@@"
  103. "trigger" "!fenews"
  104. }
  105. set rss(renews) {
  106. "url" "# http://feeds.feedburner.com/RenewableEnergyHeadlines?format=xml"
  107. "channels" "##tesla"
  108. "database" "./scripts/renews.db"
  109. "output" "[\002Renewable Energy news feed\002] @@item!title@@ - @@item!guid@@"
  110. "trigger" "!renews"
  111. }
  112. # http://feeds.feedburner.com/RenewableEnergyHeadlines?format=xml
  113.  
  114. # http://feeds.feedburner.com/freeenergynews/nqih?format=xml
  115. # supybot.plugins.RSS.feeds.dprogram: http://dprogram.net/feed/
  116. # supybot.plugins.RSS.feeds.newsworldwide:
  117. # supybot.plugins.RSS.feeds.davidicke: http://davidicke.com/feed/
  118. # supybot.plugins.RSS.feeds.rumormillnews: http://rss.groups.yahoo.com/group/RUMORMILLNEWS/rss
  119. # supybot.plugins.RSS.feeds.signofthetimes:
  120. # supybot.plugins.RSS.feeds.naturalnews: http://www.naturalnews.com/rss.xml
  121. # supybot.plugins.RSS.feeds.freeenergynews: http://feeds.feedburner.com/freeenergynews/nqih?format=xml
  122. # supybot.plugins.RSS.feeds.fourwinds10: http://feedity.com/fourwinds10-com/V1FQUlpb.rss
  123. # supybot.plugins.RSS.feeds.fromthetrenches: http://feeds.feedburner.com/fromthetrenchesworldreport/WQjf?format=xml
  124. # supybot.plugins.RSS.feeds.renewableenergy: http://www.renewableenergyworld.com/rss/renews.rss
  125. # supybot.plugins.RSS.feeds.globalresearch: http://www.globalresearch.ca/feed
  126. # supybot.plugins.RSS.feeds.rediceradio: http://www.redicecreations.com/rss.xml
  127. # supybot.plugins.RSS.feeds.infowars: http://www.infowars.com/feed.rss
  128.  
  129. # http://gas2.org/feed/
  130.  
  131. # set rss(mozillazine) {
  132. # "url" "http://www.mozillazine.org/atom.xml"
  133. # "channels" "#channel1"
  134. # "database" "./scripts/mozillazine.db"
  135. # "output" "[\002@@title@@\002] @@entry!title@@ - @@entry!link!=href@@"
  136. # "trigger" "!mozine"
  137. # }
  138.  
  139.  
  140. # The default settings, If any setting isnt set for an individual feed
  141. # it'll use the default listed here
  142. #
  143. # WARNING: You can change the options here, but DO NOT REMOVE THEM, doing
  144. # so will cause errors.
  145. set default {
  146. "announce-output" 5
  147. "trigger-output" 5
  148. "remove-empty" 1
  149. "trigger-type" 0:2
  150. "announce-type" 0
  151. "max-depth" 5
  152. "evaluate-tcl" 0
  153. "update-interval" 720
  154. "output-order" 0
  155. "timeout" 60000
  156. "channels" "#channel1"
  157. "trigger" "!rss @@feedid@@"
  158. "output" "\[\002@@channel!title@@@@title@@\002\] @@item!title@@@@entry!title@@ - @@item!link@@@@entry!link!=href@@"
  159. "user-agent" "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1"
  160. }
  161. }
  162.  
  163. #
  164. # End of Settings
  165. #
  166. ###############################################################################
  167.  
  168. proc ::rss-synd::init {args} {
  169. variable rss
  170. variable default
  171. variable version
  172. variable packages
  173.  
  174. set version(number) "0.4"
  175. set version(date) "2007-02-08"
  176.  
  177. package require http
  178. set packages(base64) [catch {package require base64}]; # http auth
  179. set packages(tls) [catch {package require tls}]; # https
  180. set packages(trf) [catch {package require Trf}]; # gzip compression
  181.  
  182. foreach feed [array names rss] {
  183. array set tmp $default
  184. array set tmp $rss($feed)
  185.  
  186. set required [list "announce-output" "trigger-output" "max-depth" "update-interval" "timeout" "channels" "output" "user-agent" "url" "database" "trigger-type" "announce-type"]
  187. foreach {key value} [array get tmp] {
  188. if {[set ptr [lsearch -exact $required $key]] >= 0} {
  189. set required [lreplace $required $ptr $ptr]
  190. }
  191. }
  192.  
  193. if {[llength $required] == 0} {
  194. regsub -nocase -all -- {@@feedid@@} $tmp(trigger) $feed tmp(trigger)
  195.  
  196. set ulist [regexp -nocase -inline -- {(http(?:s?))://(?:(.[^:]+:.[^@]+)?)(?:@?)(.*)} $tmp(url)]
  197.  
  198. if {[llength $ulist] == 0} {
  199. putlog "\002RSS Error\002: Unable to parse URL, Invalid format for feed \"$feed\"."
  200. unset rss($feed)
  201. continue
  202. }
  203.  
  204. set tmp(url) "[lindex $ulist 1]://[lindex $ulist 3]"
  205.  
  206. if {[string compare [lindex $ulist 1] "https"] == 0} {
  207. if {$packages(tls) != 0} {
  208. putlog "\002RSS Error\002: Unable to find tls package required for https, unloaded feed \"$feed\"."
  209. unset rss($feed)
  210. continue
  211. }
  212.  
  213. ::http::register https 443 ::tls::socket
  214. }
  215.  
  216. if {(![info exists tmp(url-auth)]) || ([string compare $tmp(url-auth) ""] == 0)} {
  217. set tmp(url-auth) ""
  218.  
  219. if {[string compare [lindex $ulist 2] ""] != 0} {
  220. if {$packages(base64) != 0} {
  221. putlog "\002RSS Error\002: Unable to find base64 package required for http authentication, unloaded feed \"$feed\"."
  222. unset rss($feed)
  223. continue
  224. }
  225.  
  226. set tmp(url-auth) [::base64::encode [lindex $ulist 2]]
  227. }
  228. }
  229.  
  230. if {[regexp {^[0123]{1}:[0123]{1}$} $tmp(trigger-type)] != 1} {
  231. putlog "\002RSS Error\002: Invalid 'trigger-type' syntax for feed \"$feed\"."
  232. unset rss($feed)
  233. continue
  234. }
  235.  
  236. set tmp(trigger-type) [split $tmp(trigger-type) ":"]
  237.  
  238. if {([info exists tmp(charset)]) && ([lsearch -exact [encoding names] [string tolower $tmp(charset)]] < 0)} {
  239. putlog "\002RSS Error\002: Unable to load feed \"$feed\", unknown encoding \"$tmp(encoding)\"."
  240. unset rss($feed)
  241. continue
  242. }
  243.  
  244. set tmp(updated) 0
  245. if {([file exists $tmp(database)]) && ([set mtime [file mtime $tmp(database)]] < [unixtime])} {
  246. set tmp(updated) [file mtime $tmp(database)]
  247. }
  248.  
  249. set rss($feed) [array get tmp]
  250. } else {
  251. putlog "\002RSS Error\002: Unable to load feed \"$feed\", missing one or more required settings. \"[join $required ", "]\""
  252. unset rss($feed)
  253. }
  254.  
  255. unset tmp
  256. }
  257.  
  258. bind evnt -|- prerehash [namespace current]::deinit
  259. bind time -|- {* * * * *} [namespace current]::feed_get
  260. bind pubm -|- {* *} [namespace current]::pub_trigger
  261. bind msgm -|- {*} [namespace current]::msg_trigger
  262.  
  263. putlog "\002RSS Syndication Script v$version(number)\002 ($version(date)): Loaded."
  264. }
  265.  
  266. proc ::rss-synd::deinit {args} {
  267. catch {unbind evnt -|- prerehash [namespace current]::deinit}
  268. catch {unbind time -|- {* * * * *} [namespace current]::feed_get}
  269. catch {unbind pubm -|- {* *} [namespace current]::pub_trigger}
  270. catch {unbind msgm -|- {*} [namespace current]::msg_trigger}
  271.  
  272. foreach child [namespace children] {
  273. catch {[set child]::deinit}
  274. }
  275.  
  276. namespace delete [namespace current]
  277. }
  278.  
  279. #
  280. # Trigger Functions
  281. ##
  282.  
  283. proc ::rss-synd::msg_trigger {nick user handle text} {
  284. [namespace current]::handle_triggers $text $nick
  285. }
  286.  
  287. proc ::rss-synd::pub_trigger {nick user handle chan text} {
  288. [namespace current]::handle_triggers $text $nick $chan
  289. }
  290.  
  291. proc ::rss-synd::handle_triggers {text nick {chan ""}} {
  292. variable rss
  293. variable default
  294.  
  295. array set tmp $default
  296.  
  297. if {[info exists tmp(trigger)]} {
  298. regsub -all -- {@@(.*?)@@} $tmp(trigger) "" tmp_trigger
  299. set tmp_trigger [string trimright $tmp_trigger]
  300.  
  301. if {[string compare -nocase $text $tmp_trigger] == 0} {
  302. set list_feeds [list]
  303. }
  304. }
  305.  
  306. catch {unset tmp tmp_trigger}
  307.  
  308. foreach name [array names rss] {
  309. array set feed $rss($name)
  310.  
  311. if {(![info exists list_feeds]) && \
  312. ([string compare -nocase $text $feed(trigger)] == 0)} {
  313. if {(![[namespace current]::check_channel $feed(channels) $chan]) && \
  314. ([string length $chan] != 0)} {
  315. continue
  316. }
  317.  
  318. set feed(nick) $nick
  319.  
  320. if {$chan != ""} {
  321. set feed(type) [lindex $feed(trigger-type) 0]
  322. set feed(channels) $chan
  323. } else {
  324. set feed(type) [lindex $feed(trigger-type) 1]
  325. set feed(channels) ""
  326. }
  327.  
  328. if {[catch {set data [[namespace current]::feed_read [array get feed]]} error] == 0} {
  329. if {[set feedlist [[namespace current]::feed_info [array get feed] $data]] == ""} {
  330. putlog "\002RSS Error\002: Invalid feed database file format ($feed(database))!"
  331. return
  332. }
  333.  
  334. array set feed $feedlist
  335.  
  336. if {$feed(trigger-output) >= 0} {
  337. set feed(announce-output) $feed(trigger-output)
  338.  
  339. [namespace current]::feed_output [array get feed] $data
  340. }
  341. } else {
  342. putlog "\002RSS Warning\002: $error."
  343. }
  344. } elseif {[info exists list_feeds]} {
  345. if {$chan != ""} {
  346. # triggered from a channel
  347. if {[[namespace current]::check_channel $feed(channels) $chan]} {
  348. lappend list_feeds $feed(trigger)
  349. }
  350. } else {
  351. # triggered from a privmsg
  352. foreach tmp_chan $feed(channels) {
  353. if {([catch {botonchan $tmp_chan}] == 0) && \
  354. ([onchan $nick $tmp_chan])} {
  355. lappend list_feeds $feed(trigger)
  356. continue
  357. }
  358. }
  359. }
  360. }
  361. }
  362.  
  363. if {[info exists list_feeds]} {
  364. if {[llength $list_feeds] == 0} {
  365. lappend list_feeds "None"
  366. }
  367.  
  368. lappend list_msgs "Available feeds: [join $list_feeds ", "]."
  369.  
  370. if {$chan != ""} {
  371. set list_type [lindex $feed(trigger-type) 0]
  372. set list_targets $chan
  373. } else {
  374. set list_type [lindex $feed(trigger-type) 1]
  375. set list_targets ""
  376. }
  377.  
  378. [namespace current]::feed_msg $list_type $list_msgs list_targets $nick
  379. }
  380. }
  381.  
  382. #
  383. # Feed Retrieving Functions
  384. ##
  385.  
  386. proc ::rss-synd::feed_get {args} {
  387. variable rss
  388.  
  389. set i 0
  390. foreach name [array names rss] {
  391. if {$i == 3} { break }
  392.  
  393. array set feed $rss($name)
  394.  
  395. if {$feed(updated) <= [expr { [unixtime] - ($feed(update-interval) * 60) }]} {
  396. ::http::config -useragent $feed(user-agent)
  397.  
  398. set feed(type) $feed(announce-type)
  399. set feed(headers) [list]
  400.  
  401. if {[string compare $feed(url-auth) ""] != 0} {
  402. lappend feed(headers) "Authorization" "Basic $feed(url-auth)"
  403. }
  404.  
  405. if {([info exists feed(enable-gzip)]) && ($feed(enable-gzip) == 1)} {
  406. lappend feed(headers) "Accept-Encoding" "gzip"
  407. }
  408.  
  409. catch {::http::geturl "$feed(url)" -command "[namespace current]::feed_callback {[array get feed] depth 0}" -timeout $feed(timeout) -headers $feed(headers)} debug
  410.  
  411. set feed(updated) [unixtime]
  412. set rss($name) [array get feed]
  413. incr i
  414. }
  415.  
  416. unset feed
  417. }
  418. }
  419.  
  420. proc ::rss-synd::feed_callback {feedlist args} {
  421. set token [lindex $args end]
  422. array set feed $feedlist
  423.  
  424. upvar 0 $token state
  425.  
  426. if {[string compare -nocase $state(status) "ok"] != 0} {
  427. putlog "\002RSS HTTP Error\002: $state(url) (State: $state(status))"
  428. return 1
  429. }
  430.  
  431. array set meta $state(meta)
  432.  
  433. if {([::http::ncode $token] == 302) || ([::http::ncode $token] == 301)} {
  434. set feed(depth) [expr {$feed(depth) + 1 }]
  435.  
  436. if {$feed(depth) < $feed(max-depth)} {
  437. catch {::http::geturl "$meta(Location)" -command "[namespace current]::feed_callback {$feedlist}" -timeout $feed(timeout) -headers $feed(headers)}
  438. } else {
  439. putlog "\002RSS HTTP Error\002: $state(url) (State: timeout, max refer limit reached)"
  440. }
  441.  
  442. return 1
  443. } elseif {[::http::ncode $token] != 200} {
  444. putlog "\002RSS HTTP Error\002: $state(url) ($state(http))"
  445. return 1
  446. }
  447.  
  448. set data [::http::data $token]
  449.  
  450. if {([info exists meta(Content-Encoding)]) && \
  451. ([string compare $meta(Content-Encoding) "gzip"] == 0)} {
  452. if {[catch {[namespace current]::feed_gzip $data} data] != 0} {
  453. putlog "\002RSS Error\002: Unable to decompress \"$state(url)\": $data"
  454. return 1
  455. }
  456. }
  457.  
  458. if {[catch {[namespace current]::xml_list_create $data} data] != 0} {
  459. putlog "\002RSS Error\002: Unable to parse feed properly, parser returned error. \"$state(url)\""
  460. return 1
  461. }
  462.  
  463. if {[string length $data] == 0} {
  464. putlog "\002RSS Error\002: Unable to parse feed properly, no data returned. \"$state(url)\""
  465. return 1
  466. }
  467.  
  468. set odata ""
  469. if {[catch {set odata [[namespace current]::feed_read $feedlist]} error] != 0} {
  470. putlog "\002RSS Warning\002: $error."
  471. }
  472.  
  473. if {[set feedlist [[namespace current]::feed_info $feedlist $data]] == ""} {
  474. putlog "\002RSS Error\002: Invalid feed format ($state(url))!"
  475. return 1
  476. }
  477.  
  478. array set feed $feedlist
  479.  
  480. ::http::cleanup $token
  481.  
  482. if {[catch {[namespace current]::feed_write $feedlist $data} error] != 0} {
  483. putlog "\002RSS Database Error\002: $error."
  484. return 1
  485. }
  486.  
  487. if {$feed(announce-output) > 0} {
  488. [namespace current]::feed_output $feedlist $data $odata
  489. }
  490. }
  491.  
  492. proc ::rss-synd::feed_info {feedlist data} {
  493. array set feed $feedlist
  494. set length [[namespace current]::xml_get_info $data [list -1 "*"]]
  495.  
  496. for {set i 0} {$i < $length} {incr i} {
  497. set type [[namespace current]::xml_get_info $data [list $i "*"] "name"]
  498.  
  499. # tag-name: the name of the element that contains each article and its data.
  500. # tag-list: the position in the xml structure where all 'tag-name' reside.
  501. switch [string tolower $type] {
  502. rss {
  503. # RSS v0.9x & x2.0
  504. set feed(tag-list) [list 0 "channel"]
  505. set feed(tag-name) "item"
  506. break
  507. }
  508. rdf:rdf {
  509. # RSS v1.0
  510. set feed(tag-list) [list]
  511. set feed(tag-name) "item"
  512. break
  513. }
  514. feed {
  515. # ATOM
  516. set feed(tag-list) [list]
  517. set feed(tag-name) "entry"
  518. break
  519. }
  520. }
  521. }
  522.  
  523. if {![info exists feed(tag-list)]} {
  524. return
  525. }
  526.  
  527. set feed(tag-feed) [list 0 $type]
  528.  
  529. return [array get feed]
  530. }
  531.  
  532. # decompress gzip formatted data
  533. proc ::rss-synd::feed_gzip {cdata} {
  534. variable packages
  535.  
  536. if {(![info exists packages(trf)]) || \
  537. ($packages(trf) != 0)} {
  538. error "Trf package not found."
  539. }
  540.  
  541. # remove the 10 byte gzip header and 8 byte footer.
  542. set cdata [string range $cdata 10 [expr { [string length $cdata] - 9 } ]]
  543.  
  544. # decompress the raw data
  545. if {[catch {zip -mode decompress -nowrap 1 $cdata} data] != 0} {
  546. error $data
  547. }
  548.  
  549. return $data
  550. }
  551.  
  552. proc ::rss-synd::feed_read {feedlist} {
  553. array set feed $feedlist
  554.  
  555. if {[catch {open $feed(database) "r"} fp] != 0} {
  556. error $fp
  557. }
  558.  
  559. if {[info exists feed(charset)]} {
  560. fconfigure $fp -encoding [string tolower $feed(charset)]
  561. }
  562.  
  563. set data [read -nonewline $fp]
  564.  
  565. close $fp
  566.  
  567. return $data
  568. }
  569.  
  570. proc ::rss-synd::feed_write {feedlist data} {
  571. array set feed $feedlist
  572.  
  573. if {[catch {open $feed(database) "w+"} fp] != 0} {
  574. error $fp
  575. }
  576.  
  577. if {[info exists feed(charset)]} {
  578. fconfigure $fp -encoding [string tolower $feed(charset)]
  579. }
  580.  
  581. set data [string map { "\n" "" "\r" "" } $data]
  582.  
  583. puts -nonewline $fp $data
  584.  
  585. close $fp
  586. }
  587.  
  588. #
  589. # XML Functions
  590. ##
  591.  
  592. proc ::rss-synd::xml_list_create {xml_data} {
  593. set xml_list [list]
  594.  
  595. set ptr 0
  596. while {[string compare [set tag_start [[namespace current]::xml_get_position $xml_data $ptr]] ""]} {
  597. array set tag [list]
  598.  
  599. set tag_start_first [lindex $tag_start 0]
  600. set tag_start_last [lindex $tag_start 1]
  601.  
  602. set tag_string [string range $xml_data $tag_start_first $tag_start_last]
  603.  
  604. # move the pointer to the next character after the current tag
  605. set last_ptr $ptr
  606. set ptr [expr { $tag_start_last + 2 }]
  607.  
  608. # match 'special' tags that dont close
  609. if {[regexp -nocase -- {^!(\[CDATA|--|DOCTYPE)} $tag_string]} {
  610. set tag_data $tag_string
  611.  
  612. regexp -nocase -- {^!\[CDATA\[(.*?)\]\]$} $tag_string -> tag_data
  613. regexp -nocase -- {^!--(.*?)--$} $tag_string -> tag_data
  614.  
  615. if {[info exists tag_data]} {
  616. set tag(data) [[namespace current]::xml_escape $tag_data]
  617. }
  618. } else {
  619. # we should only ever encounter opening tags, if we hit a closing one somethings wrong.
  620. if {[string match {[/]*} $tag_string]} {
  621. putlog "\002Malformed Feed\002: Tag not open: \"<$tag_string>\" ($tag_start_first => $tag_start_last)"
  622. continue
  623. }
  624.  
  625. # NOTE: should this be a continue ?
  626. if {![regexp -- {(.[^ \/\n\r]*)(?: |\n|\r\n|\r|)(.*?)$} $tag_string -> tag_name tag_args]} {
  627. putlog "parse error!!!?!?!?!"
  628. continue
  629. }
  630. set tag(name) [[namespace current]::xml_escape $tag_name]
  631.  
  632. # get all of the tags attributes
  633. set tag(attrib) [list]
  634. if {[string length $tag_args] > 0} {
  635. set values [regexp -inline -all -- {(?:\s*|)(.[^=]*)=["'](.[^"']*)["']} $tag_args]
  636.  
  637. foreach {r_match r_tag r_value} $values {
  638. lappend tag(attrib) [[namespace current]::xml_escape $r_tag] [[namespace current]::xml_escape $r_value]
  639. }
  640. }
  641.  
  642. # find the end tag of non-self-closing tags
  643. if {(![regexp {(\?|!|/)(\s*)$} $tag_args]) || \
  644. (![string match "\?*" $tag_string])} {
  645. set tmp_num 1
  646. set tag_end_last $ptr
  647.  
  648. # find the correct closing tag if there are nested elements
  649. # with the same name
  650. while {$tmp_num > 0} {
  651. # search for a possible closing tag
  652. regexp -indices -start $tag_end_last -- "</$tag_name>" $xml_data tag_end
  653.  
  654. set last_tag_end_last $tag_end_last
  655.  
  656. set tag_end_first [lindex $tag_end 0]
  657. set tag_end_last [lindex $tag_end 1]
  658.  
  659. # check to see if there are any NEW opening tags within the
  660. # previous closing tag and the new closing one
  661. incr tmp_num [regexp -all -- "<$tag_name\(|.\[^>\]+\)>" [string range $xml_data $last_tag_end_last $tag_end_last]]
  662.  
  663. incr tmp_num -1
  664. }
  665.  
  666. # set the pointer to after the last closing tag
  667. set ptr [expr { $tag_end_last + 1 }]
  668.  
  669. catch {unset tmp_num xml_sub_data}
  670.  
  671. # remember tag_start*'s character index doesnt include the tag start and end characters
  672. set xml_sub_data [string range $xml_data [expr { $tag_start_last + 2 }] [expr { $tag_end_first - 1 }]]
  673.  
  674. # recurse the data within the currently open tag
  675. set result [[namespace current]::xml_list_create $xml_sub_data]
  676.  
  677. # set the list data returned from the recursion we just performed
  678. if {[llength $result] > 0} {
  679. set tag(children) $result
  680.  
  681. # set the current data we have because were already at the end of a branch
  682. # (ie: the recursion didnt return any data)
  683. } else {
  684. set tag(data) [[namespace current]::xml_escape $xml_sub_data]
  685. }
  686. }
  687. }
  688.  
  689. # insert any plain data that appears before the current element
  690. if {$last_ptr != [expr { $tag_start_first - 1 }]} {
  691. lappend xml_list [list "data" [[namespace current]::xml_escape [string range $xml_data $last_ptr [expr { $tag_start_first - 2 }]]]]
  692. }
  693. lappend xml_list [array get tag]
  694.  
  695. array unset tag "*"
  696. }
  697.  
  698. # if there is still plain data left add it
  699. if {$ptr < [string length $xml_data]} {
  700. lappend xml_list [list "data" [[namespace current]::xml_escape [string range $xml_data $ptr end]]]
  701. }
  702.  
  703. return $xml_list
  704. }
  705.  
  706. # simple escape function
  707. proc ::rss-synd::xml_escape {string} {
  708. regsub -all -- {([\{\}])} $string {\\\1} string
  709.  
  710. return $string
  711. }
  712.  
  713. # this function is to replace:
  714. # regexp -indices -start $ptr {<(!\[CDATA\[.+?\]\]|!--.+?--|!DOCTYPE.+?|.+?)>} $xml_data -> tag_start
  715. # which doesnt work correctly with tcl's re_syntax.
  716. proc ::rss-synd::xml_get_position {xml_data ptr} {
  717. set tag_start [list -1 -1]
  718.  
  719. regexp -indices -start $ptr {<(.+?)>} $xml_data -> tmp(tag)
  720. regexp -indices -start $ptr {<(!--.*?--)>} $xml_data -> tmp(comment)
  721. regexp -indices -start $ptr {<(!DOCTYPE.+?)>} $xml_data -> tmp(doctype)
  722. regexp -indices -start $ptr {<(!\[CDATA\[.+?\]\])>} $xml_data -> tmp(cdata)
  723.  
  724. # 'tag' regexp should be compared last
  725. foreach name [lsort [array names tmp]] {
  726. set tmp_s [split $tmp($name)]
  727. if {( ([lindex $tmp_s 0] < [lindex $tag_start 0]) && \
  728. ([lindex $tmp_s 0] > -1) ) || \
  729. ([lindex $tag_start 0] == -1)} {
  730. set tag_start $tmp($name)
  731. }
  732. }
  733.  
  734. if {([lindex $tag_start 0] == -1) || \
  735. ([lindex $tag_start 1] == -1)} {
  736. set tag_start ""
  737. }
  738.  
  739. return $tag_start
  740. }
  741.  
  742. # recursivly flatten all data without tags or attributes
  743. proc ::rss-synd::xml_list_flatten {xml_list {level 0}} {
  744. set xml_string ""
  745.  
  746. foreach e_list $xml_list {
  747. if {[catch {array set e_array $e_list}] != 0} {
  748. return $xml_list
  749. }
  750.  
  751. if {[info exists e_array(children)]} {
  752. append xml_string [[namespace current]::xml_list_flatten $e_array(children) [expr { $level + 1 }]]
  753. } elseif {[info exists e_array(data)]} {
  754. append xml_string $e_array(data)
  755. }
  756.  
  757. array unset e_array "*"
  758. }
  759.  
  760. return $xml_string
  761. }
  762.  
  763. # returns information on a data structure when given a path.
  764. # paths can be specified using: [struct number] [struct name] <...>
  765. proc ::rss-synd::xml_get_info {xml_list path {element "data"}} {
  766. set i 0
  767.  
  768. foreach {t_data} $xml_list {
  769. array set t_array $t_data
  770.  
  771. # if the name doesnt exist set it so we can still reference the data
  772. # using the 'stuct name' *
  773. if {![info exists t_array(name)]} {
  774. set t_array(name) ""
  775. }
  776.  
  777. if {[string match -nocase [lindex $path 1] $t_array(name)]} {
  778.  
  779. if {$i == [lindex $path 0]} {
  780. set result ""
  781.  
  782. if {([llength $path] == 2) && \
  783. ([info exists t_array($element)])} {
  784. set result $t_array($element)
  785. } elseif {[info exists t_array(children)]} {
  786. # shift the first path reference of the front of the path and recurse
  787. set result [[namespace current]::xml_get_info $t_array(children) [lreplace $path 0 1] $element]
  788. }
  789.  
  790. return $result
  791. }
  792.  
  793. incr i
  794. }
  795.  
  796. array unset t_array
  797. }
  798.  
  799. if {[lindex $path 0] == -1} {
  800. return $i
  801. }
  802. }
  803.  
  804. # converts 'args' into a list in the same order
  805. proc ::rss-synd::xml_join_tags {args} {
  806. set list [list]
  807.  
  808. foreach tag $args {
  809. foreach item $tag {
  810. if {[string length $item] > 0} {
  811. lappend list $item
  812. }
  813. }
  814. }
  815.  
  816. return $list
  817. }
  818.  
  819. #
  820. # Output Feed Functions
  821. ##
  822.  
  823. proc ::rss-synd::feed_output {feedlist data {odata ""}} {
  824. array set feed $feedlist
  825. set msgs [list]
  826.  
  827. set path [[namespace current]::xml_join_tags $feed(tag-feed) $feed(tag-list) -1 $feed(tag-name)]
  828. set count [[namespace current]::xml_get_info $data $path]
  829.  
  830. for {set i 0} {($i < $count) && ($i < $feed(announce-output))} {incr i} {
  831. set tmpp [[namespace current]::xml_join_tags $feed(tag-feed) $feed(tag-list) $i $feed(tag-name)]
  832. set tmpd [[namespace current]::xml_get_info $data $tmpp "children"]
  833.  
  834. if {[[namespace current]::feed_compare $feedlist $odata $tmpd]} {
  835. break
  836. }
  837.  
  838. set tmp_msg [[namespace current]::cookie_parse $feedlist $data $i]
  839. if {(![info exists feed(output-order)]) || \
  840. ($feed(output-order) == 0)} {
  841. set msgs [linsert $msgs 0 $tmp_msg]
  842. } else {
  843. lappend msgs $tmp_msg
  844. }
  845. }
  846.  
  847. set nick ""
  848. if {[info exists feed(nick)]} {
  849. set nick $feed(nick)
  850. }
  851.  
  852. [namespace current]::feed_msg $feed(type) $msgs $feed(channels) $nick
  853. }
  854.  
  855. proc ::rss-synd::feed_msg {type msgs targets {nick ""}} {
  856. # check if our target is a nick
  857. if {(($nick != "") && \
  858. ($targets == "")) || \
  859. ([regexp -- {[23]} $type])} {
  860. set targets $nick
  861. }
  862.  
  863. foreach msg $msgs {
  864. foreach chan $targets {
  865. if {([catch {botonchan $chan}] == 0) || \
  866. ([regexp -- {^[#&]} $chan] == 0)} {
  867. foreach line [split $msg "\n"] {
  868. if {($type == 1) || ($type == 3)} {
  869. putserv "NOTICE $chan :$line"
  870. } else {
  871. putserv "PRIVMSG $chan :$line"
  872. }
  873. }
  874. }
  875. }
  876. }
  877. }
  878.  
  879. proc ::rss-synd::feed_compare {feedlist odata data} {
  880. if {[string compare $odata ""] == 0} {
  881. return 0
  882. }
  883.  
  884. array set feed $feedlist
  885. array set ofeed [[namespace current]::feed_info [list] $odata]
  886.  
  887. if {[array size ofeed] == 0} {
  888. putlog "\002RSS Error\002: Invalid feed format ($feed(database))!"
  889. return 0
  890. }
  891.  
  892. if {[string compare -nocase [lindex $feed(tag-feed) 1] "feed"] == 0} {
  893. set cmp_items [list {0 "id"} "children" "" 2 {0 "link"} "attrib" "href" 1 {0 "title"} "children" "" 1]
  894. } else {
  895. set cmp_items [list {0 "guid"} "children" "" 2 {0 "link"} "children" "" 1 {0 "title"} "children" "" 1]
  896. }
  897.  
  898. set path [[namespace current]::xml_join_tags $ofeed(tag-feed) $ofeed(tag-list) -1 $ofeed(tag-name)]
  899. set count [[namespace current]::xml_get_info $odata $path]
  900.  
  901. for {set i 0} {$i < $count} {incr i} {
  902. # extract the current article from the database
  903. set tmpp [[namespace current]::xml_join_tags $ofeed(tag-feed) $ofeed(tag-list) $i $ofeed(tag-name)]
  904. set tmpd [[namespace current]::xml_get_info $odata $tmpp "children"]
  905.  
  906. set e 0; # compare items that existed in the feed
  907. set m 0; # total matches
  908. foreach {cmp_path cmp_element cmp_attrib cmp_weight} $cmp_items {
  909. # try and extract the tag info from the database
  910. set oresult [[namespace current]::xml_get_info $tmpd $cmp_path $cmp_element]
  911. if {[string compare -nocase $cmp_element "attrib"] == 0} {
  912. array set tmp $oresult
  913. catch {set oresult $tmp($cmp_attrib)}
  914. unset tmp
  915. }
  916.  
  917. # the tag doesnt exist in this feed so we'll ignore it
  918. if {[string compare $oresult ""] == 0} {
  919. continue
  920. }
  921.  
  922. incr e
  923.  
  924. # extract the tag info from the current article
  925. set result [[namespace current]::xml_get_info $data $cmp_path $cmp_element]
  926. if {[string compare -nocase $cmp_element "attrib"] == 0} {
  927. array set tmp $result
  928. catch {set result $tmp($cmp_attrib)}
  929. unset tmp
  930. }
  931.  
  932. if {[string compare -nocase $oresult $result] == 0} {
  933. set m [expr { $m + $cmp_weight} ]
  934. }
  935. }
  936.  
  937. # announce if we have over 66% certainty that this is new
  938. if {[expr { round(double($m) / double($e) * 100) }] >= 66} {
  939. return 1
  940. }
  941. }
  942.  
  943. return 0
  944. }
  945.  
  946. #
  947. # Cookie Parsing Functions
  948. ##
  949.  
  950. proc ::rss-synd::cookie_parse {feedlist data current} {
  951. array set feed $feedlist
  952. set output $feed(output)
  953.  
  954. set eval 0
  955. if {([info exists feed(evaluate-tcl)]) && ($feed(evaluate-tcl) == 1)} { set eval 1 }
  956.  
  957. set matches [regexp -inline -nocase -all -- {@@(.*?)@@} $output]
  958. foreach {match tmpc} $matches {
  959. set tmpc [split $tmpc "!"]
  960. set index 0
  961.  
  962. set cookie [list]
  963. foreach piece $tmpc {
  964. set tmpp [regexp -nocase -inline -all -- {^(.*?)\((.*?)\)|(.*?)$} $piece]
  965.  
  966. if {[lindex $tmpp 3] == ""} {
  967. lappend cookie [lindex $tmpp 2] [lindex $tmpp 1]
  968. } else {
  969. lappend cookie 0 [lindex $tmpp 3]
  970. }
  971. }
  972.  
  973. # replace tag-item's index with the current article
  974. if {[string compare -nocase $feed(tag-name) [lindex $cookie 1]] == 0} {
  975. set cookie [[namespace current]::xml_join_tags $feed(tag-list) [lreplace $cookie $index $index $current]]
  976. }
  977.  
  978. set cookie [[namespace current]::xml_join_tags $feed(tag-feed) $cookie]
  979.  
  980. if {[set tmp [[namespace current]::charset_encode $feedlist [[namespace current]::cookie_replace $cookie $data]]] != ""} {
  981. set tmp [[namespace current]::xml_list_flatten $tmp]
  982.  
  983. regsub -all -- {([\"\$\[\]\{\}\(\)\\])} $match {\\\1} match
  984. regsub -- $match $output "[string map { "&" "\\\x26" } [[namespace current]::html_decode $eval $tmp]]" output
  985. }
  986. }
  987.  
  988. # remove empty cookies
  989. if {(![info exists feed(remove-empty)]) || ($feed(remove-empty) == 1)} {
  990. regsub -nocase -all -- "@@.*?@@" $output "" output
  991. }
  992.  
  993. # evaluate tcl code
  994. if {$eval == 1} {
  995. if {[catch {set output [subst $output]} error] != 0} {
  996. putlog "\002RSS Eval Error\002: $error"
  997. }
  998. }
  999.  
  1000. return $output
  1001. }
  1002.  
  1003. proc ::rss-synd::cookie_replace {cookie data} {
  1004. set element "children"
  1005.  
  1006. set tags [list]
  1007. foreach {num section} $cookie {
  1008. if {[string compare "=" [string range $section 0 0]] == 0} {
  1009. set attrib [string range $section 1 end]
  1010. set element "attrib"
  1011. break
  1012. } else {
  1013. lappend tags $num $section
  1014. }
  1015. }
  1016.  
  1017. set return [[namespace current]::xml_get_info $data $tags $element]
  1018.  
  1019. if {[string compare -nocase "attrib" $element] == 0} {
  1020. array set tmp $return
  1021.  
  1022. if {[catch {set return $tmp($attrib)}] != 0} {
  1023. return
  1024. }
  1025. }
  1026.  
  1027. return $return
  1028. }
  1029.  
  1030. #
  1031. # Misc Functions
  1032. ##
  1033.  
  1034. proc ::rss-synd::html_decode {eval data {loop 0}} {
  1035. array set chars {
  1036. nbsp \x20 amp \x26 quot \x22 lt \x3C
  1037. gt \x3E iexcl \xA1 cent \xA2 pound \xA3
  1038. curren \xA4 yen \xA5 brvbar \xA6 brkbar \xA6
  1039. sect \xA7 uml \xA8 die \xA8 copy \xA9
  1040. ordf \xAA laquo \xAB not \xAC shy \xAD
  1041. reg \xAE hibar \xAF macr \xAF deg \xB0
  1042. plusmn \xB1 sup2 \xB2 sup3 \xB3 acute \xB4
  1043. micro \xB5 para \xB6 middot \xB7 cedil \xB8
  1044. sup1 \xB9 ordm \xBA raquo \xBB frac14 \xBC
  1045. frac12 \xBD frac34 \xBE iquest \xBF Agrave \xC0
  1046. Aacute \xC1 Acirc \xC2 Atilde \xC3 Auml \xC4
  1047. Aring \xC5 AElig \xC6 Ccedil \xC7 Egrave \xC8
  1048. Eacute \xC9 Ecirc \xCA Euml \xCB Igrave \xCC
  1049. Iacute \xCD Icirc \xCE Iuml \xCF ETH \xD0
  1050. Dstrok \xD0 Ntilde \xD1 Ograve \xD2 Oacute \xD3
  1051. Ocirc \xD4 Otilde \xD5 Ouml \xD6 times \xD7
  1052. Oslash \xD8 Ugrave \xD9 Uacute \xDA Ucirc \xDB
  1053. Uuml \xDC Yacute \xDD THORN \xDE szlig \xDF
  1054. agrave \xE0 aacute \xE1 acirc \xE2 atilde \xE3
  1055. auml \xE4 aring \xE5 aelig \xE6 ccedil \xE7
  1056. egrave \xE8 eacute \xE9 ecirc \xEA euml \xEB
  1057. igrave \xEC iacute \xED icirc \xEE iuml \xEF
  1058. eth \xF0 ntilde \xF1 ograve \xF2 oacute \xF3
  1059. ocirc \xF4 otilde \xF5 ouml \xF6 divide \xF7
  1060. oslash \xF8 ugrave \xF9 uacute \xFA ucirc \xFB
  1061. uuml \xFC yacute \xFD thorn \xFE yuml \xFF
  1062. ensp \x20 emsp \x20 thinsp \x20 zwnj \x20
  1063. zwj \x20 lrm \x20 rlm \x20 euro \x80
  1064. sbquo \x82 bdquo \x84 hellip \x85 dagger \x86
  1065. Dagger \x87 circ \x88 permil \x89 Scaron \x8A
  1066. lsaquo \x8B OElig \x8C oelig \x8D lsquo \x91
  1067. rsquo \x92 ldquo \x93 rdquo \x94 ndash \x96
  1068. mdash \x97 tilde \x98 scaron \x9A rsaquo \x9B
  1069. Yuml \x9F apos \x27
  1070. }
  1071.  
  1072. regsub -all -- {<(.[^>]*)>} $data " " data
  1073.  
  1074. if {$eval != 1} {
  1075. regsub -all -- {([\"\$\[\]\{\}\(\)\\])} $data {\\\1} data
  1076. } else {
  1077. regsub -all -- {([\"\$\[\]\{\}\(\)\\])} $data {\\\\\\\1} data
  1078. }
  1079.  
  1080. regsub -all -- {&#([0-9]+);} $data {[format %c [scan \1 %d]]} data
  1081. regsub -all -- {&#x([0-9a-zA-Z]+);} $data {[format %c [scan \1 %x]]} data
  1082. regsub -all -- {&([0-9a-zA-Z#]*);} $data {[if {[catch {set tmp $chars(\1)} char] == 0} { set tmp }]} data
  1083. regsub -all -- {&([0-9a-zA-Z#]*);} $data {[if {[catch {set tmp [string tolower $chars(\1)]} char] == 0} { set tmp }]} data
  1084.  
  1085. regsub -nocase -all -- "\\s{2,}" $data " " data
  1086.  
  1087. set data [subst $data]
  1088. if {[incr loop] == 1} {
  1089. set data [[namespace current]::html_decode 0 $data $loop]
  1090. }
  1091.  
  1092. return $data
  1093. }
  1094.  
  1095. proc ::rss-synd::charset_encode {feedlist string} {
  1096. array set feed $feedlist
  1097.  
  1098. if {[info exists feed(charset)]} {
  1099. set string [encoding convertto [string tolower $feed(charset)] $string]
  1100. }
  1101.  
  1102. return $string
  1103. }
  1104.  
  1105. proc ::rss-synd::check_channel {chanlist chan} {
  1106. foreach match [split $chanlist] {
  1107. if {[string compare -nocase $match $chan] == 0} {
  1108. return 1
  1109. }
  1110. }
  1111.  
  1112. return 0
  1113. }
  1114.  
  1115. proc ::rss-synd::urldecode {str} {
  1116. regsub -all -- {([\"\$\[\]\{\}\(\)\\])} $str {\\\1} str
  1117.  
  1118. regsub -all -- {%([aAbBcCdDeEfF0-9][aAbBcCdDeEfF0-9]);?} $str {[format %c [scan \1 %x]]} str
  1119.  
  1120. return [subst $str]
  1121. }
  1122.  
  1123. ::rss-synd::init
Add Comment
Please, Sign In to add comment