Advertisement
Guest User

urltitle

a guest
Feb 8th, 2016
53
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.69 KB | None | 0 0
  1. # Script to grab titles from webpages
  2. # Updated version by teel @ IRCnet
  3. #
  4. # https://github.com/teeli/urltitle
  5. #
  6. # Detects URL from IRC channels and prints out the title
  7. #
  8. # Version Log:
  9. # 0.04 HTML parsing for titles added
  10. # 0.03c HTTPS support is now optional and will be automatically dropeed if TCL TSL package does not exist
  11. # 0.03b Some formatting
  12. # 0.03 HTTPS support
  13. # 0.02 Updated version by teel. Added support for redirects, trimmed titles (remove extra whitespaces),
  14. # some optimization
  15. # 0.01a Original version by rosc
  16. #
  17. ################################################################################################################
  18. #
  19. # Original script:
  20. # Copyright C.Leonhardt (rosc2112 at yahoo com) Aug.11.2007
  21. # http://members.dandy.net/~fbn/urltitle.tcl.txt
  22. # Loosely based on the tinyurl script by Jer and other bits and pieces of my own..
  23. #
  24. ################################################################################################################
  25. #
  26. # Usage:
  27. #
  28. # 1) Set the configs below
  29. # 2) .chanset #channelname +urltitle ;# enable script
  30. # 3) .chanset #channelname +logurltitle ;# enable logging
  31. # Then just input a url in channel and the script will retrieve the title from the corresponding page.
  32. #
  33. ################################################################################################################
  34.  
  35. package require tls
  36. ::tls::init -ssl2 false -ssl3 false -tls1 true
  37.  
  38.  
  39. namespace eval UrlTitle {
  40. # CONFIG
  41. set ignore "bdkqr|dkqr" ;# User flags script will ignore input from
  42. set length 5 ;# minimum url length to trigger channel eggdrop use
  43. set delay 1 ;# minimum seconds to wait before another eggdrop use
  44. set timeout 5000 ;# geturl timeout (1/1000ths of a second)
  45.  
  46. # BINDS
  47. bind pubm "-|-" {*://*} UrlTitle::handler
  48. setudef flag urltitle ;# Channel flag to enable script.
  49. setudef flag logurltitle ;# Channel flag to enable logging of script.
  50.  
  51. # INTERNAL
  52. set last 1 ;# Internal variable, stores time of last eggdrop use, don't change..
  53. set scriptVersion 0.03c
  54.  
  55. # PACKAGES
  56. package require http ;# You need the http package..
  57. if {[catch {package require tls}]} {
  58. set httpsSupport false
  59. } else {
  60. set httpsSupport true
  61. }
  62. if {[catch {package require htmlparse}]} {
  63. set htmlSupport false
  64. } else {
  65. set htmlSupport true
  66. }
  67.  
  68. proc handler {nick host user chan text} {
  69. variable httpsSupport
  70. variable htmlSupport
  71. variable delay
  72. variable last
  73. variable ignore
  74. variable length
  75. set unixtime [clock seconds]
  76. if {[channel get $chan urltitle] && ($unixtime - $delay) > $last && (![matchattr $user $ignore])} {
  77. foreach word [split $text] {
  78. if {[string length $word] >= $length && [regexp {^(f|ht)tp(s|)://} $word] && \
  79. ![regexp {://([^/:]*:([^/]*@|\d+(/|$))|.*/\.)} $word ]} {
  80. set last $unixtime
  81. # enable https if supported
  82. if {$httpsSupport} {
  83. ::http::register https 443 ::tls::socket
  84. }
  85. set urtitle [UrlTitle::parse $word]
  86. if {$htmlSupport} {
  87. set urtitle [::htmlparse::mapEscapes $urtitle]
  88. }
  89. # unregister https if supported
  90. if {$httpsSupport} {
  91. ::http::unregister https
  92. }
  93. if {[string length $urtitle]} {
  94. putserv "PRIVMSG $chan :\[ $urtitle \]"
  95. }
  96. break
  97. }
  98. }
  99. }
  100. # change to return 0 if you want the pubm trigger logged additionally..
  101. return 1
  102. }
  103.  
  104. proc parse {url} {
  105. variable timeout
  106. set title ""
  107. if {[info exists url] && [string length $url]} {
  108. if {[catch {set http [::http::geturl $url -timeout $timeout]} results]} {
  109. putlog "Connection to $url failed"
  110. } else {
  111. if { [::http::status $http] == "ok" } {
  112. set data [::http::data $http]
  113. set status [::http::code $http]
  114. set meta [::http::meta $http]
  115. switch -regexp -- $status {
  116. "HTTP.*200.*" {
  117. regexp -nocase {<title.*>(.*?)</title>} $data match title
  118. set title [regsub -all -nocase {\s+} $title " "]
  119. }
  120. "HTTP\/[0-1]\.[0-1].3.*" {
  121. regexp -nocase {Location\s(http[^\s]+)} $meta match location
  122. catch {set title [UrlTitle::parse $location]} error
  123. }
  124. }
  125. } else {
  126. putlog "Connection to $url failed"
  127. }
  128. ::http::cleanup $http
  129. }
  130. }
  131. return $title
  132. }
  133.  
  134. putlog "Initialized Url Title Grabber v$scriptVersion"
  135. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement