Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- proc get_title {url} {
- global redirections
- if {$redirections > 10} {set redirections 0; return "FUCK YOU!@%$&^!*#"}
- if {[catch {set token [::http::geturl "$url" ]}]} {
- putlog "Couldn't connect to $url"
- return ""
- }
- upvar #0 $token state
- array set header $state(meta)
- if {![regexp {(.*)([2][0-9][0-9])(.*)} $state(http)]} {
- if {[info exists header(Location)] && [regexp {(.*)([3][0-9][0-9])(.*)} $state(http)]} {
- incr redirections
- if {[regexp {^/} $header(Location)]} {
- regsub {(http[s]?://[^/]+)(.*)} $url {\1} newurl
- append newurl $header(Location)
- } else {
- set newurl "$header(Location)"
- }
- putlog "Redirecting $url to $newurl"
- ::http::cleanup $token
- return [get_title $newurl]
- } else {
- ::http::cleanup $token
- return ""
- }
- }
- set redirections 0
- set charset [regexp -inline -nocase -- $state(charset) [encoding names]]
- if {$charset == ""} {set charset utf-8}
- set body [encoding convertto $charset $state(body)]
- set tweetregexp {<div class=["]tweet-text js-tweet-text["]>(.+?)</div>}
- if {![regexp -nocase {.*</title>.*} $body]} {::http::cleanup $token; return ""}
- set title [lindex [regexp -inline -nocase -- {.*<title[^.]*>(.*)</title>} $body] 1]
- if {[regexp {http[s]?:\/\/(www\.)?twitter\.com} $url]} {
- if {[regexp -nocase $tweetregexp $body]} {
- set username [regexp -inline -- {<a class='tweet-screen-name.+?title='(.+?)'>(.+?)</a>} $body]
- set title "[lindex $username 1] ([lindex $username 2])"
- set title [regsub {:.+?\.\.\.:} [format "%s: %s" $title [encoding convertto utf-8 [lindex [regexp -all -nocase -inline -- $tweetregexp $body] 1]]] {:}]
- regsub -nocase -all {<a [^>]+?[^>]*>([^<>]+?)</a>} $title {\1} title
- }
- if {[regexp -nocase {.*<span class="entry-content">(.+?)</span>.*} $body]} {
- set title [regsub {:.+?\.\.\.:} [format "%s: %s" $title [encoding convertto utf-8 [lindex [regexp -all -nocase -inline -- {<span class="entry-content">(.+?)</span>} $body] 1]]] {:}]
- regsub -nocase -all {<a [^>]+?[^>]*>([^<>]+?)</a>} $title {\1} title
- }
- }
- regsub -all {\n|\t} $title {} title
- regsub -all { +} $title { } title
- regsub -all {[\[\]]} $title {} title
- ::http::cleanup $token
- return $title
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement