Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require 'rubygems'
- require 'nokogiri'
- require 'open-uri'
- require 'mysql'
- # Load the config file
- require_relative 'lib/config.rb'
- def fileUpdate(fUrl)
- doc = Nokogiri::HTML(open(fUrl))
- file = Hash.new
- file["name"] = doc.at_css('.main-body').at_css('h1').inner_text().strip()
- file["changelog"] = doc.at_css('.content-box').at_css('p')
- if (file["changelog"] != nil)
- file["changelog"] = file["changelog"].text
- else
- file["changelog"] = ""
- end
- factBox = doc.at_css('.standard-date').parent().parent()
- file["date"] = doc.at_css('.standard-date')['data-epoch']
- file["url"] = factBox.at_css('a')['href']
- file["filename"] = factBox.at_css('a').text
- file["size"] = factBox.css('dd')[3].text
- file["status"] = factBox.at_css('.file-status').text
- file["type"] = factBox.at_css('.file-type').text
- file["md5"] = factBox.css('dd')[4].text
- file["downloads"] = Integer(factBox.css('dd')[7].text.gsub(/[^\d]/, ''))
- # List of builds
- file["builds"] = Array.new
- factBox.at_css('.comma-separated-list').css('li').each do |build|
- file["builds"].push(build.text)
- end
- return file
- end
- def pluginUpdate(slug)
- pUrl = CONFIG[:site] + slug + "/files/"
- doc = Nokogiri::HTML(open(pUrl))
- files = Array.new
- regEx = /\/files\/(.*)/
- # Check if there are no files, return empty array if so
- if (doc.at_css('.listing-none-found') != nil)
- return files
- end
- # Figure out if we're paginated
- page = 1
- lastPage = 1
- pagination = doc.at_css('.listing-pagination')
- if (pagination.at_css('a') != nil)
- lastPage = Integer(pagination.at_css('.listing-pagination-pages-next').previous_sibling().text)
- end
- # Loop through the pages/files, fetch file info
- while(page <= lastPage)
- doc.css('td.col-file').each do |file|
- fUrl = pUrl + regEx.match(file.at_css('a')['href'])[1]
- files.push(fileUpdate(fUrl))
- sleep(CONFIG[:sleep])
- end
- page = page + 1
- if (page <= lastPage)
- doc = Nokogiri::HTML(open("#{pUrl}?page=#{page}"))
- end
- end
- puts files
- return files
- end
- def devBukkitUpdate(startPage, quick)
- slugRegEx = /\/server-mods\/(.*?)\//
- categoryRegEx = /.*?category=(.*)/
- page = startPage
- lastPage = nil
- # TODO: Get last update time if quick
- lastUpdate = nil
- plugins = Array.new
- while (lastPage == nil || page <= lastPage)
- # Sort by name, so as to not miss plugins updated while we're running
- # We only scrape the list first, to hopefully not miss any plugins
- if (quick)
- pageUrl = "#{CONFIG[:site]}?page=#{page}"
- else
- pageUrl = "#{CONFIG[:site]}?page=#{page}&sort=name"
- end
- puts "Scraping #{pageUrl}"
- doc = Nokogiri::HTML(open(pageUrl))
- # Get the maximum page count
- if (lastPage == nil)
- lastPage = Integer(doc.at_css('.listing-pagination-pages').at_css('.listing-pagination-pages-next').previous_sibling().text)
- #lastPage = 1
- end
- doc.css('.row-joined-to-next').each do |plugin|
- pEntry = Hash.new
- pEntry["updated"] = plugin.at_css('.col-date').at_css('.standard-date')['data-epoch']
- # Quick mode will stop scraping once we hit the last checked plugin
- if (lastUpdate != nil && pEntry["updated"] < lastUpdate)
- page = lastPage
- break
- end
- pEntry["name"] = plugin.at_css('.col-project').text
- pEntry["slug"] = slugRegEx.match(plugin.at_css('.col-project').at_css('a')['href'])[1]
- pEntry["stage"] = plugin.at_css('.col-status').text
- pEntry["summary"] = plugin.next_sibling().at_css('.summary').inner_html
- pEntry["downloads"] = 0
- icon = plugin.at_css('.col-icon').at_css('a')
- if (icon)
- pEntry["banner"] = icon.at_css('img')['data-full-src']
- else
- pEntry["banner"] = ""
- end
- # Fetch authors
- pEntry["authors"] = Array.new
- authorlist = plugin.at_css('.col-user')
- authorlist.css('a').each do |author|
- pEntry["authors"].push(author.text)
- end
- # Fetch categories
- pEntry["categories"] = Array.new
- catlist = plugin.at_css('.col-category')
- catlist.css('a').each do |category|
- cat = Hash.new
- cat["name"] = category.text
- cat["slug"] = categoryRegEx.match(category['href'])[1]
- cat["description"] = category['title']
- pEntry["categories"].push(cat)
- end
- plugins.push(pEntry)
- sleep(CONFIG[:sleep])
- end
- page = page + 1
- end
- # Now we update the plugin files
- plugins.each do |plugin|
- plugin["files"] = pluginUpdate(plugin["slug"])
- plugin["files"].each do |file|
- plugin["downloads"] = plugin["downloads"] + file["downloads"]
- end
- end
- # TODO: Store the plugins in the database
- return true
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement