Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- -- To save the output of this script to a file, run it like this: "lua mageloscrape.lua > outputfile.txt"
- --local http = require("socket.http");
- local https = require("ssl.https");
- local ltn12 = require("ltn12");
- local DEFLATE = require 'compress.deflatelua';
- --[[
- -- if this table exists then all items are ignored except those in this table
- local ITEMS = {
- ["Black Marble"] = true,
- ["Chipped Onyx Sapphire"] = true,
- ["Crushed Black Marble"] = true,
- ["Crushed Coral"] = true,
- ["Crushed Flame Emerald"] = true,
- ["Crushed Flame Opal"] = true,
- ["Crushed Jaundice Gem"] = true,
- ["Crushed Lava Ruby"] = true,
- ["Crushed Onyx Sapphire"] = true,
- ["Crushed Opal"] = true,
- ["Crushed Topaz"] = true,
- ["Flawed Emerald"] = true,
- ["Flawed Sea Sapphire"] = true,
- ["Flawed Topaz"] = true,
- ["Flawless Diamond"] = true,
- ["Jaundice Gem"] = true,
- ["Nephrite"] = true,
- ["Pristine Emerald"] = true,
- ["Chipped Black Marble"] = true,
- ["Coral Crescent"] = true,
- ["Flawed Chrysolite"] = true,
- ["Flawed Flame Emerald"] = true,
- ["Flawed Flame Opal"] = true,
- ["Flawed Opal"] = true,
- ["Flawed Lava Ruby"] = true,
- ["Imperfect Diamond"] = true,
- ["Crushed Chrysolite"] = true,
- ["Crushed Diamond Dust"] = true,
- ["Crushed Emerald"] = true,
- ["Crushed Nephrite"] = true,
- ["Crushed Sea Sapphire"] = true,
- };
- ]]
- local MAX_ITEM_ID = 33000; -- ignore items with an item ID above this number. Useful if you want to filter out items from newer eras
- -- the script will scrape all the NPC URLs in this table
- local urls = {
- "https://eq.magelo.com/npc/92944/a-fetid-fiend",
- "https://eq.magelo.com/npc/92952",
- "https://eq.magelo.com/npc/95180",
- };
- local t = {};
- if ( not https ) then
- print("failed to load http lib");
- os.exit();
- end
- if ( not ltn12 ) then
- print("failed to load ltn12 lib");
- os.exit();
- end
- if ( not DEFLATE ) then
- print("failed to load deflate lib");
- os.exit();
- end
- local fname;
- for urlIndex, url in ipairs(urls) do
- fname = 'mageloscrape'..urlIndex..'.txt';
- t = {};
- local success, code, headers, status = https.request {
- url = url,
- headers = {
- --["Accept"] = "text/*, text/html",
- --["Accept-Encoding"] = "gzip;q=0,deflate;q=0",
- --["Accept-Encoding"] = "",
- },
- sink = ltn12.sink.table(t);
- };
- if ( not success ) then
- print("failed to get web page; code == ", code);
- os.exit();
- end
- --[[
- print("code ==", code);
- print("status ==", status);
- if ( headers ) then
- print(headers.date);
- print(headers.server);
- print(headers["last-modified"]);
- print(headers["content-length"]);
- print(headers["connection"]);
- print(headers["content-Type"]);
- print("#t", #t);
- end
- --print(t[1]);
- --print(t[2]);
- ]]
- --local ofh = io.open(fname, 'w+b');
- local combinedText = table.concat(t);
- --ofh:write(combinedText);
- --ofh:close();
- -- local compressedPage = table.concat(t);
- -- DEFLATE.gunzip {input=compressedPage, output=ofh};
- --local page = io.open("venril.html", "r"):read("*a");
- -- ofh:seek("set");
- -- local page = ofh:read("*a");
- -- local page = io.open(fname, "r"):read("*a");
- local page = combinedText;
- --local compressedPage = "";
- local pos = page:find("var data=(function(){return[", 20000, true);
- --[[
- if ( not pos ) then
- --print("length == ", #compressedPage);
- print("length == ", #page);
- print("code == ", code);
- --print(unpack(headers));
- for k, v in pairs(headers) do
- print(k, v);
- end
- print("stauts == ", status);
- --print(compressedPage);
- end
- ]]
- local _, _, npc = page:find("<title>NPC : (.*) %- EQ %- Magelo</title>");
- local _, _, level = page:find("Level : </span>(%S+)");
- local items;
- if ( not pos ) then
- print("no items for NPC "..npc.." "..level);
- print("");
- else
- items = page:sub(pos + 27, page:find("]]]", pos + 27, true) + 2);
- items = items:gsub("%[", "{");
- items = items:gsub("%]", "}");
- --print(items);
- itemsTable = assert(loadstring("return "..items))();
- local itemId, itemUrl, itemName, drops, kills, header;
- table.sort(itemsTable, function(a,b) return a[1] < b[1]; end );
- for index, item in ipairs(itemsTable) do
- itemId = item[1];
- itemUrl = item[2];
- itemName = item[3][1];
- --unknown = item[3][2];
- drops = item[8][3];
- kills = item[8][4];
- --print(itemId, itemUrl, itemName, drops, kills);
- if ( not header ) then
- print(npc.." "..level, kills);
- header = true;
- end
- if ( (not ITEMS and itemId < MAX_ITEM_ID) or (ITEMS and ITEMS[itemName]) ) then
- --print(itemName, drops, itemId);
- --print(itemName, drops);
- print(itemName, itemId, drops, drops/kills);
- end
- end
- --[[
- if ( not ITEMS and itemId > 40000 ) then
- for index, item in ipairs(itemsTable) do
- itemId = item[1];
- itemUrl = item[2];
- itemName = item[3][1];
- --unknown = item[3][2];
- drops = item[8][3];
- kills = item[8][4];
- --print(itemId, itemUrl, itemName, drops, kills);
- if ( not header ) then
- print(npc, kills);
- header = true;
- end
- print(itemName, drops, itemId);
- end
- end
- ]]
- --print(unpack(i));
- print("");
- --ofh:close();
- --os.remove('mageloscrape'..urlIndex..'.txt');
- end
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement