Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.util.Date;
- import java.text.DateFormat;
- import java.text.SimpleDateFormat;
- import java.text.ParseException;
- import java.io.*;
- /*
- * - add image.
- * - add link.
- */
- class Post implements Comparable<Post>
- {
- public final Integer THREAD_ID;
- public final Integer POST_ID;
- public final Date DATE;
- public final String USER_NAME;
- public final String USER_ID;
- public final String TEXT;
- private static DateFormat dateFt =
- new SimpleDateFormat("d/MMM/yyyy HH:mm:ss");
- public Post(int t_id,int p_id,Date d,String uname,String u_id,String txt)
- {
- THREAD_ID = t_id;
- POST_ID = p_id;
- DATE = d;
- USER_NAME = uname;
- USER_ID = u_id;
- TEXT = txt;
- }
- // ------------------
- boolean isThread() {
- return THREAD_ID.equals(POST_ID);
- }
- public static Post fromCSV(String line)
- {
- String[] f = line.split(",",6);
- Date date;
- try {
- date = dateFt.parse(f[2]);
- }
- catch (ParseException e) {
- date = null;
- System.err.println(e.getMessage());
- }
- return new Post(
- Integer.valueOf(f[0]),
- Integer.valueOf(f[1]),
- date,
- f[3],
- f[4],
- f[5]
- );
- }
- public String toCSV()
- {
- return THREAD_ID+","+POST_ID+","+dateFt.format(DATE)+","
- +USER_NAME+","+USER_ID+"," +TEXT+"\n";
- }
- /* Date, and then post number. */
- public int compareTo(Post o)
- {
- int dcomp = (o.DATE).compareTo(DATE);
- if (dcomp != 0)
- return dcomp;
- return (o.POST_ID).compareTo(POST_ID);
- }
- public boolean equals(Object o)
- {
- if (o instanceof Post) {
- return (this.compareTo(((Post)o)) == 0);
- //return POST_ID.equals(((Post)o).POST_ID);
- } else
- return false;
- }
- public String toString()
- {
- return THREAD_ID + "/" + POST_ID + ", " + dateFt.format(DATE) +
- ", " + USER_NAME + " " + USER_ID + ": " + "\n" + TEXT;
- }
- }
- import java.net.URL;
- import java.net.URLConnection;
- import java.util.SortedSet;
- import java.util.Date;
- import java.util.TimeZone;
- import java.util.List;
- import java.util.ArrayList;
- import java.io.*;
- import java.nio.file.Path;
- import java.text.DateFormat;
- import java.text.SimpleDateFormat;
- import java.text.ParseException;
- //import org.w3c.dom.Document;
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- import org.jsoup.nodes.Element;
- import org.jsoup.select.Elements;
- class Board
- {
- String BOARD_ID;
- String BOARD_URL;
- long VISIT_WAIT = 800; // milli
- private Post previousNew; // sort of 'sliding window'
- private Post actualNew;
- private Post lastRead;
- private String saveFile = null;
- private int NUM_PAGES;
- private SortedSet<Post> boardPosts = new java.util.TreeSet<>();
- Board(String _url,String _id)
- {
- this(_url);
- BOARD_ID = _id;
- saveFile = _id + ".csv";
- Path path = java.nio.file.Paths.get(saveFile);
- if (java.nio.file.Files.exists(path))
- {
- BufferedReader in = null;
- try {
- in = java.nio.file.Files.newBufferedReader(path);
- String line;
- while ((line = in.readLine()) != null) {
- Post post = Post.fromCSV(line);
- addPost(post);
- }
- } catch (IOException e) {
- System.err.println(e.getMessage());
- } finally {
- try {
- if (in != null)
- in.close();
- }
- catch (IOException e) {
- System.err.println("Error closing file");
- }
- }
- }
- }
- Board(String _url)
- {
- // TODO: throw exception here
- BOARD_URL = _url;
- numPages = numberOfPages();
- }
- // ---------------
- /* Visit board page. Get new posts. */
- boolean update() throws IOException
- {
- //System.out.println("Updating " + BOARD_URL + "...");
- boolean changed = false;
- Writer out = null;
- try {
- Document doc = Jsoup.connect(BOARD_URL).get();
- Elements threads =
- doc.getElementsByAttributeValueMatching("id",
- "^thread\\d+[a-z]+$");
- if (saveFile != null)
- out = new FileWriter(saveFile,true);
- for (Element elem : threads)
- {
- List<Post> posts = visit(Integer.valueOf(
- elem
- .id()
- .replaceAll("[a-z]+",""))
- );
- try { Thread.sleep(VISIT_WAIT); }
- catch (InterruptedException e) { }
- for (Post p : posts)
- {
- addPost(p);
- String pStr = p + "\n";
- if (!BOARD_ID.isEmpty())
- pStr = BOARD_ID + "/" + pStr;
- if (p.isThread())
- pStr = "-> " + pStr;
- System.out.println(pStr);
- if (out != null)
- out.write(p.toCSV());
- changed = true;
- }
- }
- }
- catch (IOException e) {
- System.err.println("Error in " + BOARD_URL);
- throw e;
- }
- finally {
- if (out != null)
- out.close();
- }
- return changed;
- }
- /* Visit thread page. Get new posts. */
- private List<Post> visit(int threadId) throws IOException
- {
- String url = BOARD_URL + "/res/" + threadId + ".html";
- //System.out.println("Visiting " + url);
- List<Post> posts;
- try {
- Document doc = Jsoup.connect(url).get();
- Elements threadPosts =
- doc.getElementsByAttributeValueMatching("id",
- "^(thread\\d+[a-z]+)|(reply\\d+)$");
- posts = new ArrayList<Post>();
- // iterate backwards
- for (int i=threadPosts.size()-1; i >= 0; i--)
- {
- Element elem = threadPosts.get(i);
- Post p = parsePost(elem,threadId);
- if (exists(p))
- return posts;
- posts.add(p);
- }
- }
- catch (IOException e) {
- System.err.println("Error in " + e);
- throw e;
- }
- return posts;
- }
- /* Extract from DOM node. */
- private Post parsePost(Element elem,int threadId)
- {
- String numberText = elem
- .getElementsByAttributeValueMatching("name","\\d+").first()
- .attr("name");
- //.getElementsByTag("a").first()
- int postId = Integer.valueOf(numberText);
- //System.out.println(postId);
- String postText = elem
- .getElementsByTag("blockquote")
- .first()
- .child(0)
- .text(); // not ownText because of links
- String userId = elem.ownText();;
- if (userId.matches("^ID: [a-f0-9]{6}"))
- userId = userId.replaceAll("^ID: ","");
- else
- userId = "";
- String date = elem
- .getElementsByTag("label")
- .first()
- .ownText()
- .replaceAll("^[A-Z][a-z]{2} ","");
- Date postDate;
- try {
- DateFormat df = new SimpleDateFormat("d/MMM/yyyy HH:mm:ss");
- df.setTimeZone(TimeZone.getTimeZone("Europe/Lisbon"));
- postDate = df.parse(date);
- }
- catch (ParseException e) {
- System.err.println(e.getMessage());
- postDate = null;
- }
- String userName = elem
- .getElementsByClass("postername")
- .first()
- .text(); // not ownText because of links
- return new Post(
- threadId,
- postId,
- postDate,
- userName,
- userId,
- postText
- );
- }
- private int numberOfPages()
- {
- int lastPage = 0;
- try {
- Document doc = Jsoup.connect(BOARD_URL).get();
- lastPage = Integer.valueOf(
- doc
- .select("td a")
- .select("[href~=^/[a-z]+/\\d+.html$]")
- .select(":matchesOwn(\\d+)")
- .last()
- .ownText());
- } catch (IOException e ) {
- e.getMessage();
- }
- return lastPage + 1;
- }
- private void addPost(Post post)
- {
- boardPosts.add(post);
- }
- private boolean exists(Post post)
- {
- return boardPosts.contains(post);
- }
- public void print()
- {
- for (Post p : boardPosts)
- System.out.println(p);
- }
- public void print(int n)
- {
- for (Post p : latest(n))
- System.out.println(p);
- }
- public List<Post> latest(int n)
- {
- List<Post> l = new ArrayList<>();
- int i = 0;
- for (Post p : boardPosts) {
- l.add(p);
- if (i++ == n)
- break;
- }
- return l;
- }
- public boolean equals(Object o)
- {
- if (o instanceof Board) {
- return BOARD_URL.equals(((Board)o).BOARD_URL);
- } else
- return false;
- }
- public void statistics()
- {
- System.out.println("No. posts: " + boardPosts.size());
- }
- public static void main(String[] args)
- {
- Board board = new Board("http://ptchan.net/b");
- try {
- board.update();
- board.print(5);
- }
- catch (IOException e) {
- System.err.println(e.getMessage());
- }
- }
- }
- import java.util.Set;
- import java.util.List;
- import java.util.ArrayList;
- import java.util.Queue;
- import java.util.PriorityQueue;
- import java.io.IOException;
- public class Imageboard
- {
- private Set<Board> boards = new java.util.HashSet<>();
- private long BOARD_WAIT = 800;
- public static void main(String[] args)
- {
- Imageboard img = new Imageboard();
- img.register("http://ptchan.net/a", "a" );
- img.register("http://ptchan.net/b", "b" );
- img.register("http://ptchan.net/con", "con" );
- img.register("http://ptchan.net/c", "c" );
- img.register("http://ptchan.net/cu", "cu" );
- img.register("http://ptchan.net/des", "des" );
- img.register("http://ptchan.net/dis", "dis" );
- img.register("http://ptchan.net/fit", "fit" );
- img.register("http://ptchan.net/o", "o" );
- img.register("http://ptchan.net/t", "t" );
- img.register("http://ptchan.net/u", "u" );
- img.register("http://ptchan.net/xxx", "xxx" );
- img.register("http://ptchan.net/int", "int" );
- img.register("http://ptchan.net/pt", "pt" );
- img.register("http://ptchan.net/meta", "meta" );
- while (true)
- {
- img.update();
- }
- }
- public void update()
- {
- for (Board b : boards) {
- try {
- boolean changed = b.update();
- } catch (IOException e) {
- System.out.println(e.getMessage());
- }
- try { Thread.sleep(BOARD_WAIT); }
- catch (InterruptedException e) { }
- }
- }
- public void register(String url,String name)
- {
- boards.add(new Board(url,name));
- }
- public List<Post> latest(int n)
- {
- Queue<Post> queue = new PriorityQueue<>();
- for (Board b : boards)
- queue.addAll(b.latest(n));
- List<Post> list = new ArrayList<>();
- while (n-- > 0)
- list.add(queue.poll());
- return list;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement