Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import de.bezier.data.sql.*;
- import http.requests.*;
- import java.util.regex.*;
- SQLite db;
- int profilePicIndex, tweetIndex, followingIndex, followersIndex, likesIndex, descriptionIndex;
- String imgContent, ppUrl, user, tweets, following, followers, likes, description, notExisting;
- Pattern urlPattern;
- Matcher m;
- PrintWriter output;
- int count = 1;
- void setup() {
- output = createWriter("userData.txt");
- notExisting = "";
- //En regex til at matche url'er
- urlPattern = Pattern.compile(
- "(?:^|[\\W])((ht|f)tp(s?):\\/\\/|www\\.)"
- + "(([\\w\\-]+\\.){1,}?([\\w\\-.~]+\\/?)*"
- + "[\\p{Alnum}.,%_=?&#\\-+()\\[\\]\\*$~@!:/{};']*)",
- Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL);
- db = new SQLite( this, "../medtek2017kode/data/tweets1.db" );
- if ( db.connect() ) {
- String Q = "select distinct user from user";
- db.query(Q);
- String urlMatcher;
- while (db.next ()) {
- count++;
- user = db.getString("user");
- print(user, " | ");
- //request til twitter.com/<brugernavn>
- GetRequest get = new GetRequest("https://twitter.com/"+user);
- get.send();
- //PROFILE PICTURE
- profilePicIndex = get.getContent().indexOf("<a class=\"ProfileAvatar-container u-block js-tooltip profile-picture\"");
- if (profilePicIndex < 0) {
- notExisting += user+", ";
- println();
- continue;
- }
- imgContent = get.getContent().substring(profilePicIndex, profilePicIndex+1000);
- //Java's pattern og matcher klasser bruges til at klippe billedets url ud af html indholdet.
- m = urlPattern.matcher(imgContent);
- while (m.find ()) {
- int matchStart = m.start(1);
- int matchEnd = m.end();
- ppUrl = imgContent.substring(matchStart, matchEnd);
- }
- //TWEETS
- tweetIndex = get.getContent().indexOf("<li class=\"ProfileNav-item ProfileNav-item--tweets is-active\">");
- tweets = (tweetIndex > -1) ? get.getContent().substring(tweetIndex+178, tweetIndex+211).replaceAll("[^0-9.]", "") : "0";
- //FOLLOWING
- followingIndex = get.getContent().indexOf("<li class=\"ProfileNav-item ProfileNav-item--following\">");
- following = (followingIndex > -1) ? get.getContent().substring(followingIndex+214, followingIndex+224).replaceAll("[^0-9.]", "") : "0";
- //FOLLOWERS
- followersIndex = get.getContent().indexOf("<li class=\"ProfileNav-item ProfileNav-item--followers\">");
- followers = (followersIndex > -1) ? get.getContent().substring(followersIndex+214, followersIndex+224).replaceAll("[^0-9.]", "") : "0";
- //LIKES
- likesIndex = get.getContent().indexOf("<li class=\"ProfileNav-item ProfileNav-item--favorites\" data-more-item=\".ProfileNav-dropdownItem--favorites\">");
- likes = (likesIndex > -1) ? get.getContent().substring(likesIndex+267, likesIndex+275).replaceAll("[^0-9.]", "") : "0";
- //DESCRIPTION
- descriptionIndex = get.getContent().indexOf("<p class=\"ProfileHeaderCard-bio");
- description = (descriptionIndex > -1) ? get.getContent().substring(descriptionIndex+62, descriptionIndex+2000) : null;
- description = description.substring(0, description.indexOf("</p>"));
- //<a href="/hashtag/dkpol?src=hash" data-query-source="hashtag_click" class="twitter-hashtag pretty-link js-nav" dir="ltr" ><s>#</s><b>dkpol</b></a>
- //FJERN STARTEN PĆ HASHTAG HVIS != NULL
- description = (description.length() < 2) ? null : "'" + description.replaceAll("<a href=\"/hashtag/", "#").replaceAll("<a href=\"/", "@").replaceAll("<a href=\"", "") + "'";
- String temp = "";
- if (description != null) {
- StringBuffer s = new StringBuffer(description);
- //while (s.indexOf ("?src=hash") > 0) {
- // s.replace(s.indexOf("?src=hash"), s.indexOf("?src=hash")+125, " ");
- //}
- //OMKSKRIV LINKS
- while (s.indexOf ("rel=\"nofollow noopener\" dir=\"ltr") > 0) {
- s.replace(s.indexOf("rel=\"nofollow noopener\" dir=\"ltr")-2, s.indexOf("rel=\"nofollow noopener\" dir=\"ltr")+331, " ");
- }
- //OMSKRIV LINKS TIL BRUGERE
- if (s.indexOf ("\" class=\"tweet-url twitter-atreply pretty-link\"") > 0) {
- String[][] matches = matchAll(s.toString(), "<s>@</s><b>(.*?)</b></a>");
- println();
- for (int i = 0; i < matches.length; i++) {
- println("Match: "+matches[i][0]);
- description = (s.substring(0, s.indexOf("\" class=\"tweet-url twitter-atreply pretty-link\""))+s.substring(s.indexOf(matches[i][0])+matches[i][0].length()));
- s = new StringBuffer(description);
- }
- }
- //FJERN EMOJI
- while (s.indexOf ("<img class=\"Emoji Emoji--forText\"") > 0) {
- description = (s.substring(0, s.indexOf("<img class=\"Emoji Emoji--forText\""))+s.substring(s.indexOf("Emoji:")+7));
- s = new StringBuffer(description.replace("\">", ""));
- }
- //OMKSKRIV RESTEN AF HASHTAGS
- while (s.indexOf ("?src=hash") > 0) {
- String[][] matches = matchAll(s.toString(), "<s>#</s><b>(.*?)</b></a>");
- println();
- for (int i = 0; i < matches.length; i++) {
- println("Match: "+matches[i][0]);
- description = (s.substring(0, s.indexOf("?src=hash"))+s.substring(s.indexOf(matches[i][0])+matches[i][0].length()));
- s = new StringBuffer(description);
- }
- }
- description = s.toString().replace(" ", " ");
- }
- println("User "+count+": "+user+"\n Profile img: "+ ppUrl + "\n Tweets: " +tweets + "\n Following: " +following + "\n Followers: " +followers + "\n Likes: " +likes + "\n Description: " +description);
- println("Not existing: "+ notExisting +"\n");
- output.println("INSERT INTO user1 VALUES ('"+ user +"', "+tweets+", "+following+", "+followers+", "+ likes+", "+description+", '"+ppUrl+"');");
- }
- output.flush(); // Writes the remaining data to the file
- output.close();
- exit();
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement