Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package com.trungnt.linkedin;
- import java.io.BufferedReader;
- import java.io.DataInputStream;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.IOException;
- import java.io.InputStreamReader;
- import java.net.URLEncoder;
- import java.util.ArrayList;
- import java.util.Arrays;
- import java.util.HashSet;
- import java.util.LinkedHashSet;
- import java.util.List;
- import java.util.regex.Matcher;
- import net.sf.json.JSONArray;
- import net.sf.json.JSONObject;
- import net.sf.json.JSONSerializer;
- import org.apache.http.client.HttpClient;
- import org.apache.log4j.Logger;
- import com.trungnt.linkedin.dto.BaseExtractor;
- import com.trungnt.linkedin.dto.Contact;
- import com.trungnt.linkedin.dto.Experience;
- import com.trungnt.util.CSVHelper;
- import com.trungnt.util.LinkedInHelper;
- import com.trungnt.util.Util;
- public class PeopleSearchExtractor extends BaseExtractor{
- static Logger logger = Logger.getLogger(PeopleSearchExtractor.class);
- String threadID = "";
- public static boolean separatedRowForMutualConnection = !Messages.getString("ContactCollector.separatedRowForMutualConnection").startsWith("!");
- public static boolean newFile = !Messages.getString("ContactCollector.newFile").startsWith("!");
- public static boolean sameFile = !Messages.getString("ContactCollector.sameFile").startsWith("!");
- public static boolean excludeLinkedInMembers = !Messages.getString("ContactCollector.excludeLinkedInMembers").startsWith("!");
- public static boolean additionalEmails = !Messages.getString("ContactCollector.additionalEmails").startsWith("!");
- public static boolean debugMode = !Messages.getString("ContactCollector.debugMode").startsWith("!");
- public static boolean fromFile = !Messages.getString("ContactCollector.fromFile").startsWith("!");
- public static int toofrAcceptedConfidence = Messages.getString("ContactCollector.toofrAcceptedConfidence").startsWith("!")?0:Integer.parseInt(Messages.getString("ContactCollector.toofrAcceptedConfidence"));
- public boolean validToofr = false;
- public static boolean multipleSearches = !Messages.getString("ContactCollector.multipleSearches").startsWith("!");
- public static void main(String[] args){
- Util.updateProgram("https://www.dropbox.com/s/ec7618px99sul14/version.txt?raw=1", "https://www.dropbox.com/s/llsnf5gec2dwi7s/peoplesearch.jar?raw=1", "peoplesearch.jar");
- if (fromFile){
- new PeopleSearchExtractor().processFromFile(args);
- } else {
- new PeopleSearchExtractor().process(args);
- }
- }
- public void process(String[] args) {
- String username = "";
- String password = "";
- if (args.length >=2){
- username = args[0];
- password = args[1];
- threadID = args[2];
- } else {
- username = Messages.getString("ContactCollector.username"); //$NON-NLS-1$
- password = Messages.getString("ContactCollector.password"); //$NON-NLS-1$
- }
- HttpClient client = Util.getHTTPClient();
- int planId = skipLicense?Util.PLAN_PLUS:Util.hasValidLicense(username, password, Util.PRODUCT_SEARCH_CONTACT_EXPORT);
- if (planId == -1) return;
- try {
- LinkedInHelper.login(client, username, password);
- try{
- validToofr = (Util.hasToofrLicense(username)!=-1);
- FileInputStream fstream = new FileInputStream("input" + threadID + ".txt");
- // Get the object of DataInputStream
- DataInputStream in = new DataInputStream(fstream);
- BufferedReader br = new BufferedReader(new InputStreamReader(in));
- String searchURL;
- int count = 1;
- // Read File Line By Line
- while ((searchURL = br.readLine()) != null) {
- if (searchURL.startsWith("#")) continue;
- searchURL = searchURL.replaceAll(" ", "%20");
- String name = "";
- if (searchURL.indexOf("@@@") > -1){
- name = searchURL.split("@@@")[0];
- searchURL = searchURL.split("@@@")[1];
- }
- boolean hasNext = true;
- int page = 1;
- Matcher pageNumMatch = Util.parse(searchURL, "&page[^=]*=(\\d+)");
- if (pageNumMatch.find()){
- page = Integer.parseInt(pageNumMatch.group(1));
- }
- String originalSearchURL = searchURL.replaceAll("&page[^=]*=(\\d+)", "");
- HashSet<String> ids = new LinkedHashSet<String>();
- int fileCount = 1;
- if (!sameFile){
- if (!newFile){
- fileCount = count;
- } else {
- while (new File("output" + threadID + "-" + fileCount + ".csv").exists()){
- fileCount++;
- }
- }
- }
- while (hasNext){
- if (ids.size() > numberToDownload) break;
- System.out.println("Open search page:" + searchURL);
- String output = Util.getContent(client, searchURL);
- if (debugMode) logger.error(output);
- JSONArray arr = new JSONArray();
- String content = Util.getDataletContent(output, "/voyager/api/search/cluster");
- if (content == null){//v2
- content = Util.getDataletContent(output, "/voyager/api/search/blended");
- JSONObject outter = ((JSONObject) JSONSerializer.toJSON(content));
- JSONArray data = outter.getJSONObject("data").getJSONArray("elements");
- JSONObject searchCluster = Util.findJSONObjectWithAttribute(data, "type", "SEARCH_HITS");
- JSONArray results = Util.getJsonArray(searchCluster, "elements");
- for (int i=0; i<results.size(); i++){
- try{
- JSONObject obj = new JSONObject();
- JSONObject personObj = new JSONObject();
- personObj.put("link_nprofile_view_1", "https://www.linkedin.com/in/" + URLEncoder.encode(Util.getString(results.getJSONObject(i), "publicIdentifier"), "UTF-8"));
- personObj.put("fmt_name", Util.getString(results.getJSONObject(i).getJSONObject("title"), "text"));
- personObj.put("fmt_headline", Util.getString(results.getJSONObject(i).getJSONObject("headline"), "text"));
- obj.put("person", personObj);
- arr.add(obj);
- } catch (Exception e){
- logger.error(e.getMessage());
- }
- }
- } else {
- JSONObject outter = ((JSONObject) JSONSerializer.toJSON(content));
- JSONArray includes = outter.getJSONArray("included");
- JSONObject searchCluster = Util.findJSONObjectWithAttribute(includes, "$type", "com.linkedin.voyager.search.SearchCluster");
- JSONArray results = Util.getJsonArray(searchCluster, "elements");
- for (int i=0; i<results.size(); i++){
- JSONObject hitInfo = Util.findJSONObjectWithAttribute(includes, "$id", results.getString(i)).getJSONObject("hitInfo");
- if (hitInfo != null){
- JSONObject profileObj = Util.findJSONObjectWithAttribute(includes, "$id", Util.getString(hitInfo, "com.linkedin.voyager.search.SearchProfile"));
- String miniProfile = Util.getString(profileObj, "miniProfile");
- JSONObject profileObj2 = Util.findJSONObjectWithAttribute(includes, "entityUrn", miniProfile);
- if (profileObj2 != null){
- JSONObject obj = new JSONObject();
- JSONObject personObj = new JSONObject();
- personObj.put("link_nprofile_view_1", "https://www.linkedin.com/in/" + URLEncoder.encode(Util.getString(profileObj2, "publicIdentifier"), "UTF-8"));
- personObj.put("fmt_name", Util.getString(profileObj2, "firstName") + " " + Util.getString(profileObj2, "lastName"));
- personObj.put("fmt_headline", Util.getString(profileObj2, "occupation"));
- obj.put("person", personObj);
- arr.add(obj);
- }
- }
- }
- }
- //result in Json
- hasNext = false;
- searchURL = originalSearchURL + "&page=" + ++page; //http://www.linkedin.com" + search.getJSONObject("baseData").getJSONObject("resultPagination").getJSONObject("nextPage").getString("pageURL");
- for (int i=0; i<arr.size(); i++){
- try{
- if (ids.size() > numberToDownload) break;
- if (!arr.getJSONObject(i).has("person")) continue;
- hasNext = true;
- String url = Util.createURL("https://www.linkedin.com", Util.getProfileURL(arr.getJSONObject(i).getJSONObject("person")));
- String id = URLEncoder.encode(url.substring(url.lastIndexOf("/") + 1).split("&")[0]);
- if (ids.contains(id)){
- continue;
- }
- if (multipleSearches){
- if (new File("done/" + id).exists()){
- System.out.println("Downloaded contact with ID=" + id);
- continue;
- }
- }
- ids.add(id);
- Contact contact = createContact(planId);
- contact.hasToofr = validToofr;
- contact.toofrAcceptedConfidence = toofrAcceptedConfidence;
- contact.hasCompanyURL = contact.hasCompanyURL || contact.hasToofr;
- contact.inputURL = originalSearchURL;
- contact.fullName = Util.getString(arr.getJSONObject(i).getJSONObject("person"), "fmt_name");
- if ("".equals(contact.fullName)){
- contact.fullName = "LinkedIn Member";
- }
- if (excludeLinkedInMembers)
- if ("LinkedIn Member".equals(contact.fullName)) continue;
- if (contact.fullName.indexOf(' ') > -1){
- contact.firstName = contact.fullName.substring(0, contact.fullName.indexOf(' '));
- contact.lastName = contact.fullName.substring(contact.fullName.indexOf(' ') + 1).trim();
- } else {
- contact.firstName = contact.fullName;
- }
- if (planId == Util.PLAN_BASIC){
- System.out.println("Extract info for:" + contact.fullName);
- }
- contact.headline = Util.getString(arr.getJSONObject(i).getJSONObject("person"), "fmt_headline");
- contact.industry = Util.getString(arr.getJSONObject(i).getJSONObject("person"), "fmt_industry");
- contact.location = Util.getString(arr.getJSONObject(i).getJSONObject("person"), "fmt_location");
- contact.distance = Util.getString(arr.getJSONObject(i).getJSONObject("person"), "distance");
- contact.linkedInURL = url.split("&")[0];
- if (planId >= Util.PLAN_PLUS){
- Util.parseContactNew(contact, url , client);
- Thread.sleep(delayMin + rand.nextInt(delay - delayMin));
- if (multipleSearches){
- CSVHelper.writeLine("done/" + id, new String[]{contact.fullName, contact.linkedInURL});
- }
- }
- if (! new File("output" + threadID + "-" + fileCount + ".csv").exists()){
- String[] out = contact.toCSVHeader();
- if (!"".equals(name)){
- List<String> wordList = new ArrayList<String>(Arrays.asList(out));
- wordList.add(0, "Person");
- out = wordList.toArray(new String[0]);
- }
- CSVHelper.writeLine("output" + threadID + "-" + fileCount + ".csv", out);
- }
- if (separatedRowForMutualConnection){
- String[] mutualConnections = contact.mutualText.split("\n");
- for (String mutualConnection:mutualConnections){
- contact.mutualText = mutualConnection;
- CSVHelper.writeLine("output" + threadID + "-" + fileCount + ".csv", contact.toCSVValues());
- }
- } else {
- String[] out = contact.toCSVValues();
- if (!"".equals(name)){
- List<String> wordList = new ArrayList<String>(Arrays.asList(out));
- wordList.add(0, name);
- out = wordList.toArray(new String[0]);
- }
- CSVHelper.writeLine("output" + threadID + "-" + fileCount + ".csv", out);
- }
- if (additionalEmails){
- for (Experience experience:contact.experienceArr){
- try{
- if ("".equals(experience.company.website)) continue;
- String domain = experience.company.website.replaceAll("http(s)*://www\\.", "").split("/")[0];
- String firstName = contact.firstName.split(",")[0];
- String lastName = contact.lastName.split(",")[0];
- if (lastName.lastIndexOf(' ') > -1){
- lastName = lastName.substring(lastName.lastIndexOf(' ') + 1);
- }
- CSVHelper.writeLine("outputAdditional.csv", new String[]{firstName, lastName, contact.headline, experience.companyName, contact.location, contact.industry, contact.numberOfConnections, contact.email, contact.phone});
- CSVHelper.writeLine("outputAdditional.csv", new String[]{firstName, lastName, contact.headline, experience.companyName, contact.location, contact.industry, contact.numberOfConnections, firstName + "." + lastName + "@" + domain, contact.phone});
- CSVHelper.writeLine("outputAdditional.csv", new String[]{firstName, lastName, contact.headline, experience.companyName, contact.location, contact.industry, contact.numberOfConnections, lastName + "." + firstName + "@" + domain, contact.phone});
- CSVHelper.writeLine("outputAdditional.csv", new String[]{firstName, lastName, contact.headline, experience.companyName, contact.location, contact.industry, contact.numberOfConnections, firstName.substring(0, 1) + lastName + "@" + domain, contact.phone});
- break;
- } catch (Exception e){
- }
- }
- }
- } catch (Exception e){
- logger.error(e.getMessage());
- }
- }
- }
- count++;
- }
- in.close();
- } catch (Exception e){
- logger.error(e.getMessage());
- }
- } catch (IOException e) {
- logger.error(e.getMessage());
- }
- }
- public void processFromFile(String[] args) {
- String username = "";
- String password = "";
- if (args.length >=2){
- username = args[0];
- password = args[1];
- threadID = args[2];
- } else {
- username = Messages.getString("ContactCollector.username"); //$NON-NLS-1$
- password = Messages.getString("ContactCollector.password"); //$NON-NLS-1$
- }
- HttpClient client = Util.getHTTPClient();
- int planId = Util.hasValidLicense(username, password, Util.PRODUCT_SEARCH_CONTACT_EXPORT);
- if ((!skipLicense) && (planId == -1)) return;
- try {
- LinkedInHelper.login(client, username, password);
- try{
- FileInputStream fstream = new FileInputStream("input" + threadID + ".txt");
- // Get the object of DataInputStream
- DataInputStream in = new DataInputStream(fstream);
- BufferedReader br = new BufferedReader(new InputStreamReader(in));
- String searchURL;
- int count = 1;
- // Read File Line By Line
- while ((searchURL = br.readLine()) != null) {
- if (searchURL.startsWith("#")) continue;
- int fileCount = 1;
- searchURL = searchURL.replaceAll(" ", "%20");
- Contact contact = createContact(planId);
- Util.parseContactNew(contact, searchURL , client);
- Thread.sleep(delayMin + rand.nextInt(delay - delayMin));
- if (! new File("output" + threadID + "-" + fileCount + ".csv").exists()){
- CSVHelper.writeLine("output" + threadID + "-" + fileCount + ".csv", contact.toCSVHeader());
- }
- if (separatedRowForMutualConnection){
- String[] mutualConnections = contact.mutualText.split("\n");
- for (String mutualConnection:mutualConnections){
- contact.mutualText = mutualConnection;
- CSVHelper.writeLine("output" + threadID + "-" + fileCount + ".csv", contact.toCSVValues());
- }
- } else {
- CSVHelper.writeLine("output" + threadID + "-" + fileCount + ".csv", contact.toCSVValues());
- }
- if (additionalEmails){
- for (Experience experience:contact.experienceArr){
- try{
- if ("".equals(experience.company.website)) continue;
- String domain = experience.company.website.replaceAll("http(s)*://www\\.", "").split("/")[0];
- String firstName = contact.firstName.split(",")[0];
- String lastName = contact.lastName.split(",")[0];
- if (lastName.lastIndexOf(' ') > -1){
- lastName = lastName.substring(lastName.lastIndexOf(' ') + 1);
- }
- CSVHelper.writeLine("outputAdditional.csv", new String[]{firstName, lastName, contact.headline, experience.companyName, contact.location, contact.industry, contact.numberOfConnections, contact.email, contact.phone});
- CSVHelper.writeLine("outputAdditional.csv", new String[]{firstName, lastName, contact.headline, experience.companyName, contact.location, contact.industry, contact.numberOfConnections, firstName + "." + lastName + "@" + domain, contact.phone});
- CSVHelper.writeLine("outputAdditional.csv", new String[]{firstName, lastName, contact.headline, experience.companyName, contact.location, contact.industry, contact.numberOfConnections, lastName + "." + firstName + "@" + domain, contact.phone});
- CSVHelper.writeLine("outputAdditional.csv", new String[]{firstName, lastName, contact.headline, experience.companyName, contact.location, contact.industry, contact.numberOfConnections, firstName.substring(0, 1) + lastName + "@" + domain, contact.phone});
- break;
- } catch (Exception e){
- }
- }
- }
- count++;
- }
- in.close();
- } catch (Exception e){
- logger.error(e.getMessage());
- }
- } catch (IOException e) {
- logger.error(e.getMessage());
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement