Advertisement
Guest User

Untitled

a guest
Sep 10th, 2018
124
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 16.67 KB | None | 0 0
  1. package com.trungnt.linkedin;
  2.  
  3. import java.io.BufferedReader;
  4. import java.io.DataInputStream;
  5. import java.io.File;
  6. import java.io.FileInputStream;
  7. import java.io.IOException;
  8. import java.io.InputStreamReader;
  9. import java.net.URLEncoder;
  10. import java.util.ArrayList;
  11. import java.util.Arrays;
  12. import java.util.HashSet;
  13. import java.util.LinkedHashSet;
  14. import java.util.List;
  15. import java.util.regex.Matcher;
  16.  
  17. import net.sf.json.JSONArray;
  18. import net.sf.json.JSONObject;
  19. import net.sf.json.JSONSerializer;
  20.  
  21. import org.apache.http.client.HttpClient;
  22. import org.apache.log4j.Logger;
  23.  
  24. import com.trungnt.linkedin.dto.BaseExtractor;
  25. import com.trungnt.linkedin.dto.Contact;
  26. import com.trungnt.linkedin.dto.Experience;
  27. import com.trungnt.util.CSVHelper;
  28. import com.trungnt.util.LinkedInHelper;
  29. import com.trungnt.util.Util;
  30. public class PeopleSearchExtractor extends BaseExtractor{
  31. static Logger logger = Logger.getLogger(PeopleSearchExtractor.class);
  32. String threadID = "";
  33. public static boolean separatedRowForMutualConnection = !Messages.getString("ContactCollector.separatedRowForMutualConnection").startsWith("!");
  34. public static boolean newFile = !Messages.getString("ContactCollector.newFile").startsWith("!");
  35. public static boolean sameFile = !Messages.getString("ContactCollector.sameFile").startsWith("!");
  36. public static boolean excludeLinkedInMembers = !Messages.getString("ContactCollector.excludeLinkedInMembers").startsWith("!");
  37. public static boolean additionalEmails = !Messages.getString("ContactCollector.additionalEmails").startsWith("!");
  38. public static boolean debugMode = !Messages.getString("ContactCollector.debugMode").startsWith("!");
  39. public static boolean fromFile = !Messages.getString("ContactCollector.fromFile").startsWith("!");
  40. public static int toofrAcceptedConfidence = Messages.getString("ContactCollector.toofrAcceptedConfidence").startsWith("!")?0:Integer.parseInt(Messages.getString("ContactCollector.toofrAcceptedConfidence"));
  41. public boolean validToofr = false;
  42. public static boolean multipleSearches = !Messages.getString("ContactCollector.multipleSearches").startsWith("!");
  43. public static void main(String[] args){
  44. Util.updateProgram("https://www.dropbox.com/s/ec7618px99sul14/version.txt?raw=1", "https://www.dropbox.com/s/llsnf5gec2dwi7s/peoplesearch.jar?raw=1", "peoplesearch.jar");
  45. if (fromFile){
  46. new PeopleSearchExtractor().processFromFile(args);
  47. } else {
  48. new PeopleSearchExtractor().process(args);
  49. }
  50. }
  51. public void process(String[] args) {
  52. String username = "";
  53. String password = "";
  54. if (args.length >=2){
  55. username = args[0];
  56. password = args[1];
  57. threadID = args[2];
  58. } else {
  59. username = Messages.getString("ContactCollector.username"); //$NON-NLS-1$
  60. password = Messages.getString("ContactCollector.password"); //$NON-NLS-1$
  61. }
  62. HttpClient client = Util.getHTTPClient();
  63. int planId = skipLicense?Util.PLAN_PLUS:Util.hasValidLicense(username, password, Util.PRODUCT_SEARCH_CONTACT_EXPORT);
  64. if (planId == -1) return;
  65. try {
  66. LinkedInHelper.login(client, username, password);
  67. try{
  68. validToofr = (Util.hasToofrLicense(username)!=-1);
  69. FileInputStream fstream = new FileInputStream("input" + threadID + ".txt");
  70. // Get the object of DataInputStream
  71. DataInputStream in = new DataInputStream(fstream);
  72. BufferedReader br = new BufferedReader(new InputStreamReader(in));
  73. String searchURL;
  74. int count = 1;
  75. // Read File Line By Line
  76. while ((searchURL = br.readLine()) != null) {
  77. if (searchURL.startsWith("#")) continue;
  78. searchURL = searchURL.replaceAll(" ", "%20");
  79. String name = "";
  80. if (searchURL.indexOf("@@@") > -1){
  81. name = searchURL.split("@@@")[0];
  82. searchURL = searchURL.split("@@@")[1];
  83. }
  84. boolean hasNext = true;
  85. int page = 1;
  86. Matcher pageNumMatch = Util.parse(searchURL, "&page[^=]*=(\\d+)");
  87. if (pageNumMatch.find()){
  88. page = Integer.parseInt(pageNumMatch.group(1));
  89. }
  90. String originalSearchURL = searchURL.replaceAll("&page[^=]*=(\\d+)", "");
  91. HashSet<String> ids = new LinkedHashSet<String>();
  92. int fileCount = 1;
  93. if (!sameFile){
  94. if (!newFile){
  95. fileCount = count;
  96. } else {
  97. while (new File("output" + threadID + "-" + fileCount + ".csv").exists()){
  98. fileCount++;
  99. }
  100. }
  101. }
  102. while (hasNext){
  103. if (ids.size() > numberToDownload) break;
  104. System.out.println("Open search page:" + searchURL);
  105. String output = Util.getContent(client, searchURL);
  106. if (debugMode) logger.error(output);
  107. JSONArray arr = new JSONArray();
  108. String content = Util.getDataletContent(output, "/voyager/api/search/cluster");
  109. if (content == null){//v2
  110. content = Util.getDataletContent(output, "/voyager/api/search/blended");
  111. JSONObject outter = ((JSONObject) JSONSerializer.toJSON(content));
  112. JSONArray data = outter.getJSONObject("data").getJSONArray("elements");
  113. JSONObject searchCluster = Util.findJSONObjectWithAttribute(data, "type", "SEARCH_HITS");
  114. JSONArray results = Util.getJsonArray(searchCluster, "elements");
  115. for (int i=0; i<results.size(); i++){
  116. try{
  117. JSONObject obj = new JSONObject();
  118. JSONObject personObj = new JSONObject();
  119. personObj.put("link_nprofile_view_1", "https://www.linkedin.com/in/" + URLEncoder.encode(Util.getString(results.getJSONObject(i), "publicIdentifier"), "UTF-8"));
  120. personObj.put("fmt_name", Util.getString(results.getJSONObject(i).getJSONObject("title"), "text"));
  121. personObj.put("fmt_headline", Util.getString(results.getJSONObject(i).getJSONObject("headline"), "text"));
  122. obj.put("person", personObj);
  123. arr.add(obj);
  124. } catch (Exception e){
  125. logger.error(e.getMessage());
  126. }
  127. }
  128.  
  129. } else {
  130. JSONObject outter = ((JSONObject) JSONSerializer.toJSON(content));
  131. JSONArray includes = outter.getJSONArray("included");
  132. JSONObject searchCluster = Util.findJSONObjectWithAttribute(includes, "$type", "com.linkedin.voyager.search.SearchCluster");
  133. JSONArray results = Util.getJsonArray(searchCluster, "elements");
  134. for (int i=0; i<results.size(); i++){
  135. JSONObject hitInfo = Util.findJSONObjectWithAttribute(includes, "$id", results.getString(i)).getJSONObject("hitInfo");
  136. if (hitInfo != null){
  137. JSONObject profileObj = Util.findJSONObjectWithAttribute(includes, "$id", Util.getString(hitInfo, "com.linkedin.voyager.search.SearchProfile"));
  138. String miniProfile = Util.getString(profileObj, "miniProfile");
  139. JSONObject profileObj2 = Util.findJSONObjectWithAttribute(includes, "entityUrn", miniProfile);
  140. if (profileObj2 != null){
  141. JSONObject obj = new JSONObject();
  142. JSONObject personObj = new JSONObject();
  143. personObj.put("link_nprofile_view_1", "https://www.linkedin.com/in/" + URLEncoder.encode(Util.getString(profileObj2, "publicIdentifier"), "UTF-8"));
  144. personObj.put("fmt_name", Util.getString(profileObj2, "firstName") + " " + Util.getString(profileObj2, "lastName"));
  145. personObj.put("fmt_headline", Util.getString(profileObj2, "occupation"));
  146. obj.put("person", personObj);
  147. arr.add(obj);
  148. }
  149. }
  150. }
  151. }
  152. //result in Json
  153. hasNext = false;
  154. searchURL = originalSearchURL + "&page=" + ++page; //http://www.linkedin.com" + search.getJSONObject("baseData").getJSONObject("resultPagination").getJSONObject("nextPage").getString("pageURL");
  155. for (int i=0; i<arr.size(); i++){
  156. try{
  157. if (ids.size() > numberToDownload) break;
  158. if (!arr.getJSONObject(i).has("person")) continue;
  159. hasNext = true;
  160. String url = Util.createURL("https://www.linkedin.com", Util.getProfileURL(arr.getJSONObject(i).getJSONObject("person")));
  161. String id = URLEncoder.encode(url.substring(url.lastIndexOf("/") + 1).split("&")[0]);
  162. if (ids.contains(id)){
  163. continue;
  164. }
  165. if (multipleSearches){
  166. if (new File("done/" + id).exists()){
  167. System.out.println("Downloaded contact with ID=" + id);
  168. continue;
  169. }
  170. }
  171.  
  172. ids.add(id);
  173. Contact contact = createContact(planId);
  174. contact.hasToofr = validToofr;
  175. contact.toofrAcceptedConfidence = toofrAcceptedConfidence;
  176. contact.hasCompanyURL = contact.hasCompanyURL || contact.hasToofr;
  177. contact.inputURL = originalSearchURL;
  178. contact.fullName = Util.getString(arr.getJSONObject(i).getJSONObject("person"), "fmt_name");
  179. if ("".equals(contact.fullName)){
  180. contact.fullName = "LinkedIn Member";
  181. }
  182. if (excludeLinkedInMembers)
  183. if ("LinkedIn Member".equals(contact.fullName)) continue;
  184. if (contact.fullName.indexOf(' ') > -1){
  185. contact.firstName = contact.fullName.substring(0, contact.fullName.indexOf(' '));
  186. contact.lastName = contact.fullName.substring(contact.fullName.indexOf(' ') + 1).trim();
  187. } else {
  188. contact.firstName = contact.fullName;
  189. }
  190. if (planId == Util.PLAN_BASIC){
  191. System.out.println("Extract info for:" + contact.fullName);
  192. }
  193. contact.headline = Util.getString(arr.getJSONObject(i).getJSONObject("person"), "fmt_headline");
  194. contact.industry = Util.getString(arr.getJSONObject(i).getJSONObject("person"), "fmt_industry");
  195. contact.location = Util.getString(arr.getJSONObject(i).getJSONObject("person"), "fmt_location");
  196. contact.distance = Util.getString(arr.getJSONObject(i).getJSONObject("person"), "distance");
  197. contact.linkedInURL = url.split("&")[0];
  198. if (planId >= Util.PLAN_PLUS){
  199. Util.parseContactNew(contact, url , client);
  200. Thread.sleep(delayMin + rand.nextInt(delay - delayMin));
  201. if (multipleSearches){
  202. CSVHelper.writeLine("done/" + id, new String[]{contact.fullName, contact.linkedInURL});
  203. }
  204.  
  205. }
  206. if (! new File("output" + threadID + "-" + fileCount + ".csv").exists()){
  207. String[] out = contact.toCSVHeader();
  208. if (!"".equals(name)){
  209. List<String> wordList = new ArrayList<String>(Arrays.asList(out));
  210. wordList.add(0, "Person");
  211. out = wordList.toArray(new String[0]);
  212.  
  213. }
  214. CSVHelper.writeLine("output" + threadID + "-" + fileCount + ".csv", out);
  215. }
  216. if (separatedRowForMutualConnection){
  217. String[] mutualConnections = contact.mutualText.split("\n");
  218. for (String mutualConnection:mutualConnections){
  219. contact.mutualText = mutualConnection;
  220. CSVHelper.writeLine("output" + threadID + "-" + fileCount + ".csv", contact.toCSVValues());
  221. }
  222. } else {
  223. String[] out = contact.toCSVValues();
  224. if (!"".equals(name)){
  225. List<String> wordList = new ArrayList<String>(Arrays.asList(out));
  226. wordList.add(0, name);
  227. out = wordList.toArray(new String[0]);
  228.  
  229. }
  230. CSVHelper.writeLine("output" + threadID + "-" + fileCount + ".csv", out);
  231. }
  232. if (additionalEmails){
  233. for (Experience experience:contact.experienceArr){
  234. try{
  235. if ("".equals(experience.company.website)) continue;
  236. String domain = experience.company.website.replaceAll("http(s)*://www\\.", "").split("/")[0];
  237. String firstName = contact.firstName.split(",")[0];
  238. String lastName = contact.lastName.split(",")[0];
  239. if (lastName.lastIndexOf(' ') > -1){
  240. lastName = lastName.substring(lastName.lastIndexOf(' ') + 1);
  241. }
  242. CSVHelper.writeLine("outputAdditional.csv", new String[]{firstName, lastName, contact.headline, experience.companyName, contact.location, contact.industry, contact.numberOfConnections, contact.email, contact.phone});
  243. CSVHelper.writeLine("outputAdditional.csv", new String[]{firstName, lastName, contact.headline, experience.companyName, contact.location, contact.industry, contact.numberOfConnections, firstName + "." + lastName + "@" + domain, contact.phone});
  244. CSVHelper.writeLine("outputAdditional.csv", new String[]{firstName, lastName, contact.headline, experience.companyName, contact.location, contact.industry, contact.numberOfConnections, lastName + "." + firstName + "@" + domain, contact.phone});
  245. CSVHelper.writeLine("outputAdditional.csv", new String[]{firstName, lastName, contact.headline, experience.companyName, contact.location, contact.industry, contact.numberOfConnections, firstName.substring(0, 1) + lastName + "@" + domain, contact.phone});
  246. break;
  247. } catch (Exception e){
  248.  
  249. }
  250. }
  251. }
  252.  
  253. } catch (Exception e){
  254. logger.error(e.getMessage());
  255. }
  256. }
  257. }
  258. count++;
  259. }
  260. in.close();
  261. } catch (Exception e){
  262. logger.error(e.getMessage());
  263. }
  264. } catch (IOException e) {
  265. logger.error(e.getMessage());
  266.  
  267. }
  268.  
  269. }
  270. public void processFromFile(String[] args) {
  271. String username = "";
  272. String password = "";
  273. if (args.length >=2){
  274. username = args[0];
  275. password = args[1];
  276. threadID = args[2];
  277. } else {
  278. username = Messages.getString("ContactCollector.username"); //$NON-NLS-1$
  279. password = Messages.getString("ContactCollector.password"); //$NON-NLS-1$
  280. }
  281. HttpClient client = Util.getHTTPClient();
  282. int planId = Util.hasValidLicense(username, password, Util.PRODUCT_SEARCH_CONTACT_EXPORT);
  283. if ((!skipLicense) && (planId == -1)) return;
  284. try {
  285. LinkedInHelper.login(client, username, password);
  286. try{
  287. FileInputStream fstream = new FileInputStream("input" + threadID + ".txt");
  288. // Get the object of DataInputStream
  289. DataInputStream in = new DataInputStream(fstream);
  290. BufferedReader br = new BufferedReader(new InputStreamReader(in));
  291. String searchURL;
  292. int count = 1;
  293. // Read File Line By Line
  294. while ((searchURL = br.readLine()) != null) {
  295. if (searchURL.startsWith("#")) continue;
  296. int fileCount = 1;
  297. searchURL = searchURL.replaceAll(" ", "%20");
  298. Contact contact = createContact(planId);
  299. Util.parseContactNew(contact, searchURL , client);
  300. Thread.sleep(delayMin + rand.nextInt(delay - delayMin));
  301. if (! new File("output" + threadID + "-" + fileCount + ".csv").exists()){
  302. CSVHelper.writeLine("output" + threadID + "-" + fileCount + ".csv", contact.toCSVHeader());
  303. }
  304. if (separatedRowForMutualConnection){
  305. String[] mutualConnections = contact.mutualText.split("\n");
  306. for (String mutualConnection:mutualConnections){
  307. contact.mutualText = mutualConnection;
  308. CSVHelper.writeLine("output" + threadID + "-" + fileCount + ".csv", contact.toCSVValues());
  309. }
  310. } else {
  311. CSVHelper.writeLine("output" + threadID + "-" + fileCount + ".csv", contact.toCSVValues());
  312. }
  313. if (additionalEmails){
  314. for (Experience experience:contact.experienceArr){
  315. try{
  316. if ("".equals(experience.company.website)) continue;
  317. String domain = experience.company.website.replaceAll("http(s)*://www\\.", "").split("/")[0];
  318. String firstName = contact.firstName.split(",")[0];
  319. String lastName = contact.lastName.split(",")[0];
  320. if (lastName.lastIndexOf(' ') > -1){
  321. lastName = lastName.substring(lastName.lastIndexOf(' ') + 1);
  322. }
  323. CSVHelper.writeLine("outputAdditional.csv", new String[]{firstName, lastName, contact.headline, experience.companyName, contact.location, contact.industry, contact.numberOfConnections, contact.email, contact.phone});
  324. CSVHelper.writeLine("outputAdditional.csv", new String[]{firstName, lastName, contact.headline, experience.companyName, contact.location, contact.industry, contact.numberOfConnections, firstName + "." + lastName + "@" + domain, contact.phone});
  325. CSVHelper.writeLine("outputAdditional.csv", new String[]{firstName, lastName, contact.headline, experience.companyName, contact.location, contact.industry, contact.numberOfConnections, lastName + "." + firstName + "@" + domain, contact.phone});
  326. CSVHelper.writeLine("outputAdditional.csv", new String[]{firstName, lastName, contact.headline, experience.companyName, contact.location, contact.industry, contact.numberOfConnections, firstName.substring(0, 1) + lastName + "@" + domain, contact.phone});
  327. break;
  328. } catch (Exception e){
  329.  
  330. }
  331. }
  332. }
  333. count++;
  334. }
  335. in.close();
  336. } catch (Exception e){
  337. logger.error(e.getMessage());
  338. }
  339. } catch (IOException e) {
  340. logger.error(e.getMessage());
  341.  
  342. }
  343.  
  344. }
  345. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement