Advertisement
Guest User

Untitled

a guest
Oct 7th, 2017
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 18.39 KB | None | 0 0
  1. // Below is my code and the error i am getting: <br/>
  2.  
  3. public class ReaderLinkedin extends ActiveBatchBusinessService {
  4.  
  5. private static Logger log = Logger.getLogger(ReaderLinkedin.class);
  6. String temp;
  7. Pattern jsonCntP = null;
  8. Matcher jsonCntPMatch = null;
  9. Matcher rsidMatch = null;
  10. HttpsURLConnection loginRequest = null;
  11. HttpURLConnection homeRequest, searchRequest, loginRedirectRequest = null;
  12. Vector<String> LinkedInCookies = new Vector<String>();
  13.  
  14. /*
  15. * Replace the username and password here
  16. */
  17. String userName = "Username";
  18. String password = "password";
  19. HashMap<Integer, String> industryCodeMap = new HashMap<Integer, String>();
  20.  
  21. public static void main(String... args) {
  22. PropertyConfigurator.configure("log4j/log4j.properties");
  23. for (SocialDataLI obj : new ReaderLinkedin().runSearch("Aayush", "People")) {
  24. log.info("##########################################");
  25. log.info(obj.toString());
  26. log.info("##########################################");
  27. }
  28. }
  29.  
  30. @Override
  31. public List<? extends SocialDataLI> runSearch(String query, String type) {
  32. List<SocialDataLI> dataList = new ArrayList();
  33. log.debug("query: " + query);
  34.  
  35. String temp;
  36.  
  37. // GET_LOGIN_PARAMETER_FROM_HOME
  38. HttpURLConnection homeRequest, searchRequest, loginRedirectRequest = null;
  39. industryCodes();
  40. try {
  41. homeRequest = getPageRequest("https://www.linkedin.com/uas/login", "");
  42.  
  43. if (homeRequest.getResponseCode() == 200) {
  44.  
  45. StringBuffer response = getResponse(homeRequest);
  46. Pattern sourceAliasInputPattern = Pattern.compile("<input [^>]*sourceAlias[^>]*value=["|']([^("|')]+)["|'][^>]*>");
  47. Matcher sourceAliasInputMatch = sourceAliasInputPattern.matcher(response);
  48. sourceAliasInputMatch.find();
  49. String sourceAliasLoginParam = sourceAliasInputMatch.group(1);
  50.  
  51. Pattern csrfTokenInputPattern = Pattern.compile("<input [^>]*csrfToken[^>]*value=["|']([^("|')]+)["|'][^>]*>");
  52. Matcher csrfTokenInputMatch = csrfTokenInputPattern.matcher(response);
  53. csrfTokenInputMatch.find();
  54. String csrfTokenLoginParam = csrfTokenInputMatch.group(1);
  55.  
  56. // LOGIN_PART
  57. String loginParam = "session_key=" + URLEncoder.encode(userName, "UTF-8") + "&session_password="
  58. + URLEncoder.encode(password, "UTF-8") + "&csrfToken=" + URLEncoder.encode(csrfTokenLoginParam, "UTF-8") + "&sourceAlias="
  59. + URLEncoder.encode(sourceAliasLoginParam, "UTF-8") + "&signin=" + URLEncoder.encode("Sign In", "UTF-8");
  60. log.debug("loginParam --->" + loginParam);
  61.  
  62. HttpsURLConnection loginRequest = (HttpsURLConnection) new URL("https://www.linkedin.com/uas/login-submit").openConnection();
  63. loginRequest.setRequestMethod("POST");
  64. loginRequest.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
  65. loginRequest.setRequestProperty("Content-Length", "" + Integer.toString(loginParam.getBytes().length));
  66. loginRequest.setRequestProperty("Host", "www.linkedin.com");
  67. loginRequest.setDoOutput(true);
  68. loginRequest.setInstanceFollowRedirects(false);
  69.  
  70. DataOutputStream wr = new DataOutputStream(loginRequest.getOutputStream());
  71. wr.writeBytes(loginParam);
  72. wr.flush();
  73. wr.close();
  74.  
  75. Thread.sleep(3000);
  76. log.debug("loginRequest status code--->" + loginRequest.getResponseCode());
  77. if ((loginRequest.getResponseCode() == 302) || (loginRequest.getResponseCode() == 301)) {
  78.  
  79. log.debug("New location after login--->" + loginRequest.getHeaderField("Location"));
  80. // CHECK_LOGIN_AND_PROCESS_COOKIE
  81. if (loginRequest.getHeaderField("Location") == null) {
  82. log.info("Unable to login");
  83. System.exit(0);
  84. }
  85.  
  86. List<String> cookies = loginRequest.getHeaderFields().get("Set-Cookie");
  87. LinkedInCookies.clear();
  88. for (String cookie : cookies) {
  89. LinkedInCookies.add(cookie);
  90. }
  91. log.debug(cookies);
  92. loginRedirectRequest = getPageRequest(loginRequest.getHeaderField("Location"), "");
  93.  
  94. Thread.sleep(3000);
  95. log.debug("New location after redirect--->" + loginRedirectRequest.getResponseCode());
  96. if (loginRedirectRequest.getResponseCode() == 200) {
  97. if (type.equalsIgnoreCase("People"))
  98. dataList = searchPeople(query);
  99. /* else if (type.equalsIgnoreCase("Company"))
  100. dataList = searchCompany(query);*/
  101. } else {
  102. log.debug("New location after redirect is not 200--->" + loginRedirectRequest.getResponseCode());
  103. }
  104. }
  105. }
  106.  
  107. } catch (Exception e) {
  108. // TODO Auto-generated catch block
  109. e.printStackTrace();
  110. }
  111.  
  112. // TODO search implementation
  113. return dataList;
  114. }
  115.  
  116. */
  117. /**
  118. * This method used for people search
  119. *
  120. * @param query
  121. * @return List<SocialDataLI>
  122. */
  123. private List<SocialDataLI> searchPeople(String query) {
  124. List<SocialDataLI> dataList = new ArrayList();
  125. Integer noOfPages = 0;
  126. int page = 1;
  127.  
  128. try {
  129. String cook = getCookie();
  130.  
  131. searchRequest = getPageRequest("http://www.linkedin.com/vsearch/p?adv=true&trk=advsrch", cook);
  132. Thread.sleep(3000);
  133. log.debug("searchRequest status code--->" + searchRequest.getResponseCode());
  134.  
  135. if (searchRequest.getResponseCode() == 200) {
  136.  
  137. StringBuffer sResponse = getResponse(searchRequest);
  138.  
  139. Pattern jsonCntP = Pattern.compile("<code id="voltron_people_search_json-content" style="display:none;"><!--(.*?)--></code>");
  140. Matcher jsonCntPMatch = jsonCntP.matcher(sResponse);
  141.  
  142. if (jsonCntPMatch.find()) {
  143. String jsonCntPParam = jsonCntPMatch.group(1);
  144. log.debug("json Content --->" + jsonCntPParam);
  145.  
  146. Pattern rsidP = Pattern.compile("rsid=([0-9]{0,})&trk=");
  147. Matcher rsidMatch = rsidP.matcher(jsonCntPParam);
  148.  
  149. if (rsidMatch.find()) {
  150. String advUrl = "http://www.linkedin.com/vsearch/p?keywords=" + query.replaceAll(" ", "%20")
  151. + "&openAdvancedForm=true&locationType=Y&rsid=" + rsidMatch.group(1) + "&orig=FCTD";
  152. log.debug(advUrl);
  153.  
  154. do {
  155.  
  156. HttpURLConnection advSearchRequest = getPageRequest(advUrl, cook);
  157. log.debug(advUrl);
  158. log.debug("advSearchRequest status code--->" + advSearchRequest.getResponseCode());
  159.  
  160. if (advSearchRequest.getResponseCode() == 200) {
  161.  
  162. StringBuffer aResponse = getResponse(advSearchRequest);
  163.  
  164. if (page == 1)
  165. noOfPages = getNoOfPage(aResponse.toString());
  166.  
  167. jsonCntPMatch = jsonCntP.matcher(aResponse);
  168. if (jsonCntPMatch.find()) {
  169. log.debug("adv search json Content --->" + jsonCntPMatch.group(1));
  170.  
  171. ArrayList al = new ArrayList();
  172. // add elements to al, including duplicates
  173. HashSet hs = new HashSet();
  174.  
  175. Pattern pattern = Pattern.compile("pid=([0-9]{0,})");
  176. java.util.regex.Matcher m = pattern.matcher(jsonCntPMatch.group(1));
  177. while (m.find()) {
  178. log.debug("Pid's found--->" + m.group(1));
  179. al.add(m.group(1));
  180. }
  181. hs.addAll(al);
  182. al.clear();
  183. al.addAll(hs);
  184. for (int p = 0; p < al.size(); p++) {
  185.  
  186. String linkedinId = al.get(p).toString().trim();
  187. SocialDataLI sData = new SocialDataLI();
  188. System.out.println("linkedinId----" + linkedinId);
  189. sData.setLinkedinId(linkedinId);
  190. sData.setType("People");
  191. Thread.sleep(3000);
  192. HttpURLConnection profilePageRequest = getPageRequest("http://www.linkedin.com/profile/view?id=" + linkedinId, cook);
  193.  
  194. log.debug("profilePageRequest status code--->" + profilePageRequest.getResponseCode());
  195. if (profilePageRequest.getResponseCode() == 200) {
  196.  
  197. StringBuffer pageResponse = getResponse(profilePageRequest);
  198. System.out.println("firstName----" + getPatternValue(pageResponse, "firstName":"(.*?)""));
  199. sData.setFirstName(getPatternValue(pageResponse, "firstName":"(.*?)""));
  200.  
  201. sData.setLastName(getPatternValue(pageResponse, "lastName":"(.*?)""));
  202.  
  203. sData.setHeadline(getPatternValue(pageResponse, "memberHeadline":"(.*?)""));
  204.  
  205. String numConnections = getPatternValue(pageResponse, "numberOfConnections":([0-9]{0,})");
  206. if (!numConnections.equalsIgnoreCase(""))
  207. sData.setNumConnections(Integer.parseInt(numConnections));
  208.  
  209. Pattern summaryp = Pattern.compile("showSummarySection":(true|false),[^>]*"summary_lb":"(.*?)"");
  210. java.util.regex.Matcher summarym = summaryp.matcher(pageResponse.toString());
  211. if (summarym.find()) {
  212. sData.setSummary(summarym.group(2));
  213. }
  214.  
  215. sData.setPublicProfileUrl(getPatternValue(pageResponse, ""canonicalUrlToShow":"(.*?)""));
  216.  
  217. sData.setPictureUrl(getPatternValue(pageResponse, "mem_pic":"(.*?)""));
  218.  
  219. Integer endrosementCount = 0;
  220. Pattern eCountp = Pattern.compile("endorsementCount":([0-9]{0,})");
  221. java.util.regex.Matcher eCountm = eCountp.matcher(pageResponse.toString());
  222. while (eCountm.find()) {
  223. endrosementCount += Integer.parseInt(eCountm.group(1));
  224. }
  225.  
  226. // log.debug("Total Endrosement Count--->" + endrosementCount);
  227. sData.setNumRecommenders(endrosementCount);
  228. Pattern specialp = Pattern.compile("],"name":"(.*?)","endorsementCount":");
  229. java.util.regex.Matcher specialm = specialp.matcher(pageResponse.toString());
  230. while (specialm.find()) {
  231. sData.setSpecialties(specialm.group(1));
  232.  
  233. }
  234. Pattern twitterp = Pattern.compile("twitterHandle":"(.*)"");
  235. java.util.regex.Matcher twitterm = twitterp.matcher(pageResponse.toString());
  236. while (twitterm.find()) {
  237. sData.setTwitterId(twitterm.group(1));
  238. }
  239.  
  240. String industryCode = getPatternValue(pageResponse, "industryID":([0-9]{0,})");
  241. if (!industryCode.equalsIgnoreCase(""))
  242. sData.setIndustries(industryCodeMap.get(Integer.parseInt(industryCode)));
  243.  
  244. Pattern connectionsCappedp = Pattern.compile("i18n_numconnectionsformattable":"(.*?)"");
  245. java.util.regex.Matcher connectionsCappedm = connectionsCappedp.matcher(pageResponse.toString());
  246. if (connectionsCappedm.find()) {
  247. sData.setNumConnectionsCapped(true);
  248. }
  249.  
  250. Pattern memUrlResP = Pattern.compile(""websites":(.*?)}],");
  251. java.util.regex.Matcher memUrlResM = memUrlResP.matcher(pageResponse.toString());
  252. if (memUrlResM.find()) {
  253. JSONParser parser = new JSONParser();
  254. Object obj = parser.parse(memUrlResM.group(1) + "}]");
  255. JSONArray a = (JSONArray) obj;
  256.  
  257. Vector<Url> memberUrlResources = null;
  258.  
  259. Url urlV = sData.new Url();
  260. memberUrlResources = new Vector<Url>(a.size());
  261. for (Object o : a) {
  262. JSONObject url = (JSONObject) o;
  263. urlV.setUrl(url.get("URL").toString());
  264. log.trace("url--------------" + url.get("URL").toString());
  265. memberUrlResources.add(urlV);
  266. }
  267.  
  268. sData.setMemberUrlResources(memberUrlResources);
  269.  
  270. }
  271.  
  272. dataList.add(sData);
  273.  
  274. }
  275. }
  276. } else {
  277. log.debug("adv search json content not found");
  278. }
  279. } else {
  280. log.debug("advSearchRequest status code is not 200--->" + advSearchRequest.getResponseCode());
  281. }
  282. page++;
  283. advUrl = "http://www.linkedin.com/vsearch/p?keywords=" + query.replaceAll(" ", "%20")
  284. + "&openAdvancedForm=true&locationType=Y&rsid=" + rsidMatch.group(1) + "&orig=FCTD&page_num=" + page;
  285.  
  286. } while (page <= noOfPages);
  287. } else {
  288. log.debug("rsid's not found");
  289. }
  290. } else {
  291. log.debug("First json Content not found");
  292. }
  293.  
  294. } else {
  295. log.debug("searchRequest status code is not 200--->" + searchRequest.getResponseCode());
  296. }
  297. } catch (Exception e) {
  298. e.printStackTrace();
  299. }
  300. return dataList;
  301. }
  302.  
  303. /**
  304. * This method used to get Number of pages
  305. *
  306. * @param param
  307. * @return noOfPages
  308. */
  309. private Integer getNoOfPage(String param) {
  310. Integer resultCount = null;
  311. Integer noOfPages = null;
  312. Pattern rc = Pattern.compile("resultCount":([0-9]{0,})");
  313. java.util.regex.Matcher rsc = rc.matcher(param.toString());
  314. if (rsc.find()) {
  315. resultCount = Integer.parseInt(rsc.group(1));
  316. }
  317. if (resultCount > 100)
  318. noOfPages = 10;
  319. else if (resultCount % 10 > 0) {
  320. noOfPages = (resultCount / 10) + 1;
  321. }
  322. return noOfPages;
  323. }
  324.  
  325. /**
  326. * This method used to get page response
  327. *
  328. * @param request
  329. * @return response
  330. */
  331. private StringBuffer getResponse(HttpURLConnection request) {
  332. StringBuffer response = null;
  333. try {
  334. BufferedReader ld = new BufferedReader(new InputStreamReader(request.getInputStream()));
  335. response = new StringBuffer();
  336. while ((temp = ld.readLine()) != null) {
  337. response.append(temp);
  338. }
  339. ld.close();
  340. } catch (Exception e) {
  341. e.printStackTrace();
  342. }
  343. return response;
  344.  
  345. }
  346.  
  347. /**
  348. * This method used to get cookie value
  349. *
  350. * @return cook
  351. */
  352. private String getCookie() {
  353. String cook = "";
  354.  
  355. for (String cookie : LinkedInCookies) {
  356. cook += cookie.split(";", 2)[0] + "; ";
  357. }
  358. return cook;
  359. }
  360.  
  361. /**
  362. * Method used to get page
  363. *
  364. * @param url
  365. * @param cook
  366. * @return page
  367. */
  368. private HttpURLConnection getPageRequest(String url, String cook) {
  369. HttpURLConnection page = null;
  370. try {
  371. Thread.sleep(3000);
  372. page = (HttpURLConnection) new URL(url).openConnection();
  373. page.setRequestProperty("Host", "www.linkedin.com");
  374. page.setRequestProperty("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.5; rv:16.0) Gecko/20100101 Firefox/16.0");
  375. page.setDoInput(true);
  376. if (!cook.equalsIgnoreCase(""))
  377. page.addRequestProperty("Cookie", cook);
  378. } catch (Exception e) {
  379. e.printStackTrace();
  380. }
  381. return page;
  382. }
  383.  
  384. /**
  385. * Method used to get pattern value
  386. *
  387. * @param pageResponse
  388. * @param patternRule
  389. * @return value
  390. */
  391. private String getPatternValue(StringBuffer pageResponse, String patternRule) {
  392. String value = "";
  393. Pattern pattern = Pattern.compile(patternRule);
  394. java.util.regex.Matcher matcher = pattern.matcher(pageResponse);
  395. if (matcher.find())
  396. value = matcher.group(1).toString().trim();
  397. log.debug("Parsed value----> " + value);
  398. return value;
  399. }
  400.  
  401. /*
  402. * Reference: https://developer.linkedin.com/documents/industry-codes
  403. */
  404. private void industryCodes() {
  405.  
  406. industryCodeMap.put(47, "Accounting");
  407. industryCodeMap.put(94, "Airlines/Aviation");
  408. industryCodeMap.put(120, "Alternative Dispute Resolution");
  409. industryCodeMap.put(125, "Alternative Medicine");
  410. industryCodeMap.put(127, "Animation");
  411. industryCodeMap.put(19, "Apparel & Fashion");
  412. industryCodeMap.put(50, "Architecture & Planning");
  413. industryCodeMap.put(111, "Arts and Crafts");
  414. industryCodeMap.put(53, "Automotive");
  415. industryCodeMap.put(52, "Aviation & Aerospace");
  416. industryCodeMap.put(41, "Banking");
  417. industryCodeMap.put(12, "Biotechnology");
  418. industryCodeMap.put(36, "Broadcast Media");
  419. industryCodeMap.put(49, "Building Materials");
  420. industryCodeMap.put(138, "Business Supplies and Equipment");
  421. industryCodeMap.put(129, "Capital Markets");
  422. industryCodeMap.put(54, "Chemicals");
  423. industryCodeMap.put(90, "Civic & Social Organization");
  424. //Whole lot of other industry codes
  425. }
  426. }
  427.  
  428.  
  429. The Error i am getting is :
  430. DEBUG [main] (ReaderLinkedin.java:56) - query: Aayush
  431. DEBUG [main] (ReaderLinkedin.java:83) - loginParam --->session_key=username&session_password=password&csrfToken=ajax%3A8320583353563693529&`enter code here`sourceAlias=0_7r5yezRXCiA_H0CRD8sf6DhOjTKUNps5xGTqeX8EEoi&signin=Sign+In
  432. DEBUG [main] (ReaderLinkedin.java:99) - loginRequest status code--->200
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement