Advertisement
Guest User

Untitled

a guest
Jan 16th, 2017
68
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.51 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {
  7. "collapsed": false
  8. },
  9. "outputs": [],
  10. "source": [
  11. "import urllib\n",
  12. "from bs4 import BeautifulSoup\n",
  13. "\n",
  14. "\n",
  15. "class GetVacancyInfo():\n",
  16. " \n",
  17. " # this class allows you to get information about certain vacancy on HeadHunter by url or by its ID\n",
  18. " \n",
  19. " def __init__(self, get_by_id=False, vacancy_url=None, vacancy_id=None):\n",
  20. " \n",
  21. " if get_by_id == True:\n",
  22. " \n",
  23. " assert type(vacancy_id) == str or type(vacancy_id) == int, \"Incorrect ID's format\"\n",
  24. " self.vacancy_id = vacancy_id\n",
  25. " self.URL = \"https://spb.hh.ru/vacancy/\"+str(vacancy_id)\n",
  26. " \n",
  27. " else:\n",
  28. " self.URL = self.clear_url(vacancy_url)\n",
  29. " \n",
  30. " \n",
  31. " def clear_url(self,given_url):\n",
  32. " \n",
  33. " assert (type(given_url) == str), \"Incorrect URL's format\"\n",
  34. " \n",
  35. " if given_url.find('?') != -1:\n",
  36. " valid_url = given_url[:given_url.find('?')]\n",
  37. " else: \n",
  38. " valid_url = given_url\n",
  39. " \n",
  40. " return valid_url\n",
  41. " \n",
  42. " \n",
  43. " def get_html(self,url):\n",
  44. " response = urllib.urlopen(url)\n",
  45. " return response.read()\n",
  46. " \n",
  47. " \n",
  48. " def parse(self,html):\n",
  49. " soup = BeautifulSoup(html,'lxml')\n",
  50. " \n",
  51. " description = soup.find('div', class_='b-vacancy-desc-wrapper').get_text()\n",
  52. " key_skills = soup.find('div', class_ = 'l-paddings').get_text()\n",
  53. "\n",
  54. " return (description,key_skills)\n",
  55. "\n",
  56. " \n",
  57. " def get_info(self):\n",
  58. " html = self.get_html(self.URL)\n",
  59. " \n",
  60. " try: \n",
  61. " info = self.parse(html)\n",
  62. " \n",
  63. " except AttributeError:\n",
  64. " print 'Unavalible vacancy'\n",
  65. " return \n",
  66. " \n",
  67. " \n",
  68. " return info\n",
  69. " \n",
  70. "# TODO: fix key skills parsing "
  71. ]
  72. },
  73. {
  74. "cell_type": "code",
  75. "execution_count": 2,
  76. "metadata": {
  77. "collapsed": false
  78. },
  79. "outputs": [
  80. {
  81. "name": "stdout",
  82. "output_type": "stream",
  83. "text": [
  84. "Крупная западная фармацевтическая компания производит набор медицинских представителей на конкурсной основе\n",
  85. "\n",
  86. "\n",
  87. "Обязанности: \n",
  88. "\n",
  89. "- заниматься продвижением препаратов компании на территории Москвы и московской области;\n",
  90. "\n",
  91. "- 10-12 визитов в день, ведение коммерческих переговоров с лидерами ключевых мнений;\n",
  92. "\n",
  93. "- ведение презентаций.\n",
  94. "\n",
  95. "\n",
  96. "Требования:\n",
  97. "\n",
  98. "- высшее медицинское/фармацевтическое образование;\n",
  99. "\n",
  100. "- опыт работы приветствуется;\n",
  101. "\n",
  102. "- хорошие презентационные навыки;\n",
  103. "\n",
  104. "- опыт вождения автомобиля;\n",
  105. "\n",
  106. "- опыт работы на п/к.\n",
  107. "\n",
  108. "\n",
  109. "З/п 550-750$ + социальный пакет (мед.страховка, представительские расходы, компьютер, телефон, машина - иномарка)\n"
  110. ]
  111. }
  112. ],
  113. "source": [
  114. "VI = GetVacancyInfo(get_by_id=True, vacancy_id=18123)\n",
  115. "print VI.get_info()[0]"
  116. ]
  117. }
  118. ],
  119. "metadata": {
  120. "kernelspec": {
  121. "display_name": "Python 2",
  122. "language": "python",
  123. "name": "python2"
  124. },
  125. "language_info": {
  126. "codemirror_mode": {
  127. "name": "ipython",
  128. "version": 2
  129. },
  130. "file_extension": ".py",
  131. "mimetype": "text/x-python",
  132. "name": "python",
  133. "nbconvert_exporter": "python",
  134. "pygments_lexer": "ipython2",
  135. "version": "2.7.12"
  136. }
  137. },
  138. "nbformat": 4,
  139. "nbformat_minor": 1
  140. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement