Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "import urllib\n",
- "from bs4 import BeautifulSoup\n",
- "\n",
- "\n",
- "class GetVacancyInfo():\n",
- " \n",
- " # this class allows you to get information about certain vacancy on HeadHunter by url or by its ID\n",
- " \n",
- " def __init__(self, get_by_id=False, vacancy_url=None, vacancy_id=None):\n",
- " \n",
- " if get_by_id == True:\n",
- " \n",
- " assert type(vacancy_id) == str or type(vacancy_id) == int, \"Incorrect ID's format\"\n",
- " self.vacancy_id = vacancy_id\n",
- " self.URL = \"https://spb.hh.ru/vacancy/\"+str(vacancy_id)\n",
- " \n",
- " else:\n",
- " self.URL = self.clear_url(vacancy_url)\n",
- " \n",
- " \n",
- " def clear_url(self,given_url):\n",
- " \n",
- " assert (type(given_url) == str), \"Incorrect URL's format\"\n",
- " \n",
- " if given_url.find('?') != -1:\n",
- " valid_url = given_url[:given_url.find('?')]\n",
- " else: \n",
- " valid_url = given_url\n",
- " \n",
- " return valid_url\n",
- " \n",
- " \n",
- " def get_html(self,url):\n",
- " response = urllib.urlopen(url)\n",
- " return response.read()\n",
- " \n",
- " \n",
- " def parse(self,html):\n",
- " soup = BeautifulSoup(html,'lxml')\n",
- " \n",
- " description = soup.find('div', class_='b-vacancy-desc-wrapper').get_text()\n",
- " key_skills = soup.find('div', class_ = 'l-paddings').get_text()\n",
- "\n",
- " return (description,key_skills)\n",
- "\n",
- " \n",
- " def get_info(self):\n",
- " html = self.get_html(self.URL)\n",
- " \n",
- " try: \n",
- " info = self.parse(html)\n",
- " \n",
- " except AttributeError:\n",
- " print 'Unavalible vacancy'\n",
- " return \n",
- " \n",
- " \n",
- " return info\n",
- " \n",
- "# TODO: fix key skills parsing "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Крупная западная фармацевтическая компания производит набор медицинских представителей на конкурсной основе\n",
- "\n",
- "\n",
- "Обязанности: \n",
- "\n",
- "- заниматься продвижением препаратов компании на территории Москвы и московской области;\n",
- "\n",
- "- 10-12 визитов в день, ведение коммерческих переговоров с лидерами ключевых мнений;\n",
- "\n",
- "- ведение презентаций.\n",
- "\n",
- "\n",
- "Требования:\n",
- "\n",
- "- высшее медицинское/фармацевтическое образование;\n",
- "\n",
- "- опыт работы приветствуется;\n",
- "\n",
- "- хорошие презентационные навыки;\n",
- "\n",
- "- опыт вождения автомобиля;\n",
- "\n",
- "- опыт работы на п/к.\n",
- "\n",
- "\n",
- "З/п 550-750$ + социальный пакет (мед.страховка, представительские расходы, компьютер, телефон, машина - иномарка)\n"
- ]
- }
- ],
- "source": [
- "VI = GetVacancyInfo(get_by_id=True, vacancy_id=18123)\n",
- "print VI.get_info()[0]"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 2",
- "language": "python",
- "name": "python2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 2
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.12"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 1
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement