pymen

OdeskForYong

Sep 17th, 2013
128
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 11.61 KB | None | 0 0
  1. # -*- coding utf-8
  2.  
  3. #Office Hours, Medical Affiliation(s), Medical education, Internship, Residency, Additional information, Spoken languages, Community Activities, Personal Interests, Physician Web Link
  4.  
  5. content = """"
  6. "<DIV class=ps_content><DIV id=ps_doctor_top itemtype=""http://schema.org/Physician"" itemscope>
  7. <DIV id=ps_doctor_cont>
  8. <DIV class=doc_photo><!--image--><IMG alt=""Jill Ackerman"" src=""http://www.pamf.org/providersearch/photos.html?masterid=18860"" width=153 height=191> </DIV><!-- end photo -->
  9. <DIV class=socialBar><!--Google Plus Start-->
  10. <DIV class=""socialContainer googlePlusContainer"">
  11. <DIV style=""POSITION: absolute; WIDTH: 450px; LEFT: -10000px"" id=___plusone_0><IFRAME style=""POSITION: absolute; BORDER-BOTTOM-STYLE: none; BORDER-RIGHT-STYLE: none; MARGIN: 0px; WIDTH: 450px; BORDER-TOP-STYLE: none; BORDER-LEFT-STYLE: none; TOP: -10000px"" id=I0_1379057126633 tabIndex=0 vspace=0 marginHeight=0 src=""https://apis.google.com/_/+1/fastbutton?bsv=o&usegapi=1&size=medium&hl=en-US&origin=http%3A%2F%2Fwww.pamf.org&url=http%3A%2F%2Fwww.pamf.org%2Fdr-jill-ackerman.html&gsrc=3p&ic=1&jsh=m%3B%2F_%2Fscs%2Fapps-static%2F_%2Fjs%2Fk%3Doz.gapi.da.-BIef0n1yWk.O%2Fm%3D__features__%2Fam%3DAQ%2Frt%3Dj%2Fd%3D1%2Frs%3DAItRSTOED_BfdMX6az94EQOTepaUwL-JDA#_methods=onPlusOne%2C_ready%2C_close%2C_open%2C_resizeMe%2C_renderstart%2Concircled%2Cdrefresh%2Cerefresh&id=I0_1379057126633&parent=http%3A%2F%2Fwww.pamf.org&pfname=&rpctoken=99552656"" frameBorder=0 width=""100%"" allowTransparency name=I0_1379057126633 marginWidth=0 scrolling=no hspace=0 data-gapiattached=""true""></IFRAME></DIV><G:PLUSONE size=""medium"" data-gapiscan=""true"" data-onload=""true"" data-gapistub=""true""></G:PLUSONE>
  12. <SCRIPT type=text/javascript>
  13.    (function() {
  14.     var po = document.createElement('script'); po.type = 'text/javascript'; po.async = true;
  15.     po.src = 'https://apis.google.com/js/plusone.js';
  16.     var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(po, s);
  17.    })();
  18.    </SCRIPT>
  19. </DIV><!--Google Plus End--><!--Facebook Start-->
  20. <DIV class=""socialContainer facebookContainer"">
  21. <DIV id=fb-root class="" fb_reset"">
  22. <DIV style=""POSITION: absolute; WIDTH: 0px; HEIGHT: 0px; TOP: -10000px"">
  23. <DIV></DIV></DIV>
  24. <DIV style=""POSITION: absolute; WIDTH: 0px; HEIGHT: 0px; TOP: -10000px"">
  25. <DIV><IFRAME aria-hidden=true style=""BORDER-BOTTOM: medium none; BORDER-LEFT: medium none; BORDER-TOP: medium none; BORDER-RIGHT: medium none"" id=fb_xdm_frame_http title=""Facebook Cross Domain Communication Frame"" src=""http://static.ak.facebook.com/connect/xd_arbiter.php?version=27#channel=f17fbe08d2733a9&channel_path=%2Fdr-jill-ackerman.html%3Ffb_xd_fragment%23xd_sig%3Df286d04dfaf5e3c%26&origin=http%3A%2F%2Fwww.pamf.org"" frameBorder=0 allowTransparency name=fb_xdm_frame_http scrolling=no tab-index=""-1""></IFRAME><IFRAME aria-hidden=true style=""BORDER-BOTTOM: medium none; BORDER-LEFT: medium none; BORDER-TOP: medium none; BORDER-RIGHT: medium none"" id=fb_xdm_frame_https title=""Facebook Cross Domain Communication Frame"" src=""https://s-static.ak.facebook.com/connect/xd_arbiter.php?version=27#channel=f17fbe08d2733a9&channel_path=%2Fdr-jill-ackerman.html%3Ffb_xd_fragment%23xd_sig%3Df286d04dfaf5e3c%26&origin=http%3A%2F%2Fwww.pamf.org"" frameBorder=0 allowTransparency name=fb_xdm_frame_https scrolling=no tab-index=""-1""></IFRAME></DIV></DIV></DIV>
  26. <SCRIPT>
  27.  
  28.    window.fbAsyncInit = function() {
  29.     FB.init({
  30.      status : true, // check login status
  31.      cookie : true, // enable cookies to allow the server to access the session
  32.      xfbml : true // parse XFBML
  33.     });
  34.     if (typeof _ga != 'undefined') _ga.trackFacebook(); //Google Analytics tracking
  35.    };
  36.    // Load the Facebook SDK Asynchronously
  37.    (function(d){
  38.     var js, id = 'facebook-jssdk', ref = d.getElementsByTagName('script')[0];
  39.     if (d.getElementById(id)) {return;}
  40.     js = d.createElement('script'); js.id = id; js.async = true;
  41.     js.src = ""//connect.facebook.net/en_US/all.js"";
  42.     ref.parentNode.insertBefore(js, ref);
  43.    }(document));
  44.  
  45.    </SCRIPT>
  46.  
  47. <DIV class=fb-like data-show-faces=""false"" data-width=""85"" data-layout=""button_count"" data-send=""false""></DIV></DIV><!--Facebook End--></DIV>
  48. <DIV class=""certs ps-no-top-margin"">
  49. <H1 class="header_no_pad dr_heading" itemprop="name">Jill Ackerman, M.D. </H1><!-- Hiding additional titles --><BR>
  50. <SCRIPT>
  51.      var count=0;
  52.      var specArray;
  53.     </SCRIPT>
  54. <!-- specialties -->
  55. <SCRIPT>
  56.       specArray=new Array(1);
  57.      </SCRIPT>
  58.  
  59. <H2 class=""no_line_br header_no_pad"">Ophthalmology</H2>
  60. <SCRIPT>
  61.        specArray[count]=""Ophthalmology"";
  62.        count++;
  63.       </SCRIPT>
  64. <BR>(Board Certified) <BR><!-- adding status for spec --><SPAN class=ps_green>Accepting</SPAN> new patients<BR><BR>
  65. <DIV class=clear></DIV><!-- MHO/request appt -->
  66. <DIV><BR><A class=computer href=""http://www.sutterhealth.org/myhealthonline/index.html"" rel=nofollow>Offers <STRONG>My Health Online</STRONG><SPAN class=hideOffScreen>Opens in new window</SPAN> </A></DIV></DIV>
  67. <DIV class=clear></DIV><BR><!--<hr class=""ps_hr clear"">--><!--<div class=""clear""></div>-->
  68. <DIV class=ps-mho-appt-container><!-- Health Plans --><A title=""Accepted Health Plans. Opens in new window"" onclick=""trackExternalGA('Health plan pop-up - from Profile page');"" href=""javascript:popup('/providersearch/healthplan.html?sitecfg=41&recId=ps156407sp15640718325667','HealthPlans','450','550')"" rel=nofollow><IMG alt=""Accepted Health Plans"" src=""http://common.sutterhealth.org/appDesign/providersearch/images/btn-icon-color-health-plans.png""><SPAN class=hideOffScreen>Opens in new window</SPAN></A> <!-- Check for Open Scheduling --></DIV>
  69. <DIV class=clear></DIV><!--<br/>-->
  70. <DIV class=ps-margin><!-- addresses -->
  71. <DIV class=ps_dr_address><SPAN itemtype=""http://schema.org/Organization"" itemscope><SPAN class=ps_add_title itemprop="name">Vision Care Center</SPAN> <BR><SPAN itemtype=""http://schema.org/PostalAddress"" itemscope itemprop="address"><SPAN itemprop="streetAddress">323 N. Mathilda Avenue</SPAN> <BR><SPAN itemprop="addressLocality">Sunnyvale</SPAN>, <SPAN itemprop="addressRegion">California</SPAN> <SPAN itemprop="postalCode">94087</SPAN> <BR><SPAN><A href=""javascript:popup('/map/Map?country=USA&name=Vision"" Care Center&address=323 N. Mathilda Avenue&city=Sunnyvale&stateProvince=California&postalCode=94087','LocationMap','850','550')"">View Map<SPAN class=hideOffScreen>of 323 N. Mathilda Avenue, Sunnyvale. Opens in new window</SPAN></A></SPAN> <BR></SPAN></SPAN><STRONG>(408) 524-5900</STRONG>  <!--<br />--></DIV></DIV>
  72. <DIV class=clear></DIV></DIV><!-- end doc cont --></DIV><!-- end doc top --><!-- New video field -->
  73. <DIV class=border_bottom>
  74. <SCRIPT>
  75.   count=0;
  76.   var medgrpArray;
  77.  </SCRIPT>
  78. <!-- Med groups -->
  79. <DIV class=ps_detail_left>
  80. <H3 class=detailsubheading>Medical Affiliation(s)</H3></DIV>
  81. <DIV class=ps_detail_right><A href=""http://www.pamf.org"">Palo Alto Medical Foundation</A><BR>  <A href=""http://www.pamf.org"">Palo Alto Foundation Medical Group</A><BR></DIV>
  82. <SCRIPT>
  83.   var cap = 0;
  84.   if(specArray){
  85.    cap = specArray.length;
  86.   }
  87.   if(medgrpArray){
  88.    if(medgrpArray.length >cap)
  89.     cap=medgrpArray.length;
  90.   }
  91.   for(var i=0; i<cap; i++) {
  92.    if(specArray && specArray[i]){
  93.     try {_gaq.push(['_setCustomVar',3,'Specialty',specArray[i],3]);} catch(err) {};
  94.    }
  95.    if(medgrpArray && medgrpArray[i]){
  96.     try {_gaq.push(['_setCustomVar',4,'Medical Group',medgrpArray[i],3]);} catch(err) {};
  97.    }
  98.    try {_gaq.push(['_trackEvent','set-cv','set specialty and medical group',undefined,undefined,true]);} catch(err) {};
  99.   }
  100.  </SCRIPT>
  101. <!-- Hospital affiliation -->
  102. <DIV class=clear></DIV><BR>
  103. <DIV class=ps_detail_left>
  104. <H3 class=detailsubheading>Hospital Affiliation(s) </H3></DIV>
  105. <DIV class=ps_detail_right><!-- external affiliations -->El Camino Hospital <BR>SurgeCenter, Palo Alto <BR>SurgiCenter, Mountain View <BR></DIV>
  106. <DIV class=clear></DIV><BR>
  107. <DIV class=ps_detail_left>
  108. <H3 class=detailsubheading>Education & Training</H3></DIV>
  109. <DIV class=clear></DIV><!-- physician record -->
  110. <DIV class=ps_detail_left>Medical education: </DIV>
  111. <DIV class=ps_detail_right>Stanford University School of Medicine, CA, 1992 </DIV>
  112. <DIV class=clear></DIV>
  113. <DIV class=ps_detail_left>Internship: </DIV>
  114. <DIV class=ps_detail_right>Cedars-Sinai Medical Center, CA, 1993 </DIV>
  115. <DIV class=clear></DIV>
  116. <DIV class=ps_detail_left>Residency: </DIV>
  117. <DIV class=ps_detail_right>Stanford University, Department of Ophthalmology, CA, 1996 </DIV>
  118. <DIV class=clear></DIV>
  119. <DIV class=ps_detail_left>Fellowship(s): </DIV>
  120. <DIV class=ps_detail_right>Preceptorship in Oculoplastic & Plastic Surgery, 1997 </DIV>
  121. <DIV class=clear></DIV><!-- phys/nonphys --><!-- education, residency, internship .... --></DIV><!-- end border bottom -->
  122. <DIV class=border_bottom><!--depts -->
  123. <DIV class=ps_detail_left>
  124. <H3 class=detailsubheading>Department</H3></DIV>
  125. <DIV class=ps_detail_right>Ophthalmology </DIV>
  126. <DIV class=clear></DIV><BR><!-- specialties  -->
  127. <DIV class=ps_detail_left>
  128. <H3 class=detailsubheading>Professional Interests</H3></DIV>
  129. <DIV class=ps_detail_right><STRONG>Ophthalmology</STRONG><BR>Facial Nerve Disorders, Oculoplastics, Tear Duct Probings Surgery, Thyroid Eye Disease<BR><BR></DIV>
  130. <DIV class=clear></DIV>
  131. <DIV class=ps_detail_left>
  132. <H3 class=detailsubheading>Additional Information</H3></DIV>
  133. <DIV class=ps_detail_right>Dry eye disease, cosmetic eyelid surgery </DIV>
  134. <DIV class=clear></DIV><BR>
  135. <DIV class=ps_detail_left>
  136. <H3 class=detailsubheading>Spoken Languages</H3></DIV>
  137. <DIV class=clear></DIV>
  138. <DIV class=ps_detail_left>Primary: </DIV>
  139. <DIV class=ps_detail_right>English </DIV>
  140. <DIV class=clear></DIV>
  141. <DIV class=ps_detail_left>Additional: </DIV>
  142. <DIV class=ps_detail_right>Spanish (Good)<BR></DIV>
  143. <DIV class=clear></DIV><BR>
  144. <DIV class=ps_detail_left>
  145. <H3 class=detailsubheading>Personal Interests</H3></DIV>
  146. <DIV class=ps_detail_right>Dr. Ackerman enjoys hiking with her two dogs, travel, nature and animal rescue. </DIV>
  147. <DIV class=clear></DIV><BR>
  148. <DIV class=ps_detail_left>
  149. <H3 class=detailsubheading>Physician Web Link</H3></DIV>
  150. <DIV class=ps_detail_right><A href=""http://www.pamf.org/eye"">http://www.pamf.org/eye<SPAN class=hideOffScreen>Opens in new window</SPAN></A> </DIV>
  151. <DIV class=clear></DIV><BR>
  152. <DIV class=ps_back_to_top><A class=back_to_top href=""#"">Back to top</A></DIV></DIV><!-- end border bottom --><!-- disclaimer -->
  153. <DIV class=clear></DIV><BR><BR>
  154. <DIV class=ps_custom>
  155. <DIV class=""ps_detail_middle gray"">This physician directory is provided as a convenience to you. It is not intended as a recommendation, referral, or endorsement of any particular provider. Physicians are not employees or agents of Sutter Health or its affiliates. The information is submitted by each doctor and we make no guarantee or warrantee as to the accuracy of the information. Sutter Health is not responsible for any loss or damage caused by your reliance on this information. You should verify the accuracy of the information directly with the physician's office. <BR><BR></DIV></DIV>
  156. <DIV class=clear></DIV><BR></DIV>"
  157.  
  158. """
  159.  
  160. import lxml.html
  161.  
  162. doc = lxml.html.document_fromstring(content)
  163.  
  164. term_xpath = doc.xpath('//h1[@class="header_no_pad dr_heading"]')
  165. for term in term_xpath:
  166.     print term.text
  167.  
  168. term_xpath = doc.xpath('//div[@class="ps_dr_address"]')
  169. for term in term_xpath:
  170.     print term.text_content()
  171.  
  172.  
  173. #THIS WILL OUTPUT !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  174. #Jill Ackerman, M.D.
  175. #Vision Care Center 323 N. Mathilda Avenue Sunnyvale, California 94087 View Mapof 323 N.
  176. #Mathilda Avenue, Sunnyvale. Opens in new window (408) 524-5900
Advertisement
Add Comment
Please, Sign In to add comment