Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package step4;
- import java.io.File;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.List;
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- import org.jsoup.nodes.Element;
- import org.jsoup.select.Elements;
- public class Task {
- //通过filePath文件路径获取Docment对象
- public Document getDoc(String filePath) throws IOException{
- /********** Begin **********/
- File file=new File("backups/hotels.ctrip.com_domestic-city-hotel.txt");
- Document doc=Jsoup.parse(file,"UTF-8","http://hotels.ctrip.com/");
- return doc;
- /********** End **********/
- }
- /**
- * 获取所有城市返回城市信息集合
- * @param doc
- * @return
- */
- public List<HotelCity> getAllCitys(Document doc){
- /********** Begin **********/
- List<HotelCity> cities = new ArrayList<HotelCity>();
- Elements aa= doc.getElementsByClass("pinyin_filter_detail layoutfix");
- Element pp = aa.first();
- Elements hh= pp.getElementsByTag("dd");
- Elements hts=pp.getElementsByTag("dt");
- for (int i = 0; i < hh.size(); i++) {
- Element bb = hts.get(i);
- Element head_hotelsLink = hh.get(i);
- Elements links = head_hotelsLink.children();
- for (Element link : links) {
- String pinyin_cityId = link.attr("href").replace("/hotel/", "");
- String pinyin = pinyin_cityId.replace(StringUtil.getNumbers(link.attr("href")), "");//截取拼音
- HotelCity city = new HotelCity();
- city.setCityId(StringUtil.getNumbers(link.attr("href"))); //截取cityId
- city.setCityName(link.text());
- city.setHeadPinyin(bb.text());
- city.setPinyin(pinyin);
- cities.add(city);
- }
- }
- return cities;
- /********** End **********/
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement