Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package ps.myhome2.clean;
- import java.text.NumberFormat;
- import java.text.ParseException;
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.List;
- import java.util.Map;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- /**
- * Parse a myhome.ie description string. If the parse succeeds, there are accessors
- * for square footage, sale type (sale or auction), house type (house, apartment, etc.),
- * extended house type (detached, terrace, etc.), number of beds.
- *
- * If the parse did not succeed, getHouseType() will return null.
- *
- * Not thread-safe due to static NumberFormat.
- *
- * LICENSE: Do whatever you like with this. Absolutely no warranty as to fitness
- * for any purpose is expressed or implied.
- *
- * v1.0 02-Aug-2014
- * See http://www.thepropertypin.com/viewtopic.php?f=1&t=62995
- *
- */
- public class BedsType {
- private String bedsType;
- private int squareFt;
- private SaleType saleType;
- private HouseType houseType;
- private boolean specialTaxTreatment;
- private String houseTypeStr;
- private int numBeds;
- static public enum HouseType{House, Apartment, Site, Commercial, Unspecified};
- static public enum SaleType{Sale, Auction, RentToBuy};
- /**
- * Constructor.
- * @param strBedsType the myhome.ie description
- */
- public BedsType(String strBedsType) {
- bedsType= strBedsType;
- // Temporary working strings
- String desc = bedsType;
- String s;
- // Remove noise words
- desc = desc.replace('-', ' ');
- desc = desc.replace('/', ' ');
- s = "from ";
- if (desc.contains(s)) {
- desc = removeString(desc, s);
- }
- s = "to ";
- if (desc.contains(s)) {
- desc = removeString(desc, s);
- }
- // Check tax
- for (String tax : kTaxTreatments) {
- if (desc.contains(tax)) {
- specialTaxTreatment = true;
- desc = removeString(desc, tax);
- break;
- }
- }
- // Compress the remaining string
- desc = compressString(desc);
- // Check sale/auction
- saleType = SaleType.Sale; // default
- for (Pair<String, SaleType> pair : kSaleTypeList) {
- if (desc.contains(pair.first())) {
- saleType = pair.second();
- desc = removeString(desc, pair.first());
- break;
- }
- }
- try {
- // Number of beds
- s = " Bed";
- if (desc.contains(s)) {
- int pos = desc.indexOf(s);
- String strNumBeds = desc.substring(0, pos);
- desc = desc.substring(pos + s.length());
- numBeds = intFormat.parse(compressString(strNumBeds)).intValue();
- }
- // Floor area
- desc = doFloorArea(desc);
- } catch (ParseException ex) {
- return;
- }
- // Residual string contains a valid house type
- // or the whole parse is considered invalid
- houseTypeStr = compressString(desc);
- houseType = houseTypeMap.get(houseTypeStr);
- }
- /**
- * Compute floor area if present.
- * @param desc the myhome.ie description containing the floor area
- * @return the description string after floor area parsed out
- * @throws ParseException
- */
- private String doFloorArea(String desc) throws ParseException {
- Matcher m = kPatMetres.matcher(desc);
- if (m.find()) {
- String strNum = m.group(1);
- desc = removeString(desc, m.group());
- double d = numFormat.parse(strNum).doubleValue();
- d *= 10.76; // metres to square ft.
- squareFt = (int)d; // truncate to integer
- }
- m = kPatFoot.matcher(desc);
- if (m.find()) {
- String strNum = m.group(1);
- desc = removeString(desc, m.group());
- if (squareFt == 0) {
- // Only use sq.ft. if we didn't have metres
- double d = numFormat.parse(strNum).doubleValue();
- squareFt = (int)d;
- }
- }
- // More matches to swallow unwanted duplicates/ranges e.g. x ft to y ft
- Pattern[] pats = {kPatMetres, kPatFoot, kPatAcre, kPatAcre};
- for (Pattern pat : pats) {
- m = pat.matcher(desc);
- if (m.find()) {
- desc = removeString(desc, m.group());
- }
- }
- return desc;
- }
- /**
- * The original myhome.ie description
- *
- * @return the description
- */
- public String getBedsType() {
- return bedsType;
- }
- /**
- * Number of beds
- *
- * @return num beds if present in description string, otherwise zero
- */
- public int getNumBeds() {
- return numBeds;
- }
- /**
- * Floor area in sq. ft.
- *
- * @return Floor area in sq. ft. if present, otherwise zero.
- */
- public int getSquareFt() {
- return squareFt;
- }
- /**
- * House type. If this returns null then all the rest of the data is
- * unreliable too.
- *
- * @return
- */
- public HouseType getHouseType() {
- return houseType;
- }
- /**
- * Extended house type string.
- *
- * @return house type string
- */
- public String getHouseTypeStr() {
- return houseTypeStr;
- }
- /**
- * Sale type.
- * @return the sale type.
- */
- public SaleType getSaleType() {
- return saleType;
- }
- /**
- * True if there was any special tax treatment e.g. section 23.
- * @return tax treatment
- */
- public boolean getSpecialTaxTreatment() {
- return specialTaxTreatment;
- }
- // Static Metadata
- static private Map<String, HouseType> houseTypeMap;
- static {
- Object[] oh = {
- "", HouseType.Unspecified,
- "Site", HouseType.Site,
- "Development Land", HouseType.Site,
- "Apartment", HouseType.Apartment,
- "Penthouse", HouseType.Apartment,
- "Studio", HouseType.Apartment,
- "Live Work Unit", HouseType.Apartment,
- "Duplex", HouseType.Apartment,
- "House", HouseType.House,
- "Detached House", HouseType.House,
- "End of Terrace House", HouseType.House,
- "End of Terrace Houses", HouseType.House,
- "Semi Detached House", HouseType.House,
- "Terraced House", HouseType.House,
- "Bungalow", HouseType.House,
- "Country House", HouseType.House,
- "Period House", HouseType.House,
- "Cottage", HouseType.House,
- "Townhouse", HouseType.House,
- "Dormer", HouseType.House,
- "Holiday Home", HouseType.House,
- "Mews", HouseType.House,
- "Investment Property", HouseType.Commercial,
- "Office", HouseType.Commercial,
- "Pubs & Restaurants", HouseType.Commercial,
- "Retail Units", HouseType.Commercial,
- "Industrial Units", HouseType.Commercial,
- "Farm Land", HouseType.Commercial,
- "Farm", HouseType.Commercial,
- "Businesses", HouseType.Commercial,
- };
- houseTypeMap = new HashMap<String, HouseType>();
- for (int i = 0; i < oh.length; i += 2) {
- houseTypeMap.put((String)oh[i], (HouseType)oh[i+1]);
- }
- }
- static final List<Pair<String, SaleType>> kSaleTypeList;
- static {
- kSaleTypeList = new ArrayList<Pair<String,SaleType>>();
- Object os[] = {
- "For Sale By Tender", SaleType.Sale,
- "For Sale By Auction", SaleType.Auction,
- "For Sale Rent To Buy", SaleType.RentToBuy,
- "For Auction", SaleType.Auction,
- "For Sale", SaleType.Sale,
- "Rent To Buy", SaleType.RentToBuy,
- };
- for (int i = 0; i < os.length; i += 2) {
- kSaleTypeList.add(new Pair<String, SaleType>((String)os[i], (SaleType)os[i+1]));
- }
- }
- static final String kTaxTreatments[] = {
- "Tax Section 23", "Tax Section 27",
- "Tax Section 50", "Tax Pre 63", "Tax Holiday Home"
- };
- static final Pattern kPatMetres = Pattern.compile("([[0-9][,.]]*) ?m²");
- static final Pattern kPatFoot = Pattern.compile("([[0-9][,.]]*) ?ft²");
- static final Pattern kPatAcre = Pattern.compile("([[0-9][,.]]*) ?((Acres?)|(Hectares))");
- static NumberFormat intFormat = NumberFormat.getIntegerInstance();
- static NumberFormat numFormat = NumberFormat.getNumberInstance();
- // Utilities
- /**
- * A convenience class to represent a pair of values.
- *
- * @param <FIRST>
- * the type of the first value.
- * @param <SECOND>
- * the type of the second value.
- */
- static class Pair<FIRST, SECOND> {
- private FIRST first;
- private SECOND second;
- public Pair(FIRST f, SECOND s) {
- first = f;
- second = s;
- }
- public FIRST first() {
- return first;
- }
- public SECOND second() {
- return second;
- }
- }
- // String methods
- /**
- * Trim leading and trailing spaces and compress internal runs of white space to a single space.
- * @param s string to compress
- * @return compressed string
- */
- public static String compressString(String s) {
- if (s == null) {
- return "";
- }
- StringBuilder sb = new StringBuilder();
- boolean inSpace = true;
- for (int i = 0; i < s.length(); i++) {
- char ch = s.charAt(i);
- if (Character.isWhitespace(ch)) {
- if (!inSpace) {
- sb.append(' ');
- inSpace = true;
- }
- } else {
- sb.append(ch);
- inSpace = false;
- }
- }
- int len = sb.length();
- if ((len > 0) && (sb.charAt(len - 1) == ' ')) {
- sb.delete(len - 1, len);
- }
- return sb.toString();
- }
- /**
- * Remove one string embedded in another.
- *
- * @param s
- * the input string
- * @param toRemove
- * the string to remove
- * @return if s contains toRemove then a new string with toRemove removed,
- * otherwise s
- */
- public static String removeString(String s, String toRemove) {
- int pos = s.indexOf(toRemove);
- return (pos < 0) ? s : s.substring(0, pos)
- + s.substring(pos + toRemove.length());
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement