Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public void readFromHTML(File sourceHTML)
- {
- try
- {
- Scanner inputFile = new Scanner(sourceHTML);
- try
- {
- this.name = findName(inputFile);
- }
- catch (RecipeException re)
- {
- System.out.println(re);
- //this recipe is illegal
- }
- try
- {
- this.image = new File(findImage(inputFile));
- }
- catch (RecipeException re)
- {
- System.out.println();
- this.image = null;
- }
- try
- {
- this.prep = findPrep(inputFile);
- }
- catch (RecipeException re)
- {
- System.out.println();
- this.prep = 0;
- }
- try
- {
- this.cook = findCook(inputFile);
- }
- catch (RecipeException re)
- {
- System.out.println();
- this.cook = 0;
- }
- try
- {
- this.ingredients = findIngredients(inputFile);
- }
- catch (RecipeException re)
- {
- System.out.println(re);
- //this recipe is illegal
- }
- try
- {
- this.method = findMethod(inputFile);
- }
- catch (RecipeException re)
- {
- System.out.println(re);
- //this recipe is illegal
- }
- }
- catch (FileNotFoundException fnfe)
- {
- System.out.println(sourceHTML + " file not found");
- }
- }
- private String findName(Scanner inputFile) throws RecipeException
- {
- String temp = inputFile.findWithinHorizon("<title>.+</title>", 0);
- if (temp == null)
- throw new RecipeException("Unable to find name");
- temp = temp.substring(7, temp.length()-23);
- return temp;
- }
- private String findImage(Scanner inputFile) throws RecipeException
- {
- String temp = inputFile.findWithinHorizon("og:image\".+/>", 0);
- if (temp == null)
- throw new RecipeException("Unable to find Image in the html... continuing");
- temp = temp.substring(temp.length()-14, temp.length()-10);
- temp = temp + ".jpg";
- return temp;
- }
- private int findPrep(Scanner inputFile) throws RecipeException
- {
- String temp = inputFile.findWithinHorizon("itemprop=\"prepTime\">\\d+", 0); //find the preptime
- if (temp == null)
- throw new RecipeException("Unable to find Preparation time... continuing");
- Pattern number = Pattern.compile("\\d+");//locate the integer relating to the preptime
- Matcher match = number.matcher(temp);
- match.find();
- String s = match.group();
- int i = Integer.parseInt(s);
- return i;
- }
- private int findCook(Scanner inputFile) throws RecipeException
- {
- String temp = inputFile.findWithinHorizon("itemprop=\"cookTime\" datetime=\".+\">\\d+", 0); //find the cooktime
- if (temp == null)
- throw new RecipeException("Unable to find Cooking time... continuing");
- int charnum = temp.indexOf('>');
- temp = temp.substring(charnum); //remove everything before the > (Because datetime includes an int)
- Pattern number = Pattern.compile("\\d+"); //locate the integer relating to the cooktime
- Matcher match = number.matcher(temp);
- match.find();
- String s = match.group();
- int i = Integer.parseInt(s);
- return i;
- }
- private String[] findIngredients(Scanner inputFile) throws RecipeException
- {
- Pattern broadsection = Pattern.compile("class=\"ingredients.*?module-content.*?div", Pattern.DOTALL); //compiling out here to allow for multi line expressions
- String temp = inputFile.findWithinHorizon(broadsection, 0); //locate the ingredient section
- if (temp == null)
- throw new RecipeException("Unable to find Ingredients");
- Pattern narrowsection = Pattern.compile("<ul><li>.*?</li></ul>", Pattern.DOTALL); //locate the actual ingredients
- Matcher narrow = narrowsection.matcher(temp);
- narrow.find();
- temp = narrow.group();
- String[] ingredientArray = temp.split("<.+?>"); //splits the ingredients into their components
- List<String> stringList = new ArrayList<String>(); //use a List to remove all null/empty strings
- for(String s : ingredientArray)
- {
- if(s.length() > 0)
- {
- stringList.add(s);
- }
- }
- ingredientArray = stringList.toArray(new String[stringList.size()]); //put back into the string array
- for (int i = 0; i<ingredientArray.length; i++)
- {
- ingredientArray[i] = ingredientArray[i].trim(); //remove whitespace
- }
- return ingredientArray;
- }
- private String[] findMethod(Scanner inputFile) throws RecipeException
- {
- Pattern broadsection = Pattern.compile("ol id=\"method\">.*?</ol>", Pattern.DOTALL); //compiling out here to allow for multi line expressions
- String temp = inputFile.findWithinHorizon(broadsection, 0); //locate the method section
- if (temp == null)
- throw new RecipeException("Unable to find Method");
- Pattern narrowsection = Pattern.compile("<li><p>.*?</p></li>\\s", Pattern.DOTALL); //locate the actual method
- Matcher narrow = narrowsection.matcher(temp);
- narrow.find();
- temp = narrow.group();
- String[] methodArray = temp.split("<.+?>"); //splits the method into its steps
- List<String> stringList = new ArrayList<String>(); //use a List to remove all null/empty strings
- for(String s : methodArray)
- {
- if(s.length() > 0)
- {
- stringList.add(s);
- }
- }
- methodArray = stringList.toArray(new String[stringList.size()]); //put back into the string array
- for (int i = 0; i<methodArray.length; i++)
- {
- methodArray[i] = methodArray[i].trim(); //remove whitespace
- }
- return methodArray;
- }
Add Comment
Please, Sign In to add comment