Guest User

Untitled

a guest
Feb 18th, 2018
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 5.33 KB | None | 0 0
  1.     public void readFromHTML(File sourceHTML)
  2.     {
  3.         try
  4.         {
  5.             Scanner inputFile = new Scanner(sourceHTML);
  6.            
  7.             try
  8.             {
  9.                 this.name = findName(inputFile);
  10.             }
  11.             catch (RecipeException re)
  12.             {
  13.                 System.out.println(re);
  14.                 //this recipe is illegal
  15.             }
  16.  
  17.             try
  18.             {
  19.                 this.image = new File(findImage(inputFile));
  20.             }
  21.             catch (RecipeException re)
  22.             {
  23.                 System.out.println();
  24.                 this.image = null;
  25.             }
  26.  
  27.             try
  28.             {
  29.                 this.prep = findPrep(inputFile);
  30.             }
  31.             catch (RecipeException re)
  32.             {
  33.                 System.out.println();
  34.                 this.prep = 0;
  35.             }
  36.  
  37.             try
  38.             {
  39.                 this.cook = findCook(inputFile);
  40.             }
  41.             catch (RecipeException re)
  42.             {
  43.                 System.out.println();
  44.                 this.cook = 0;
  45.             }
  46.  
  47.             try
  48.             {
  49.                 this.ingredients = findIngredients(inputFile);
  50.             }
  51.             catch (RecipeException re)
  52.             {
  53.                 System.out.println(re);
  54.                 //this recipe is illegal
  55.             }
  56.  
  57.             try
  58.             {
  59.                 this.method = findMethod(inputFile);
  60.             }
  61.             catch (RecipeException re)
  62.             {
  63.                 System.out.println(re);
  64.                 //this recipe is illegal
  65.             }
  66.         }
  67.         catch (FileNotFoundException fnfe)
  68.         {
  69.             System.out.println(sourceHTML + " file not found");
  70.         }
  71.     }
  72.    
  73.     private String findName(Scanner inputFile) throws RecipeException
  74.     {
  75.         String temp = inputFile.findWithinHorizon("<title>.+</title>", 0);
  76.        
  77.         if (temp == null)
  78.                 throw new RecipeException("Unable to find name");
  79.        
  80.         temp = temp.substring(7, temp.length()-23);
  81.        
  82.         return temp;
  83.     }
  84.    
  85.     private String findImage(Scanner inputFile) throws RecipeException
  86.     {
  87.         String temp = inputFile.findWithinHorizon("og:image\".+/>", 0);
  88.        
  89.         if (temp == null)
  90.                 throw new RecipeException("Unable to find Image in the html... continuing");
  91.        
  92.         temp = temp.substring(temp.length()-14, temp.length()-10);
  93.         temp = temp + ".jpg";
  94.  
  95.         return temp;
  96.     }
  97.    
  98.     private int findPrep(Scanner inputFile) throws RecipeException
  99.     {
  100.         String temp = inputFile.findWithinHorizon("itemprop=\"prepTime\">\\d+", 0); //find the preptime
  101.        
  102.         if (temp == null)
  103.                 throw new RecipeException("Unable to find Preparation time... continuing");
  104.  
  105.         Pattern number = Pattern.compile("\\d+");//locate the integer relating to the preptime
  106.         Matcher match = number.matcher(temp);
  107.         match.find();
  108.         String s = match.group();
  109.         int i = Integer.parseInt(s);
  110.        
  111.         return i;
  112.     }
  113.    
  114.     private int findCook(Scanner inputFile) throws RecipeException
  115.     {
  116.         String temp = inputFile.findWithinHorizon("itemprop=\"cookTime\" datetime=\".+\">\\d+", 0); //find the cooktime
  117.        
  118.         if (temp == null)
  119.                 throw new RecipeException("Unable to find Cooking time... continuing");
  120.        
  121.         int charnum = temp.indexOf('>');
  122.         temp = temp.substring(charnum); //remove everything before the > (Because datetime includes an int)
  123.  
  124.         Pattern number = Pattern.compile("\\d+"); //locate the integer relating to the cooktime
  125.         Matcher match = number.matcher(temp);
  126.         match.find();
  127.         String s = match.group();
  128.         int i = Integer.parseInt(s);
  129.        
  130.         return i;
  131.     }
  132.  
  133.     private String[] findIngredients(Scanner inputFile) throws RecipeException
  134.     {
  135.         Pattern broadsection = Pattern.compile("class=\"ingredients.*?module-content.*?div", Pattern.DOTALL); //compiling out here to allow for multi line expressions
  136.         String temp = inputFile.findWithinHorizon(broadsection, 0); //locate the ingredient section
  137.        
  138.         if (temp == null)
  139.                 throw new RecipeException("Unable to find Ingredients");
  140.        
  141.         Pattern narrowsection = Pattern.compile("<ul><li>.*?</li></ul>", Pattern.DOTALL); //locate the actual ingredients
  142.         Matcher narrow = narrowsection.matcher(temp);
  143.         narrow.find();
  144.         temp = narrow.group();
  145.        
  146.         String[] ingredientArray = temp.split("<.+?>"); //splits the ingredients into their components
  147.        
  148.         List<String> stringList = new ArrayList<String>(); //use a List to remove all null/empty strings
  149.  
  150.         for(String s : ingredientArray)
  151.         {
  152.            if(s.length() > 0)
  153.            {
  154.               stringList.add(s);
  155.            }
  156.         }
  157.  
  158.         ingredientArray = stringList.toArray(new String[stringList.size()]); //put back into the string array
  159.  
  160.        
  161.         for (int i = 0; i<ingredientArray.length; i++)
  162.         {
  163.             ingredientArray[i] = ingredientArray[i].trim(); //remove whitespace
  164.         }
  165.        
  166.         return ingredientArray;
  167.     }
  168.  
  169.     private String[] findMethod(Scanner inputFile) throws RecipeException
  170.     {
  171.         Pattern broadsection = Pattern.compile("ol id=\"method\">.*?</ol>", Pattern.DOTALL); //compiling out here to allow for multi line expressions
  172.         String temp = inputFile.findWithinHorizon(broadsection, 0); //locate the method section
  173.        
  174.         if (temp == null)
  175.                 throw new RecipeException("Unable to find Method");
  176.        
  177.         Pattern narrowsection = Pattern.compile("<li><p>.*?</p></li>\\s", Pattern.DOTALL); //locate the actual method
  178.         Matcher narrow = narrowsection.matcher(temp);
  179.         narrow.find();
  180.         temp = narrow.group();
  181.        
  182.         String[] methodArray = temp.split("<.+?>"); //splits the method into its steps
  183.        
  184.         List<String> stringList = new ArrayList<String>(); //use a List to remove all null/empty strings
  185.  
  186.         for(String s : methodArray)
  187.         {
  188.            if(s.length() > 0)
  189.            {
  190.               stringList.add(s);
  191.            }
  192.         }
  193.  
  194.         methodArray = stringList.toArray(new String[stringList.size()]); //put back into the string array
  195.  
  196.        
  197.         for (int i = 0; i<methodArray.length; i++)
  198.         {
  199.             methodArray[i] = methodArray[i].trim(); //remove whitespace
  200.         }
  201.        
  202.         return methodArray;
  203.     }
Add Comment
Please, Sign In to add comment