Advertisement
noam76

ExtractData_Site_1.php

May 28th, 2023
751
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 1.58 KB | None | 0 0
  1. <?php
  2. require '../vendor/autoload.php';
  3.  
  4. use GuzzleHttp\Client;
  5. use PHPHtmlParser\Dom;
  6. use PhpOffice\PhpSpreadsheet\IOFactory;
  7. use PhpOffice\PhpSpreadsheet\Spreadsheet;
  8.  
  9. // Fetch the webpage content
  10. $client = new Client();
  11. $response = $client->get('https://www.duurzaamloket.nl/SolKey_X014/index.php?SchemeNo=0&Offset=1&SearchText=&PageCnt=500');
  12. $content = (string) $response->getBody();
  13.  
  14. // Load content into a DOMDocument
  15. $dom = new Dom();
  16. $dom->load($content);
  17.  
  18. // Find all <td> elements with an ID attribute
  19. $tdElements = $dom->find('td[id]');
  20.  
  21. // Create a new Excel spreadsheet
  22. $spreadsheet = new Spreadsheet();
  23. $sheet = $spreadsheet->getActiveSheet();
  24.  
  25. // Set the column headers
  26. $sheet->setCellValue('A1', 'ID');
  27. $sheet->setCellValue('B1', 'Data');
  28.  
  29. // Extract the data from each <td> element with an ID attribute and save to the Excel file
  30. $rowIndex = 2;
  31. $idStrings = array(); // Initialize the string table
  32. foreach ($tdElements as $td) {
  33.     $id = $td->getAttribute('id');
  34.     $data = $td->text;
  35.     // Save to file if the <td> element has an ID attribute
  36.     if ($id !== null) {
  37.         // Extract the number from the ID and save to the string table
  38.         $idNumber = substr($id, 4);
  39.         $idStrings[] = $idNumber;
  40.         // Save to Excel file
  41.         $sheet->setCellValue('A' . $rowIndex, $idNumber);
  42.         $sheet->setCellValue('B' . $rowIndex, $data);
  43.         $rowIndex++;
  44.     }
  45. }
  46.  
  47. // Save the Excel file
  48. $writer = IOFactory::createWriter($spreadsheet, 'Xlsx');
  49. $writer->save('output.xlsx');
  50.  
  51. // Print the string table
  52. print_r($idStrings);
  53. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement