Guest User

Untitled

a guest
Nov 23rd, 2025
31
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 7.76 KB | None | 0 0
  1. <?php
  2.  
  3.  
  4. // try to find all mentions of time in the literary quotes
  5.  
  6. // we'll first try to find simple formats with digits (like '04.23 P.M.' and '1am') using a regular expreission.
  7. // instead of trying to catch everything else with more complicated regular expressions (and have a lot of false positives),
  8. // make a function to try several different time formats one after another,
  9. // longest ('twenty-three minutes past four') first and then shorter ones (like 'At five').
  10. // Save the first found result in a csv-file.
  11.  
  12. $file = fopen("litclock_annotated_1.csv","w");
  13.  
  14. $row = 1;
  15. if (($handle = fopen("litclock.csv", "r")) !== FALSE) {
  16.     while (($data = fgetcsv($handle, 1000, ";")) !== FALSE) {
  17.         $num = count($data);
  18.         $row++;
  19.         $time = $data[0];
  20.         $time = new DateTime($time);
  21.         $quote = $data[1];
  22.         $quote = trim(preg_replace('/\s+/', ' ', $quote));
  23.         $title = $data[2];
  24.         $author = $data[3];
  25.  
  26.         // first try simple digital mentions of time in the quote
  27.         $result = findDigitalMentionsOfTime($quote, $time);
  28.         if ( $result !== FALSE ) {
  29.  
  30.             list($foundstring, $foundposition) = $result;
  31.             // echo $time->format('H:i') . "\t" . $foundposition . "\t" . $foundstring . "\t\t" . $quote . "\n";
  32.             fputcsv($file, array($time->format('H:i'), $foundposition, (string)$foundstring, $quote, $title, $author),'|');
  33.  
  34.         } else {
  35.  
  36.             $result = findTimeStrings($quote, $time);
  37.  
  38.             if ( $result !== FALSE ) {
  39.             // if that didn't get us anywhere, try strings with numerals
  40.  
  41.                 list($foundstring, $foundposition) = $result;
  42.                 // echo $time->format('H:i') . "\t" . $foundposition . "\t" . $foundstring . "\t\t" . $quote . "\n";
  43.                 fputcsv($file, array($time->format('H:i'), $foundposition, (string)$foundstring, $quote, $title, $author),'|');
  44.  
  45.             } else {
  46.  
  47.                 // echo $time->format('H:i') . "\tNO RESULT:\t\t" . $quote . "\n";
  48.                 fputcsv($file, array($time->format('H:i'), "", "", $quote, $title, $author),'|');
  49.  
  50.             }
  51.  
  52.         }
  53.  
  54.     }
  55.     fclose($handle);
  56. }
  57.  
  58. fclose($file);
  59.  
  60.  
  61.  
  62. function findDigitalMentionsOfTime($quote, $time) {
  63.  
  64.     // [0-9]{1,4}[:\.]?\d*(\s?[ap\.M]{2,})?h? -> this worked well, but also gave too many false negatives and false positives.
  65.     // To prevent that, we'll use the same regex but with the time we know should be in the quote.
  66.  
  67.     $preg_string = '/0?';
  68.  
  69.     if ( $time->format('A') == 'PM' ) {
  70.         $preg_string .= '(' . $time->format('g') . '|' . $time->format('G') . ')';
  71.     } else {
  72.         $preg_string .= $time->format('g');
  73.     }
  74.  
  75.     $preg_string .= '[:\.]?';
  76.  
  77.     // we need to specify the minutes too, otherwise "From 1am to 1.16am" will get us '1am' even if the quote is there for '1:16'
  78.     $preg_string .= $time->format('i');
  79.     $preg_string .= '(\s?[ap\.M]{2,})?h?/i';
  80.  
  81.     preg_match($preg_string, $quote, $found, PREG_OFFSET_CAPTURE);
  82.    
  83.     if ( count($found) > 0 ) {
  84.         $foundstring = $found[0][0];
  85.         $foundposition = $found[0][1];
  86.         return array($foundstring, $foundposition);
  87.     } else {
  88.         return FALSE;
  89.     }
  90.  
  91. }
  92.  
  93.  
  94. function findTimeStrings($quote, $time) {
  95.  
  96.  
  97.     $preg_string = '/0?';
  98.  
  99.     if ( $time->format('A') == 'PM' ) {
  100.         $preg_string .= '(' . $time->format('g') . '|' . $time->format('G') . ')';
  101.     } else {
  102.         $preg_string .= $time->format('g');
  103.     }
  104.  
  105.     $minutes = $time->format('i');
  106.  
  107.     $timestrings = formulateTimeStrings($time);
  108.  
  109.  
  110.     foreach ($timestrings as $timestring)
  111.     {
  112.         $foundposition=stripos($quote, $timestring);
  113.         if ( $foundposition !== FALSE ) {
  114.             // when a time is found
  115.             return array($timestring, $foundposition);
  116.             break;
  117.         }
  118.     }
  119.  
  120.     return FALSE;
  121. }
  122.  
  123.  
  124. function formulateTimeStrings($time) {
  125.  
  126.     $timestrings = array();
  127.  
  128.     $numeral = array('', 'one','two','three','four','five','six','seven','eight','nine','ten','eleven','twelve','thirteen','fourteen','fifteen','sixteen','seventeen','eighteen','nineteen','twenty','twenty-one','twenty-two','twenty-three','twenty-four','twenty-five','twenty-six','twenty-seven','twenty-eight','twenty-nine','thirty','thirty-one','thirty-two','thirty-three','thirty-four','thirty-five','thirty-six','thirty-seven','thirty-eight','thirty-nine','forty','forty-one','forty-two','forty-three','forty-four','forty-five','forty-six','forty-seven','forty-eight','forty-nine','fifty','fifty-one','fifty-two','fifty-three','fifty-four','fifty-five','fifty-six','fifty-seven','fifty-eight','fifty-nine');
  129.  
  130.     $minutes = (int)$time->format('i'); // minutes without leading zeroes
  131.     $minutesNumerals = $numeral[$minutes];
  132.  
  133.  
  134.     /*
  135.     g   12-hour format of an hour without leading zeros 1 through 12
  136.     G   24-hour format of an hour without leading zeros 0 through 23
  137.     i   Minutes with leading zeros  00 to 59
  138.     */
  139.  
  140.     $timestrings[] = $numeral[$time->format('g')] . "-" . $numeral[(int)$time->format('i')]; // 'Five-thirty'
  141.     $timestrings[] = $numeral[$time->format('g')] . " " . $numeral[(int)$time->format('i')]; // 'Five thirty'
  142.  
  143.     if ( $time->format('G') == 0 ) {
  144.         if ( $time->format('i') == '00' ) {
  145.             $timestrings[] = "midnight";
  146.         } else {
  147.             $timestrings[] = $numeral[(int)$time->format('i')] . " past midnight";
  148.             $timestrings[] = (int)$time->format('i') . " past midnight";
  149.             $timestrings[] = $numeral[(int)$time->format('i')] . " minutes past midnight";
  150.             $timestrings[] = (int)$time->format('i') . " minutes past midnight";
  151.         }
  152.     }
  153.  
  154.     if ( $time->format('G') == 12 ) {
  155.         if ( $time->format('i') == '00' ) {
  156.             $timestrings[] = "noon";
  157.         } else {
  158.             $timestrings[] = $numeral[(int)$time->format('i')] . " past noon";
  159.             $timestrings[] = (int)$time->format('i') . " past noon";
  160.         }
  161.     }
  162.  
  163.     if ( $time->format('i') == '00' ) {
  164.         $timestrings[] = $numeral[$time->format('g')] . " o'clock";
  165.         $timestrings[] = $time->format('g') . " o'clock";
  166.         $timestrings[] = "At " . $numeral[$time->format('g')];
  167.         $timestrings[] = $numeral[$time->format('g')];
  168.         $timestrings[] = $numeral[$time->format('G')];
  169.     } else {   
  170.         $timestrings[] = $numeral[(int)$time->format('i')] . " past " . $numeral[$time->format('g')];
  171.         $timestrings[] = (int)$time->format('i') . " past " . $numeral[$time->format('g')];
  172.         $timestrings[] = $numeral[(int)$time->format('i')] . " minutes past " . $numeral[$time->format('g')];
  173.         $timestrings[] = (int)$time->format('i') . " minutes past " . $numeral[$time->format('g')];
  174.     }
  175.  
  176.     if ($time->format('i') == 15) {
  177.         $timestrings[] = "quarter past " . $numeral[$time->format('g')];
  178.     }
  179.  
  180.     if ( $time->format('i') == 30 ) {
  181.         $timestrings[] = "half past " . $numeral[$time->format('g')];
  182.         $timestrings[] = "half-past " . $numeral[$time->format('g')];
  183.         $timestrings[] = "half past " . $time->format('g');    
  184.     }
  185.  
  186.     // add one hour for counting towards the next hour
  187.     date_add($time, date_interval_create_from_date_string('1 hours'));
  188.  
  189.     if ( $time->format('i') > 30 ) {
  190.         $timestrings[] = $numeral[60-(int)$time->format('i')] . " to " . $numeral[$time->format('g')];
  191.         $timestrings[] = (60-(int)$time->format('i')) . " to " . $numeral[$time->format('g')];
  192.         $timestrings[] = $numeral[60-(int)$time->format('i')] . " minutes to " . $numeral[$time->format('g')];
  193.         $timestrings[] = (60-(int)$time->format('i')) . " minutes to " . $numeral[$time->format('g')];
  194.         $timestrings[] = $numeral[60-(int)$time->format('i')] . " before " . $numeral[$time->format('g')];
  195.         $timestrings[] = (60-(int)$time->format('i')) . " before " . $numeral[$time->format('g')];
  196.         $timestrings[] = $numeral[60-(int)$time->format('i')] . " minutes before " . $numeral[$time->format('g')];
  197.         $timestrings[] = (60-(int)$time->format('i')) . " minutes before " . $numeral[$time->format('g')];
  198.     }
  199.  
  200.     if ($time->format('i') == '45') {
  201.         $timestrings[] = "quarter to " . $numeral[$time->format('g')];
  202.     }
  203.  
  204.     // reset the time to prevent confusion
  205.     date_sub($time, date_interval_create_from_date_string('1 hours'));
  206.  
  207.     usort($timestrings,'sortByLength');
  208.  
  209.     return $timestrings;
  210.  
  211. }
  212.  
  213. function sortByLength($a,$b){
  214.     return strlen($b)-strlen($a);
  215. }
  216.  
  217.  
  218.  
  219.  
  220.  
  221.  
  222. ?>
Advertisement
Add Comment
Please, Sign In to add comment