daily pastebin goal
9%
SHARE
TWEET

Pattern parser update

a guest Nov 10th, 2018 6 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #include <stdio.h>
  2. #include <string.h>
  3. #include <ctype.h>
  4. #include <stdint.h>
  5. #include <stdbool.h>
  6. #include <stddef.h>
  7.  
  8. #define ELEMENTS(x) (sizeof x/sizeof *x)
  9.  
  10. #define TENS(x)  (((char) x - '0') * 10)
  11. #define UNITS(x) ((char) x - '0')
  12.  
  13. #define ID_FOUND_MARK 255
  14.  
  15. /* The pattern string tell us what elements on the array units should we mark.
  16.  *
  17.  * The comma separator are used to mark single units, the dash separator are used
  18.  * for 'ranges' with the former number being the lower bound of the range and
  19.  * the latter number being the upper bound of the range.
  20.  *
  21.  * Numbers found on the pattern string will be marked with the 255 number.
  22.  *
  23.  * Assumptions made:
  24.  * - The former number on the range is smaller then the former number.
  25.  * - There is no spaces on the string.
  26.  * - There is no ranges with double dash, e.g. 1-5-7.
  27.  * - All numbers are 1 or 2 digits.
  28.  */
  29. void parse_units(const char *pattern, uint8_t *units, const size_t units_size)
  30. {
  31.     const size_t pattern_len = strlen(pattern);
  32.  
  33.     char *parser_cursor = NULL;
  34.     size_t parser_cursor_pos = 0;
  35.     size_t parser_step = 1;
  36.  
  37.     printf("\r\nPattern %s\r\n\r\n", pattern);
  38.  
  39.     // Begin the parsing
  40.     for (parser_cursor = pattern; parser_cursor_pos < pattern_len; parser_cursor += parser_step) {
  41.  
  42.         bool is_comma_separator = false;
  43.         bool is_dash_separator = false;
  44.         bool is_last_number = false;
  45.  
  46.         // Does the parser_cursor points to a digit character?
  47.         if (isdigit(*parser_cursor)) {
  48.             char *next_comma = NULL;
  49.             char *next_dash = NULL;
  50.  
  51.             // Found the closest separator (comma, dash o NULL):
  52.             next_comma = strchr(parser_cursor, ',');
  53.             next_dash = strchr(parser_cursor, '-');
  54.  
  55.             if ((NULL != next_comma) && (NULL != next_dash)) {
  56.                 if (next_comma < next_dash) {
  57.                     is_comma_separator = true;
  58.                 } else {
  59.                     is_dash_separator = true;
  60.                 }
  61.             } else if ((NULL == next_comma) && (NULL != next_dash)) {
  62.                 is_dash_separator = true;
  63.             } else if ((NULL != next_comma) && (NULL == next_dash)) {
  64.                 is_comma_separator = true;
  65.             } else {
  66.                 // No more comas nor dashes, so we reached
  67.                 // the last number on the pattern
  68.                 is_last_number = true;
  69.             }
  70.  
  71.             if (is_comma_separator) {
  72.                 uint8_t num = 0;
  73.                 ptrdiff_t num_digits = 0;
  74.  
  75.                 num_digits = next_comma - parser_cursor;
  76.  
  77.                 if (num_digits == 1) {
  78.                     num = UNITS(*parser_cursor);
  79.                 } else if (num_digits == 2) {
  80.                     num = TENS(*parser_cursor) + UNITS(*(parser_cursor + 1));
  81.                 }
  82.  
  83.                 printf("number found: %d\r\n", num);
  84.  
  85.                 // Mark the number found
  86.                 if (num < units_size) {
  87.                     units[num] = ID_FOUND_MARK;
  88.                 }
  89.                 // this will place the cursor right after the comma
  90.                 parser_step = (size_t) num_digits + 1;
  91.  
  92.             } else if (is_dash_separator) {
  93.                 // If a dash is being found then we need to parse the first
  94.                 // and second numbers.
  95.                 printf("Range found\r\n");
  96.  
  97.                 uint8_t first_number = 0;
  98.                 ptrdiff_t first_num_digits = 0;
  99.  
  100.                 ptrdiff_t second_num_digits = 0;
  101.                 uint8_t second_num = 0;
  102.                 char *second_comma = NULL;
  103.                 char *next_separator = NULL;
  104.  
  105.                 // first number
  106.                 first_num_digits = next_dash - parser_cursor;
  107.  
  108.                 if (first_num_digits == 1) {
  109.                     first_number = UNITS(*parser_cursor);
  110.                 } else if (first_num_digits == 2) {
  111.                     first_number = TENS(*parser_cursor) + UNITS(*(parser_cursor + 1));
  112.                 }
  113.  
  114.                 printf("\tfirst_number: %d\r\n", first_number);
  115.  
  116.                 // Move the parser_cursor after the dash separator
  117.                 parser_cursor = next_dash + 1;
  118.                 parser_cursor_pos += (size_t) first_num_digits + 1;
  119.  
  120.                 // ASSUMPTION:
  121.                 // When we found a dash separator the next separator can only be:
  122.                 // - comma
  123.                 // - NULL (end of the pattern string)
  124.                 second_comma = strchr(parser_cursor, ',');
  125.                 next_separator = pattern + strlen(pattern);
  126.  
  127.                 // If a comma was found then next_separator will point to it.
  128.                 if (second_comma != NULL) {
  129.                     next_separator = second_comma;
  130.                 }
  131.  
  132.                 second_num_digits = next_separator - parser_cursor;
  133.  
  134.                 if (second_num_digits == 1) {
  135.                     second_num = UNITS(*parser_cursor);
  136.                 } else if (second_num_digits == 2) {
  137.                     second_num = TENS(*parser_cursor) + UNITS(*(parser_cursor + 1));
  138.                 }
  139.  
  140.                 printf("\tsecond_number: %d\r\n", second_num);
  141.  
  142.                 // Mark the numbers found
  143.                 // TODO:
  144.                 // If the second_num is bigger than units_size should we
  145.                 // truncate marking the numbers up to the last element of the
  146.                 // units array?
  147.                 if (first_number < units_size || second_num < units_size) {
  148.                     for (size_t idx = first_number; idx <= second_num; idx++) {
  149.                         units[idx] = ID_FOUND_MARK;
  150.                     }
  151.                 }
  152.  
  153.                 // this will place the cursor right after the comma
  154.                 parser_step = (size_t) second_num_digits + 1;
  155.             } else if (is_last_number) {
  156.                 size_t num_size = 0;
  157.                 uint8_t num = 0;
  158.  
  159.                 // Get the amount of the remaining digits
  160.                 num_size = strlen(parser_cursor);
  161.  
  162.                 if (num_size == 1) {
  163.                     num = UNITS(*parser_cursor);
  164.                 } else if (num_size == 2) {
  165.                     num = TENS(*parser_cursor) + UNITS(*(parser_cursor + 1));
  166.                 }
  167.  
  168.                 printf("last number: %d\r\n", num);
  169.  
  170.                 // Mark the number found
  171.                 if (num < units_size) {
  172.                     units[num] = ID_FOUND_MARK;
  173.                 }
  174.                 parser_step = num_size;
  175.             }
  176.  
  177.         } else {
  178.             // The parser cursor was not pointing to a number character so we
  179.             // just move the pointer to the next character.
  180.             parser_step = 1;
  181.         }
  182.  
  183.         // Keep the parser_cursor_pos up to date
  184.         parser_cursor_pos += parser_step;
  185.     }
  186.  
  187.     printf("parsing done\r\n");
  188. }
  189.  
  190. int main(void)
  191. {
  192.     uint8_t objects[40] = {0};
  193.     parse_units("1-3,4,5,6,8,9,10,12-14", objects, ELEMENTS(objects));
  194.     memset(objects, 0, ELEMENTS(objects));
  195.     parse_units("4-10,26", objects, ELEMENTS(objects));
  196.  
  197.     return 0;
  198. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top