Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- def has_n_digits(dat, n):
- """
- Determines which of the insurer IDs in 'dat' contain digit strings of
- length n that are surrounded by non-digits.
- For example,
- insurers = ["ANV 123", "QBE 0123", "Bermuda", "12345", "1234/175"]
- print(has_n_digits(dat=insurers, n=4))
- >> ["QBE 0123", "1234/175"]
- print(has_n_digits(dat=insurers, n=3))
- >> ["ANV 123", "1234/175"]
- print(has_n_digits(dat=insurers, n=2))
- >> []
- dat: list of insurer ID strings
- n: integer denoting the number of digits to look for
- """
- regex_str = "([0-9]{%s})" % n
- output = []
- for d in dat:
- # Regex doesn't reuse found characters, so the string must be manually
- # split apart
- d_sub = [d[i:i+n] for i in range(len(d)-n+1)]
- matches = [
- d_sub_entry for d_sub_entry in d_sub
- if re.match(regex_str, d_sub_entry) is not None]
- if len(matches) == 0:
- continue
- # Valid matches are those without other surrounding digits
- valid_matches = []
- for match in matches:
- start_index = d.find(match)
- if start_index > 0:
- if d[start_index - 1] in all_digits:
- continue
- end_index = start_index + n
- if end_index < len(d):
- if d[end_index] in all_digits:
- continue
- valid_matches.append(match)
- if len(valid_matches) > 0:
- output.append(d)
- return output
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement