Advertisement
Guest User

Untitled

a guest
May 12th, 2017
562
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Latex 9.18 KB | None | 0 0
  1. \documentclass{beamer}
  2. \usepackage{beamerthemesplit}
  3. \usepackage{times}
  4. \usepackage[english]{babel}
  5. \usepackage[latin1]{inputenc}
  6. \usepackage{times}
  7. \usepackage[T1]{fontenc}
  8. \usepackage{graphicx}
  9. \include{pythonlisting}
  10.  
  11. \mode<presentation>{
  12.    \usetheme{Warsaw}
  13.    \setbeamercovered{invisible}
  14. }
  15.  
  16. \mode<handout>{
  17.  \usepackage{pgfpages}
  18.  \pgfpagesuselayout{4 on 1}[a4paper,border shrink=5mm,landscape]
  19.  \setbeamercolor{background canvas}{bg=black!5}
  20. }
  21.  
  22. \title[Regular Expressions]{UCT Algorithm Circle: Intermediate Class: Regular Expressions}
  23. \author{Yaseen Hamdulay}
  24. \institute{Wynberg Boys High School}
  25. \date{4 March 2010}
  26.  
  27. \pgfdeclareimage[height=20pt]{university-logo}{images/uct}
  28. \pgfdeclareimage[width=200px]{regular-expresions-xkcd}{images/regular-expressions-xkcd}
  29. \logo{\pgfuseimage{university-logo}}
  30.  
  31. \begin{document}
  32.  
  33. \begin{frame}
  34. \titlepage
  35. \end{frame}
  36.  
  37. \section{Introduction to Regular Expressions}
  38.  
  39. \begin{frame}{What are Regular Expressions}
  40.    \begin{itemize}
  41.        \item Regular Expressions are a powerful way to search for patterns in a string
  42.        \item Regular Expressions are described in their own language (Yup, new language coming through)
  43.        \pause
  44.        \item It's best to avoid them if it's possible to have the same result with the standard string functions
  45.        \begin{itemize}
  46.            \item Regular Expressions are hard to debug and understand in retrospect
  47.        \end{itemize}
  48.    \end{itemize}
  49. \end{frame}
  50.  
  51. \begin{frame}{Why use Regular Expressions?}
  52.    \begin{itemize}
  53.        \item Since Regular Expressions are so complicated why would we want to use them?
  54.        \pause
  55.        \item Well for one...
  56.        \pause
  57.        \item You could save someone from a serial-killer!
  58.    \end{itemize}
  59. \end{frame}
  60.  
  61. \begin{frame}
  62.        \begin{center}
  63.            \pgfuseimage{regular-expresions-xkcd}
  64.        \end{center}
  65. \end{frame}
  66.  
  67. \begin{frame}{Why use Regular Expressions?}
  68.    If somehow saving someones life isn't a good enough reason for you...
  69.    \begin{itemize}
  70.        \item Regular expressions can match very complicated patterns in strings
  71.        \pause
  72.        \item They can do things that just aren't possible with standard string functions
  73.        \pause
  74.        \item Regular expressions can validate that the data is valid
  75.        \pause
  76.        \item Regular expressions can separate a string into groups of useful information
  77.        \begin{itemize}
  78.            \item For example separating a URI into its protocol, domain, directory etc etc
  79.        \end{itemize}
  80.    \end{itemize}
  81. \end{frame}
  82.  
  83. \section{Regular Expressions syntax}
  84. \begin{frame}{Simple Pattern Matching}
  85.    \begin{itemize}
  86.        \item Plain text characters in a pattern match themselves.
  87.        \pause
  88.        \item For example using a regular expression of 'name'
  89.        \item Would match \texttt{'My \textbf{name} is Yaseen'}
  90.        \pause
  91.        \item We have a problem if we have a string like this \texttt{'My sur\textbf{name} is Yaseen'}
  92.        \pause
  93.        \item This is where regular expressions become very useful
  94.        \item We can specify in exactly what context we want our match to be in
  95.    \end{itemize}
  96. \end{frame}
  97.  
  98. \begin{frame}
  99.    \begin{itemize}
  100.        \item In this case we want name to be a word on its own, not just some random part of a word
  101.        \pause
  102.        \item To do this we use a special symbol \texttt{\textbackslash{}b}, this matches a word boundary
  103.        \pause
  104.        \item We can not update our Regular Expression to \texttt{\textbackslash{}bname\textbackslash{}b}
  105.        \item Our pattern will now match name but not surname or anything else
  106.    \end{itemize}
  107. \end{frame}
  108.  
  109. \begin{frame}{Character Classes}
  110.    \begin{itemize}
  111.        \item Sometimes we have a pattern that we want to match but certain parts of it are unknown, but we know how it's supposed to look
  112.        \pause
  113.        \item To match the unknown characters we use character classes. Character classes match a range of characters that we specify
  114.        \item To match the letters of the alphabet we use the class \texttt{[a-zA-Z]}
  115.        \item The \texttt{[} and \texttt{]} makes this a character class
  116.        \pause
  117.        \item Only \textbf{one} character in the character class gets matched
  118.        \pause
  119.        \item Special characters don't need to be escaped when inside a character class
  120.    \end{itemize}
  121. \end{frame}
  122.  
  123. \begin{frame}{Special Characters}
  124.    \begin{itemize}
  125.        \item Examples of special characters \texttt{. \textasciicircum{} \$ * + ? \{ \} [ ] \textbackslash{} | ( ) }
  126.        \item Most characters simply match themselves, special characters do special things
  127.        \item We have seen an example of this with the \texttt{\textbackslash{}b} symbol
  128.        \pause
  129.        \item If we want to match the character literal itself we will have to escape it first
  130.        \pause
  131.        \item Say we wanted to match \texttt{\textbackslash{}her} in \texttt{his\textbackslash{}her}
  132.        \pause
  133.        \item Our pattern would have to be \texttt{'\textbackslash{}\textbackslash{}her'}
  134.    \end{itemize}
  135. \end{frame}
  136. \begin{frame}{Summary of Special and Meta Characters}
  137.    \begin{tabular}{l l}
  138.        \texttt{.} & Match anything\\
  139.        \texttt{\textasciicircum{}} & Match the beginning of a line\\
  140.        \texttt{\$} & Match the end of a string\\
  141.        \texttt{\textbackslash{}b} & Match a word boundary\\
  142.        \texttt{\textbackslash{}B} & Match anything besides a word boundary\\
  143.        \texttt{\textbackslash{}d} & Match any decimal digit\\
  144.        \texttt{\textbackslash{}D} & Match anything besides a decimal digit\\
  145.        \texttt{\textbackslash{}s} & Match any whitespace\\
  146.        \texttt{\textbackslash{}S} & Match anything besides whitespace\\
  147.        \texttt{\textbackslash{}w} & Match anything in the class [a-zA-Z0-9]\\
  148.        \texttt{\textbackslash{}W} & Match anything not in the class [a-zA-Z0-9]\\
  149.    \end{tabular}
  150.    \begin{itemize}
  151.    \item NOTE: Special Characters match themselves when in a character class
  152.    \end{itemize}
  153. \end{frame}
  154.  
  155. \begin{frame}{Repetition}
  156.    \begin{itemize}
  157.        \item Sometimes we want to allow certain parts of our pattern to be repeated
  158.        \pause
  159.        \item We want to be able to match \texttt{This is really awesome} and \texttt{This is really really really awesome} with one pattern
  160.        \pause
  161.        \item To do this we use the repetition metacharacters
  162.        \begin{tabular}{l  l}
  163.        \texttt{*} & match the previous pattern zero or more times\\
  164.        \texttt{+} & match the previous pattern one or more times\\
  165.        \texttt{?} & match the previous pattern one or no times\\
  166.        \texttt{\{a, b\}} & match the previous pattern a to b times\\
  167.        \end{tabular}
  168.        \pause
  169.        \item Now to match the previous example we would use a pattern of \texttt{(really\textbackslash{}b)+}
  170.        \item The brackets are to ensure that the repetition characters ensure that the entire group of characters are matched for repetition and not just the last character
  171.    \end{itemize}
  172. \end{frame}
  173.  
  174. \section{Using Regular Expressions}
  175. \begin{frame}{Regular Expressions in Python}
  176.    \begin{itemize}
  177.        \item Regular Expression library is in module \texttt{re}
  178.        \item In order to use a pattern to match a string we first have to compile it, we use \texttt{re.compile(pattern)} for this. This gives us a pattern object
  179.        \item This pattern object contains all the methods we need to search and replace things in strings
  180.        \item All searching methods return \texttt{None} when no match was made
  181.    \end{itemize}
  182. \end{frame}
  183.  
  184. \begin{frame}[fragile]{Pattern Methods}
  185. \begin{python}
  186. import re
  187. p = re.compile(r'\b[a-z0-9.]+@[a-z0-9.]+\.[a-z]{2,4}\b')
  188. #Searches through the string for a match
  189. #and returns a match object
  190. p.search('username@email.com')  
  191. #Finds all substrings that match the pattern
  192. #and returns it in a list of strings
  193. p.findall('My email address is: username@email.com'+
  194.          ' or whateverelse@example.com')
  195. #Same as findall except it returns an iterator
  196. #that gives us match objects
  197. pattern.finditer('My email address is: '+
  198.        'username@email.com or whateverelse@example.com')
  199. \end{python}
  200. \end{frame}
  201.  
  202. \begin{frame}{Match methods}
  203.    \begin{tabular}{l l}
  204.    \texttt{group()} & Return the string matched by the Regular Expression\\
  205.    \texttt{start()} & Return the beginning index of the match\\
  206.    \texttt{end()} & Return the end index of the match\\
  207.    \texttt{span()} & Return a tuple (start(), end())\\
  208.    \end{tabular}
  209. \end{frame}
  210.  
  211. \begin{frame}{Regular Expressions in Java}
  212. \begin{verbatim}
  213. import java.util.regex.Pattern;
  214. import java.util.regex.Matcher;
  215.  
  216. public class Regex \{
  217.    public static void main(String [] args) \{
  218.        Pattern p = new Pattern("\b[a-z0-9.]+@[a-z0-9.]+\.[a-z]{2,4}\b");
  219.        Matcher m = p.matcher("some@email.com");
  220.        while(m.find()) \{
  221.            System.out.println("Found match: "+m.group());
  222.            System.out.println("Begins at: "+m.start());
  223.            System.out.println("Ends at: "m.end());
  224.        \}
  225.    \}
  226. \}
  227. \end{verbatim}
  228. \end{frame}
  229. \end{document}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement