Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Option Explicit On
- Option Strict On
- Imports System.Drawing.Text
- Public Class Form1
- Private Sub Form1_Load(sender As Object, e As EventArgs) Handles MyBase.Load
- Show()
- 'Call this on the start to initialize the LUTs for the OCR
- BuildOCRLUTs()
- End Sub
- Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
- Dim Plate As New Bitmap(PicPlate.Image)
- Dim Result As String = Scan(Plate)
- 'The image now have the debugging rectangles with the regions
- PicPlate.Image = Plate
- 'Show result
- Label1.Text = Result
- End Sub
- 'With the blue filter enabled, only regions with a certain % of blue background will be accepted
- Const UseBlueFilter As Boolean = True
- 'This is the minimum color "luminosity" to make a pixel be threated as a character pixel
- Const MinColorMedian As Integer = 168
- 'This is the minimum color "luminosity" to make a pixel be threated as a character pixel on the OCR scan method
- 'Making it lower than the MinColorMedian may improve the results a bit on some cases
- Const MinColorMedianOCR As Integer = 152
- 'This is the list of characters that the OCR will try to match against (A-Z)
- Const CharList As String = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- 'Number of "tiles" on the X-axis used on the OCR character matching code
- Const SamplesX As Single = 24
- 'Number of "tiles" on the Y-axis used on the OCR character matching code
- Const SamplesY As Single = 48
- 'LookUp Tables, see method below
- Private SampleChars As Bitmap()
- Private SampleBounds As Rectangle()
- Private Sub BuildOCRLUTs()
- 'Here, we build LookUp tables that will be used as base characters when compared to characters used on the plate
- 'Those tables are used simple for speed reasons, since drawing those characters on each verification would be slow and unnecessary
- 'We are using the Arial font bold at size 40. You need to use a font similar to the one on the object that should be OCRed for good results
- Dim TestFont As New Font("Arial", 40, FontStyle.Bold)
- 'Create the array of Bitmap that will hold the image of each character
- SampleChars = New Bitmap(CharList.Length - 1) {}
- 'This creates the array that will round the boundary rectangle of each character
- SampleBounds = New Rectangle(CharList.Length - 1) {}
- 'Now loop through all characters in the characters list, and draw it on the SampleChars list one by one
- For i As Integer = 0 To CharList.Length - 1
- 'Each sample have a fixed size of 64x64, that is big enough to hold our font at size 40
- 'If you increase the font size, using bigger values here may also be necessary
- SampleChars(i) = New Bitmap(64, 64)
- 'Create our graphics that will be used to draw the character
- Using g As Graphics = Graphics.FromImage(SampleChars(i))
- 'Set the charcter background to the black color
- g.Clear(Color.Black)
- 'Disable all kinds of aliasing, they would just confuse the OCR
- g.TextRenderingHint = TextRenderingHint.SingleBitPerPixel
- 'Draw the character i with the white color at position [0, 0] of our Bitmap
- g.DrawString(CharList.Substring(i, 1), TestFont, Brushes.White, PointF.Empty)
- End Using
- 'Calculate the boundaries of the character, and assign it to the array
- Dim Bounds As Rectangle = GetRectangles(SampleChars(i))(0)
- SampleBounds(i) = Bounds
- Next
- End Sub
- Private Function Scan(Img As Bitmap) As String
- 'This function is the main "OCR" function, its responsible to convert the image in text
- 'This variable will hold the total text captured
- Dim Output As String = String.Empty
- Dim ChrFound As Boolean
- Dim ChrFoundX As Integer
- For Each Chr As Rectangle In GetRectangles(Img, UseBlueFilter)
- 'Check for a weird ratio of the white "font" pixels in relation to background pixels
- Dim WhitePixelsRatio As Single = CountWhitePixelsRatio(Img, Chr)
- 'This values (0.25) was choosen by trial and error
- 'The captured region needs to have at least 25% of white to be considered a "character"
- If WhitePixelsRatio < 0.25F Then Continue For
- 'Another check, if we processed at least one character already, and this one is too far away
- 'from the last character we found, then its probably unrelated, so we stop processing here
- If ChrFound AndAlso Chr.X - ChrFoundX > 32 Then Exit For
- Dim BestRatio As Single = 0
- Dim BestScore As Integer = 0
- Dim CharacterIdx As Integer = 0
- 'Compares the captured region with all characters on the list
- For i As Integer = 0 To CharList.Length - 1
- 'This variable holds the total "score" of the current character
- 'About the scoring system:
- 'The scoring system gives more weight to pixels into certain situations
- 'It was designed mostly by trial and error, but it shows improved results over the previous method based on the matches count only
- Dim Score As Integer = 0
- Dim Matches As Integer = 0
- 'Boundaries of the current character being processed
- Dim Bounds As Rectangle = SampleBounds(i)
- 'Gets the Width/Height of each "tile" on the captured region
- 'Detailed explanation:
- '- Suppose that we captured a region of size 40x40
- '- We want to know which characters this region have, so we will be comparing it with all characters on the list
- '- After, we choose the one with the best match ratio
- '- To do this, we divide both the captured region character, and the "sample" character from the font with the same number of tiles
- '- The SamplesX variable defines how many tiles we will use on the X axis, and SamplesY how many we use on the Y axis
- '- Due to the fact that characters are usually taller than wider, makes sense to use more tiles on the Y axis
- '- Following our 40x40 example, if we use 24 tiles on the X axis, then each tile will have ~1.6 pixels of width
- '- The font with size 64x64, will have 64 / 24 = ~2.6 of width for each tile
- '- The code will then compare the first pixel on the captured region with the first pixel on the font character
- '- Remember, both have the same number of "tiles", the only thing that changes between the font character and the captured region is the size of each tile
- Dim IStepX As Single = Chr.Width / SamplesX
- Dim IStepY As Single = Chr.Height / SamplesY
- Dim SStepX As Single = Bounds.Width / SamplesX
- Dim SStepY As Single = Bounds.Height / SamplesY
- For Y As Integer = 0 To Convert.ToInt32(SamplesY) - 1 'Loop through all Y-tiles
- For X As Integer = 0 To Convert.ToInt32(SamplesX) - 1 'Loop through all X-tiles
- 'This calculates the absolute position on the image of the first pixel of the current tile
- 'IStepX, IStepY = Width/Height of the tiles on the captured region (the character on the image to be OCRed) (I = Image)
- 'SStepX, SStepY = Width/Height of the tiles on the character font (S = Sample)
- Dim IX As Integer = Chr.X + Convert.ToInt32(X * IStepX)
- Dim IY As Integer = Chr.Y + Convert.ToInt32(Y * IStepY)
- Dim SX As Integer = Bounds.X + Convert.ToInt32(X * SStepX)
- Dim SY As Integer = Bounds.Y + Convert.ToInt32(Y * SStepY)
- 'Check if the pixel is white (or near white), where Px = True
- 'Or if it is a background pixel (on the character font this is always black), where Px = False
- Dim IPx As Boolean = GetMedian(Img.GetPixel(IX, IY)) > MinColorMedianOCR
- Dim SPx As Boolean = GetMedian(SampleChars(i).GetPixel(SX, SY)) > MinColorMedianOCR
- 'If both the character and the captured region pixel is white (or almost white)
- 'Or both the character and the captured region pixel is a background color (anything other than near-white colors)
- 'Then we have a match, and we increment our match count in this case
- If IPx = SPx Then
- 'This is the count of the actual number of matches
- Matches += 1
- 'Give more weight to "white" pixels that matches than background pixels that matches
- 'Because "white" pixels are more likely to be correct than background pixels, that may be actually noise
- If IPx Then Score += 2 Else Score += 1
- Else
- 'Give a penalty if the region have (bg) but the character have white
- If SPx Then Score -= 1
- End If
- Next
- Next
- 'Check if this character in particular, have a better number of matches than the last ones we tested
- 'If so, then update our best match count, calculate the match ratio of this character on a 0 to 1 value
- 'And finally, set the index of the matched character on the CharacterIdx variable
- If Score > BestScore Then
- BestRatio = Matches / (SamplesX * SamplesY)
- BestScore = Score
- CharacterIdx = i
- End If
- Next
- 'Calculates the Aspect Ratio of the captured region and the matched font character
- 'It then calculates the difference between the two
- Dim SB As Rectangle = SampleBounds(CharacterIdx)
- Dim SampleAR As Single = Convert.ToSingle(SB.Width / SB.Height)
- Dim ChrAR As Single = Convert.ToSingle(Chr.Width / Chr.Height)
- Dim ARDifference = Math.Abs(SampleAR - ChrAR)
- 'Check if at least 68% of the compared pixels matched
- 'If our ratio is smaller than that, its very unlikely that what we matched is a character
- 'If it isn't a character, it may be noise or some junk on the image that we captured by mistake
- 'The Aspect Ratio difference must be less than 0.4, this was also choosen by trial and error and may need to be tweaked
- If BestRatio > 0.68F AndAlso ARDifference < 0.4F Then
- ChrFound = True
- ChrFoundX = Chr.X
- Output += CharList.Substring(CharacterIdx, 1)
- End If
- Next
- Return Output
- End Function
- Private Function GetRectangles(Img As Bitmap, Optional BlueFilter As Boolean = False) As Rectangle()
- 'This function is responsible for finding where each character is
- 'It is also the main source of problems - it often adds junk to the list on noisy images
- Dim Characters As New List(Of Rectangle)()
- Dim W As Integer = Img.Width - 1
- Dim H As Integer = Img.Height - 1
- Dim Visited(W, H) As Boolean
- 'Loop through the entire images in search of characters
- For X As Integer = 0 To W
- For Y As Integer = 0 To H
- 'StartPt and EndPt are the start and end points [X, Y] on the image of the captured region
- Dim StartPt As New Point(Img.Width, Img.Height)
- Dim EndPt As Point = Point.Empty
- 'This is a simple 4-way flood fill algorithm
- 'It works in the following way:
- '- Grabs the pixel [X, Y] from the queue, check if it is white or near white
- '- If TRUE => Queue neighbour pixels [X - 1, Y] (left), [X + 1, Y] (right), [X, Y - 1] (up), [X, Y + 1] (down) for verification too
- '- If FALSE => Don't do anything
- '- And always => Mark the pixel [X, Y] as visited, so we don't waste time verifying it again
- 'The queue begins with the current pixel [X, Y] being processed, and the loops keeps going until the queue gets empty
- 'The queue will be empty when all pixels are processed and there is no matching neighbour found in any pixel
- Dim Points As New Queue(Of Point)()
- Points.Enqueue(New Point(X, Y))
- While Points.Count > 0
- Dim Pt As Point = Points.Dequeue()
- If Not Visited(Pt.X, Pt.Y) Then
- Visited(Pt.X, Pt.Y) = True
- If GetMedian(Img.GetPixel(Pt.X, Pt.Y)) > MinColorMedian Then
- StartPt.X = Math.Min(StartPt.X, Pt.X)
- StartPt.Y = Math.Min(StartPt.Y, Pt.Y)
- EndPt.X = Math.Max(EndPt.X, Pt.X)
- EndPt.Y = Math.Max(EndPt.Y, Pt.Y)
- If Pt.X > 0 Then Points.Enqueue(New Point(Pt.X - 1, Pt.Y))
- If Pt.Y > 0 Then Points.Enqueue(New Point(Pt.X, Pt.Y - 1))
- If Pt.X < Img.Width - 1 Then Points.Enqueue(New Point(Pt.X + 1, Pt.Y))
- If Pt.Y < Img.Height - 1 Then Points.Enqueue(New Point(Pt.X, Pt.Y + 1))
- End If
- End If
- End While
- 'Calculate the Width/Height of the captured region from the Start and End points
- Dim Width As Integer = EndPt.X - StartPt.X
- Dim Height As Integer = EndPt.Y - StartPt.Y
- 'Detects the amount of the blue color of the captured region
- 'If its at least 20%, then continue with the verifications
- 'Else, just ignore
- If BlueFilter Then
- Dim Matches As Integer = 0
- For TY As Integer = StartPt.Y To StartPt.Y + Height
- For TX As Integer = StartPt.X To StartPt.X + Width
- Dim Col As Color = Img.GetPixel(TX, TY)
- Dim R As Integer = Col.R
- Dim G As Integer = Col.G
- Dim B As Integer = Col.B
- If Col.B > 128 AndAlso (B - ((R + G) / 2)) > 64 Then Matches += 1
- Next
- Next
- Dim MatchRatio As Single = Convert.ToSingle(Matches / (Width * Height))
- If MatchRatio < 0.2F Then Continue For
- End If
- 'Check for faulty aspect ratios (probably not a character)
- 'Again, those values (1.5 and 8) were found by trial and error, and may need tweaking
- 'Also, 6x6 is the minimum size for a captured region to be considered a "character"
- Dim FaultyWidth As Boolean = Width > Height * 1.5F OrElse Width < 4
- Dim FaultyHeight As Boolean = Height > Width * 10.0F OrElse Height < 6
- 'Add the character to the rectangle list (if its valid)
- If Not FaultyWidth AndAlso Not FaultyHeight Then 'If the sizes of the character are invalid, then just ignore
- Dim Rect As New Rectangle(StartPt, New Size(Width, Height)) 'Creates a rectangle from the data we got
- Rect = Expand(Rect, 1) 'It usually cuts-off a bit of the aliased found border, so its better to expand the captured region a bit
- Characters.Add(Rect) 'And adds it to the character list
- '(Un)comment this code to draw (or hide, if commented) a visible rectangle around the captured region for debugging purposes
- Using g As Graphics = Graphics.FromImage(Img)
- g.DrawRectangle(Pens.Red, Expand(Rect, 1))
- End Using
- End If
- Next
- Next
- Return Characters.ToArray()
- End Function
- Private Function CountWhitePixelsRatio(Img As Bitmap, Bounds As Rectangle) As Single
- 'This will simply count white (or near white) pixels on the image inside the bounds region
- Dim Count As Integer
- For Y As Integer = Bounds.Y To Bounds.Y + Bounds.Height - 1
- For X As Integer = Bounds.X To Bounds.X + Bounds.Width - 1
- If GetMedian(Img.GetPixel(X, Y)) > MinColorMedian Then Count += 1
- Next
- Next
- Return Convert.ToSingle(Count / (Bounds.Width * Bounds.Height))
- End Function
- Private Function Expand(Input As Rectangle, Difference As Integer) As Rectangle
- 'This basically expands a rectangle with a given amount of pixels
- Dim Output As New Rectangle
- Output.X = Input.X - Difference
- Output.Y = Input.Y - Difference
- Output.Width = Input.Width + Difference * 2
- Output.Height = Input.Height + Difference * 2
- Return Output
- End Function
- Private Function GetMedian(Color As Color) As Integer
- 'Gets the median of a RGB color
- 'It's the lazy way to get the luminosity of a pixel, basically
- Return (
- Convert.ToInt32(Color.R) +
- Convert.ToInt32(Color.G) +
- Convert.ToInt32(Color.B)) \ 3
- End Function
- End Class
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement