Advertisement
Guest User

Untitled

a guest
Feb 5th, 2016
292
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 17.60 KB | None | 0 0
  1. Option Explicit On
  2. Option Strict On
  3.  
  4. Imports System.Drawing.Text
  5.  
  6. Public Class Form1
  7. Private Sub Form1_Load(sender As Object, e As EventArgs) Handles MyBase.Load
  8. Show()
  9.  
  10. 'Call this on the start to initialize the LUTs for the OCR
  11. BuildOCRLUTs()
  12. End Sub
  13.  
  14. Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
  15. Dim Plate As New Bitmap(PicPlate.Image)
  16. Dim Result As String = Scan(Plate)
  17.  
  18. 'The image now have the debugging rectangles with the regions
  19. PicPlate.Image = Plate
  20.  
  21. 'Show result
  22. Label1.Text = Result
  23. End Sub
  24.  
  25. 'With the blue filter enabled, only regions with a certain % of blue background will be accepted
  26. Const UseBlueFilter As Boolean = True
  27.  
  28. 'This is the minimum color "luminosity" to make a pixel be threated as a character pixel
  29. Const MinColorMedian As Integer = 168
  30.  
  31. 'This is the minimum color "luminosity" to make a pixel be threated as a character pixel on the OCR scan method
  32. 'Making it lower than the MinColorMedian may improve the results a bit on some cases
  33. Const MinColorMedianOCR As Integer = 152
  34.  
  35. 'This is the list of characters that the OCR will try to match against (A-Z)
  36. Const CharList As String = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  37.  
  38. 'Number of "tiles" on the X-axis used on the OCR character matching code
  39. Const SamplesX As Single = 24
  40.  
  41. 'Number of "tiles" on the Y-axis used on the OCR character matching code
  42. Const SamplesY As Single = 48
  43.  
  44. 'LookUp Tables, see method below
  45. Private SampleChars As Bitmap()
  46. Private SampleBounds As Rectangle()
  47.  
  48. Private Sub BuildOCRLUTs()
  49. 'Here, we build LookUp tables that will be used as base characters when compared to characters used on the plate
  50. 'Those tables are used simple for speed reasons, since drawing those characters on each verification would be slow and unnecessary
  51. 'We are using the Arial font bold at size 40. You need to use a font similar to the one on the object that should be OCRed for good results
  52. Dim TestFont As New Font("Arial", 40, FontStyle.Bold)
  53.  
  54. 'Create the array of Bitmap that will hold the image of each character
  55. SampleChars = New Bitmap(CharList.Length - 1) {}
  56.  
  57. 'This creates the array that will round the boundary rectangle of each character
  58. SampleBounds = New Rectangle(CharList.Length - 1) {}
  59.  
  60. 'Now loop through all characters in the characters list, and draw it on the SampleChars list one by one
  61. For i As Integer = 0 To CharList.Length - 1
  62. 'Each sample have a fixed size of 64x64, that is big enough to hold our font at size 40
  63. 'If you increase the font size, using bigger values here may also be necessary
  64. SampleChars(i) = New Bitmap(64, 64)
  65.  
  66. 'Create our graphics that will be used to draw the character
  67. Using g As Graphics = Graphics.FromImage(SampleChars(i))
  68. 'Set the charcter background to the black color
  69. g.Clear(Color.Black)
  70.  
  71. 'Disable all kinds of aliasing, they would just confuse the OCR
  72. g.TextRenderingHint = TextRenderingHint.SingleBitPerPixel
  73.  
  74. 'Draw the character i with the white color at position [0, 0] of our Bitmap
  75. g.DrawString(CharList.Substring(i, 1), TestFont, Brushes.White, PointF.Empty)
  76. End Using
  77.  
  78. 'Calculate the boundaries of the character, and assign it to the array
  79. Dim Bounds As Rectangle = GetRectangles(SampleChars(i))(0)
  80. SampleBounds(i) = Bounds
  81. Next
  82. End Sub
  83.  
  84. Private Function Scan(Img As Bitmap) As String
  85. 'This function is the main "OCR" function, its responsible to convert the image in text
  86. 'This variable will hold the total text captured
  87. Dim Output As String = String.Empty
  88.  
  89. Dim ChrFound As Boolean
  90. Dim ChrFoundX As Integer
  91. For Each Chr As Rectangle In GetRectangles(Img, UseBlueFilter)
  92. 'Check for a weird ratio of the white "font" pixels in relation to background pixels
  93. Dim WhitePixelsRatio As Single = CountWhitePixelsRatio(Img, Chr)
  94.  
  95. 'This values (0.25) was choosen by trial and error
  96. 'The captured region needs to have at least 25% of white to be considered a "character"
  97. If WhitePixelsRatio < 0.25F Then Continue For
  98.  
  99. 'Another check, if we processed at least one character already, and this one is too far away
  100. 'from the last character we found, then its probably unrelated, so we stop processing here
  101. If ChrFound AndAlso Chr.X - ChrFoundX > 32 Then Exit For
  102.  
  103. Dim BestRatio As Single = 0
  104. Dim BestScore As Integer = 0
  105. Dim CharacterIdx As Integer = 0
  106.  
  107. 'Compares the captured region with all characters on the list
  108. For i As Integer = 0 To CharList.Length - 1
  109. 'This variable holds the total "score" of the current character
  110. 'About the scoring system:
  111. 'The scoring system gives more weight to pixels into certain situations
  112. 'It was designed mostly by trial and error, but it shows improved results over the previous method based on the matches count only
  113. Dim Score As Integer = 0
  114. Dim Matches As Integer = 0
  115.  
  116. 'Boundaries of the current character being processed
  117. Dim Bounds As Rectangle = SampleBounds(i)
  118.  
  119. 'Gets the Width/Height of each "tile" on the captured region
  120. 'Detailed explanation:
  121. '- Suppose that we captured a region of size 40x40
  122. '- We want to know which characters this region have, so we will be comparing it with all characters on the list
  123. '- After, we choose the one with the best match ratio
  124. '- To do this, we divide both the captured region character, and the "sample" character from the font with the same number of tiles
  125. '- The SamplesX variable defines how many tiles we will use on the X axis, and SamplesY how many we use on the Y axis
  126. '- Due to the fact that characters are usually taller than wider, makes sense to use more tiles on the Y axis
  127. '- Following our 40x40 example, if we use 24 tiles on the X axis, then each tile will have ~1.6 pixels of width
  128. '- The font with size 64x64, will have 64 / 24 = ~2.6 of width for each tile
  129. '- The code will then compare the first pixel on the captured region with the first pixel on the font character
  130. '- Remember, both have the same number of "tiles", the only thing that changes between the font character and the captured region is the size of each tile
  131. Dim IStepX As Single = Chr.Width / SamplesX
  132. Dim IStepY As Single = Chr.Height / SamplesY
  133. Dim SStepX As Single = Bounds.Width / SamplesX
  134. Dim SStepY As Single = Bounds.Height / SamplesY
  135. For Y As Integer = 0 To Convert.ToInt32(SamplesY) - 1 'Loop through all Y-tiles
  136. For X As Integer = 0 To Convert.ToInt32(SamplesX) - 1 'Loop through all X-tiles
  137. 'This calculates the absolute position on the image of the first pixel of the current tile
  138. 'IStepX, IStepY = Width/Height of the tiles on the captured region (the character on the image to be OCRed) (I = Image)
  139. 'SStepX, SStepY = Width/Height of the tiles on the character font (S = Sample)
  140. Dim IX As Integer = Chr.X + Convert.ToInt32(X * IStepX)
  141. Dim IY As Integer = Chr.Y + Convert.ToInt32(Y * IStepY)
  142. Dim SX As Integer = Bounds.X + Convert.ToInt32(X * SStepX)
  143. Dim SY As Integer = Bounds.Y + Convert.ToInt32(Y * SStepY)
  144.  
  145. 'Check if the pixel is white (or near white), where Px = True
  146. 'Or if it is a background pixel (on the character font this is always black), where Px = False
  147. Dim IPx As Boolean = GetMedian(Img.GetPixel(IX, IY)) > MinColorMedianOCR
  148. Dim SPx As Boolean = GetMedian(SampleChars(i).GetPixel(SX, SY)) > MinColorMedianOCR
  149.  
  150. 'If both the character and the captured region pixel is white (or almost white)
  151. 'Or both the character and the captured region pixel is a background color (anything other than near-white colors)
  152. 'Then we have a match, and we increment our match count in this case
  153. If IPx = SPx Then
  154. 'This is the count of the actual number of matches
  155. Matches += 1
  156.  
  157. 'Give more weight to "white" pixels that matches than background pixels that matches
  158. 'Because "white" pixels are more likely to be correct than background pixels, that may be actually noise
  159. If IPx Then Score += 2 Else Score += 1
  160. Else
  161. 'Give a penalty if the region have (bg) but the character have white
  162. If SPx Then Score -= 1
  163. End If
  164. Next
  165. Next
  166.  
  167. 'Check if this character in particular, have a better number of matches than the last ones we tested
  168. 'If so, then update our best match count, calculate the match ratio of this character on a 0 to 1 value
  169. 'And finally, set the index of the matched character on the CharacterIdx variable
  170. If Score > BestScore Then
  171. BestRatio = Matches / (SamplesX * SamplesY)
  172. BestScore = Score
  173. CharacterIdx = i
  174. End If
  175. Next
  176.  
  177. 'Calculates the Aspect Ratio of the captured region and the matched font character
  178. 'It then calculates the difference between the two
  179. Dim SB As Rectangle = SampleBounds(CharacterIdx)
  180. Dim SampleAR As Single = Convert.ToSingle(SB.Width / SB.Height)
  181. Dim ChrAR As Single = Convert.ToSingle(Chr.Width / Chr.Height)
  182. Dim ARDifference = Math.Abs(SampleAR - ChrAR)
  183.  
  184. 'Check if at least 68% of the compared pixels matched
  185. 'If our ratio is smaller than that, its very unlikely that what we matched is a character
  186. 'If it isn't a character, it may be noise or some junk on the image that we captured by mistake
  187. 'The Aspect Ratio difference must be less than 0.4, this was also choosen by trial and error and may need to be tweaked
  188. If BestRatio > 0.68F AndAlso ARDifference < 0.4F Then
  189. ChrFound = True
  190. ChrFoundX = Chr.X
  191. Output += CharList.Substring(CharacterIdx, 1)
  192. End If
  193. Next
  194.  
  195. Return Output
  196. End Function
  197.  
  198. Private Function GetRectangles(Img As Bitmap, Optional BlueFilter As Boolean = False) As Rectangle()
  199. 'This function is responsible for finding where each character is
  200. 'It is also the main source of problems - it often adds junk to the list on noisy images
  201. Dim Characters As New List(Of Rectangle)()
  202.  
  203. Dim W As Integer = Img.Width - 1
  204. Dim H As Integer = Img.Height - 1
  205. Dim Visited(W, H) As Boolean
  206.  
  207. 'Loop through the entire images in search of characters
  208. For X As Integer = 0 To W
  209. For Y As Integer = 0 To H
  210. 'StartPt and EndPt are the start and end points [X, Y] on the image of the captured region
  211. Dim StartPt As New Point(Img.Width, Img.Height)
  212. Dim EndPt As Point = Point.Empty
  213.  
  214. 'This is a simple 4-way flood fill algorithm
  215. 'It works in the following way:
  216. '- Grabs the pixel [X, Y] from the queue, check if it is white or near white
  217. '- If TRUE => Queue neighbour pixels [X - 1, Y] (left), [X + 1, Y] (right), [X, Y - 1] (up), [X, Y + 1] (down) for verification too
  218. '- If FALSE => Don't do anything
  219. '- And always => Mark the pixel [X, Y] as visited, so we don't waste time verifying it again
  220. 'The queue begins with the current pixel [X, Y] being processed, and the loops keeps going until the queue gets empty
  221. 'The queue will be empty when all pixels are processed and there is no matching neighbour found in any pixel
  222. Dim Points As New Queue(Of Point)()
  223. Points.Enqueue(New Point(X, Y))
  224. While Points.Count > 0
  225. Dim Pt As Point = Points.Dequeue()
  226. If Not Visited(Pt.X, Pt.Y) Then
  227. Visited(Pt.X, Pt.Y) = True
  228. If GetMedian(Img.GetPixel(Pt.X, Pt.Y)) > MinColorMedian Then
  229. StartPt.X = Math.Min(StartPt.X, Pt.X)
  230. StartPt.Y = Math.Min(StartPt.Y, Pt.Y)
  231. EndPt.X = Math.Max(EndPt.X, Pt.X)
  232. EndPt.Y = Math.Max(EndPt.Y, Pt.Y)
  233.  
  234. If Pt.X > 0 Then Points.Enqueue(New Point(Pt.X - 1, Pt.Y))
  235. If Pt.Y > 0 Then Points.Enqueue(New Point(Pt.X, Pt.Y - 1))
  236. If Pt.X < Img.Width - 1 Then Points.Enqueue(New Point(Pt.X + 1, Pt.Y))
  237. If Pt.Y < Img.Height - 1 Then Points.Enqueue(New Point(Pt.X, Pt.Y + 1))
  238. End If
  239. End If
  240. End While
  241.  
  242. 'Calculate the Width/Height of the captured region from the Start and End points
  243. Dim Width As Integer = EndPt.X - StartPt.X
  244. Dim Height As Integer = EndPt.Y - StartPt.Y
  245.  
  246. 'Detects the amount of the blue color of the captured region
  247. 'If its at least 20%, then continue with the verifications
  248. 'Else, just ignore
  249. If BlueFilter Then
  250. Dim Matches As Integer = 0
  251. For TY As Integer = StartPt.Y To StartPt.Y + Height
  252. For TX As Integer = StartPt.X To StartPt.X + Width
  253. Dim Col As Color = Img.GetPixel(TX, TY)
  254. Dim R As Integer = Col.R
  255. Dim G As Integer = Col.G
  256. Dim B As Integer = Col.B
  257. If Col.B > 128 AndAlso (B - ((R + G) / 2)) > 64 Then Matches += 1
  258. Next
  259. Next
  260. Dim MatchRatio As Single = Convert.ToSingle(Matches / (Width * Height))
  261. If MatchRatio < 0.2F Then Continue For
  262. End If
  263.  
  264. 'Check for faulty aspect ratios (probably not a character)
  265. 'Again, those values (1.5 and 8) were found by trial and error, and may need tweaking
  266. 'Also, 6x6 is the minimum size for a captured region to be considered a "character"
  267. Dim FaultyWidth As Boolean = Width > Height * 1.5F OrElse Width < 4
  268. Dim FaultyHeight As Boolean = Height > Width * 10.0F OrElse Height < 6
  269.  
  270. 'Add the character to the rectangle list (if its valid)
  271. If Not FaultyWidth AndAlso Not FaultyHeight Then 'If the sizes of the character are invalid, then just ignore
  272. Dim Rect As New Rectangle(StartPt, New Size(Width, Height)) 'Creates a rectangle from the data we got
  273. Rect = Expand(Rect, 1) 'It usually cuts-off a bit of the aliased found border, so its better to expand the captured region a bit
  274. Characters.Add(Rect) 'And adds it to the character list
  275.  
  276. '(Un)comment this code to draw (or hide, if commented) a visible rectangle around the captured region for debugging purposes
  277. Using g As Graphics = Graphics.FromImage(Img)
  278. g.DrawRectangle(Pens.Red, Expand(Rect, 1))
  279. End Using
  280. End If
  281. Next
  282. Next
  283.  
  284. Return Characters.ToArray()
  285. End Function
  286.  
  287. Private Function CountWhitePixelsRatio(Img As Bitmap, Bounds As Rectangle) As Single
  288. 'This will simply count white (or near white) pixels on the image inside the bounds region
  289. Dim Count As Integer
  290. For Y As Integer = Bounds.Y To Bounds.Y + Bounds.Height - 1
  291. For X As Integer = Bounds.X To Bounds.X + Bounds.Width - 1
  292. If GetMedian(Img.GetPixel(X, Y)) > MinColorMedian Then Count += 1
  293. Next
  294. Next
  295. Return Convert.ToSingle(Count / (Bounds.Width * Bounds.Height))
  296. End Function
  297.  
  298. Private Function Expand(Input As Rectangle, Difference As Integer) As Rectangle
  299. 'This basically expands a rectangle with a given amount of pixels
  300. Dim Output As New Rectangle
  301.  
  302. Output.X = Input.X - Difference
  303. Output.Y = Input.Y - Difference
  304. Output.Width = Input.Width + Difference * 2
  305. Output.Height = Input.Height + Difference * 2
  306.  
  307. Return Output
  308. End Function
  309.  
  310. Private Function GetMedian(Color As Color) As Integer
  311. 'Gets the median of a RGB color
  312. 'It's the lazy way to get the luminosity of a pixel, basically
  313. Return (
  314. Convert.ToInt32(Color.R) +
  315. Convert.ToInt32(Color.G) +
  316. Convert.ToInt32(Color.B)) \ 3
  317. End Function
  318. End Class
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement