身份证识别
安装环境
tar leptonica-1.77.0
./configure
make && make install
centos yum install epel-release yum install tesseract-devel leptonica-develdebian apt-get install libtesseract-dev libtesseract3 tesseract-ocr
pip install opencv-python pytesseract tesseract tesseract-ocr
代码
import re
import cv2
import pytesseract
from PIL
import Image
r
= r
'^([1-9]\d{5}[12]\d{3}(0[1-9]|1[012])(0[1-9]|[12][0-9]|3[01])\d{3}[0-9xX])$'
def CalcIdRectByFaceRect_normal(x
, y
, w
, h
):
scale
= float(w
) / 95
x1
= int(x
+ ((0 - 159)) * scale
)
y1
= int(y
+ (0 + (149)) * scale
)
x2
= int(x
+ (0 - 159 + (275)) * scale
)
y2
= int(y
+ (0 + (149) + (45)) * scale
)
return (x1
, y1
, x2
, y2
)
def CalcIdRectByFaceRect_big(x
, y
, w
, h
):
scale
= float(w
) / 95
x1
= int(x
+ ((0 - 159) + 10) * scale
)
y1
= int(y
+ (0 + (149 - 3)) * scale
)
x2
= int(x
+ (0 - 159 + (275 - 10)) * scale
)
y2
= int(y
+ (0 + (149 - 3) + (45 - 10)) * scale
)
return (x1
, y1
, x2
, y2
)
def CalcIdRectByFaceRect_small(x
, y
, w
, h
):
scale
= float(w
) / 95
x1
= int(x
+ ((0 - 159) - 10) * scale
)
y1
= int(y
+ (0 + (149 + 3)) * scale
)
x2
= int(x
+ (0 - 159 + (275 + 10)) * scale
)
y2
= int(y
+ (0 + (149 + 5) + (45 + 10)) * scale
)
return (x1
, y1
, x2
, y2
)
def binarizing(img
, threshold
):
pixdata
= img
.load
()
w
, h
= img
.size
for y
in range(h
):
for x
in range(w
):
if pixdata
[x
, y
] < threshold
:
pixdata
[x
, y
] = 0
else:
pixdata
[x
, y
] = 255
return img
def depoint(img
):
pixdata
= img
.load
()
w
, h
= img
.size
for y
in range(1, h
- 1):
for x
in range(1, w
- 1):
count
= 0
if pixdata
[x
, y
- 1] > 245:
count
= count
+ 1
if pixdata
[x
, y
+ 1] > 245:
count
= count
+ 1
if pixdata
[x
- 1, y
] > 245:
count
= count
+ 1
if pixdata
[x
+ 1, y
] > 245:
count
= count
+ 1
if count
> 2:
pixdata
[x
, y
] = 255
return img
def identity_OCR_byFaceRect(oImg
, faceRect
):
(x
, y
, w
, h
) = faceRect
iw
, ih
= oImg
.size
largeImg
= oImg
.resize
((iw
* 3, ih
* 3), Image
.ANTIALIAS
)
(x1
, y1
, x2
, y2
) = CalcIdRectByFaceRect_normal
(x
, y
, w
, h
)
region
= (x1
* 3, y1
* 3, x2
* 3, y2
* 3)
code
= GetRegionString
(largeImg
, region
)
if not re
.match
(r
, code
):
(x1
, y1
, x2
, y2
) = CalcIdRectByFaceRect_small
(x
, y
, w
, h
)
region
= (x1
* 3, y1
* 3, x2
* 3, y2
* 3)
code
= GetRegionString
(largeImg
, region
)
if not re
.match
(r
, code
):
(x1
, y1
, x2
, y2
) = CalcIdRectByFaceRect_big
(x
, y
, w
, h
)
region
= (x1
* 3, y1
* 3, x2
* 3, y2
* 3)
code
= GetRegionString
(largeImg
, region
)
if not re
.match
(r
, code
):
code
= 'NONE'
return code
, (x1
, y1
, x2
, y2
)
def GetRegionString(img
, region
):
cropImg
= img
.crop
(region
)
cropImg
.save
('/tmp/2_crop.png')
grayImg
= cropImg
.convert
('L')
bImg
= binarizing
(grayImg
, 100)
dImg
= depoint
(bImg
)
code
= pytesseract
.image_to_string
(dImg
)
code
= PostProc
(code
)
return code
def PostProc(s
):
res
= s
res
= res
.replace
(" ", "")
res
= res
.replace
("O", "0")
res
= res
.replace
("U", "0")
res
= res
.replace
("D", "0")
res
= res
.replace
("Z", "2")
res
= res
.replace
("S", "5")
res
= res
.replace
("s", "5")
res
= res
.replace
("o", "6")
res
= res
.replace
("f", "7")
res
= res
.replace
("H", "11")
return res
def DetectFacesAndIDs(pic_path
):
frame
= cv2
.imread
(pic_path
)
oImg
= Image
.open(pic_path
)
ih
, iw
= frame
.shape
[:2]
classfier
= cv2
.CascadeClassifier
("/usr/local/lib/python3.6/site-packages/cv2/data/haarcascade_frontalface_alt.xml")
color
= (0, 255, 0)
color2
= (255, 0, 0)
gray
= cv2
.cvtColor
(frame
, cv2
.COLOR_BGR2GRAY
)
faceRects
= classfier
.detectMultiScale
(
gray
, scaleFactor
=1.2, minNeighbors
=3, minSize
=(32, 32))
if len(faceRects
) > 0:
for faceRect
in faceRects
:
x
, y
, w
, h
= faceRect
cv2
.rectangle
(frame
, (x
, y
), (x
+ w
, y
+ h
), color
, 2)
code
, (x1
, y1
, x2
, y2
) = identity_OCR_byFaceRect
(oImg
, faceRect
)
cv2
.rectangle
(frame
, (x1
, y1
), (x2
, y2
), color2
, 2)
return code
return 'NONE'
if __name__
== '__main__':
DetectFacesAndIDs
('1.jpg')