34
Passport MRZ Recognition with Dynamsoft C++ OCR SDK
- Download Dynamsoft C++ OCR dev package, which supports Windows and Linux.
- Get a 30-day FREE Trial License.
Windows
Linux
sudo apt install libopencv-dev cmake
In the following paragraphs, we firstly use Dynamsoft OCR SDK to localize the machine-readable zone of a passport and recognize the corresponding text string, and then extract all information from the text string according to the standard format of passport booklets.
I strongly recommend installing CMake extension in Visual Studio Code to create and debug a CMake project for both Windows and Linux.
Let us configure the header files and linking libraries of Dynamsoft OCR and OpenCV in CMakeLists:
cmake_minimum_required (VERSION 2.6)
project (mrz)
MESSAGE( STATUS "PROJECT_NAME: " ${PROJECT_NAME} )
# Check platforms
if (CMAKE_HOST_WIN32)
set(WINDOWS 1)
elseif(CMAKE_HOST_UNIX)
set(LINUX 1)
endif()
# Add search path for include and lib files
MESSAGE( STATUS "CPU architecture ${CMAKE_SYSTEM_PROCESSOR}" )
if(WINDOWS)
link_directories("${PROJECT_SOURCE_DIR}/platform/windows/lib/")
elseif(LINUX)
link_directories("${PROJECT_SOURCE_DIR}/platform/linux/")
endif()
include_directories("${PROJECT_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/include/")
# Add the executable
find_package(OpenCV REQUIRED)
add_executable(${PROJECT_NAME} mrzcv.cpp)
if(WINDOWS)
target_link_libraries (${PROJECT_NAME} "DynamsoftLabelRecognitionx64" ${OpenCV_LIBS})
else()
target_link_libraries (${PROJECT_NAME} "DynamsoftLabelRecognition" ${OpenCV_LIBS})
endif()
# Copy DLLs
if(WINDOWS)
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
"${PROJECT_SOURCE_DIR}/platform/windows/bin/"
$<TARGET_FILE_DIR:${PROJECT_NAME}>)
endif()
The character model trained by deep neural network (DNN) can be found from the C++ dev package. We need to copy the whole model folder to the output directory, so does the template file.
# Copy template
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
"${PROJECT_SOURCE_DIR}/template/"
$<TARGET_FILE_DIR:${PROJECT_NAME}>)
# Copy model files
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
"${PROJECT_SOURCE_DIR}/CharacterModel"
$<TARGET_FILE_DIR:${PROJECT_NAME}>/CharacterModel)
The online documentation can help you figure out how template parameters work.
Once the build configuration is done, we can move to the code part.
-
Initialize the OCR object. A valid license key is required.
CLabelRecognition dlr; dlr.InitLicense("LICENSE-KEY");
-
Append a template file.
int ret = dlr.AppendSettingsFromFile("template-file");
Note: if the
DirectoryPath
configured in the template file is a relative path, you must put the template file along with the model folder.
"CharacterModelArray" : [ { "DirectoryPath": "CharacterModel", "FilterFilePath": "", "Name": "NumberUppercase" } ],
-
Call OCR recognition method. We can use the built-in function of OpenCV to measure the passing time. The
TickMeter
class simplifies the code of time calculation for Windows and Linux.
TickMeter tm; tm.start(); errorCode = dlr.RecognizeByFile(pszImageFile, "locr"); tm.stop(); float costTime = tm.getTimeSec();
-
Get the text recognition results that include the coordinate of the text zone, text lines, and text strings.
DLRResultArray* pDLRResults = NULL; dlr.GetAllDLRResults(&pDLRResults); if (pDLRResults != NULL) { int rCount = pDLRResults->resultsCount; printf("\r\nRecognized %d results\r\n", rCount); for (int ri = 0; ri < rCount; ++ri) { printf("\r\nResult %d :\r\n", ri); int startX = 50, startY = 50; DLRResult* result = pDLRResults->results[ri]; int lCount = result->lineResultsCount; for (int li = 0; li < lCount; ++li) { printf("Line result %d: %s\r\n", li, result->lineResults[li]->text); DLRPoint *points = result->lineResults[li]->location.points; printf("x1: %d, y1: %d, x2: %d, y2: %d, x3: %d, y3: %d, x4: %d, y4: %d\r\n", points[0].x, points[0].y, points[1].x, points[1].y, points[2].x, points[2].y, points[3].x, points[3].y); } } } else { printf("\r\nNo data detected.\r\n"); } dlr.FreeDLRResults(&pDLRResults);
-
Parse the MRZ string and extract the corresponding information.
string line1 = result->lineResults[0]->text; string line2 = result->lineResults[1]->text; // https://en.wikipedia.org/wiki/Machine-readable_passport // Type string tmp = "Type: "; tmp.insert(tmp.length(), 1, line1[0]); printf("%s\r\n", tmp.c_str()); // Issuing country tmp = "Issuing country: "; line1.substr(2, 5); tmp += line1.substr(2, 3); printf("%s\r\n", tmp.c_str()); // Surname int index = 5; tmp = "Surname: "; for (; index < 44; index++) { if (line1[index] != '<') { tmp.insert(tmp.length(), 1, line1[index]); } else { break; } } printf("%s\r\n", tmp.c_str()); // Given names tmp = "Given Names: "; index += 2; for (; index < 44; index++) { if (line1[index] != '<') { tmp.insert(tmp.length(), 1, line1[index]); } else { tmp.insert(tmp.length(), 1, ' '); } } printf("%s\r\n", tmp.c_str()); // Passport number tmp = "Passport number: "; index = 0; for (; index < 9; index++) { if (line2[index] != '<') { tmp.insert(tmp.length(), 1, line2[index]); } else { break; } } printf("%s\r\n", tmp.c_str()); // Nationality tmp = "Nationality: "; tmp += line2.substr(10, 3); printf("%s\r\n", tmp.c_str()); // Date of birth tmp = line2.substr(13, 6); tmp.insert(2, "/"); tmp.insert(5, "/"); tmp = "Date of birth (YYMMDD): " + tmp; printf("%s\r\n", tmp.c_str()); // Sex tmp = "Sex: "; tmp.insert(tmp.length(), 1, line2[20]); printf("%s\r\n", tmp.c_str()); // Expiration date of passport tmp = line2.substr(21, 6); tmp.insert(2, "/"); tmp.insert(5, "/"); tmp = "Expiration date of passport (YYMMDD): " + tmp; printf("%s\r\n", tmp.c_str()); // Personal number if (line2[28] != '<') { tmp = "Personal number: "; for (index = 28; index < 42; index++) { if (line2[index] != '<') { tmp.insert(tmp.length(), 1, line2[index]); } else { break; } } printf("%s\r\n", tmp.c_str()); }
To make the program user-friendly, we use OpenCV to show the display window and draw relevant information on it.
line( ori, Point(x1, y1), Point(x2, y2), lineColor, thickness);
line( ori, Point(x2, y2), Point(x3, y3), lineColor, thickness);
line( ori, Point(x3, y3), Point(x4, y4), lineColor, thickness);
line( ori, Point(x4, y4), Point(x1, y1), lineColor, thickness);
drawText(ori, result->lineResults[li]->text, minX, minY - scale * 10);
imshow("Passport MRZ Recognition", ori);
A further improvement is to use hconcat
function to stitch the images for better comparison.
hconcat(before, after, newMat);
imshow("Comparison", newMat);
34