Passport MRZ Recognition with Dynamsoft C++ OCR SDK

About Dynamsoft OCR SDK

Download Dynamsoft C++ OCR dev package, which supports Windows and Linux.
Get a 30-day FREE Trial License.

Prerequisites

Windows

OpenCV
CMake

Linux

sudo apt install libopencv-dev cmake

Passport MRZ Recognition in C++

In the following paragraphs, we firstly use Dynamsoft OCR SDK to localize the machine-readable zone of a passport and recognize the corresponding text string, and then extract all information from the text string according to the standard format of passport booklets.

Setting up CMake project

I strongly recommend installing CMake extension in Visual Studio Code to create and debug a CMake project for both Windows and Linux.

Let us configure the header files and linking libraries of Dynamsoft OCR and OpenCV in CMakeLists:

cmake_minimum_required (VERSION 2.6)
project (mrz)
MESSAGE( STATUS "PROJECT_NAME: " ${PROJECT_NAME} )

# Check platforms
if (CMAKE_HOST_WIN32)
    set(WINDOWS 1)
elseif(CMAKE_HOST_UNIX)
    set(LINUX 1)
endif()

# Add search path for include and lib files
MESSAGE( STATUS "CPU architecture ${CMAKE_SYSTEM_PROCESSOR}" )
if(WINDOWS)
    link_directories("${PROJECT_SOURCE_DIR}/platform/windows/lib/") 
elseif(LINUX)
    link_directories("${PROJECT_SOURCE_DIR}/platform/linux/")
endif()
include_directories("${PROJECT_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/include/")

# Add the executable
find_package(OpenCV REQUIRED)
    add_executable(${PROJECT_NAME} mrzcv.cpp)
    if(WINDOWS)
        target_link_libraries (${PROJECT_NAME} "DynamsoftLabelRecognitionx64" ${OpenCV_LIBS})
    else()
        target_link_libraries (${PROJECT_NAME} "DynamsoftLabelRecognition" ${OpenCV_LIBS})
    endif()

# Copy DLLs
if(WINDOWS)
    add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD 
        COMMAND ${CMAKE_COMMAND} -E copy_directory
        "${PROJECT_SOURCE_DIR}/platform/windows/bin/"      
        $<TARGET_FILE_DIR:${PROJECT_NAME}>)
endif()

The character model trained by deep neural network (DNN) can be found from the C++ dev package. We need to copy the whole model folder to the output directory, so does the template file.

# Copy template
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD 
        COMMAND ${CMAKE_COMMAND} -E copy_directory
        "${PROJECT_SOURCE_DIR}/template/"      
        $<TARGET_FILE_DIR:${PROJECT_NAME}>)

# Copy model files
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD 
        COMMAND ${CMAKE_COMMAND} -E copy_directory
        "${PROJECT_SOURCE_DIR}/CharacterModel"      
        $<TARGET_FILE_DIR:${PROJECT_NAME}>/CharacterModel)

The online documentation can help you figure out how template parameters work.

Coding for MRZ detection and information parsing

Once the build configuration is done, we can move to the code part.

Initialize the OCR object. A valid license key is required.
```
CLabelRecognition dlr;
dlr.InitLicense("LICENSE-KEY");
```

Append a template file.

int ret = dlr.AppendSettingsFromFile("template-file");

Note: if the DirectoryPath configured in the template file is a relative path, you must put the template file along with the model folder.

"CharacterModelArray" : [
{
  "DirectoryPath": "CharacterModel",
  "FilterFilePath": "",
  "Name": "NumberUppercase"
}
],

Call OCR recognition method. We can use the built-in function of OpenCV to measure the passing time. The TickMeter class simplifies the code of time calculation for Windows and Linux.
```
TickMeter tm;
tm.start();
errorCode = dlr.RecognizeByFile(pszImageFile, "locr");
tm.stop();
float costTime = tm.getTimeSec();
```

Get the text recognition results that include the coordinate of the text zone, text lines, and text strings.

DLRResultArray* pDLRResults = NULL;
    dlr.GetAllDLRResults(&pDLRResults);
    if (pDLRResults != NULL)
    {
        int rCount = pDLRResults->resultsCount;
        printf("\r\nRecognized %d results\r\n", rCount);
        for (int ri = 0; ri < rCount; ++ri)
        {
            printf("\r\nResult %d :\r\n", ri);
            int startX = 50, startY = 50;
            DLRResult* result = pDLRResults->results[ri];
            int lCount = result->lineResultsCount;
            for (int li = 0; li < lCount; ++li)
            {
                printf("Line result %d: %s\r\n", li, result->lineResults[li]->text);
                DLRPoint *points = result->lineResults[li]->location.points;
                printf("x1: %d, y1: %d, x2: %d, y2: %d, x3: %d, y3: %d, x4: %d, y4: %d\r\n", points[0].x, 
                points[0].y, points[1].x, points[1].y, points[2].x, points[2].y, points[3].x, points[3].y);
            }
        }
    }
    else
    {
        printf("\r\nNo data detected.\r\n");
    }
    dlr.FreeDLRResults(&pDLRResults);

Parse the MRZ string and extract the corresponding information.

string line1 = result->lineResults[0]->text;
string line2 = result->lineResults[1]->text;
// https://en.wikipedia.org/wiki/Machine-readable_passport
// Type
string tmp = "Type: ";
tmp.insert(tmp.length(), 1, line1[0]);
printf("%s\r\n", tmp.c_str());

// Issuing country
tmp = "Issuing country: "; line1.substr(2, 5);
tmp += line1.substr(2, 3);      
printf("%s\r\n", tmp.c_str());

// Surname
int index = 5;
tmp = "Surname: ";
for (; index < 44; index++)
{
  if (line1[index] != '<')
  {
    tmp.insert(tmp.length(), 1, line1[index]);
  }
  else 
  {
    break;
  }
}
printf("%s\r\n", tmp.c_str());

// Given names
tmp = "Given Names: ";
index += 2;
for (; index < 44; index++)
{
  if (line1[index] != '<')
  {
    tmp.insert(tmp.length(), 1, line1[index]);
  }
  else 
  {
    tmp.insert(tmp.length(), 1, ' ');
  }
}
printf("%s\r\n", tmp.c_str());

// Passport number
tmp = "Passport number: ";
index = 0;
for (; index < 9; index++)
{
  if (line2[index] != '<')
  {
    tmp.insert(tmp.length(), 1, line2[index]);
  }
  else 
  {
    break;
  }
}
printf("%s\r\n", tmp.c_str());

// Nationality
tmp = "Nationality: ";
tmp += line2.substr(10, 3);
printf("%s\r\n", tmp.c_str());

// Date of birth
tmp = line2.substr(13, 6);
tmp.insert(2, "/");
tmp.insert(5, "/");
tmp = "Date of birth (YYMMDD): " + tmp;
printf("%s\r\n", tmp.c_str());

// Sex
tmp = "Sex: ";
tmp.insert(tmp.length(), 1, line2[20]);
printf("%s\r\n", tmp.c_str());

// Expiration date of passport
tmp = line2.substr(21, 6);
tmp.insert(2, "/");
tmp.insert(5, "/");
tmp = "Expiration date of passport (YYMMDD): " + tmp;
printf("%s\r\n", tmp.c_str());

// Personal number
if (line2[28] != '<')
{
  tmp = "Personal number: ";
  for (index = 28; index < 42; index++)
  {
    if (line2[index] != '<')
    {
      tmp.insert(tmp.length(), 1, line2[index]);
    }
    else 
    {
      break;
    }
  }
  printf("%s\r\n", tmp.c_str());
}

To make the program user-friendly, we use OpenCV to show the display window and draw relevant information on it.

line( ori, Point(x1, y1), Point(x2, y2), lineColor, thickness);
line( ori, Point(x2, y2), Point(x3, y3), lineColor, thickness);
line( ori, Point(x3, y3), Point(x4, y4), lineColor, thickness);
line( ori, Point(x4, y4), Point(x1, y1), lineColor, thickness);
drawText(ori, result->lineResults[li]->text, minX, minY - scale * 10);

imshow("Passport MRZ Recognition", ori);

A further improvement is to use hconcat function to stitch the images for better comparison.

hconcat(before, after, newMat);
imshow("Comparison", newMat);

Source Code

Tags:

Ocr Mrz Passport Cpp

C++ is awesome, here's why...

LeetCode - Plus One

Inbuilt Functions in C++ for Strings 🐱‍👤

Generating Combinations using C++

The Number that Broke and Spoke – C++ Investigator

Arrays Questions: Reverse an array

const and smart pointers

8 Reasons to Learn C++