Passport MRZ Recognition with Dynamsoft C++ OCR SDK

About Dynamsoft OCR SDK
  • Download Dynamsoft C++ OCR dev package, which supports Windows and Linux.
  • Get a 30-day FREE Trial License.
  • Prerequisites
    Windows
    Linux
    sudo apt install libopencv-dev cmake
    Passport MRZ Recognition in C++
    In the following paragraphs, we firstly use Dynamsoft OCR SDK to localize the machine-readable zone of a passport and recognize the corresponding text string, and then extract all information from the text string according to the standard format of passport booklets.
    Setting up CMake project
    I strongly recommend installing CMake extension in Visual Studio Code to create and debug a CMake project for both Windows and Linux.
    Let us configure the header files and linking libraries of Dynamsoft OCR and OpenCV in CMakeLists:
    cmake_minimum_required (VERSION 2.6)
    project (mrz)
    MESSAGE( STATUS "PROJECT_NAME: " ${PROJECT_NAME} )
    
    # Check platforms
    if (CMAKE_HOST_WIN32)
        set(WINDOWS 1)
    elseif(CMAKE_HOST_UNIX)
        set(LINUX 1)
    endif()
    
    # Add search path for include and lib files
    MESSAGE( STATUS "CPU architecture ${CMAKE_SYSTEM_PROCESSOR}" )
    if(WINDOWS)
        link_directories("${PROJECT_SOURCE_DIR}/platform/windows/lib/") 
    elseif(LINUX)
        link_directories("${PROJECT_SOURCE_DIR}/platform/linux/")
    endif()
    include_directories("${PROJECT_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/include/")
    
    # Add the executable
    find_package(OpenCV REQUIRED)
        add_executable(${PROJECT_NAME} mrzcv.cpp)
        if(WINDOWS)
            target_link_libraries (${PROJECT_NAME} "DynamsoftLabelRecognitionx64" ${OpenCV_LIBS})
        else()
            target_link_libraries (${PROJECT_NAME} "DynamsoftLabelRecognition" ${OpenCV_LIBS})
        endif()
    
    # Copy DLLs
    if(WINDOWS)
        add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD 
            COMMAND ${CMAKE_COMMAND} -E copy_directory
            "${PROJECT_SOURCE_DIR}/platform/windows/bin/"      
            $<TARGET_FILE_DIR:${PROJECT_NAME}>)
    endif()
    The character model trained by deep neural network (DNN) can be found from the C++ dev package. We need to copy the whole model folder to the output directory, so does the template file.
    # Copy template
    add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD 
            COMMAND ${CMAKE_COMMAND} -E copy_directory
            "${PROJECT_SOURCE_DIR}/template/"      
            $<TARGET_FILE_DIR:${PROJECT_NAME}>)
    
    # Copy model files
    add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD 
            COMMAND ${CMAKE_COMMAND} -E copy_directory
            "${PROJECT_SOURCE_DIR}/CharacterModel"      
            $<TARGET_FILE_DIR:${PROJECT_NAME}>/CharacterModel)
    The online documentation can help you figure out how template parameters work.
    Coding for MRZ detection and information parsing
    Once the build configuration is done, we can move to the code part.
  • Initialize the OCR object. A valid license key is required.

    CLabelRecognition dlr;
    dlr.InitLicense("LICENSE-KEY");
    
  • Append a template file.

    int ret = dlr.AppendSettingsFromFile("template-file");
    

    Note: if the DirectoryPath configured in the template file is a relative path, you must put the template file along with the model folder.

    "CharacterModelArray" : [
    {
      "DirectoryPath": "CharacterModel",
      "FilterFilePath": "",
      "Name": "NumberUppercase"
    }
    ],
    
  • Call OCR recognition method. We can use the built-in function of OpenCV to measure the passing time. The TickMeter class simplifies the code of time calculation for Windows and Linux.

    TickMeter tm;
    tm.start();
    errorCode = dlr.RecognizeByFile(pszImageFile, "locr");
    tm.stop();
    float costTime = tm.getTimeSec();
    
  • Get the text recognition results that include the coordinate of the text zone, text lines, and text strings.

    DLRResultArray* pDLRResults = NULL;
        dlr.GetAllDLRResults(&pDLRResults);
        if (pDLRResults != NULL)
        {
            int rCount = pDLRResults->resultsCount;
            printf("\r\nRecognized %d results\r\n", rCount);
            for (int ri = 0; ri < rCount; ++ri)
            {
                printf("\r\nResult %d :\r\n", ri);
                int startX = 50, startY = 50;
                DLRResult* result = pDLRResults->results[ri];
                int lCount = result->lineResultsCount;
                for (int li = 0; li < lCount; ++li)
                {
                    printf("Line result %d: %s\r\n", li, result->lineResults[li]->text);
                    DLRPoint *points = result->lineResults[li]->location.points;
                    printf("x1: %d, y1: %d, x2: %d, y2: %d, x3: %d, y3: %d, x4: %d, y4: %d\r\n", points[0].x, 
                    points[0].y, points[1].x, points[1].y, points[2].x, points[2].y, points[3].x, points[3].y);
                }
            }
        }
        else
        {
            printf("\r\nNo data detected.\r\n");
        }
        dlr.FreeDLRResults(&pDLRResults);
    
  • Parse the MRZ string and extract the corresponding information.

    string line1 = result->lineResults[0]->text;
    string line2 = result->lineResults[1]->text;
    // https://en.wikipedia.org/wiki/Machine-readable_passport
    // Type
    string tmp = "Type: ";
    tmp.insert(tmp.length(), 1, line1[0]);
    printf("%s\r\n", tmp.c_str());
    
    // Issuing country
    tmp = "Issuing country: "; line1.substr(2, 5);
    tmp += line1.substr(2, 3);      
    printf("%s\r\n", tmp.c_str());
    
    // Surname
    int index = 5;
    tmp = "Surname: ";
    for (; index < 44; index++)
    {
      if (line1[index] != '<')
      {
        tmp.insert(tmp.length(), 1, line1[index]);
      }
      else 
      {
        break;
      }
    }
    printf("%s\r\n", tmp.c_str());
    
    // Given names
    tmp = "Given Names: ";
    index += 2;
    for (; index < 44; index++)
    {
      if (line1[index] != '<')
      {
        tmp.insert(tmp.length(), 1, line1[index]);
      }
      else 
      {
        tmp.insert(tmp.length(), 1, ' ');
      }
    }
    printf("%s\r\n", tmp.c_str());
    
    // Passport number
    tmp = "Passport number: ";
    index = 0;
    for (; index < 9; index++)
    {
      if (line2[index] != '<')
      {
        tmp.insert(tmp.length(), 1, line2[index]);
      }
      else 
      {
        break;
      }
    }
    printf("%s\r\n", tmp.c_str());
    
    // Nationality
    tmp = "Nationality: ";
    tmp += line2.substr(10, 3);
    printf("%s\r\n", tmp.c_str());
    
    // Date of birth
    tmp = line2.substr(13, 6);
    tmp.insert(2, "/");
    tmp.insert(5, "/");
    tmp = "Date of birth (YYMMDD): " + tmp;
    printf("%s\r\n", tmp.c_str());
    
    // Sex
    tmp = "Sex: ";
    tmp.insert(tmp.length(), 1, line2[20]);
    printf("%s\r\n", tmp.c_str());
    
    // Expiration date of passport
    tmp = line2.substr(21, 6);
    tmp.insert(2, "/");
    tmp.insert(5, "/");
    tmp = "Expiration date of passport (YYMMDD): " + tmp;
    printf("%s\r\n", tmp.c_str());
    
    // Personal number
    if (line2[28] != '<')
    {
      tmp = "Personal number: ";
      for (index = 28; index < 42; index++)
      {
        if (line2[index] != '<')
        {
          tmp.insert(tmp.length(), 1, line2[index]);
        }
        else 
        {
          break;
        }
      }
      printf("%s\r\n", tmp.c_str());
    }
    
  • To make the program user-friendly, we use OpenCV to show the display window and draw relevant information on it.
    line( ori, Point(x1, y1), Point(x2, y2), lineColor, thickness);
    line( ori, Point(x2, y2), Point(x3, y3), lineColor, thickness);
    line( ori, Point(x3, y3), Point(x4, y4), lineColor, thickness);
    line( ori, Point(x4, y4), Point(x1, y1), lineColor, thickness);
    drawText(ori, result->lineResults[li]->text, minX, minY - scale * 10);
    
    imshow("Passport MRZ Recognition", ori);
    A further improvement is to use hconcat function to stitch the images for better comparison.
    hconcat(before, after, newMat);
    imshow("Comparison", newMat);
    Source Code

    45

    This website collects cookies to deliver better user experience

    Passport MRZ Recognition with Dynamsoft C++ OCR SDK