sábado, 25 de julho de 2015

C++ Unicode Sample

#include
#include
#include



static const char* const EnglishDictionary = "english.edc";
static const char* const GermanDictionary = "german.edc";
static const char* const PatternsFile = "patterns.rom";
static const char* const EnglishKeywords = "english.akw";
static const char* const GermanKeywords = "german.akw";

// Recommended buffer sizes
static const int RamSizeForLibraryInitialization = 1024;    // 1 KB
static const int RamSizeForRecognition = 2 * 1024 * 1024;   // 2 MB
static const int RamSizeForSuggestGeneration = 100 * 1024;  // 100 KB
static const int WholeTextBuffSize = 100 * 1024;                   // 100 KB
static const int OneLineTextBuffSize = 4 * 1024;                   // 4 KB
static const int InitialLinesBuffSize = 10 * 1024;                 // 10 KB
static const int InitialFieldsBuffSize = 10 * 1024;                // 10 KB

static int fileSize( const char* fileName )
{
       FILE* f;
       f = fopen(fileName, "rb");
       if( f == NULL ) {
             fprintf( stderr, "File %s not found!\n", fileName );
             return 0;
       }
       fseek(f, 0, SEEK_END);
       const int size = ftell(f);
       fclose(f);
       return size;
}

static void* loadFile( const char* fileName )
{
       const int size = fileSize( fileName );
       FILE* f;
       f = fopen(fileName, "rb");
       if( f == NULL ) {
             fprintf( stderr, "File %s not found!\n", fileName );
             return NULL;
       }
       void* ret = malloc( size );
       if( ret == NULL ) {
             fclose(f);
             return ret;
       }
       if( fread(ret, size, 1, f) != 1 ) {
             free(ret);
             fclose(f);
             return NULL;
       }
       fclose(f);
       return ret;
}

static void unload( void* ptr )
{
       if( ptr != NULL ) {
             free(ptr);
       }
}

static const int langCount = 2;

static bool areLoaded( void** files ) {
       for( int i = 0; i < langCount; i++ ) {
             if( files[i] == 0 ) {
                    return false;
             }
       }
       return true;
}

static void unloadAll( void** files )
{
       for( int i = 0; i < langCount; i++ ) {
             unload( files[i] );
       }
}

unsigned char* bmpRead( char* bmp, int bmpsize,
             int* xsize, int* ysize, int* bitsPerPix, int* imageByteWidth );

int callback( int processedPercentage, DWORD warning )
{
       printf("%d%% of the work is done.\n", processedPercentage);
       if( ( warning & FINE_WARN_PROBABLY_BAD_IMAGE ) != 0 ) {
             printf("Probably bad image.\n");
       }
       if( processedPercentage < 50 && ( warning & FINE_WARN_SLOW_RECOGNITION ) != 0 ) {
             return 0;
       } else {
             return 1;
       }
}

static FILE* rewriteUnicodeFile( const char* fileName )
{
       const WCHAR unicodeTag = L'\xFEFF';
       FILE* f;
       f = fopen(fileName, "wb");
       if( f == NULL ) {
             return NULL;
       }
       fwrite( &unicodeTag, sizeof(WCHAR), 1, f );
       return f;
}

static int writeUnicodeChar( wchar_t wchar, FILE* file )
{
       return fwrite( &wchar, sizeof(wchar_t), 1, file );
}

static void writeUnicodeString( const wchar_t* string, FILE* file )
{
       while( *string != L'\0' ) {
             writeUnicodeChar( *string, file );
             string++;
       }
}

// returns 0 on success, 1 otherwise.
static int writeRecognitionResults( const char* outputFileName, int lineCount, PFINE_TEXT_LINE textBuff )
{
       FILE* output = rewriteUnicodeFile( outputFileName );
       if( output == NULL ) {
             return 1;
       }
       for( int line = 0; line < lineCount; line++ ) {
             for( int ch = 0; ch < textBuff[line].CharCount; ch++ ) {
                    writeUnicodeChar( textBuff[line].Chars[ch].Unicode, output );
             }
             writeUnicodeString( L"\r\n", output );
       }
       fclose( output );
       return 0;
}

static int recognizeFileInEnglishAndGerman( const char* inputFileName, const char* outputFileName )
{
       // Load image
       void* bmpImage = loadFile( inputFileName );
       if( bmpImage == 0 ) {
             fprintf( stderr, "Can't load %s.\n", inputFileName );
             return 1;
       }
       FINE_IMAGE image;
       image.Image = bmpRead( (char*)bmpImage, fileSize(inputFileName), &image.ImageWidth, &image.ImageHeight,
             &image.BitsPerPixel, &image.ImageByteWidth );
       if( image.Image == 0 ) {
             fprintf( stderr, "Can't read image.\n" );
       }
       image.Resolution = 260;
       unload( bmpImage );

       // Load dictionaries
       TLanguageID languages[langCount + 1] = { LID_English, LID_German, LID_Undefined };
       PFINE_DICTIONARY dictionaries[langCount + 1] = { loadFile( EnglishDictionary ), loadFile( GermanDictionary ), 0 };
       if( dictionaries[0] == 0 ) {
             fprintf( stderr, "Can't load %s.\n", EnglishDictionary );
       }
       if( dictionaries[1] == 0 ) {
             fprintf( stderr, "Can't load %s.\n", GermanDictionary );
       }

       // Load patterns
       PFINE_PATTERNS patterns = loadFile( PatternsFile );
       if( patterns == 0 ) {
             fprintf( stderr, "Can't load %s.\n", PatternsFile );
       }

       // Allocate buffers
       void* libraryRam = malloc( RamSizeForLibraryInitialization );
       void* ram = malloc( RamSizeForRecognition );
       int textBuffSize = WholeTextBuffSize;
       PFINE_TEXT_LINE textBuff = (PFINE_TEXT_LINE) malloc( textBuffSize );
       if( libraryRam == 0 || ram == 0 || textBuff == 0 ) {
             fprintf( stderr, "Can't allocate buffers.\n" );
       }

       int errorCode = 1;

       if( areLoaded( dictionaries ) && patterns != 0 && image.Image != 0 &&
             libraryRam != 0 && ram != 0 && textBuff != 0 )
       {
             DWORD ret = FineInitialize( CustomerKey, libraryRam, RamSizeForLibraryInitialization );

             if( ret == FINE_ERR_NO_ERROR ) {
                    int lineCount = 0;
                    ret = FineRecognizeImage( languages, patterns, dictionaries, &image, ram, RamSizeForRecognition,
                           RM_Full, &lineCount, textBuff, &textBuffSize, callback );
                    if( ret == FINE_ERR_OUTPUT_BUFF_TOO_SMALL ) {
                           free( textBuff );
                           textBuff = (PFINE_TEXT_LINE) malloc( textBuffSize );
                           ret = FineRecognizeImage( languages, patterns, dictionaries, &image, ram, RamSizeForRecognition,
                                  RM_Full, &lineCount, textBuff, &textBuffSize, callback );
                    }

                    if( ret == FINE_ERR_NO_ERROR ) {
                           errorCode = writeRecognitionResults( outputFileName, lineCount, textBuff );
                           if( errorCode != 0 ) {
                                  fprintf( stderr, "Can't write recognition results to %s.\n", outputFileName );
                           }
                    } else {
                           fprintf( stderr, "Error %d on FineRecognizeImage.\n", ret );
                    }

                    FineDeinitialize();
             } else {
                    fprintf( stderr, "Error %d on FineInitialize.\n", ret );
             }
       }

       unloadAll( dictionaries );
       unload( patterns );
       unload( image.Image );
       unload( libraryRam );
       unload( ram );
       unload( textBuff );
       return errorCode;
}

static int recognizeFileInEnglishAndGermanByLines( const char* inputFileName, const char* outputFileName )
{
       // Load image
       void* bmpImage = loadFile( inputFileName );
       if( bmpImage == 0 ) {
             fprintf( stderr, "Can't load %s.\n", inputFileName );
             return 1;
       }
       FINE_IMAGE image;
       image.Image = bmpRead( (char*)bmpImage, fileSize(inputFileName), &image.ImageWidth, &image.ImageHeight,
             &image.BitsPerPixel, &image.ImageByteWidth );
       if( image.Image == 0 ) {
             fprintf( stderr, "Can't read image.\n" );
       }
       image.Resolution = 260;
       unload( bmpImage );

       // Load dictionaries
       TLanguageID languages[langCount + 1] = { LID_English, LID_German, LID_Undefined };
       PFINE_DICTIONARY dictionaries[langCount + 1] = { loadFile( EnglishDictionary ), loadFile( GermanDictionary ), 0 };
       if( dictionaries[0] == 0 ) {
             fprintf( stderr, "Can't load %s.\n", EnglishDictionary );
       }
       if( dictionaries[1] == 0 ) {
             fprintf( stderr, "Can't load %s.\n", GermanDictionary );
       }

       // Load patterns
       PFINE_PATTERNS patterns = loadFile( PatternsFile );
       if( patterns == 0 ) {
             fprintf( stderr, "Can't load %s.\n", PatternsFile );
       }

       // Allocate buffers
       void* libraryRam = malloc( RamSizeForLibraryInitialization );
       void* ram = malloc( RamSizeForRecognition );
       int linesBuffSize = InitialLinesBuffSize;
       RECT* linesBuff = (RECT*) malloc( linesBuffSize );
       int textBuffSize = OneLineTextBuffSize;
       PFINE_TEXT_LINE textBuff = (PFINE_TEXT_LINE) malloc( textBuffSize );
       if( libraryRam == 0 || ram == 0 || linesBuff == 0 || textBuff == 0 ) {
             fprintf( stderr, "Can't allocate buffers.\n" );
       }

       FILE* output = fopen( outputFileName, "wt" );
       if( output == 0 ) {
             fprintf( stderr, "Can't write recognition results to %s.\n", outputFileName );
       }

       int errorCode = 1;

       if( areLoaded( dictionaries ) && patterns != 0 && image.Image != 0 &&
             libraryRam != 0 && ram != 0 && linesBuff != 0 && textBuff != 0 && output != 0 )
       {
             DWORD ret = FineInitialize( CustomerKey, libraryRam, RamSizeForLibraryInitialization );

             if( ret == FINE_ERR_NO_ERROR ) {
                    int rectsCount;
                    ret = FineGetTextLines( &image, ram, RamSizeForRecognition, &rectsCount,
                           linesBuff, &linesBuffSize, callback );
                    if( ret == FINE_ERR_OUTPUT_BUFF_TOO_SMALL ) {
                           free( linesBuff );
                           linesBuff = (RECT*) malloc( linesBuffSize );
                           ret = FineGetTextLines( &image, ram, RamSizeForRecognition, &rectsCount,
                                  linesBuff, &linesBuffSize, callback );
                    }

                    if( ret == FINE_ERR_NO_ERROR ) {
                           int line;
                           for( line = 0; line < rectsCount; line++ ) {
                                  fprintf( output, "Line rectangle #%d: top=%d, bottom=%d, left=%d, right=%d.\n", line,
                                        linesBuff[line].top, linesBuff[line].bottom, linesBuff[line].left, linesBuff[line].right );

                                  int lineCount = 0;

                                  ret = FineRecognizeRegion( languages, patterns, dictionaries, &image, 1, &linesBuff[line],
                                        ram, RamSizeForRecognition, RM_Fast, &lineCount, textBuff, &textBuffSize, callback );
                                  if( ret == FINE_ERR_OUTPUT_BUFF_TOO_SMALL ) {
                                        free( textBuff );
                                        textBuff = (PFINE_TEXT_LINE) malloc( textBuffSize );
                                        ret = FineRecognizeRegion( languages, patterns, dictionaries, &image, 1, &linesBuff[line],
                                               ram, RamSizeForRecognition, RM_Fast, &lineCount, textBuff, &textBuffSize, callback );
                                  }
                                  if( ret != FINE_ERR_NO_ERROR ) {
                                        fprintf( stderr, "Error %d on FineRecognizeRegion.\n", ret );
                                        break;
                                  }

                                  fprintf( output, "Line text #%d: ", line );
                                  for( int line = 0; line < lineCount; line++ ) {
                                        for( int ch = 0; ch < textBuff[line].CharCount; ch++ ) {
                                               fprintf(output, "%C", textBuff[line].Chars[ch].Unicode);
                                        }
                                        fprintf(output, "\n");
                                  }
                           }
                           if( line == rectsCount ) {
                                  errorCode = 0;
                           }
                    } else {
                           fprintf( stderr, "Error %d on FineGetTextLines.\n", ret );
                    }

                    FineDeinitialize();
             } else {
                    fprintf( stderr, "Error %d on FineInitialize.\n", ret );
             }
       }

       unloadAll( dictionaries );
       unload( patterns );
       unload( image.Image );
       unload( libraryRam );
       unload( ram );
       unload( linesBuff );
       unload( textBuff );
       if( output != 0 ) {
             fclose( output );
       }
       return errorCode;
}

static const wchar_t* const fieldNames[BFT_Max] = {
       L"Phone",
       L"Fax",
       L"Mobile Phone",
       L"E-mail",
       L"Web address",
       L"Postal address",
       L"Name",
       L"CompanyName",
       L"Job",
       L"OtherText"
 };

static void writeField( const FINE_BCR_FIELD* field, FILE* file )
{
       writeUnicodeString( fieldNames[field->Type], file );
       writeUnicodeString( L": ", file );
       for( int i = 0; i < field->TextLength; i++ ) {
             writeUnicodeChar( field->Text[i], file );
       }
       writeUnicodeString( L".\r\n", file );
}

inline static WORD swapBytes( WORD input )
{
       return (WORD)( (WORD)( input >> 8 ) | (WORD)( input << 8 ) );
}

void loadUnicodeString( const char* fileName, WCHAR** ret, int* length )
{
       const WCHAR UNICODE_FILE_TAG = L'\xFEFF';
       const WCHAR INVERSE_UNICODE_FILE_TAG = L'\xFFEF';
       WCHAR tag;
       const int size = fileSize( fileName );
       FILE* file = fopen( fileName, "rb" );
       if( file == 0 ) {
             *ret = 0;
             return;
       }
       fread( &tag, sizeof( tag ), 1, file );
       *length = size / sizeof( WCHAR );
       *ret = (WCHAR*)malloc( size );
       if( *ret == 0 ) {
             fclose( file );
             return;
       }
       fread( *ret, size - sizeof( WCHAR ), 1, file );
       (*ret)[ *length - 1 ] = L'\0';
       switch( tag ) {
             case UNICODE_FILE_TAG:
                    break;
             case INVERSE_UNICODE_FILE_TAG :
             {
                    for( int i = 0; i < *length - 1; i++ ) {
                           (*ret)[i] = swapBytes( (*ret)[i] );
                    }
                    break;
             }
             default:
                    unload( *ret );
                    fprintf( stderr, "File %s not unicode: no byte order tag found!\n", fileName );
                    break;
       }
       fclose(file);
       return;
}

static int recognizeBusinessCardInEnglishAndGerman( const char* inputFileName, const char* outputFileName )
{
       // Load image
       void* bmpImage = loadFile( inputFileName );
       if( bmpImage == 0 ) {
             fprintf( stderr, "Can't load %s.\n", inputFileName );
             return 1;
       }
       FINE_IMAGE image;
       image.Image = bmpRead( (char*)bmpImage, fileSize(inputFileName), &image.ImageWidth, &image.ImageHeight,
             &image.BitsPerPixel, &image.ImageByteWidth );
       if( image.Image == 0 ) {
             fprintf( stderr, "Can't read image.\n" );
       }
       image.Resolution = 260;
       unload( bmpImage );

       // Load dictionaries
       TLanguageID languages[langCount + 1] = { LID_English, LID_German, LID_Undefined };
       PFINE_DICTIONARY dictionaries[langCount + 1] = { loadFile( EnglishDictionary ), loadFile( GermanDictionary ), 0 };
       if( dictionaries[0] == 0 ) {
             fprintf( stderr, "Can't load %s.\n", EnglishDictionary );
       }
       if( dictionaries[1] == 0 ) {
             fprintf( stderr, "Can't load %s.\n", GermanDictionary );
       }

       // Load keywords
       PFINE_KEYWORDS keywords[langCount + 1] = { loadFile( EnglishKeywords ), loadFile( GermanKeywords ), 0 };
       if( keywords[0] == 0 ) {
             fprintf( stderr, "Can't load %s.\n", EnglishKeywords );
       }
       if( keywords[1] == 0 ) {
             fprintf( stderr, "Can't load %s.\n", GermanKeywords );
       }

       // Load patterns
       PFINE_PATTERNS patterns = loadFile( PatternsFile );
       if( patterns == 0 ) {
             fprintf( stderr, "Can't load %s.\n", PatternsFile );
       }

       // Allocate buffers
       void* libraryRam = malloc( RamSizeForLibraryInitialization );
       void* ram = malloc( RamSizeForRecognition );
       int fieldsBuffSize = InitialFieldsBuffSize;
       PFINE_BCR_FIELD fieldsBuff = (PFINE_BCR_FIELD)malloc( fieldsBuffSize );
       if( libraryRam == 0 || ram == 0 || fieldsBuff == 0 ) {
             fprintf( stderr, "Can't allocate buffers.\n" );
       }

       FILE* output = rewriteUnicodeFile( outputFileName );
       if( output == 0 ) {
             fprintf( stderr, "Can't write recognition results to %s.\n", outputFileName );
       }

       int errorCode = 1;

       if( image.Image != 0 && areLoaded( dictionaries ) && areLoaded( keywords ) && patterns != 0 &&
             libraryRam != 0 && ram != 0 && fieldsBuff != 0 && output != 0 )
       {
             DWORD ret = FineInitialize( CustomerKey, libraryRam, RamSizeForLibraryInitialization );

             if( ret == FINE_ERR_NO_ERROR ) {
                    int fieldsCount;
                    ret = FineRecognizeBusinessCard( languages, patterns, dictionaries, keywords, &image,
                           ram, RamSizeForRecognition, RM_Fast, &fieldsCount, fieldsBuff, &fieldsBuffSize, callback );
                    if( ret == FINE_ERR_OUTPUT_BUFF_TOO_SMALL ) {
                           free( fieldsBuff );
                           fieldsBuff = (PFINE_BCR_FIELD) malloc( fieldsBuffSize );
                           ret = FineRecognizeBusinessCard( languages, patterns, dictionaries, keywords, &image,
                                  ram, RamSizeForRecognition, RM_Fast, &fieldsCount, fieldsBuff, &fieldsBuffSize, callback );
                    }

                    if( ret == FINE_ERR_NO_ERROR ) {
                           for( int j = 0; j < fieldsCount; j++ ) {
                                  writeField( fieldsBuff + j, output );
                           }
                    } else {
                           fprintf( stderr, "Error %d on FineRecognizeBusinessCard.\n", ret );
                    }

                    FineDeinitialize();
             } else {
                    fprintf( stderr, "Error %d on FineInitialize.\n", ret );
             }
       }

       unloadAll( dictionaries );
       unloadAll( keywords );
       unload( patterns );
       unload( image.Image );
       unload( libraryRam );
       unload( ram );
       unload( fieldsBuff );
       if( output != 0 ) {
             fclose( output );
       }
       return errorCode;
}

static int findSuggests( const char* inputFileName, const char* outputFileName )
{
       // Load dictionary
       PFINE_DICTIONARY dictionary = loadFile( EnglishDictionary );
       if( dictionary == 0 ) {
             fprintf( stderr, "Can't load %s.\n", EnglishDictionary );
       }

       // Allocate buffers
       void* libraryRam = malloc( RamSizeForLibraryInitialization );
       void* ram = malloc( RamSizeForSuggestGeneration );
       int suggestBuffLen = 100;
       WCHAR* suggestBuff = (WCHAR*) malloc( suggestBuffLen * sizeof(WCHAR) );
       if( libraryRam == 0 || ram == 0 || suggestBuff == 0 ) {
             fprintf( stderr, "Can't allocate buffers.\n" );
       }

       WCHAR* input = 0;
       int wordLength = 0;
       loadUnicodeString( inputFileName, &input, &wordLength );
       if( input == 0 ) {
             fprintf( stderr, "Can't load %s.\n", inputFileName );
       }

       FILE* output = rewriteUnicodeFile( outputFileName );
       if( output == 0 ) {
             fprintf( stderr, "Can't write results to %s.\n", outputFileName );
       }

       int errorCode = 1;

       if( ram != 0 && suggestBuff != 0 && input != 0 && dictionary != 0 && output != 0 ) {
             DWORD ret = FineInitialize( CustomerKey, libraryRam, RamSizeForLibraryInitialization );

             if( ret == FINE_ERR_NO_ERROR ) {
                    ret = FineGetWordSuggest(  dictionary, ram, RamSizeForSuggestGeneration,
                           input, wordLength, suggestBuff, &suggestBuffLen );
                    if( ret == FINE_ERR_NO_ERROR ) {
                           for( WCHAR* ptr = suggestBuff; ptr < suggestBuff + suggestBuffLen; ptr++ ) {
                                  writeUnicodeChar( *ptr, output );
                           }
                           errorCode = 0;
                    } else {
                           fprintf( stderr, "Error %d on FineGetWordSuggest.\n", ret );
                    }

                    FineDeinitialize();
             } else {
                    fprintf( stderr, "Error %d on FineInitialize.\n", ret );
             }
       }

       unload( dictionary );
       unload( libraryRam );
       unload( ram );
       unload( suggestBuff );
       unload( input );
       if( output != 0 ) {
             fclose( output );
       }
       return errorCode;
}

static const char* const usageString = "\n"
       "3 command line parameters required: mode input_file_name output_file_name.\n"
       "Possible modes:\n"
       "-r --- recognize text (input --- bitmap file, output --- unicode text file);\n"
       "-l --- recognize text by lines (input --- bitmap file, output --- ANSI text file);\n"
       "-b --- recognize and analyze business card (input --- bitmap file, output --- unicode text file);\n"
       "-s --- get word suggest (input --- unicode text file, output --- unicode text file).\n";

int main( int argc, char** argv, char** /*envp*/ )
{
       if( argc < 4 ) {
             printf( usageString );
             return 1;
       }
      
       if( strcmp( argv[1], "-r" ) == 0 ) {
             return recognizeFileInEnglishAndGerman( argv[2], argv[3] );
       } else if( strcmp( argv[1], "-l" ) == 0 ) {
             return recognizeFileInEnglishAndGermanByLines( argv[2], argv[3] );
       } else if( strcmp( argv[1], "-b" ) == 0 ) {
             return recognizeBusinessCardInEnglishAndGerman( argv[2], argv[3] );
       } else if( strcmp( argv[1], "-s" ) == 0 ) {
             return findSuggests( argv[2], argv[3] );
       } else {
             printf( usageString );
       }

       return 1;
}


Postar um comentário