Differences in Tesseract OCR Results between Command Line and Unity Wrapper

83 Views Asked by At

I'm encountering discrepancies in the OCR results when using Tesseract 5.3.3 from the command line compared to using a Unity wrapper, even though the configurations and trained data are identical. I suspect that the differences might be related to how images are imported into Unity.

To provide more context:

I'm using Tesseract 5.3.3 with the same trained data and configurations in both environments. However, despite these similarities, the OCR results differ in more complicated images. Could the differences be attributed to how images are imported and processed within Unity? If so, what should I consider when handling images in Unity to ensure consistency with the command-line results? I tried change the format to png and playing on image settings in unity, but while from command I always have good results, they dont translate to unity. I'd appreciate any kinf of help and suggestion.

unity results

The wrapper I'm using: https://github.com/Neelarghya/tesseract-unity

image processed command results

This is the code used to preprocess the image

        public string Recognize(Texture2D texture)
        {
            if (_tessHandle.Equals(IntPtr.Zero))
                return null;
    
            _highlightedTexture = texture;
    
            int width = _highlightedTexture.width;
            int height = _highlightedTexture.height;
            Color32[] colors = _highlightedTexture.GetPixels32();
            int count = width * height;
            int bytesPerPixel = 4;
            byte[] dataBytes = new byte[count * bytesPerPixel];
            int bytePtr = 0;
    
            for (int y = height - 1; y >= 0; y--)
            {
                for (int x = 0; x < width; x++)
                {
                    int colorIdx = y * width + x;
                    dataBytes[bytePtr++] = colors[colorIdx].r;
                    dataBytes[bytePtr++] = colors[colorIdx].g;
                    dataBytes[bytePtr++] = colors[colorIdx].b;
                    dataBytes[bytePtr++] = colors[colorIdx].a;
                }
            }
    
    
            IntPtr imagePtr = Marshal.AllocHGlobal(count * bytesPerPixel);
            Marshal.Copy(dataBytes, 0, imagePtr, count * bytesPerPixel);
    
            MyTessBaseAPISetImage(_tessHandle, imagePtr, width, height, bytesPerPixel, width * bytesPerPixel);
    
            int language_model_min_compound_length=-1;
            MyTessBaseAPIGetIntVariable(_tessHandle, "language_model_min_compound_length", ref language_model_min_compound_length);
            Debug.Log("MyTessBaseAPIGetIntVariable: " + language_model_min_compound_length);
            //textord_force_make_prop_words
            //MyTessBaseAPISetVariable(_tessHandle, "textord_force_make_prop_words", "1");
            //MyTessBaseAPISetVariable(_tessHandle, "textord_force_make_prop_words", "1");
    
            double invert_threshold = .0f;
            MyTessBaseAPIGetDoubleVariable(_tessHandle, "invert_threshold", ref invert_threshold);
            Debug.Log("invert_threshold: " + invert_threshold);
    
            //Debug.Log("Resolution: " + SourceYResolution());
            //MyTessBaseAPISetSourceResolution(_tessHandle, 1024);
            //Debug.Log("Resolution: " + SourceYResolution());
    
            if (MyTessBaseAPIRecognize(_tessHandle, IntPtr.Zero) != 0)
            {
                Marshal.FreeHGlobal(imagePtr);
                return null;
            }
            
            IntPtr confidencesPointer = MyTessBaseAPIAllWordConfidences(_tessHandle);
            int i = 0;
            List<int> confidence = new List<int>();
            
            while (true)
            {
                int tempConfidence = Marshal.ReadInt32(confidencesPointer, i * 4);
    
                if (tempConfidence == -1) break;
    
                i++;
                confidence.Add(tempConfidence);
            }
    
            int pointerSize = Marshal.SizeOf(typeof(IntPtr));
            IntPtr intPtr = MyTessBaseAPIGetWords(_tessHandle, IntPtr.Zero);
            Boxa boxa = Marshal.PtrToStructure<Boxa>(intPtr);
            Box[] boxes = new Box[boxa.n];
    
            for (int index = 0; index < boxes.Length; index++)
            {
                if (confidence[index] >= MinimumConfidence)
                {
                    IntPtr boxPtr = Marshal.ReadIntPtr(boxa.box, index * pointerSize);
                    boxes[index] = Marshal.PtrToStructure<Box>(boxPtr);
                    Box box = boxes[index];
                    //DrawLines(_highlightedTexture,
                    //    new Rect(box.x, _highlightedTexture.height - box.y - box.h, box.w, box.h),
                    //    Color.green);
                }
            }
    
            IntPtr stringPtr = MyTessBaseAPIGetUTF8Text(_tessHandle);
            Marshal.FreeHGlobal(imagePtr);
            if (stringPtr.Equals(IntPtr.Zero))
                return null;
    
    #if UNITY_EDITOR_WIN || UNITY_STANDALONE_WIN
            string recognizedText = Marshal.PtrToStringAnsi (stringPtr);
    #else
            string recognizedText = Marshal.PtrToStringAuto(stringPtr);
    #endif
    
            MyTessBaseAPIClear(_tessHandle);
            MyTessDeleteText(stringPtr);
            
            string[] words = recognizedText.Split(new[] {' ', '\n'}, StringSplitOptions.RemoveEmptyEntries);
            StringBuilder result = new StringBuilder();
    
            for (i = 0; i < boxes.Length; i++)
            {
                Debug.Log(words[i] + " -> " + confidence[i]);
                if (confidence[i] >= MinimumConfidence)
                {
                    result.Append(words[i]);
                    result.Append(" ");
                }
            }
    
            for (int index = 0; index < boxes.Length; index++)
            {
                if (confidence[index] >= MinimumConfidence)// && (words[index].Contains("Berserk") || words[index].Contains("Wolf")))
                {
                    IntPtr boxPtr = Marshal.ReadIntPtr(boxa.box, index * pointerSize);
                    boxes[index] = Marshal.PtrToStructure<Box>(boxPtr);
                    Box box = boxes[index];
                    DrawLines(_highlightedTexture,
                        new Rect(box.x, _highlightedTexture.height - box.y - box.h, box.w, box.h),
                        Color.green);
                }
            }
    
            return result.ToString();
        }
0

There are 0 best solutions below