I'm encountering discrepancies in the OCR results when using Tesseract 5.3.3 from the command line compared to using a Unity wrapper, even though the configurations and trained data are identical. I suspect that the differences might be related to how images are imported into Unity.
To provide more context:
I'm using Tesseract 5.3.3 with the same trained data and configurations in both environments. However, despite these similarities, the OCR results differ in more complicated images. Could the differences be attributed to how images are imported and processed within Unity? If so, what should I consider when handling images in Unity to ensure consistency with the command-line results? I tried change the format to png and playing on image settings in unity, but while from command I always have good results, they dont translate to unity. I'd appreciate any kinf of help and suggestion.
The wrapper I'm using: https://github.com/Neelarghya/tesseract-unity
image processed command results
This is the code used to preprocess the image
public string Recognize(Texture2D texture)
{
if (_tessHandle.Equals(IntPtr.Zero))
return null;
_highlightedTexture = texture;
int width = _highlightedTexture.width;
int height = _highlightedTexture.height;
Color32[] colors = _highlightedTexture.GetPixels32();
int count = width * height;
int bytesPerPixel = 4;
byte[] dataBytes = new byte[count * bytesPerPixel];
int bytePtr = 0;
for (int y = height - 1; y >= 0; y--)
{
for (int x = 0; x < width; x++)
{
int colorIdx = y * width + x;
dataBytes[bytePtr++] = colors[colorIdx].r;
dataBytes[bytePtr++] = colors[colorIdx].g;
dataBytes[bytePtr++] = colors[colorIdx].b;
dataBytes[bytePtr++] = colors[colorIdx].a;
}
}
IntPtr imagePtr = Marshal.AllocHGlobal(count * bytesPerPixel);
Marshal.Copy(dataBytes, 0, imagePtr, count * bytesPerPixel);
MyTessBaseAPISetImage(_tessHandle, imagePtr, width, height, bytesPerPixel, width * bytesPerPixel);
int language_model_min_compound_length=-1;
MyTessBaseAPIGetIntVariable(_tessHandle, "language_model_min_compound_length", ref language_model_min_compound_length);
Debug.Log("MyTessBaseAPIGetIntVariable: " + language_model_min_compound_length);
//textord_force_make_prop_words
//MyTessBaseAPISetVariable(_tessHandle, "textord_force_make_prop_words", "1");
//MyTessBaseAPISetVariable(_tessHandle, "textord_force_make_prop_words", "1");
double invert_threshold = .0f;
MyTessBaseAPIGetDoubleVariable(_tessHandle, "invert_threshold", ref invert_threshold);
Debug.Log("invert_threshold: " + invert_threshold);
//Debug.Log("Resolution: " + SourceYResolution());
//MyTessBaseAPISetSourceResolution(_tessHandle, 1024);
//Debug.Log("Resolution: " + SourceYResolution());
if (MyTessBaseAPIRecognize(_tessHandle, IntPtr.Zero) != 0)
{
Marshal.FreeHGlobal(imagePtr);
return null;
}
IntPtr confidencesPointer = MyTessBaseAPIAllWordConfidences(_tessHandle);
int i = 0;
List<int> confidence = new List<int>();
while (true)
{
int tempConfidence = Marshal.ReadInt32(confidencesPointer, i * 4);
if (tempConfidence == -1) break;
i++;
confidence.Add(tempConfidence);
}
int pointerSize = Marshal.SizeOf(typeof(IntPtr));
IntPtr intPtr = MyTessBaseAPIGetWords(_tessHandle, IntPtr.Zero);
Boxa boxa = Marshal.PtrToStructure<Boxa>(intPtr);
Box[] boxes = new Box[boxa.n];
for (int index = 0; index < boxes.Length; index++)
{
if (confidence[index] >= MinimumConfidence)
{
IntPtr boxPtr = Marshal.ReadIntPtr(boxa.box, index * pointerSize);
boxes[index] = Marshal.PtrToStructure<Box>(boxPtr);
Box box = boxes[index];
//DrawLines(_highlightedTexture,
// new Rect(box.x, _highlightedTexture.height - box.y - box.h, box.w, box.h),
// Color.green);
}
}
IntPtr stringPtr = MyTessBaseAPIGetUTF8Text(_tessHandle);
Marshal.FreeHGlobal(imagePtr);
if (stringPtr.Equals(IntPtr.Zero))
return null;
#if UNITY_EDITOR_WIN || UNITY_STANDALONE_WIN
string recognizedText = Marshal.PtrToStringAnsi (stringPtr);
#else
string recognizedText = Marshal.PtrToStringAuto(stringPtr);
#endif
MyTessBaseAPIClear(_tessHandle);
MyTessDeleteText(stringPtr);
string[] words = recognizedText.Split(new[] {' ', '\n'}, StringSplitOptions.RemoveEmptyEntries);
StringBuilder result = new StringBuilder();
for (i = 0; i < boxes.Length; i++)
{
Debug.Log(words[i] + " -> " + confidence[i]);
if (confidence[i] >= MinimumConfidence)
{
result.Append(words[i]);
result.Append(" ");
}
}
for (int index = 0; index < boxes.Length; index++)
{
if (confidence[index] >= MinimumConfidence)// && (words[index].Contains("Berserk") || words[index].Contains("Wolf")))
{
IntPtr boxPtr = Marshal.ReadIntPtr(boxa.box, index * pointerSize);
boxes[index] = Marshal.PtrToStructure<Box>(boxPtr);
Box box = boxes[index];
DrawLines(_highlightedTexture,
new Rect(box.x, _highlightedTexture.height - box.y - box.h, box.w, box.h),
Color.green);
}
}
return result.ToString();
}