How to highlight searched text in result that may contains diacritics chatacter?

14 Views Asked by At

I want to show the searched text in the result by highlighting sections that match the searched text. but how to do it if the result text contains diacritics? for example, I search "رب" in text "الْحَمْدُ لِلَّهِ رَبِّ الْعَالَمِينَ". I can normalize text to remove all diacritics. but how do I highlight my searched text in exact position?

1

There are 1 best solutions below

0
Hadi Ahmadi On
fun getDiacriticAwareHighlightedText(
    searchText: String,
    resultText: String,
    highlightStyle: SpanStyle
): AnnotatedString {

    val start = System.currentTimeMillis()

    val builder = AnnotatedString.Builder(resultText)

    for (i in resultText.indices) {
        val endIndex = i + searchText.length * 3
        if (endIndex < resultText.length) {
            val sectionText = resultText.substring(i, endIndex).trim()

            val startIndex = sectionText.indexOf(searchText, 0, true)
            if (startIndex != -1) {
                builder.addStyle(highlightStyle, i + startIndex, i + startIndex + searchText.length)
            } else {
                //search in normalized text
                findDiacriticAwareMatch(sectionText, searchText, builder, highlightStyle, i)
            }
        } else {
            val endingText = resultText.substring(i)
            findDiacriticAwareMatch(endingText, searchText, builder, highlightStyle, i)
        }
    }
    val end = System.currentTimeMillis()
    log("getDiacriticAwareHighlightedText elapsed: " + (end - start))
    return builder.toAnnotatedString()
}

private fun findDiacriticAwareMatch(
    sectionText: String,
    searchText: String,
    builder: AnnotatedString.Builder,
    highlightStyle: SpanStyle,
    i: Int
) {
    if (sectionText.first() in arabicDiacritics) return
    val normalizedSectionText = normalizeText(sectionText)
    if (normalizedSectionText.startsWith(searchText)) {
        var diacriticCount = 0
        var noneDiacriticCount = 0
        var step = 0
        while (noneDiacriticCount < searchText.length) {
            if (arabicDiacritics.contains(sectionText[step])) {
                diacriticCount++
            } else {
                noneDiacriticCount++
            }
            step++
        }
        builder.addStyle(highlightStyle, i, i + diacriticCount + noneDiacriticCount)
    }
}



val arabicDiacritics = listOf(
    '\u0610',
    '\u0611',
    '\u0612',
    '\u0613',
    '\u0614',
    '\u0615',
    '\u0616',
    '\u0617',
    '\u0618',
    '\u0619',
    '\u061A',
    '\u06D6',
    '\u06D7',
    '\u06D8',
    '\u06D9',
    '\u06DA',
    '\u06DB',
    '\u06DC',
    '\u06DD',
    '\u06DE',
    '\u06DF',
    '\u06E0',
    '\u06E1',
    '\u06E2',
    '\u06E3',
    '\u06E4',
    '\u06E5',
    '\u06E6',
    '\u06E7',
    '\u06E8',
    '\u06E9',
    '\u06EA',
    '\u06EB',
    '\u06EC',
    '\u06ED',
    '\u0640',
    '\u064B',
    '\u064C',
    '\u064D',
    '\u064E',
    '\u064F',
    '\u0650',
    '\u0651',
    '\u0652',
    '\u0653',
    '\u0654',
    '\u0655',
    '\u0656',
    '\u0657',
    '\u0658',
    '\u0659',
    '\u065A',
    '\u065B',
    '\u065C',
    '\u065D',
    '\u065E',
    '\u065F',
    '\u0670',
    '\u0624',
    '\u0629',
    '\u064A',
    '\u0626',
    '\u0622',
    '\u0623',
    '\u0625',
)

fun normalizeText(inputText: String): String {
    return Normalizer.normalize(inputText, Normalizer.Form.NFKD).replace("\\p{M}".toRegex(), "")
}