How to convert the perl index script to bash function

168 Views Asked by At

I want to use perl to get the index of from any given string and substring in bash function.

Here is the example for getting the indexOf value from perl script:

https://www.geeksforgeeks.org/perl-index-function/

#!/usr/bin/perl
 
# String from which Substring 
# is to be searched 
$string = "Geeks are the best";
 
# Using index() to search for substring
$index = index ($string, 'the');
 
# Printing the position of the substring
print "Position of 'the' in the string: $index\n";

Output:

Position of 'the' in the string: 10

Here is the test.sh:

#!/bin/bash

bash_function_get_index_from_perl_script() {
    local index="-1"
    
    # Here is the dummy code 
    # as I don't know how to convert
    # the perl script to bash command lines
    # and get the result from perl script
    
    index="
    
        #!/usr/bin/perl
         
        # String from which Substring 
        # is to be searched 
        $string = "Geeks are the best";
         
        # Using index() to search for substring
        $index = index ($string, 'the');
         
        # Printing the position of the substring
        print "Position of 'the' in the string: $index\n";
    
    "
    
    printf "%s" "$index"
}

result="$(bash_function_get_index_from_perl_script)"

echo "result is: $result"

Here is the expected output:

result is: 10

How to implement the "bash_function_get_index_from_perl_script"?

Update:

Here is the bash function for testing:

#!/bin/bash

bash_function_get_index() {
    local string="$1"
    local search_string="$2"
    
    before=${string%${search_string}*}
    index=${#before}
    
    printf "%s" "$index"
}

echo "test 1: $(bash_function_get_index "hello world" "wo")"
echo "test 2: $(bash_function_get_index "hello world" "wox")"

Here is the output:

test 1: 6
test 2: 11

The result of "test 2" is wrong, it should be:

test 2: -1

I want to implement the same "indexOf" function from Java:

https://www.w3schools.com/java/ref_string_indexof.asp

index_of() {
    local string="$1"
    local search_string="$2"
    local fromIndex="$3"
    
    local index=""
    
    # Get the real index here, the result shoule be the same as the Java's "indexOf"
        
    printf "%s" "$index"
}

UPDATE 2:

Here is a testing based on the "@F. Hauri - Give Up GitHub"'s solution:

The following test_example is used to get the "inner code" between "if (feature_is_enabled(Feature_111111)) {" and "}".

I used a second variable "hello_world" to get a from_index for getting the real index of "}".

But the "test_example" is not working as expected. It seems the "indexOf()" function is not working as expected.

#!/bin/bash

indexOf() {
    if [[ $1 == -v ]]; then
        local -n _iO_result="$2"
        shift 2
    else
        local _iO_result
    fi
    local _iO_string="$2" _iO_substr="$1" _iO_lhs
    _iO_lhs=${_iO_string%"$_iO_substr"*}
    _iO_result=${#_iO_lhs}
    ((_iO_result == ${#_iO_string} )) && _iO_result=-1
    case ${_iO_result@A} in _iO_result=* ) echo $_iO_result;;esac
}

indexOf_my_params() {
    local string="$1"
    local search_string="$2"
    local from_index="$3"
    
    local offset=-1
    
    if [[ "$from_index" == "" ]]
    then
        offset=0
    else
        if (( $from_index < 0 ))
        then
            from_index=0
        fi
        
        local count=$((${#string} - $from_index))
        if (( $count >= 0 ))
        then
            offset=$from_index
            string="${string:from_index:count}"
        fi
    fi
    
    local index=-1
    
    if [[ "$offset" != "-1" ]]
    then
        index="$(indexOf "$search_string" "$string")"
        
        if (( $index != -1 ))
        then
            index=$(($index + $offset))
        fi
    fi
    
    printf '%s' "$index"
    return "$((index < 0))"
}

test_example() {
    local content="$(cat <<END
initialize(args, hw);
if (feature_is_enabled(Feature_111111)) {
    args.add("foo");
    if(1>2) {
        args.add("test");
    }else{
        args.add("x");
    }
    args.add("hello world");
}

if (feature_is_enabled(Feature_222222)) {
    args.add("bar");
}
END
        )"
        
        local feature_1_if_begin="if (feature_is_enabled(Feature_111111)) {"
        local hello_world="args.add(\"hello world\");"
        local feature_1_if_end="}"
        
        local feature_1_if_begin_index="$(indexOf_my_params "$content" "$feature_1_if_begin")"
        echo "feature_1_if_begin_index: $feature_1_if_begin_index"
        
        local hello_world_index="$(indexOf_my_params "$content" "$hello_world" "$(($feature_1_if_begin_index + ${#feature_1_if_begin}))")"
        echo "hello_world_index: $hello_world_index"
        
        local feature_1_if_end_index="$(indexOf_my_params "$content" "$feature_1_if_end" "$(($hello_world_index + ${#hello_world}))")"
        echo "feature_1_if_end_index: $feature_1_if_end_index"
        
        local feature_1_if_block_code="${content:$(($feature_1_if_begin_index + ${#feature_1_if_begin})):$(($feature_1_if_end_index - $feature_1_if_begin_index - ${#feature_1_if_begin}))}"
        
        echo "------ feature_1_if_block_code (inner code) ------"
        printf "%s\n" "$feature_1_if_block_code"
        
}

test_example
exit 0

Here is the actual output (the output is wrong):

feature_1_if_begin_index: 22
hello_world_index: 169
feature_1_if_end_index: 260
------ feature_1_if_block_code (inner code) ------

    args.add("foo");
    if(1>2) {
        args.add("test");
    }else{
        args.add("x");
    }
    args.add("hello world");
}

if (feature_is_enabled(Feature_222222)) {
    args.add("bar");

Here is the expected output:

------ feature_1_if_block_code (inner code) ------

    args.add("foo");
    if(1>2) {
        args.add("test");
    }else{
        args.add("x");
    }
    args.add("hello world");

Why is the "indexOf()" not working when the search keyword is "}"?

UPDATE 3 (use the same "content" for testing as UPDATE 2):

Here is a solution from Paul Hodges but still not working as expected: Note that I renamed the function name "index" to "indexof_paul_hodges".

#!/bin/bash

indexof_paul_hodges ()
{
    local string="$1" key="$2" fromIndex=${3:-0};
    (( fromIndex )) && string=${string:$fromIndex};
    if [[ "$string" =~ "$key" ]]; then
        local ndx="${string%%"$key"*}";
        echo ${#ndx};
    else
        echo "-1";
    fi
}

test_example() {
    local content="$(cat <<END
initialize(args, hw);
if (feature_is_enabled(Feature_111111)) {
    args.add("foo");
    if(1>2) {
        args.add("test");
    }else{
        args.add("x");
    }
    args.add("hello world");
}

if (feature_is_enabled(Feature_222222)) {
    args.add("bar");
}
END
        )"
        
        local feature_1_if_begin="if (feature_is_enabled(Feature_111111)) {"
        local hello_world="args.add(\"hello world\");"
        local feature_1_if_end="}"
        
        local feature_1_if_begin_index="$(indexof_paul_hodges "$content" "$feature_1_if_begin")"
        echo "feature_1_if_begin_index: $feature_1_if_begin_index"
        
        local hello_world_index="$(indexof_paul_hodges "$content" "$hello_world" "$(($feature_1_if_begin_index + ${#feature_1_if_begin}))")"
        echo "hello_world_index: $hello_world_index"
        
        local feature_1_if_end_index="$(indexof_paul_hodges "$content" "$feature_1_if_end" "$(($hello_world_index + ${#hello_world}))")"
        echo "feature_1_if_end_index: $feature_1_if_end_index"
        
        local feature_1_if_block_code="${content:$(($feature_1_if_begin_index + ${#feature_1_if_begin})):$(($feature_1_if_end_index - $feature_1_if_begin_index - ${#feature_1_if_begin}))}"
        
        echo "------ feature_1_if_block_code (inner code) ------"
        printf "%s\n" "$feature_1_if_block_code"
        
}

test_example
exit 0

Here is the output of "indexof_paul_hodges" (The result is wrong):

feature_1_if_begin_index: 22
hello_world_index: 106
feature_1_if_end_index: 33
------ feature_1_if_block_code (inner code) ------

    args.add("foo");
    if(1>2) {
        args.add("test");
    }else{
        args.add("x");
    }
    args.add("hello world");
}

if (feature_is_enabled(Feature_222

UPDATE 4 (Use the same test_example() of UPDATE 2 for testing):

Here is a solution from @choroba:

#!/bin/bash

bash_function_get_index() {
    local string=$1
    local search=$2
    [[ $string = *"$search"* ]] || { printf %s -1 ; return ; }
    local before=${string%%"$search"*}
    local index=${#before}
    printf %s "$index"
}

indexOf_my_params() {
    local string="$1"
    local search_string="$2"
    local from_index="$3"
    
    local offset=-1
    
    if [[ "$from_index" == "" ]]
    then
        offset=0
    else
        if (( $from_index < 0 ))
        then
            from_index=0
        fi
        
        local count=$((${#string} - $from_index))
        if (( $count >= 0 ))
        then
            offset=$from_index
            string="${string:from_index:count}"
        fi
    fi
    
    local index=-1
    
    if [[ "$offset" != "-1" ]]
    then
        index="$(bash_function_get_index "$string" "$search_string")"
        
        if (( $index != -1 ))
        then
            index=$(($index + $offset))
        fi
    fi
    
    printf '%s' "$index"
    return "$((index < 0))"
}

Here is the output of UPDATE 4 (The result is correct)

feature_1_if_begin_index: 22
hello_world_index: 169
feature_1_if_end_index: 194
------ feature_1_if_block_code (inner code) ------

    args.add("foo");
    if(1>2) {
        args.add("test");
    }else{
        args.add("x");
    }
    args.add("hello world");
3

There are 3 best solutions below

8
choroba On BEST ANSWER

There are many ways. For example, you can send the script to the standard input of perl:

#!/bin/bash

bash_function_get_index_from_perl_script() {
    index=$(perl <<- '_PERL_'
    $string = 'Geeks are the best';
    $index = index $string, 'the';
    print "Position of 'the' in the string: $index\n";
    _PERL_
    )
    printf "%s" "$index"
}

result=$(bash_function_get_index_from_perl_script)

echo "result is: $result"

But you don't need Perl for that, you can find the position using parameter expansion in bash itself:

#!/bin/bash

bash_function_get_index() {
    string=$1
    search=$2
    [[ $string = *"$search"* ]] || { printf %s -1 ; return ; }
    before=${string%%"$search"*}
    index=${#before}
    printf %s "$index"
}

for s in the thex ; do
    result=$(bash_function_get_index 'Geeks are the best' "$s")
    echo "Position of '$s' in the string: $result"
done

To include a start position, just remove everything before the start position from the string before trying to match the substring. Don't forget to add the start position to the result:

bash_function_get_index() {
    string=$1
    search=$2
    pos=${3:-0}
    string=${string:$pos}
    [[ $string = *"$search"* ]] || { printf %s -1 ; return ; }
    before=${string%%"$search"*}
    index=${#before}
    printf %s $(( pos + index ))
}
8
F. Hauri  - Give Up GitHub On

Index Of substring in string in

Try this:

string="Geeks are the best"
sub=the
lhs=${string%"$sub"*}
echo ${#lhs}
10

As a function

with -v option to assign a variable instead of echoing. In order to avoid useless forks:

indexOf() {
    if [[ $1 == -v ]]; then
        local -n _iO_result="$2"
        shift 2
    else
        local _iO_result
    fi
    local _iO_string="$2" _iO_substr="$1" _iO_lhs
    _iO_lhs=${_iO_string%"$_iO_substr"*}
    _iO_result=${#_iO_lhs}
    ((_iO_result == ${#_iO_string} )) && _iO_result=-1
    case ${_iO_result@A} in _iO_result=* ) echo "$_iO_result";;esac
}

With an exeption: if substring not found, then answer is -1.

Tests and syntax

Then

indexOf the "Geeks are the best"
10

indexOf -v var the "Geeks are the best"
echo $var
10

indexOf bad "Hello good world!"
-1

indexOf '*' 'abc*efg???x'
3

Then for avoiding forks var=$(function...), use -v var option!!

tcnt=1;for tests in  $'wo\thello world' $'wox\thello world' $'*\tabc*efg???x'; do
    tststr=${tests%%$'\t'*} string=${tests#*$'\t'}
    indexOf -v idx "$tststr" "$string"
    printf 'Test %2d: result: %3d. string "%s" substring: "%s"\n' \
        $((tcnt++)) "$idx" "$string" "$tststr"
done

should produce:

Test  1: result:   6. string "hello world" substring: "wo"
Test  2: result:  -1. string "hello world" substring: "wox"
Test  3: result:   3. string "abc*efg???x" substring: "*"

Why do I insist about avoiding forks

Just try to run 1'000 time the job:

time for ((i=1000;i--;)){ idx=$(indexOf 'wo' 'Hello world') ;};echo $idx
real    0m0.948s
user    0m0.608s
sys     0m0.337s
6
time for ((i=1000;i--;)){ indexOf -v idx 'wo' 'Hello world' ;};echo $idx
real    0m0.030s
user    0m0.030s
sys     0m0.000s
6

From approx 1 seconds to approx 3 100th of second!!

No comment!

3
Paul Hodges On

I don't think this is doing anything dramatically different from what others have done, but implements what you've asked for with native built-in bash-isms and is maybe a little easier to read?

$: declare -f index
index ()
{
    local string="$1" key="$2" fromIndex=${3:-0};
    (( fromIndex )) && string=${string:$fromIndex};
    if [[ "$string" =~ "$key" ]]; then
        local ndx="${string%%"$key"*}";
        echo ${#ndx};
    else
        echo "-1";
    fi
}

$: index "Geeks are the best" the
10

$: index "Geeks are the best" the 4
6

$: index "Geeks are the best" the 12
-1

$: index "Geeks are the best" foo
-1

Edit

This smacks of an XY Problem with a horribly fragile Y. What is it that you are actually trying to accomplish?

In general, unless you are ABSOLUTELY in COMPLETE and SOLE control of the code (and probably not even then), don't try to parse code this way. One small "harmless" change now breaks your parse.

Even so, your problem is that you are using offsets for reference that aren't counting from the same place. You have to index from the start of your source string, even if you had to use a point in the middle to identify the correct closing brace...which is what makes this so fragile.

$: cat tst
#!/bin/bash
indexof_paul_hodges () { local string="$1" key="$2" fromIndex=${3:-0};
    (( fromIndex )) && string=${string:$fromIndex};
    if [[ "$string" =~ "$key" ]]
    then local ndx="${string%%"$key"*}";
         echo ${#ndx};
    else echo "-1"; # this should probably be "return 1"
    fi
}
test_example() { local content="$1"
  local -i if_begin_index if_begin_end hello_world_index hello_world_end if_end_index
  local if_begin=$'if (feature_is_enabled(Feature_111111)) {' hello_world=$'args.add("hello world");' if_end="}"
  if_begin_index="$(indexof_paul_hodges "$content" "$if_begin")"
  if_begin_end=if_begin_index+${#if_begin}
  hello_world_index="$( indexof_paul_hodges "$content" "$hello_world" )"                      # index FROM START
  hello_world_end=hello_world_index+${#hello_world}
  if_end_index=$(indexof_paul_hodges "$content" "$if_end" "$hello_world_end")+hello_world_end # index FROM START
  local if_block="${content:$if_begin_index:$if_end_index - $if_begin_index + 1}"             # include all
  local if_content="${content:$if_begin_end:$if_end_index - $if_begin_end}"                   # inside braces only
  printf "%s\n" "--- if block ---" "$if_block" "" "--- inner code ---"  "$if_content"
}
content='
    initialize(args, hw);
    if (feature_is_enabled(Feature_111111)) { /* comments can show up anywhere */
      if(1>2) { args.add("test"); } else { args.add("x"); }
      args.add("hello world"); /* comments can include braces... */
    }
    if (feature_is_enabled(Feature_222222)) { args.add("bar"); }
  '
echo "As-is:";                 test_example "$content"
echo "with a comment edited:"; test_example "${content/braces/\}}"

when run:

$: ./tst
As-is:
--- if block ---
if (feature_is_enabled(Feature_111111)) { /* comments can show up anywhere */
      if(1>2) { args.add("test"); } else { args.add("x"); }
      args.add("hello world"); /* comments can include braces... */
    }

--- inner code ---
 /* comments can show up anywhere */
      if(1>2) { args.add("test"); } else { args.add("x"); }
      args.add("hello world"); /* comments can include braces... */

with a comment edited:
--- if block ---
if (feature_is_enabled(Feature_111111)) { /* comments can show up anywhere */
      if(1>2) { args.add("test"); } else { args.add("x"); }
      args.add("hello world"); /* comments can include }

--- inner code ---
 /* comments can show up anywhere */
      if(1>2) { args.add("test"); } else { args.add("x"); }
      args.add("hello world"); /* comments can include

So again: What is it that you are actually trying to accomplish?