How to replace multiple matches of reoccurring word in a .txt file with new words?

61 Views Asked by At

So I have a .txt file with several dataframes. It looks similar to the following example:

$stim
rt 82289.8878539, 82294.8309221, 82299.3357436, 82304.1822179
category 1, 2, 1, 1
orient 263, 313, 266, 253

$stim
rt 82289.887000, 82294.8309333, 82299.3357444, 82304.1822179
category 1, 2, 2, 2
orient 263, 310, 360, 250

Each dataframe in this .txt file corresponds to a filename, which I have stored in a list. What I would like to do is replace the $stim with a filename. I have constructed a for loop to do this, which looks like this:

library(stringr)
text <- readLines("filepath")

wrong_words <-  ("$stim")
new_words <- (filenames)
for (i in seq_along(wrong_words)) {
  text <- str_replace_all(text, wrong_words[i], new_words[i])
}
text
writeLines(text, con="filepath")

However, when I run this loop, nothing changes and I get the exact same .txt file as before. What am I doing wrong?

2

There are 2 best solutions below

0
jay.sf On

What you want is grep here and loop over the matches. stringi::stri_replace_all_regex or the like rather replace by dictionary, i.e. all "$stim" would be replaced with the same new word. We can wrap this in a function and nevertheless include a dictionary feature. To avoid feature creep, we leave out the readLines/writeLines.

> batch_replace <- \(text, wrong_words, new_words) {
+   len0 <- length(wrong_words)
+   len1 <- length(new_words)
+   len2 <- length(pos <- grep(paste(wrong_words, collapse='|'), text))
+   if (len0 != 1L && len0 != len1) {
+     stop(sprintf("Counts of wrong_words (%s) must be 1 or match with new_words (%s).", 
+                  len0, len1))
+   }
+   if (len2 == 0L) {
+     message('No matches found.')
+     return(text)
+   } 
+   else if (len0 == 1L) {
+     pos <- grep(wrong_words, text)
+     return(replace(text, pos, new_words))
+   } 
+   else if (len0 == len2) {
+     Map(\(ps, nw) text[ps] <<- nw, pos, new_words)  ## `<<-` in function scope!
+     return(text)
+   } else {
+     stop(sprintf("Counts of found words (%s) and new_words (%s) must match.", 
+                  len1, len2))
+   }
+ }

Usage

> text <- readLines('foo.txt')
> wrong_words <- c("^\\$stim$")
> new_words <- c("## WORD1", "## WORD2")
> batch_replace(text, wrong_words, new_words)
 [1] "## WORD1"                                                     
 [2] "rt 82289.8878539, 82294.8309221, 82299.3357436, 82304.1822179"
 [3] "category 1, 2, 1, 1"                                          
 [4] "orient 263, 313, 266, 253"                                    
 [5] ""                                                             
 [6] "## WORD2"                                                     
 [7] "rt 82289.887000, 82294.8309333, 82299.3357444, 82304.1822179" 
 [8] "category 1, 2, 2, 2"                                          
 [9] "orient 263, 310, 360, 250"                                    
[10] ""  

You can also provide a dictionary.

> batch_replace(text, c("^\\$stim$", "^\\$stim$"), c("## WORD1", "## WORD2"))

Complete approach

> readLines('foo.txt') |> 
+   batch_replace(c("\\$stim"), c("## WORD1", "## WORD2")) |> 
+   writeLines('foo2.txt')
>
> readLines('foo2.txt')  ## check
 [1] "## WORD1"                                                     
 [2] "rt 82289.8878539, 82294.8309221, 82299.3357436, 82304.1822179"
 [3] "category 1, 2, 1, 1"                                          
 [4] "orient 263, 313, 266, 253"                                    
 [5] ""                                                             
 [6] "## WORD2"                                                     
 [7] "rt 82289.887000, 82294.8309333, 82299.3357444, 82304.1822179" 
 [8] "category 1, 2, 2, 2"                                          
 [9] "orient 263, 310, 360, 250"                                    
[10] ""                             
0
margusl On

We can find rows with placeholders and replace that subset with a new character vector, at least for that example data:

# simulate a file for reprex
filepath <- textConnection("$stim
rt 82289.8878539, 82294.8309221, 82299.3357436, 82304.1822179
category 1, 2, 1, 1
orient 263, 313, 266, 253

$stim
rt 82289.887000, 82294.8309333, 82299.3357444, 82304.1822179
category 1, 2, 2, 2
orient 263, 310, 360, 250
") 

(text <- readLines(filepath))
#>  [1] "$stim"                                                        
#>  [2] "rt 82289.8878539, 82294.8309221, 82299.3357436, 82304.1822179"
#>  [3] "category 1, 2, 1, 1"                                          
#>  [4] "orient 263, 313, 266, 253"                                    
#>  [5] ""                                                             
#>  [6] "$stim"                                                        
#>  [7] "rt 82289.887000, 82294.8309333, 82299.3357444, 82304.1822179" 
#>  [8] "category 1, 2, 2, 2"                                          
#>  [9] "orient 263, 310, 360, 250"                                    
#> [10] ""

wrong_words <-  "$stim"
new_words <- c("file_01.txt", "file_02.txt")

(wrong_idx <- which(text == wrong_words))
#> [1] 1 6
text[wrong_idx] <- new_words
text
#>  [1] "file_01.txt"                                                  
#>  [2] "rt 82289.8878539, 82294.8309221, 82299.3357436, 82304.1822179"
#>  [3] "category 1, 2, 1, 1"                                          
#>  [4] "orient 263, 313, 266, 253"                                    
#>  [5] ""                                                             
#>  [6] "file_02.txt"                                                  
#>  [7] "rt 82289.887000, 82294.8309333, 82299.3357444, 82304.1822179" 
#>  [8] "category 1, 2, 2, 2"                                          
#>  [9] "orient 263, 310, 360, 250"                                    
#> [10] ""

Another note regarding str_replace_all() and why there was no change at all:
by default it interprets pattern argument as regular expression, where $ has a special meaning, i.e. "$stim" would only match lines starting with stim.