DP memoized approach for Longest common substring

9k Views Asked by At

can anyone provide the memoized approach for longest common substring between two strings.I know the bottom solution but I am not able to think in top-down manner. Expected time complexity-O(n^2)

9

There are 9 best solutions below

1
On

Memoization with recursion works with top-down approach. Taking LCS example using DP from Cormen into consideration below is the pseudo code describing how it will work.

MEMOIZED-LCS-LENGTH(X,Y)
 m<-length[X]
 n<-length[Y]
for(i<-1 to m)
  do for(j<-1 to n)
        c[i,j]<- -1

for(i<-1 to m)
    c[i,0]<-0
for(j<-1 to n)
    c[0,j]<-0
return RECURSIVE-LCS-LENGTH(X,Y,1,1)


RECURSIVE-LCS-LENGTH(X,Y,i,j)
if(c[i,j]!=-1) 
  return c[i,j]
//Above 2 line fetches the result if already present, instead of computing it again.
if(x[i]==y[j]) 
  then c[i,j]<-RECURSIVE-LCS-LENGTH(X,Y,i+1,j+1)+1 
else 
     c1<- RECURSIVE-LCS-LENGTH(X,Y,i+1,j)
     c2<-RECURSIVE-LCS-LENGTH(X,Y,i,j+1)
       if(c1<c2)
         then c[i,j]<-c1
       else c[i,j]<-c2

return c[i,j]
0
On

Recursion plus memoization in python. Please note this code is partially accepted on Hackerearth and Geeksforgeeks.For larger test cases, it is giving MLE.

import sys

sys.setrecursionlimit(1000000)
maxlen=0
t=None

def solve(s1, s2, n, m):
   global maxlen, t
   if n<=0 or m<=0:
       return 0
   if t[n][m]!=-1:
       return t[n][m]
   if s1[n-1]==s2[m-1]:
       temp=1+solve(s1, s2, n-1, m-1)
       maxlen=max(maxlen, temp)
       t[n][m]=temp
       return temp
   t[n][m]=0
   return 0

class Solution:
   def longestCommonSubstr(self, S1, S2, n, m):
       global maxlen, t
       maxlen=0
       t=[[-1]*(m+1) for i in range(n+1)]
       for i in range(n+1):
           for j in range(m+1):
               solve(S1, S2, i, j)
       return maxlen
   
if __name__=='__main__':
   S1=input().strip()
   S2=input().strip()
   n=len(S1)
   m=len(S2)
   ob = Solution()
   print(ob.longestCommonSubstr(S1, S2, n, m))
0
On

Here is a recursive and top-down approach:

       public int lcsSubstr(char[] s1, char[] s2, int m, int n, int c) {
            if (m == 0 || n == 0) {
                return c;
            }
            if (s1[m-1] == s2[n-1]) {
                c = lcsSubstr(s1, s2, m-1, n-1, c+1);
            } else {
                c2 = Math.max(lcsSubstr(s1, s2, m, n - 1, 0), lcsSubstr(s1, s2, m-1, n, 0));
            }
            return Math.max(c, c2);
        }

       public int lcsSubstrMemo(char[] s1, char[] s2, int m, int n, int c, int[][] t) {
            if(m == 0 || n == 0) {
                return c;
            }
            if (t[m-1][n-1] != -1) return t[m-1][n-1];
            if(s1[m - 1] == s2[n - 1]) {
                c = lcsSubstr(s1, s2, m - 1, n - 1, c + 1);
            } else {
                c2 = Math.max(lcsSubstr(s1, s2, m, n - 1, 0), lcsSubstr(s1, s2, m - 1, n, 0));
            }
            t[m - 1][n - 1] = Math.max(c, c2);
            return t[m-1][n-1];
        }
0
On

An easy solution is described below. Here memo[n][m] does not store the length of greatest substring but you can store the greatest substring in pointer maxi as follows:

#include<iostream>
#include<string>
using namespace std;
int lcs(string X,string Y,int n,int m,int *maxi,int memo[][8]) 
{
    if(n==0||m==0) {
    
        return 0;
    }
    int k=0;
    int j=0;
    
    if(memo[n-1][m-1]!=-1) {
    return memo[n-1][m-1];
    }
    if(X[n-1]==Y[m-1]) {
    
    memo[n-1][m-1] =  1+lcs(X,Y,n-1,m-1,maxi,memo);
    if(*maxi<memo[n-1][m-1]) 
    *maxi=memo[n-1][m-1];
    
    }
    else {
    memo[n-1][m-1]=0;
    }
    
    
    int l =  lcs(X,Y,n-1,m,maxi,memo);
    int i = lcs(X,Y,n,m-1,maxi,memo);
    
    return memo[n-1][m-1];
}
        
int main() 
{ 
    int n,m; 

    string X = "abcdxyze"; 
    //string X = "abcd";
    string Y = "xyzabcde"; 

    n=X.length(); 
    m=Y.length(); 
    int memo[n][8];
    for(int i=0;i<n;i++) {
    for(int j=0;j<m;j++) {
    memo[i][j]=-1;
    }
    }
    int maxi=0;
    int k = lcs(X,Y,n,m,&maxi,memo); 
    cout << maxi;
    return 0; 
} 
0
On
class Solution {
public:
    int t[1005][1005];
    int maxC = 0;
    int recur_memo(vector<int>& nums1, vector<int>& nums2, int m, int n) {
        if(t[m][n] != -1)
            return t[m][n];
    
        if(m == 0 || n == 0)
            return 0;
    
        int max_substring_ending_here = 0;
        //Example : "abcdezf"   "abcdelf"
        //You see that wowww, string1[m-1] = string2[n-1] = 'f' and you happily   
        go for (m-1, n-1)
        //But you see, in future after a gap of 'l' and 'z', you will find 
        "abcde" and "abcde"
        if(nums1[m-1] == nums2[n-1]) {
            max_substring_ending_here = 1 + recur_memo(nums1, nums2, m-1, n-1);
        }
    
        //May be you find better results if you do (m-1, n) and you end up 
        updating maxC with some LAAARGEST COMMON SUBSTRING LENGTH
        int decrease_m = recur_memo(nums1, nums2, m-1, n); //stage (m-1, n)
    
        //OR,
        //May be you find better results if you do (m, n-1) and you end up 
        updating maxC with some LAAARGEST COMMON SUBSTRING LENGTH
        int decrease_n  = recur_memo(nums1, nums2, m, n-1); //stage (m, n-1)
    
        //Like I said, you need to keep on finding the maxC in every call you 
        make throughout your journey.
        maxC = max({maxC, max_substring_ending_here, decrease_m, decrease_n});
    
    
        //BUT BUT BUT, you need to return the best you found at this stage (m, n)
        return t[m][n] = max_substring_ending_here;
    }

    int findLength(vector<int>& nums1, vector<int>& nums2) {
        int m = nums1.size();
        int n = nums2.size();
        memset(t, -1, sizeof(t));
        recur_memo(nums1, nums2, m, n); //resurive+memoization
        return maxC;
    }
};

Link : https://leetcode.com/problems/maximum-length-of-repeated-subarray/discuss/1169215/(1)-Recursive%2BMemo-(2)-Bottom-Up-(C%2B%2B)

0
On

Memoization refers to caching the solutions to subproblems in order to use them later. In the longest common subsequence problem, you try to match substrings of two subsequences to see if they match, maintaining in memory the longest one yet found. Here is the solution in Java you are looking for (memoized version of LCS):

    public class LongestCommonSubsequence {

    private static HashMap<Container, Integer> cache = new HashMap<>();
    private static int count=0, total=0;
    public static void main(String sargs[]){
        Scanner scanner = new Scanner(System.in);
        String x=scanner.nextLine();
        String y=scanner.nextLine();
        int max=0;
        String longest="";
        for(int j=0;j<x.length();j++){
            String common=commonSubsequence(j,0, x, y);
            if(max<common.length()){
                max=common.length();
                longest=common;
            }
        }
        for(int j=0;j<y.length();j++){
            String common=commonSubsequence(j,0, y, x);
            if(max<common.length()){
                max=common.length();
                longest=common;
            }
        }
        System.out.println(longest);
        System.out.println("cache used "+count+" / "+total);
    }
    public static String commonSubsequence(final int startPositionX, int startPositionY, String x, String y){
        StringBuilder commonSubsequence= new StringBuilder();
        for(int i=startPositionX;i<x.length();i++){
            Integer index=find(x.charAt(i),startPositionY,y);
            if(index!=null){
                commonSubsequence.append(x.charAt(i));              
                if(index!=y.length()-1)
                    startPositionY=index+1;
                else 
                    break;
            }               
        }
        return commonSubsequence.toString();        
    }
    public static Integer find(char query, int startIndex, String target){
        Integer pos=cache.get(new Container(query, startIndex));
        total++;
        if(pos!=null){
            count++;
            return pos;
        }else{
            for(int i=startIndex;i<target.length();i++){
                if(target.charAt(i)==query){
                    cache.put(new Container(query, startIndex), i);
                    return i;
                }                   
            }
            return null;
        }           
        }
        }
    class Container{
            private Character toMatch;
            private Integer indexToStartMatch;

        public Container(char t, int i){
        toMatch=t;
        indexToStartMatch=i;
    }

    @Override
    public int hashCode() {
        final int prime = 31;
        int result = 1;
        result = prime
                * result
                + ((indexToStartMatch == null) ? 0 : indexToStartMatch
                        .hashCode());
        result = prime * result + ((toMatch == null) ? 0 : toMatch.hashCode());
        return result;
    }

    @Override
    public boolean equals(Object obj) {
        if (this == obj)
            return true;
        if (obj == null)
            return false;
        if (getClass() != obj.getClass())
            return false;
        Container other = (Container) obj;
        if (indexToStartMatch == null) {
            if (other.indexToStartMatch != null)
                return false;
        } else if (!indexToStartMatch.equals(other.indexToStartMatch))
            return false;
        if (toMatch == null) {
            if (other.toMatch != null)
                return false;
        } else if (!toMatch.equals(other.toMatch))
            return false;
        return true;


        }       
    }
0
On

TOP-DOWN APPROACH

#include <iostream>
#include <algorithm>
#include <cstring>
using namespace std;

string X, Y;             //input strings
int ans, dp[105][105];   // ans : answer

int LCS(int n, int m)    //our function return value of (n,m) state
{                        // so that we can use the result in (n+1,m+1) state
  if(n == 0 || m == 0) return 0;   //in case of match in (n+1,m+1) state
  if(dp[n][m] != -1) return dp[n][m];

  LCS(n-1,m);          //to visit all n*m states          (try example:  X:ASDF
  LCS(n,m-1);          // we call these states first                     Y:ASDFF)

  if(X[n-1] == Y[m-1])
  {
    
    dp[n][m] =  LCS(n-1,m-1) + 1;
    ans = max(ans, dp[n][m]);
    return dp[n][m];
  }
    return dp[n][m] = 0;
}

int main()
{
    int t; cin>>t;
    while(t--)
    {
      int n, m; cin>>n>>m;  //length of strings
      cin>>X>>Y;
      memset(dp, -1, sizeof dp);
      ans = 0;
      LCS(n, m);
      cout<<ans<<'\n';
    }
    return 0;
}
0
On

Java Solution:

class Solution {
    private int ans = Integer.MIN_VALUE;
    
    public int dfs(int i, int j, String str1, String str2, int[][]dp){
        if(i < 0 || j < 0){
            return 0;
        }
        if(dp[i][j] != -1){
            return dp[i][j];
        }
        if(str1.charAt(i) == str2.charAt(j)){
            dp[i][j] = 1 + dfs(i - 1, j - 1, str1, str2, dp);
            ans = Math.max(ans, dp[i][j]);
        }else{
            dp[i][j] = 0 ;
        }
        dfs(i - 1, j, str1, str2, dp);
        dfs(i, j - 1, str1, str2, dp);
        return dp[i][j];
    }
    
    int longestCommonSubstr(String S1, String S2, int n, int m){
        int[][]dp = new int[n][m];
        for(int[]row : dp){
            Arrays.fill(row, -1);
        }
        dfs(n - 1, m - 1, S1, S2, dp);
        return ans == Integer.MIN_VALUE ? 0 : ans;
    }
}
0
On

Java Solution:

class Solution {
    public int findLength(int[] A, int[] B) {
    
        int[][] cache = new int[A.length][B.length];
        Arrays.stream(cache).forEach(a->Arrays.fill(a,-1));
        int[] res = new int[1];
        findLength(0, 0, A, B, cache, res);
        return res[0];
    }
    
    public static int findLength(int a, int b, int[] A, int[] B, int[][] cache, int[] res){
        
        if( a >= A.length || b >= B.length )
            return 0;
        
        if(cache[a][b] != -1){
            return cache[a][b];
        }
        if(A[a] == B[b]){   
            cache[a][b] = 1 + findLength(a+1,b+1,A,B,cache,res);
          //  remember you can not return here: why? see case: s1 = 1,2,3 s2=1,4,1,2,3
        }
        // try out other possiblities and update cache
        findLength(a+1,b,A,B,cache,res);
    
        findLength(a,b+1,A,B,cache,res);
    
        //you can avoid this and find max value at end in cache
        res[0] = Math.max(res[0],cache[a][b]);
    
        //at this point cache might have -1 or updated value, if its -1 make it to 0 as this location is visited and no common substring is there from here
        cache[a][b] = Math.max(0,cache[a][b]);
        
    return cache[a][b];
    }
}