Thursday, September 27, 2012

Write a program to find the longest common sub-string of two given strings

The approach:-
  1. Key to the LCS problem is to generate a LCS matrix based on which the longest common sub-string could be derived.
  2. There is a cool video explaining the LCS matrix construction from Jay Liew, Security Researcher @ Websense Security Labs.



The solution:-
#include <iostream>
#include <cstdlib>
using namespace std;

int main() {
    char input1[] = "philologist";
    char input2[] = "lollipop";

    int sz1 = sizeof(input1) / sizeof(char);
    int sz2 = sizeof(input2) / sizeof(char);
    sz1--;
    sz2--;

    cout << "Input strings" << endl;
    cout << "-------------" << endl;
    cout << input1 << endl;
    cout << input2 << endl;
    cout << endl;

    int max_sz = 0;
    int max_index = 0;
    char* out = (char*) malloc(min(sz1, sz2) + 1);

    int ** matrix = (int**)calloc(sizeof(int), sz1 + 1);
    for ( int i = 0; i < sz1 + 1; i++ ) 
            matrix[i] = (int*)calloc(sizeof(int), sz2 + 1);

    char* ptr1 = input1;
    char* ptr2 = input2;

    for ( int i = 0; i < sz1; i++ ) {
        for ( int j = 0; j < sz2; j++ ) {
            if ( input1[i] == input2[j] ) {
                int val = matrix[i][j] + 1;
                matrix[i+1][j+1] = val;
                if ( val > max_sz ) {
                    max_sz = val;
                    max_index = j;
                }
            }
        }
    }

    cout << "LCS matrix" << endl;
    cout << "----------" << endl;
    for ( int i = 0; i < sz1 + 1; i++ ) {
        for ( int j = 0; j < sz2 + 1; j++ ) {
                cout << matrix[i][j] << " ";
        }
        cout << endl;
    }

    cout << "Maximum size of substring = " << max_sz << endl;
    cout << "Maximum size of substring ends at index = " << max_index << endl;
    cout << "Longest common substring = ";
    for ( int i = max_index - max_sz + 1; i <= max_index; i++ ) 
        cout << input2[i];

    cout << endl;
}

Output:-
Input strings
-------------
philologist
lollipop

LCS matrix
----------
0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 1 0 1
0 0 0 0 0 0 0 0 0
0 0 0 0 0 1 0 0 0
0 1 0 1 1 0 0 0 0
0 0 2 0 0 0 0 1 0
0 1 0 3 1 0 0 0 0
0 0 2 0 0 0 0 1 0
0 0 0 0 0 0 0 0 0
0 0 0 0 0 1 0 0 0
0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0
Maximum size of substring = 3
Maximum size of substring ends at index = 2
Longest common substring = lol

Contact Form

Name

Email *

Message *

Back to Top