# include # include # include # include "levenshtein_matrix.h" /******************************************************************************/ int i4_min ( int i1, int i2 ) /******************************************************************************/ /* Purpose: i4_min() returns the smaller of two I4's. Licensing: This code is distributed under the MIT license. Modified: 29 August 2006 Author: John Burkardt Input: int I1, I2, two integers to be compared. Output: int I4_MIN, the smaller of I1 and I2. */ { int value; if ( i1 < i2 ) { value = i1; } else { value = i2; } return value; } /******************************************************************************/ int *levenshtein_matrix ( int m, char *s, int n, char *t ) /******************************************************************************/ /* Purpose: levenshtein_matrix() computes the Levenshtein distance matrix between strings. Discussion: Let S and T be source and target strings. Consider the task of converting S to T in the minimal number of steps, involving * Insertion: insert a new character * Deletion: delete a character * Substitution: swap one character for another. The Levenshtein distance from S to T is the minimal number of such steps required to transform S into T. Licensing: This code is distributed under the MIT license. Modified: 18 March 2018 Author: John Burkardt Input: int M, the length of string S. char *S, the first string. int N, the length of string T. char *T, the second string. Output: int D[(M+1)*(N+1)], the Levenshtein matrix. */ { int *d; int i; int j; int substitution_cost; d = ( int * ) malloc ( ( m + 1 ) * ( n + 1 ) * sizeof ( int ) ); d[0+0*(m+1)] = 0; /* Source prefixes can be transformed into empty string by dropping all characters, */ for ( i = 1; i <= m; i++ ) { d[i+0*(m+1)] = i; } /* Target prefixes can be reached from empty source prefix by inserting every character. */ for ( j = 1; j <= n; j++ ) { d[0+j*(m+1)] = j; } for ( j = 1; j <= n; j++ ) { for ( i = 1; i <= m; i++ ) { if ( s[i-1] == t[j-1] ) { substitution_cost = 0; } else { substitution_cost = 1; } d[i+j*(m+1)] = i4_min ( d[i-1+j*(m+1)] + 1, i4_min ( d[i+(j-1)*(m+1)] + 1, d[i-1+(j-1)*(m+1)] + substitution_cost ) ); } } return d; }