LDIST: PROC(S1,S2,L,D,R) RETURNS(BIT(1)); /* given two strings the ldist procedure computes the following: l - length of the longer string d - Levenshtein distance between the strings r - the ratio of d to l the procedure returns a value of '1'b if successful and if not successful it returns '0'b . . . the Levenshtein distance between the two strings is simply the number of character additions, replacements or deletions needed to convert one string into the other if the strings are identical d and r are zero by definition if both strings are blank or null, then l is also zero by definition the ldist procedure can be used in solving matching problems in which the the input data are not error free if the value of r is less than some arbitrarily chosen threshold value, e.g., 0.3, then the two input strings can be considered to "match" -- otherwise they can be considered not to match for theoretical background see Teru Okuda, Eiichi Tanaka and Tamotsu Kasai, "A Method for the correction of garbled words based on the Levenshtein Metric" in IEEE Transactions on Computers, Vol C-25, No. 2 (Feb., 1976) pp. 172 - 178. AUTHOR: Chuck Eastlack DATE: 26 Feb. 1982 WESTAT PL/I PROCEDURE LIBRARY . . . . . . . . . . */ DCL OLDV(0:L) FIXED BINARY(31) BASED, OLDV_PTR POINTER; DCL NEWV(0:L) FIXED BINARY(31) BASED, NEWV_PTR POINTER; DCL R FLOAT BINARY(53); DCL (I,J,K,L,L1,L2,D) FIXED BINARY(31); DCL (LENGTH,SUBSTR,MIN,MAX) BUILTIN; DCL (S1,S2) CHAR(*) VAR; DCL HOLDSTRING CHAR(80) VAR; I = 0; J = LENGTH(S1); DO WHILE(I < J); I = I + 1; IF SUBSTR(S1,I) = ' ' THEN DO; S1 = SUBSTR(S1,1,I - 1); I = J; END; END; I = 0; J = LENGTH(S2); DO WHILE(I < J); I = I + 1; IF SUBSTR(S2,I) = ' ' THEN DO; S2 = SUBSTR(S2,1,I - 1); I = J; END; END; L1 = LENGTH(S1); L2 = LENGTH(S2); IF S1 = S2 THEN BEGIN; L = L1; D = 0; R = 0; RETURN('1'B); END; IF L1 = 0 THEN BEGIN; L = L2; D = L2; R = 1.0; RETURN('1'B); END; IF L2 = 0 THEN BEGIN; L = L1; D = L1; R = 1.0; RETURN('1'B); END; L = MAX(L1,L2); ALLOCATE OLDV SET (OLDV_PTR); ALLOCATE NEWV SET (NEWV_PTR); IF L1 > L2 THEN BEGIN; DO I = 0 TO L1; OLDV_PTR -> OLDV(I) = I; NEWV_PTR -> NEWV(I) = I; END; DO I = 1 TO L2; NEWV_PTR -> NEWV(0) = I; DO J = 1 TO L1; K = 1; IF SUBSTR(S1,J,1) = SUBSTR(S2,I,1) THEN K = 0; NEWV_PTR -> NEWV(J) = OLDV_PTR -> OLDV(J - 1) + K; NEWV_PTR -> NEWV(J) = MIN(NEWV_PTR -> NEWV(J),OLDV_PTR -> OLDV(J) + 1); NEWV_PTR -> NEWV(J) = MIN(NEWV_PTR -> NEWV(J),NEWV_PTR -> NEWV(J - 1) + 1); END; DO J = 1 TO L1; OLDV_PTR -> OLDV(J) = NEWV_PTR -> NEWV(J); END; END; L = L1; D = NEWV_PTR -> NEWV(L1); END; ELSE BEGIN; DO I = 0 TO L2; OLDV_PTR -> OLDV(I) = I; NEWV_PTR -> NEWV(I) = I; END; DO I = 1 TO L1; NEWV_PTR -> NEWV(0) = I; DO J = 1 TO L2; K = 1; IF SUBSTR(S2,J,1) = SUBSTR(S1,I,1) THEN K = 0; NEWV_PTR -> NEWV(J) = OLDV_PTR -> OLDV(J - 1) + K; NEWV_PTR -> NEWV(J) = MIN(NEWV_PTR -> NEWV(J),OLDV_PTR -> OLDV(J) + 1); NEWV_PTR -> NEWV(J) = MIN(NEWV_PTR -> NEWV(J),NEWV_PTR -> NEWV(J - 1) + 1); END; DO J = 1 TO L2; OLDV_PTR -> OLDV(J) = NEWV_PTR -> NEWV(J); END; END; L = L2; D = NEWV_PTR -> NEWV(L2); END; R = FLOAT(D,53) / FLOAT(L,53); RETURN('1'B); END LDIST;