I think you should use the length. The longest common subsequence instead of Levenshtein distance. This seems like the best indicator for your business. Essentially, it prioritizes insertions and deletions over replacements, as I suggested in my comment.
"" → "" "" → "" ( 3 1) ( " " → " " 7, " " → "" 1), , "".
. m n, - ( ), m + 1, n + 1. - , ( ); ( , , ). , - LCS.
"Ingsll" "Ingersoll":
0 1 2 3 4 5 6
I n g s l l
---------------
0 | 0 0 0 0 0 0 0
1 I | 0 1 1 1 1 1 1
2 n | 0 1 2 2 2 2 2
3 g | 0 1 2 3 3 3 3
4 e | 0 1 2 3 3 3 3
5 r | 0 1 2 3 3 3 3
6 s | 0 1 2 3 4 4 4
7 o | 0 1 2 3 4 4 4
8 l | 0 1 2 3 4 5 5
9 l | 0 1 2 3 4 5 6
ObjC. , - @"o̶" - .
#import <Foundation/Foundation.h>
@interface NSString (WSSComposedLength)
- (NSUInteger)WSSComposedLength;
@end
@implementation NSString (WSSComposedLength)
- (NSUInteger)WSSComposedLength
{
__block NSUInteger length = 0;
[self enumerateSubstringsInRange:(NSRange){0, [self length]}
options:NSStringEnumerationByComposedCharacterSequences | NSStringEnumerationSubstringNotRequired
usingBlock:^(NSString *substring, NSRange substringRange, NSRange enclosingRange, BOOL *stop) {
length++;
}];
return length;
}
@end
@interface NSString (WSSLongestCommonSubsequence)
- (NSUInteger)WSSLengthOfLongestCommonSubsequenceWithString:(NSString *)target;
- (NSString *)WSSLongestCommonSubsequenceWithString:(NSString *)target;
@end
@implementation NSString (WSSLongestCommonSubsequence)
- (NSUInteger)WSSLengthOfLongestCommonSubsequenceWithString:(NSString *)target
{
NSUInteger * const * scores;
scores = [[self scoreMatrixForLongestCommonSubsequenceWithString:target] bytes];
return scores[[target WSSComposedLength]][[self WSSComposedLength]];
}
- (NSString *)WSSLongestCommonSubsequenceWithString:(NSString *)target
{
NSUInteger * const * scores;
scores = [[self scoreMatrixForLongestCommonSubsequenceWithString:target] bytes];
return nil;
}
- (NSData *)scoreMatrixForLongestCommonSubsequenceWithString:(NSString *)target{
NSUInteger selfLength = [self WSSComposedLength];
NSUInteger targetLength = [target WSSComposedLength];
NSMutableData * scoresData = [NSMutableData dataWithLength:(targetLength + 1) * sizeof(NSUInteger *)];
NSUInteger ** scores = [scoresData mutableBytes];
for( NSUInteger i = 0; i <= targetLength; i++ ){
scores[i] = [[NSMutableData dataWithLength:(selfLength + 1) * sizeof(NSUInteger)] mutableBytes];
}
NSRange selfFullRange = (NSRange){0, [self length]};
NSRange targetFullRange = (NSRange){0, [target length]};
__block NSUInteger col = 0;
__block NSUInteger row = 0;
[target enumerateSubstringsInRange:targetFullRange
options:NSStringEnumerationByComposedCharacterSequences
usingBlock:^(NSString * targetSubstring,
NSRange targetSubstringRange,
NSRange _, BOOL * _0)
{
row++;
col = 0;
[self enumerateSubstringsInRange:selfFullRange
options:NSStringEnumerationByComposedCharacterSequences
usingBlock:^(NSString * selfSubstring,
NSRange selfSubstringRange,
NSRange _, BOOL * _0)
{
col++;
NSUInteger newScore;
if( [selfSubstring isEqualToString:targetSubstring] ){
newScore = 1 + scores[row - 1][col - 1];
}
else {
NSUInteger upperScore = scores[row - 1][col];
NSUInteger leftScore = scores[row][col - 1];
newScore = MAX(upperScore, leftScore);
}
scores[row][col] = newScore;
}];
}];
return scoresData;
}
@end
int main(int argc, const char * argv[])
{
@autoreleasepool {
NSArray * testItems = @[@{@"source" : @"Ingso̶ll",
@"targets": @[
@{@"string" : @"Ingersoll",
@"score" : @6,
@"sequence" : @"Ingsll"},
@{@"string" : @"Boylan",
@"score" : @1,
@"sequence" : @"n"},
@{@"string" : @"New Ingersoll",
@"score" : @6,
@"sequence" : @"Ingsll"}]},
@{@"source" : @"Ing",
@"targets": @[
@{@"string" : @"Ingersoll",
@"score" : @3,
@"sequence" : @"Ing"},
@{@"string" : @"Boylan",
@"score" : @1,
@"sequence" : @"n"},
@{@"string" : @"New Ingersoll",
@"score" : @3,
@"sequence" : @"Ing"}]},
@{@"source" : @"New Ing",
@"targets": @[
@{@"string" : @"Ingersoll",
@"score" : @3,
@"sequence" : @"Ing"},
@{@"string" : @"Boylan",
@"score" : @1,
@"sequence" : @"n"},
@{@"string" : @"New Ingersoll",
@"score" : @7,
@"sequence" : @"New Ing"}]}];
for( NSDictionary * item in testItems ){
NSString * source = item[@"source"];
for( NSDictionary * target in item[@"targets"] ){
NSString * targetString = target[@"string"];
NSCAssert([target[@"score"] integerValue] ==
[source WSSLengthOfLongestCommonSubsequenceWithString:targetString],
@"");
}
}
}
return 0;
}