here is one solution:
get the token-index pairs of all tokens in the source string get the token-index pairs of all tokens in the target string from both sets remove the values that are in the other set. foreach token-index in the source set if the target set has the token at the same location remove it from both sets, this is a match created by a previous swap get the target token at the source index if the source set has the target token (at any index) create a swap command to swap the source token at source index with the target token at its index in the source remove the token-index from the source set remove the target token-index from the target set add a token-index for the target token at the new index to the source set loop without moving to the next token-index create remove commands for any remaining token-indexes in the source set create add commands for any remaining token-indexes in the target set
here's a quick C # implementation:
private static IEnumerable<ICommand> GetChangeCommands(string source, string target) { var unmatchedSourceTokens = GetUnmatchedTokenIndexes(source, target); var unmatchedTargetTokens = GetUnmatchedTokenIndexes(target, source); var commands = new List<ICommand>(); foreach (var tokenIndexList in unmatchedSourceTokens) { var sourceToken = tokenIndexList.Key; var sourceStringSourceTokenIndexes = unmatchedSourceTokens[sourceToken]; foreach (var sourceLoopIndex in tokenIndexList.Value.ToList()) { var sourceIndex = sourceLoopIndex; bool swapped; do { swapped = false; if (sourceIndex >= target.Length) { continue; } var targetToken = target[sourceIndex]; if (targetToken == sourceToken) { sourceStringSourceTokenIndexes.Remove(sourceIndex); unmatchedTargetTokens[targetToken].Remove(sourceIndex); continue; } List<int> sourceStringTargetTokenIndexes; if (!unmatchedSourceTokens.TryGetValue(targetToken, out sourceStringTargetTokenIndexes) || !sourceStringTargetTokenIndexes.Any()) { continue; } var targetIndex = sourceStringTargetTokenIndexes.First(); commands.Add(new SwapCommand(sourceIndex, targetIndex)); sourceStringTargetTokenIndexes.RemoveAt(0); sourceStringSourceTokenIndexes.Remove(sourceIndex); sourceStringSourceTokenIndexes.Add(targetIndex); unmatchedTargetTokens[targetToken].Remove(sourceIndex); swapped = true; sourceIndex = targetIndex; } while (swapped); } } var removalCommands = unmatchedSourceTokens .SelectMany(x => x.Value) .Select(x => new RemoveCommand(x)) .Cast<ICommand>() .OrderByDescending(x => x.Index) .ToList(); commands.AddRange(removalCommands); var insertCommands = unmatchedTargetTokens .SelectMany(x => x.Value.Select(y => new InsertCommand(y, x.Key))) .Cast<ICommand>() .OrderBy(x => x.Index) .ToList(); commands.AddRange(insertCommands); return commands; } private static IDictionary<char, List<int>> GetUnmatchedTokenIndexes(string source, string target) { var targetTokenIndexes = target.Select((x, i) => new { Token = x, Index = i }) .ToLookup(x => x.Token, x => x.Index) .ToDictionary(x => x.Key, x => x.ToList()); var distinctSourceTokenIndexes = new Dictionary<char, List<int>>(); foreach (var tokenInfo in source.Select((x, i) => new { Token = x, Index = i })) { List<int> indexes; if (!targetTokenIndexes.TryGetValue(tokenInfo.Token, out indexes) || !indexes.Contains(tokenInfo.Index)) { if (!distinctSourceTokenIndexes.TryGetValue(tokenInfo.Token, out indexes)) { indexes = new List<int>(); distinctSourceTokenIndexes.Add(tokenInfo.Token, indexes); } indexes.Add(tokenInfo.Index); } } return distinctSourceTokenIndexes; } internal class InsertCommand : ICommand { private readonly char _token; public InsertCommand(int index, char token) { Index = index; _token = token; } public int Index { get; private set; } public string Change(string input) { var chars = input.ToList(); chars.Insert(Index, _token); return new string(chars.ToArray()); } public override string ToString() { return string.Format("[\"add\", {0}, '{1}']", Index, _token); } } internal class RemoveCommand : ICommand { public RemoveCommand(int index) { Index = index; } public int Index { get; private set; } public string Change(string input) { var chars = input.ToList(); chars.RemoveAt(Index); return new string(chars.ToArray()); } public override string ToString() { return string.Format("[\"remove\", {0}]", Index); } } internal class SwapCommand : ICommand { private readonly int _targetIndex; public SwapCommand(int sourceIndex, int targetIndex) { Index = sourceIndex; _targetIndex = targetIndex; } public int Index { get; private set; } public string Change(string input) { var chars = input.ToArray(); var temp = chars[Index]; chars[Index] = chars[_targetIndex]; chars[_targetIndex] = temp; return new string(chars); } public override string ToString() { return string.Format("[\"swap\", {0}, {1}]", Index, _targetIndex); } } internal interface ICommand { int Index { get; } string Change(string input); }
Sample Usage:
const string source = "123"; const string target = "324"; var commands = GetChangeCommands(source, target); Execute(source, target, commands); private static void Execute(string current, string target, IEnumerable<ICommand> commands) { Console.WriteLine("converting".PadRight(19) + current + " to " + target); foreach (var command in commands) { Console.Write(command.ToString().PadRight(15)); Console.Write(" => "); current = command.Change(current); Console.WriteLine(current); } }
sample output:
converting 123 to 324 ["swap", 0, 2] => 321 ["remove", 2] => 32 ["add", 2, '4'] => 324 converting hello to world ["swap", 1, 4] => holle ["remove", 4] => holl ["remove", 2] => hol ["remove", 0] => ol ["add", 0, 'w'] => wol ["add", 2, 'r'] => worl ["add", 4, 'd'] => world converting something to smith ["swap", 1, 2] => smoething ["swap", 2, 6] => smiethong ["swap", 3, 4] => smitehong ["swap", 4, 5] => smitheong ["remove", 8] => smitheon ["remove", 7] => smitheo ["remove", 6] => smithe ["remove", 5] => smith converting something to sightseeing ["swap", 1, 6] => simethong ["swap", 6, 3] => simotheng ["swap", 3, 5] => simhtoeng ["swap", 2, 8] => sightoenm ["remove", 8] => sightoen ["remove", 7] => sightoe ["remove", 5] => sighte ["add", 5, 's'] => sightse ["add", 7, 'e'] => sightsee ["add", 8, 'i'] => sightseei ["add", 9, 'n'] => sightseein ["add", 10, 'g'] => sightseeing
here are some of the drawbacks obvious in the above samples: it changes the tokens that will be deleted it deletes and then re-adds the tokens