Find the longest substring in an array of strings and remove it from all elements of the array

I have this array, for example (size is variable):

x = ["10111", "10122", "10250", "10113"]

I need to find the longest string, which is a substring of each element of the array (in this case "10") (this should not be a string prefix). I have to remove it from all lines. The result for this example:

x=["111","222","250","113"] //common value = "10"
+1
source share
3 answers

This extension finds the longest most common substring. Please note that "1"each line also contains more often than "10". (Only with#):

public static class StringExtensions
{
    public static IEnumerable<string> GetMostCommonSubstrings(this IList<string> strings)
    {
        if (strings == null)
            throw new ArgumentNullException("strings");
        if (!strings.Any() || strings.Any(s => string.IsNullOrEmpty(s)))
            throw new ArgumentException("None string must be empty", "strings");

        var allSubstrings = new List<List<string>>();
        for (int i = 0; i < strings.Count; i++)
        {
            var substrings = new List<string>();
            string str = strings[i];
            for (int c = 0; c < str.Length - 1; c++)
            {
                for (int cc = 1; c + cc <= str.Length; cc++)
                {
                    string substr = str.Substring(c, cc);
                    if (allSubstrings.Count < 1 || allSubstrings.Last().Contains(substr))
                        substrings.Add(substr);
                }
            }
            allSubstrings.Add(substrings);
        }
        if (allSubstrings.Last().Any())
        {
            var mostCommon = allSubstrings.Last()
                .GroupBy(str => str)
                .OrderByDescending(g => g.Key.Length)
                .ThenByDescending(g => g.Count())
                .Select(g => g.Key);
            return mostCommon;
        }
        return Enumerable.Empty<string>();
    }
}

Now easy:

string[] x = new[] { "10111", "10122", "10250", "10113" };
string mostCommonSubstring = x.GetMostCommonSubstrings().FirstOrDefault();
if (mostCommonSubstring != null)
{
    for (int i = 0; i < x.Length; i++)
        x[i] = x[i].Replace(mostCommonSubstring, "");
}
Console.Write(string.Join(", ", x));

output:

111, 122, 250, 113

DEMO


. , , (O (n)) HashSet<string>:

public static string GetLongestCommonSubstring(this IList<string> strings)
{
    if (strings == null)
        throw new ArgumentNullException("strings");
    if (!strings.Any() || strings.Any(s => string.IsNullOrEmpty(s)))
        throw new ArgumentException("None string must be empty", "strings");

    var commonSubstrings = new HashSet<string>(strings[0].GetSubstrings());
    foreach (string str in strings.Skip(1))
    {
        commonSubstrings.IntersectWith(str.GetSubstrings());
        if (commonSubstrings.Count == 0)
            return null;
    }
    return commonSubstrings.OrderByDescending(s => s.Length).First();
}

public static IEnumerable<string> GetSubstrings(this string str)
{
    if (string.IsNullOrEmpty(str))
        throw new ArgumentException("str must not be null or empty", "str");

    for (int c = 0; c < str.Length - 1; c++)
    {
        for (int cc = 1; c + cc <= str.Length; cc++)
        {
            yield return str.Substring(c, cc);
        }
    }
}

:

string[] x = new[] { "101133110", "101233210", "102533010", "101331310" };
string longestCommon = x.GetLongestCommonSubstring();  // "10"
+6

: ( , ):

string[] x = {"10111","10222","10250","10113"};
string common = x[0];
foreach(var i in x){
  while(!i.StartsWith(common)){
    common = common.Substring(0,common.Length-1);
    if(common == "") break;
  }
}
x = x.Select(a=>a.Substring(common.Length)).ToArray();
+1

, 1. O (n ^ 2). K.

"1", "0" K = 5.

Now you know that all substrings of length 2 cannot be displayed in more than K input strings. In addition, any substring that occurs K times must be made of substrings of length 1 that have K times. Find the length 1 of the substring for substrings of length 2 that exist K times, this again is a simple search for O (n ^ 2).

Repeat for longer lengths until there are more substrings in the K inputs.

+1
source

All Articles