This should return the maximum substring starting at the startIndex index and up to the length "full" graphemes ... Thus, the original / final "separated" surrogate pairs will be deleted, the original combination of labels will be deleted, the final characters that do not have their combining labels, will be deleted.
Please note that this is probably not what you asked for ... It seems you want to use graphemes as a unit of measure (or maybe you want to include the last grapheme even if its length exceeds the length parameter)
public static class StringEx { public static string UnicodeSafeSubstring(this string str, int startIndex, int length) { if (str == null) { throw new ArgumentNullException("str"); } if (startIndex < 0 || startIndex > str.Length) { throw new ArgumentOutOfRangeException("startIndex"); } if (length < 0) { throw new ArgumentOutOfRangeException("length"); } if (startIndex + length > str.Length) { throw new ArgumentOutOfRangeException("length"); } if (length == 0) { return string.Empty; } var sb = new StringBuilder(length); int end = startIndex + length; var enumerator = StringInfo.GetTextElementEnumerator(str, startIndex); while (enumerator.MoveNext()) { string grapheme = enumerator.GetTextElement(); startIndex += grapheme.Length; if (startIndex > length) { break; }
An option that will simply include the "extra" characters at the end of the substring, if necessary, to make the whole grapheme:
public static class StringEx { public static string UnicodeSafeSubstring(this string str, int startIndex, int length) { if (str == null) { throw new ArgumentNullException("str"); } if (startIndex < 0 || startIndex > str.Length) { throw new ArgumentOutOfRangeException("startIndex"); } if (length < 0) { throw new ArgumentOutOfRangeException("length"); } if (startIndex + length > str.Length) { throw new ArgumentOutOfRangeException("length"); } if (length == 0) { return string.Empty; } var sb = new StringBuilder(length); int end = startIndex + length; var enumerator = StringInfo.GetTextElementEnumerator(str, startIndex); while (enumerator.MoveNext()) { if (startIndex >= length) { break; } string grapheme = enumerator.GetTextElement(); startIndex += grapheme.Length;
This will return what you set to "Hello😀 world!".UnicodeSafeSubstring(0, 6) == "Hello😀" .
source share