Help better parse digits from a string in Java

I have a string that contains numbers and letters. I want to split a string into adjacent pieces of numbers and adjacent pieces of letters.

Consider the line "34A312O5M444123A".

I would like to output: ["34", "A", "312", "O", "5", "M", "444123", "A"]

I have a code that works and looks like this:

List<String> digitsAsElements(String str){
  StringBuilder digitCollector = new StringBuilder();

  List<String> output = new ArrayList<String>();

  for (int i = 0; i < str.length(); i++){
    char cChar = str.charAt(i);

    if (Character.isDigit(cChar))
       digitCollector.append(cChar);
    else{
      output.add(digitCollector.toString());
      output.add(""+cChar);

      digitCollector = new StringBuilder();
    }         
  }

  return output;
}

I examined the str split twice to get an array containing all pieces of numbers and an array containing all pieces of letters. Then merge the results. I shied away from this, as that would damage readability.

I deliberately avoided resolving this problem with the regex pattern, because I consider regex patterns to be the main barrier to readability.

  • .
  • , .
  • Overtime .
  • .

:

  • ?
  • ? Util, .
  • regEx -, , ?
  • / regExes?
+5
8

, ?

// Split at any position that either:
// preceded by a digit and followed by a non-digit, or
// preceded by a non-digit and followed by a digit.
String[] parts = str.split("(?<=\\d)(?=\\D)|(?<=\\D)(?=\\d)");

, , , , - ( , ).

+5

, - . , , , ( (\d+|[^\d]+), ).

, , . , , , , , ! , , ( ) . (imho) , .

, , , , , . - , , . - (, , ..), . , - , .

EDIT: mmyers.

+13

java.util.Scanner. , . .

( )

, , . , , , QuickREx Eclipse, ?

-, .

, , . , 50 , , , .

.

, , , , , , . , , , , (, , ANTLR)

.

, . , .

?

, .

? Util, .

, java.util.Scanner.

regEx -, , ?

- .

/ regExes?

, , , , , .

+7

- (, ). , . Regexps - , .

.

List<String> digitsAsElements(String str){
    Pattern p = Pattern.compile("(\\d+|\\w+)*");
    Matcher m = p.matcher(str);

    List<String> output = new ArrayList<String>();
    for(int i = 1; i <= m.groupCount(); i++) {
       output.add(m.group(i));
    }
    return output;
}
+2

Awww, - , . , /. ...

:

digitsAsElements1("34A312O5MNI444123A") = [34, A, 312, O, 5, M, , N, , I, 444123, A]
digitsAsElements2("34A312O5MNI444123A") = [34, A, 312, O, 5, MNI, 444123, A]
Expected: [34, A, 312, O, 5, MN, 444123, A]

:

DigitsAsElements.java:

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class DigitsAsElements {

    static List<String> digitsAsElements1(String str){
        StringBuilder digitCollector = new StringBuilder();

        List<String> output = new ArrayList<String>();

        for (int i = 0; i < str.length(); i++){
          char cChar = str.charAt(i);

          if (Character.isDigit(cChar))
             digitCollector.append(cChar);
          else{
            output.add(digitCollector.toString());
            output.add(""+cChar);

            digitCollector = new StringBuilder();
          }         
        }

        return output;
      }

    static List<String> digitsAsElements2(String str){
        // Match a consecutive series of digits or non-digits
        final Pattern pattern = Pattern.compile("(\\d+|\\D+)");
        final Matcher matcher = pattern.matcher(str);

        final List<String> output = new ArrayList<String>();
        while (matcher.find()) {
            output.add(matcher.group());
        }

        return output;
      }

    /**
     * @param args
     */
    public static void main(String[] args) {
        System.out.println("digitsAsElements(\"34A312O5MNI444123A\") = " +
                digitsAsElements1("34A312O5MNI444123A"));
        System.out.println("digitsAsElements2(\"34A312O5MNI444123A\") = " +
                digitsAsElements2("34A312O5MNI444123A"));
        System.out.println("Expected: [" +
                "34, A, 312, O, 5, MN, 444123, A"+"]");
    }

}
+1

, :

public class StringIterator implements Iterator<Character> {

    private final char[] chars;
    private int i;

    private StringIterator(char[] chars) {
        this.chars = chars;
    }

    public boolean hasNext() {
        return i < chars.length;
    }

    public Character next() {
        return chars[i++];
    }

    public void remove() {
        throw new UnsupportedOperationException("Not supported.");
    }

    public static Iterable<Character> of(String string) {
        final char[] chars = string.toCharArray();

        return new Iterable<Character>() {

            @Override
            public Iterator<Character> iterator() {
                return new StringIterator(chars);
            }
        };
    }
}

:

for (int i = 0; i < str.length(); i++){
    char cChar = str.charAt(i);
    ...
}

:

for (Character cChar : StringIterator.of(str)) {
    ...
}

2

, .

+1

, , . , , , , , , , .

, "Grab block of numbers or letters", , , , , , , , , .

, , .

+1

, , , .

. , StringBuilder , ( ). , StringBuilder. , - , .

static List<String> digitsAsElements(String str) {
    StringBuilder collector = new StringBuilder();

    List<String> output = new ArrayList<String>();
    boolean lastWasDigit = false;
    for (int i = 0; i < str.length(); i++) {
        char cChar = str.charAt(i);

        boolean isDigit = Character.isDigit(cChar);
        if (isDigit != lastWasDigit) {
            if (collector.length() > 0) {
                output.add(collector.toString());
                collector = new StringBuilder();
            }
            lastWasDigit = isDigit;
        }
        collector.append(cChar);
    }
    if (collector.length() > 0)
        output.add(collector.toString());

    return output;
}

. , Juha S., .

private static final Pattern DIGIT_OR_NONDIGIT_STRING =
        Pattern.compile("(\\d+|[^\\d]+)");
static List<String> digitsAsElementsR(String str) {
    // Match a consecutive series of digits or non-digits
    final Matcher matcher = DIGIT_OR_NONDIGIT_STRING.matcher(str);
    final List<String> output = new ArrayList<String>();
    while (matcher.find()) {
        output.add(matcher.group());
    }
    return output;
}

, , - . , DIGIT_OR_NONDIGIT_STRING , () , , , , .

public static void main(String[] args) {
    System.out.println(digitsAsElements( "34A312O5MNI444123A"));
    System.out.println(digitsAsElementsR("34A312O5MNI444123A"));
}

:

[34, A, 312, O, 5, MNI, 444123, A]
[34, A, 312, O, 5, MNI, 444123, A]
+1
source

All Articles