Why are my string manipulations slow using a lambda expression?

The method accepts words separated by commas, as Stringit returns Stringwords separated by commas with words in a natural sort order, not containing any four-letter words, containing all words in the case of UPPER and without duplicates. The first approach is rather slow compared to the second approach. Can you help me understand why and how I can improve my approach?

Approach 1:

public String stringProcessing(String s){
      Stream<String> tokens = Arrays.stream(s.split(","));
      return tokens.filter(t -> t.length() != 4) .distinct()
                   .sorted() 
                   .collect(Collectors.joining(",")).toUpperCase();
}

Approach 2:

public String processing(String s) {
    String[] tokens = s.split(",");
    Set<String> resultSet = new TreeSet<>();
    for(String t:tokens){
        if(t.length() !=  4)
            resultSet.add(t.toUpperCase());
    }        
    StringBuilder result = new StringBuilder();
    resultSet.forEach(key -> {
        result.append(key).append(","); 
    });
    result.deleteCharAt(result.length()-1);
    return result.toString();
}
+6
source share
3 answers

JRE, - .

, . distinct(), , , , . , , . , .

, , . :

public String variant1(String s){
    Stream<String> tokens = Arrays.stream(s.split(","));
    return tokens.filter(t -> t.length() != 4)
                 .map(String::toUpperCase)
                 .sorted().distinct()
                 .collect(Collectors.joining(","));
}

public String variant2(String s) {
    String[] tokens = s.split(",");
    Set<String> resultSet = new TreeSet<>();
    for(String t:tokens){
        if(t.length() !=  4)
            resultSet.add(t.toUpperCase());
    }
    return String.join(",", resultSet);
}

, sorted() distinct(); , distinct() sorted(), .

, :

public String variant1(String s){
    return Pattern.compile(",").splitAsStream(s)
            .filter(t -> t.length() != 4)
            .map(String::toUpperCase)
            .sorted().distinct()
            .collect(Collectors.joining(","));
}

,

public String variant3(String s) {
    Set<String> resultSet = new TreeSet<>();
    int o = 0, p;
    for(p = s.indexOf(','); p>=0; p = s.indexOf(',', o=p+1)) {
        if(p-o == 4) continue;
        resultSet.add(s.substring(o, p).toUpperCase());
    }
    if(s.length()-o != 4) resultSet.add(s.substring(o).toUpperCase());
    return String.join(",", resultSet);
}

, . , , , , , , , , , .

+9

, , JMH. Holger :

@BenchmarkMode(value = { Mode.AverageTime })
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 2, time = 2, timeUnit = TimeUnit.SECONDS)
@State(Scope.Benchmark)
public class StreamVsLoop {

    public static void main(String[] args) throws RunnerException {
        Options opt = new OptionsBuilder().include(StreamVsLoop.class.getSimpleName())
                .build();
        new Runner(opt).run();
    }

    @Param(value = {
            "a, b, c",
            "a, bb, ccc, dddd, eeeee, ffffff, ggggggg, hhhhhhhh",
            "a, bb, ccc, dddd, eeeee, ffffff, ggggggg, hhhhhhhh, ooooooooo, tttttttttttttt, mmmmmmmmmmmmmmmmmm" })
    String s;

    @Benchmark
    @Fork(1)
    public String stream() {
        Stream<String> tokens = Arrays.stream(s.split(","));
        return tokens.filter(t -> t.length() != 4)
                .map(String::toUpperCase)
                .sorted().distinct()
                .collect(Collectors.joining(","));
    }

    @Benchmark
    @Fork(1)
    public String loop() {
        String[] tokens = s.split(",");
        Set<String> resultSet = new TreeSet<>();
        for (String t : tokens) {
            if (t.length() != 4) {
                resultSet.add(t.toUpperCase());
            }
        }
        return String.join(",", resultSet);
    }

    @Benchmark
    @Fork(1)
    public String sortedDistinct() {
        return Pattern.compile(",").splitAsStream(s)
                .filter(t -> t.length() != 4)
                .map(String::toUpperCase)
                .sorted()
                .distinct()
                .collect(Collectors.joining(","));
    }

    @Benchmark
    @Fork(1)
    public String distinctSorted() {
        return Pattern.compile(",").splitAsStream(s)
                .filter(t -> t.length() != 4)
                .map(String::toUpperCase)
                .distinct()
                .sorted()
                .collect(Collectors.joining(","));
    }
}

:

 stream              3 args         574.042
 loop                3 args         393.364
 sortedDistinct      3 args         829.077
 distinctSorted      3 args         836.558

 stream              8 args         1144.488
 loop                8 args         1014.756
 sortedDistinct      8 args         1533.968
 distinctSorted      8 args         1745.055

 stream             11 args         1829.571
 loop               11 args         1514.138
 sortedDistinct     11 args         1940.256
 distinctSorted     11 args         2591.715

, , , , . , ( , -, )

+7

, , ; , ...

@BenchmarkMode(value = { Mode.AverageTime })
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 2, time = 2, timeUnit = TimeUnit.SECONDS)
@State(Scope.Benchmark)
public class StreamVsLoop {

    public static void main(String[] args) throws RunnerException {
        Options opt = new OptionsBuilder().include(StreamVsLoop.class.getSimpleName())
                .jvmArgs("-ea")
                .shouldFailOnError(true)
                .build();
        new Runner(opt).run();
    }

    @State(Scope.Thread)
    public static class StringInput {

        private String[] letters = { "q", "a", "z", "w", "s", "x", "e", "d", "c", "r", "f", "v", "t", "g", "b",
                "y", "h", "n", "u", "j", "m", "i", "k", "o", "l", "p" };

        public String s = "";

        @Param(value = { "1000", "10000", "100000" })
        int next;

        @TearDown(Level.Iteration)
        public void tearDown() {
            if (next == 1000) {
                long count = Arrays.stream(s.split(",")).filter(x -> x.length() == 5).count();
                assert count == 99;
            }

            if (next == 10000) {
                long count = Arrays.stream(s.split(",")).filter(x -> x.length() == 5).count();
                assert count == 999;
            }

            if (next == 100000) {
                long count = Arrays.stream(s.split(",")).filter(x -> x.length() == 5).count();
                assert count == 9999;
            }
            s = null;
        }

        /**
         * a very brute-force tentative to have 1/2 elements to be filtered and 1/2 not
         * highly inneficiant, but this is not part of the measurment, so who cares?
         */
        @Setup(Level.Iteration)
        public void setUp() {

            for (int i = 0; i < next; i++) {
                int index = ThreadLocalRandom.current().nextInt(0, letters.length);
                String letter = letters[index];
                if (next == 1000) {
                    if (i < 500 && i % 4 == 0) {
                        s = s + "," + letter;
                    } else if (i > 500 && i % 5 == 0) {
                        s = s + "," + letter;
                    } else {
                        s = s + letter;
                    }

                } else if (next == 10000) {
                    if (i < 5000 && i % 4 == 0) {
                        s = s + "," + letter;
                    } else if (i > 5000 && i % 5 == 0) {
                        s = s + "," + letter;
                    } else {
                        s = s + letter;
                    }
                } else if (next == 100000) {
                    if (i < 50000 && i % 4 == 0) {
                        s = s + "," + letter;
                    } else if (i > 50000 && i % 5 == 0) {
                        s = s + "," + letter;
                    } else {
                        s = s + letter;
                    }
                }
            }
        }
    }

    @Benchmark
    @Fork
    public String stream(StringInput si) {
        Stream<String> tokens = Arrays.stream(si.s.split(","));
        return tokens.filter(t -> t.length() != 4)
                .map(String::toUpperCase)
                .sorted().distinct()
                .collect(Collectors.joining(","));
    }

    @Benchmark
    @Fork(1)
    public String loop(StringInput si) {
        String[] tokens = si.s.split(",");
        Set<String> resultSet = new TreeSet<>();
        for (String t : tokens) {
            if (t.length() != 4) {
                resultSet.add(t.toUpperCase());
            }
        }
        return String.join(",", resultSet);
    }

    @Benchmark
    @Fork(1)
    public String sortedDistinct(StringInput si) {
        return Pattern.compile(",").splitAsStream(si.s)
                .filter(t -> t.length() != 4)
                .map(String::toUpperCase)
                .sorted()
                .distinct()
                .collect(Collectors.joining(","));
    }

    @Benchmark
    @Fork(1)
    public String distinctSorted(StringInput si) {
        return Pattern.compile(",").splitAsStream(si.s)
                .filter(t -> t.length() != 4)
                .map(String::toUpperCase)
                .distinct()
                .sorted()
                .collect(Collectors.joining(","));
    }

    @Benchmark
    @Fork(1)
    public String variant3(StringInput si) {
        String s = si.s;
        Set<String> resultSet = new TreeSet<>();
        int o = 0, p;
        for (p = s.indexOf(','); p >= 0; p = s.indexOf(',', o = p + 1)) {
            if (p - o == 4) {
                continue;
            }
            resultSet.add(s.substring(o, p).toUpperCase());
        }
        if (s.length() - o != 4) {
            resultSet.add(s.substring(o).toUpperCase());
        }
        return String.join(",", resultSet);
    }
}
streamvsLoop.StreamVsLoop.distinctSorted    1000   0.028
streamvsLoop.StreamVsLoop.sortedDistinct    1000   0.024
streamvsLoop.StreamVsLoop.loop              1000   0.016
streamvsLoop.StreamVsLoop.stream            1000   0.020 
streamvsLoop.StreamVsLoop.variant3          1000   0.012


streamvsLoop.StreamVsLoop.distinctSorted   10000   0.394
streamvsLoop.StreamVsLoop.sortedDistinct   10000   0.359
streamvsLoop.StreamVsLoop.loop             10000   0.274
streamvsLoop.StreamVsLoop.stream           10000   0.304  ± 0.006
streamvsLoop.StreamVsLoop.variant3         10000   0.234


streamvsLoop.StreamVsLoop.distinctSorted  100000   4.950
streamvsLoop.StreamVsLoop.sortedDistinct  100000   4.432
streamvsLoop.StreamVsLoop.loop            100000   5.457
streamvsLoop.StreamVsLoop.stream          100000   3.927 ± 0.048
streamvsLoop.StreamVsLoop.variant3        100000   3.595

, - , .

+1

All Articles