Created
October 19, 2015 07:13
-
-
Save smilingleo/84ff3d7819411344c12b to your computer and use it in GitHub Desktop.
word counter implementation by different approaches.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import java.nio.file.Files; | |
import java.nio.file.Paths; | |
import java.util.Arrays; | |
import java.util.List; | |
import java.util.Spliterator; | |
import java.util.function.Consumer; | |
import java.util.stream.IntStream; | |
import java.util.stream.Stream; | |
import java.util.stream.StreamSupport; | |
import org.junit.After; | |
import org.junit.Before; | |
import org.junit.Test; | |
public class WordCounterTest { | |
static String fileName = "/Users/leo/Downloads/java7_upgrade_patch.txt"; | |
private long begin; | |
@Before | |
public void setup(){ | |
begin = System.currentTimeMillis(); | |
} | |
@After | |
public void end() { | |
System.out.println("takes " + (System.currentTimeMillis() - begin) + " ms"); | |
} | |
@Test | |
public void sequential() throws IOException { | |
int count = 0; | |
boolean lastSpace = true; | |
byte[] bytes = Files.readAllBytes(Paths.get(fileName)); | |
for (int i=0; i<bytes.length; i++){ | |
if (Character.isWhitespace(bytes[i])){ | |
lastSpace = true; | |
} else { | |
// every time we met the first non-whitespace character, we add counter by 1. | |
if (lastSpace) | |
count ++; | |
lastSpace = false; | |
} | |
} | |
System.out.println("Count sequentially: " + count); | |
} | |
@Test | |
public void parallel_1() throws IOException { | |
// this is actually not good, since 'split' will sequentially go through the content completely. | |
long count = Files.lines(Paths.get(fileName)) | |
.parallel() | |
// problemmatic if the paragragh is huge | |
.flatMap(line -> ((List<String>)Arrays.asList(line.split("\\s"))).stream()) | |
.filter(s -> s != null && s.trim().length() > 0).count(); | |
System.out.println("Count parallelly #1: " + count); | |
} | |
@Test | |
public void functional() throws IOException { | |
Stream<Character> stream = Files.lines(Paths.get(fileName)) | |
// need to append a line-break to each line. | |
.flatMap(line -> IntStream.range(0, line.length() + 1).mapToObj(i -> i < line.length() ? line.charAt(i) : '\n')); | |
WordCounter result = stream.reduce(new WordCounter(0, true), WordCounter::accumulate, WordCounter::combine); | |
System.out.println("Count functionally: " + result.count); | |
} | |
private static class WordCounter { | |
private final int count; | |
private final boolean lastSpace; | |
public WordCounter(int count, boolean lastSpace) { | |
this.count = count; | |
this.lastSpace = lastSpace; | |
} | |
public WordCounter accumulate(Character c){ | |
// System.out.println("Thread-" + Thread.currentThread().getId() + " is handling " + c); | |
if (Character.isWhitespace(c)){ | |
return lastSpace ? this : new WordCounter(count, true); | |
} else { | |
return lastSpace ? new WordCounter(count + 1, false) : this; | |
} | |
} | |
public WordCounter combine(WordCounter counter){ | |
return new WordCounter(count + counter.count, counter.lastSpace); | |
} | |
} | |
@Test | |
public void parallel_2() throws IOException { | |
Stream<Character> stream = Files.lines(Paths.get(fileName)) | |
.flatMap(line -> StreamSupport.stream(new WordSpliterator(line + "\n"), true)); | |
WordCounter result = stream.parallel() | |
.reduce(new WordCounter(0, true), WordCounter::accumulate, WordCounter::combine); | |
System.out.println("Count parallelly functionally: " + result.count); | |
} | |
private static class WordSpliterator implements Spliterator<Character> { | |
private String sentence; | |
private int currentPos = 0; | |
public WordSpliterator(String sentence){ | |
this.sentence = sentence; | |
} | |
@Override | |
public boolean tryAdvance(Consumer<? super Character> action) { | |
// `action` here is the `WordCounter::accumulate` | |
action.accept(sentence.charAt(currentPos ++)); | |
return currentPos < sentence.length(); | |
} | |
@Override | |
public Spliterator<Character> trySplit() { | |
int currentSize = sentence.length() - currentPos; | |
if (currentSize < 10) | |
return null; | |
for (int pos = currentSize / 2 + currentPos; pos < sentence.length(); pos++){ | |
if (Character.isWhitespace(sentence.charAt(pos))){ | |
// current thread to handle right part, new thread to process the left part | |
WordSpliterator spliterator = new WordSpliterator(sentence.substring(currentPos, pos)); | |
// so we have to move the pos to handle the right part. | |
currentPos = pos; | |
return spliterator; | |
} | |
} | |
return null; | |
} | |
@Override | |
public long estimateSize() { | |
return sentence.length() - currentPos; | |
} | |
@Override | |
public int characteristics() { | |
return ORDERED + SIZED + SUBSIZED + NONNULL + IMMUTABLE; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment