The original code is verbose "thanks" to all the generics definitions.
Note added after first posting:
You can save considerable time (about 15%) by pre-compiling the regular expression. Add a line
Pattern splitOnWhitespace = Pattern.compile("[ \t]");
then change line 18, the line.split() code to
for (String s : splitOnWhitespace.split(line))
I've done this below, plus fixed one other issue about the definition of a "word".
I haven't does a full comparison timing, but hopefully Lau will run one shortly.
import java.io.*;
import java.util.*;
public class WordCounter {
public static void main(String[] args) throws IOException {
Long timeStart = System.currentTimeMillis();
File rootDir = new File("C:/temp/20_newsgroups");
CountingSet counter = new CountingSet();
Pattern wordPattern = Pattern.compile("\\w+");
for (File groupDirectory : rootDir.listFiles())
if (groupDirectory.isDirectory())
for (File f : groupDirectory.listFiles()) {
if (f.isFile()) {
BufferedReader reader = new BufferedReader(new FileReader(f));
String line;
while ((line = reader.readLine()) != null) {
Matcher matcher = wordPattern.matcher(line);
while (matcher.find())
counter.add(matcher.group());
}
reader.close();
}
}
PrintWriter pw = new PrintWriter("C:/temp/counts-alphabetical-java.txt");
for (Map.Entry<String, Integer> me : counter.entrySet())
pw.println(me.getKey() + " : " + me.getValue());
pw.close();
pw = new PrintWriter("C:/temp/counts-decreasing-java.txt");
spewInverted(counter, pw);
pw.close();
System.out.println("Finished in " + 0.001
* (System.currentTimeMillis() - timeStart) + " seconds");
}
static void spewInverted(Map<String, Integer> in, PrintWriter pw) {
ArrayList<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(
in.entrySet());
Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
public int compare(Map.Entry<String, Integer> o1,
Map.Entry<String, Integer> o2) {
return o2.getValue().compareTo(o1.getValue());
}
});
for (Map.Entry<String, Integer> entry : list)
pw.println(entry.getKey() + " : " + entry.getValue());
}
}
class CountingSet extends TreeMap<String, Integer> {
void add(String s) {
Integer i = get(s);
put(s, (i== null) ? Integer.valueOf(1) : Integer.valueOf(i+1));
}
}