The original code is verbose "thanks" to all the generics definitions.
Note added after first posting:
You can save considerable time (about 15%) by pre-compiling the regular expression. Add a line
Pattern splitOnWhitespace = Pattern.compile("[ \t]");
then change line 18, the line.split() code to
for (String s : splitOnWhitespace.split(line))
I've done this below, plus fixed one other issue about the definition of a "word".
I haven't does a full comparison timing, but hopefully Lau will run one shortly.
import java.io.*; import java.util.*; public class WordCounter { public static void main(String[] args) throws IOException { Long timeStart = System.currentTimeMillis(); File rootDir = new File("C:/temp/20_newsgroups"); CountingSet counter = new CountingSet(); Pattern wordPattern = Pattern.compile("\\w+"); for (File groupDirectory : rootDir.listFiles()) if (groupDirectory.isDirectory()) for (File f : groupDirectory.listFiles()) { if (f.isFile()) { BufferedReader reader = new BufferedReader(new FileReader(f)); String line; while ((line = reader.readLine()) != null) { Matcher matcher = wordPattern.matcher(line); while (matcher.find()) counter.add(matcher.group()); } reader.close(); } } PrintWriter pw = new PrintWriter("C:/temp/counts-alphabetical-java.txt"); for (Map.Entry<String, Integer> me : counter.entrySet()) pw.println(me.getKey() + " : " + me.getValue()); pw.close(); pw = new PrintWriter("C:/temp/counts-decreasing-java.txt"); spewInverted(counter, pw); pw.close(); System.out.println("Finished in " + 0.001 * (System.currentTimeMillis() - timeStart) + " seconds"); } static void spewInverted(Map<String, Integer> in, PrintWriter pw) { ArrayList<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>( in.entrySet()); Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() { public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) { return o2.getValue().compareTo(o1.getValue()); } }); for (Map.Entry<String, Integer> entry : list) pw.println(entry.getKey() + " : " + entry.getValue()); } } class CountingSet extends TreeMap<String, Integer> { void add(String s) { Integer i = get(s); put(s, (i== null) ? Integer.valueOf(1) : Integer.valueOf(i+1)); } }
No comments:
Post a Comment