import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; //import java.util.ArrayList; /** * @author gtowell * Written: Jan 29, 2020 * Modified: Sep 16, 2020 * Modified: gtowell Feb 24, 2021 * * A small class that counts the number of uses of each word in a file. * This class illustrates ArrayList or ArraList, splitting strings * and StringBuffer */ public class WordCounter { /** An arrayList holding all of the Word and Count objects */ private List151Impl> counts = new List151Impl<>(); /** * Does most of the heavy lifting. Reads a file and fills in the counts * arraylist appropriately * * @param filename the name of the file to be read. */ void countFile(String filename) { try (BufferedReader br = new BufferedReader(new FileReader(filename));) { String line; while (null != (line = br.readLine())) { // read line and test if there is a line to read String[] ss = line.replace("-", " ").split("\\s+"); // split the line by spaces for (String token : ss) { // take the token i.e. word, lower case it, then get rid of punctuation token = token.toLowerCase().replace(".", "").replace(",", "").replace("?", "").replace("!", ""); if (token.length() > 0) { KeyValue wordS = findWord(token); if (wordS == null) { // if have not already seen the word, add it to the arraylist wordS = new KeyValue<>(token, 0); try { counts.add(wordS); } catch (MAxSizeExceededException mex) { System.err.println("Stopping reading document, cannot hold more words"); return; } } wordS.setValue(wordS.getValue() + 1); // increment the number of times the word has been seen } } } } catch(FileNotFoundException e) { System.err.println("Error in opening the file:" + filename); System.exit(1); } catch (IOException ioe) { System.err.println("Error reading file " + ioe); System.exit(1); } } /** * Find a word in the list of seen words * * @param w the word to be found * @return the WordHolder object containing the word. * Or null if the word has not been seen */ private KeyValue findWord(String w) { KeyValue fakeInstance = new KeyValue<>(w, 0); return counts.getInstance(fakeInstance); } public String toString() { // StringBuffer is a modifiable string. If you are changing a string a lot, it // is much more efficient. StringBuffer sb = new StringBuffer(); for (int i = 0; i < counts.size(); i++) { KeyValue aWord = counts.get(i); sb.append(aWord); sb.append("\n"); } sb.append("Distinct words: " + counts.size()); return sb.toString(); } public static void main(String[] args) { WordCounter wc = new WordCounter(); wc.countFile("ham.txt"); System.out.println(wc); } }