Pattern match lines of a file

Here's an example of grep-like treatment of the contents of a text file.

In this example, the format of each line is verified against a regular expression, using a Pattern and Matcher.

Note that a String literal representing a regular expression needs to escape all backslashes. In effect, all of the backslashes are simply doubled.

Example

import java.io.BufferedReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/** JDK 7+. */
public final class LineMatcher {

  /**
  * Verifies that each line of a text file starts with:
  *   \N + tab + integer + tab + text
  * where "text" contains word characters (\w).
  *
  * The corresponding regular expression is:
  *   "^\\N\t(\d)+\t(\w)+"
  *
  * The String passed to Pattern needs to double every backslash:
  *   "^\\\\N\\t(\\d)+\\t(\\w)+"
  *
  * If a line is not of the expected pattern, then an
  * IllegalStateException is thrown.
  */
  public void findBadLines(String fileName) {
    //Pattern and Matcher are used here, not String.matches(regexp),
    //since String.matches(regexp) would repeatedly compile the same
    //regular expression
    Pattern regexp = Pattern.compile("^\\\\N\\t(\\d)+\\t(\\w)+");
    Matcher matcher = regexp.matcher("");

    Path path = Paths.get(fileName);
    //another way of getting all the lines:
    //Files.readAllLines(path, ENCODING); 
    try (
      BufferedReader reader = Files.newBufferedReader(path, ENCODING);
      LineNumberReader lineReader = new LineNumberReader(reader);
    ){
      String line = null;
      while ((line = lineReader.readLine()) != null) {
        matcher.reset(line); //reset the input
        if (!matcher.find()) {
          String msg = "Line " + lineReader.getLineNumber() + " is bad: " + line;
          throw new IllegalStateException(msg);
        }
      }      
    }    
    catch (IOException ex){
      ex.printStackTrace();
    }
  }

  final static Charset ENCODING = StandardCharsets.UTF_8;
  
  /** Test harness. */
  public static void main(String... arguments) {
    LineMatcher lineMatcher = new LineMatcher();
    lineMatcher.findBadLines("C:\\Temp\\RegexpTest.txt");
    System.out.println("Done.");
  }
} 

See Also :
Reading and writing text files