Pattern-match lines of a file

Here's an example of grep-like treatment of the contents of a text file.

In this example, the format of each line is verified against a regular expression, using a Pattern and Matcher.

Note that a String literal representing a regular expression needs to escape all backslashes. In effect, all of the backslashes are simply doubled.

Example


import java.io.BufferedReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/** JDK 7+. */
public final class LineMatcher {

  /**
  * Verifies that each line of a text file starts with:
  *   \N + tab + integer + tab + text
  * where "text" contains word characters (\w).
  *
  * The corresponding regular expression is:
  *   "^\\N\t(\d)+\t(\w)+"
  *
  * The String passed to Pattern needs to double every backslash:
  *   "^\\\\N\\t(\\d)+\\t(\\w)+"
  *
  * If a line is not of the expected pattern, then an
  * IllegalStateException is thrown.
  */
  public void findBadLines(String aFileName) {
    //Pattern and Matcher are used here, not String.matches(regexp),
    //since String.matches(regexp) would repeatedly compile the same
    //regular expression
    Pattern regexp = Pattern.compile("^\\\\N\\t(\\d)+\\t(\\w)+");
    Matcher matcher = regexp.matcher("");

    Path path = Paths.get(aFileName);
    try (
      BufferedReader reader = Files.newBufferedReader(path, ENCODING);
      LineNumberReader lineReader = new LineNumberReader(reader);
    ){
      String line = null;
      while ((line = lineReader.readLine()) != null) {
        matcher.reset(line); //reset the input
        if (!matcher.find()) {
          String msg = "Line " + lineReader.getLineNumber() + " is bad: " + line;
          throw new IllegalStateException(msg);
        }
      }      
    }    
    catch (IOException ex){
      ex.printStackTrace();
    }
  }

  final static Charset ENCODING = StandardCharsets.UTF_8;
  
  /** Test harness. */
  public static void main(String... arguments) {
    LineMatcher lineMatcher = new LineMatcher();
    lineMatcher.findBadLines("C:\\Temp\\RegexpTest.txt");
    System.out.println("Done.");
  }
} 



See Also :
Reading and writing text files
Would you use this technique?
Yes   No   Undecided   
© 2014 Hirondelle Systems | Source Code | Contact | License | RSS
Individual code snippets can be used under this BSD license - Last updated on September 21, 2013.
Over 2,000,000 unique IPs last year - Built with WEB4J.
- In Memoriam : Bill Dirani -