Reading and writing text files
grep
-like
treatment of the contents of a text file.
In this example, the format of each line is verified against
a regular expression,
using a Pattern
and Matcher
.
Note that a String
literal representing a regular expression
needs to escape all backslashes. In effect, all of the backslashes are
simply doubled.
Example
import java.io.BufferedReader; import java.io.IOException; import java.io.LineNumberReader; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.regex.Matcher; import java.util.regex.Pattern; /** JDK 7+. */ public final class LineMatcher { /** * Verifies that each line of a text file starts with: * \N + tab + integer + tab + text * where "text" contains word characters (\w). * * The corresponding regular expression is: * "^\\N\t(\d)+\t(\w)+" * * The String passed to Pattern needs to double every backslash: * "^\\\\N\\t(\\d)+\\t(\\w)+" * * If a line is not of the expected pattern, then an * IllegalStateException is thrown. */ public void findBadLines(String fileName) { //Pattern and Matcher are used here, not String.matches(regexp), //since String.matches(regexp) would repeatedly compile the same //regular expression Pattern regexp = Pattern.compile("^\\\\N\\t(\\d)+\\t(\\w)+"); Matcher matcher = regexp.matcher(""); Path path = Paths.get(fileName); //another way of getting all the lines: //Files.readAllLines(path, ENCODING); try ( BufferedReader reader = Files.newBufferedReader(path, ENCODING); LineNumberReader lineReader = new LineNumberReader(reader); ){ String line = null; while ((line = lineReader.readLine()) != null) { matcher.reset(line); //reset the input if (!matcher.find()) { String msg = "Line " + lineReader.getLineNumber() + " is bad: " + line; throw new IllegalStateException(msg); } } } catch (IOException ex){ ex.printStackTrace(); } } final static Charset ENCODING = StandardCharsets.UTF_8; /** Test harness. */ public static void main(String... arguments) { LineMatcher lineMatcher = new LineMatcher(); lineMatcher.findBadLines("C:\\Temp\\RegexpTest.txt"); System.out.println("Done."); } }