001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.geometry.io.core.internal;
018
019import java.io.Reader;
020import java.util.Arrays;
021import java.util.List;
022import java.util.function.IntConsumer;
023import java.util.function.IntPredicate;
024
025/** Class providing basic text parsing capabilities. The goals of this class are to
026 * (1) provide a simple, flexible API for performing common text parsing operations and
027 * (2) provide a mechanism for creating consistent and informative parsing errors.
028 * This class is not intended as a replacement for grammar-based parsers and/or lexers.
029 */
030public class SimpleTextParser {
031
032    /** Constant indicating that the end of the input has been reached. */
033    private static final int EOF = -1;
034
035    /** Carriage return character. */
036    private static final char CR = '\r';
037
038    /** Line feed character. */
039    private static final char LF = '\n';
040
041    /** Default value for the max string length property. */
042    private static final int DEFAULT_MAX_STRING_LENGTH = 1024;
043
044    /** Error message used when a string exceeds the configured maximum length. */
045    private static final String STRING_LENGTH_ERR_MSG = "string length exceeds maximum value of ";
046
047    /** Initial token position number. */
048    private static final int INITIAL_TOKEN_POS = -1;
049
050    /** Int consumer that does nothing. */
051    private static final IntConsumer NOOP_CONSUMER = ch -> { };
052
053    /** Current line number; line numbers start counting at 1. */
054    private int lineNumber = 1;
055
056    /** Current character column on the current line; column numbers start at 1.*/
057    private int columnNumber = 1;
058
059    /** Maximum length for strings returned by this instance. */
060    private int maxStringLength = DEFAULT_MAX_STRING_LENGTH;
061
062    /** The current token. */
063    private String currentToken;
064
065    /** The line number that the current token started on. */
066    private int currentTokenLineNumber = INITIAL_TOKEN_POS;
067
068    /** The character number that the current token started on. */
069    private int currentTokenColumnNumber = INITIAL_TOKEN_POS;
070
071    /** Flag used to indicate that at least one token has been read from the stream. */
072    private boolean hasSetToken;
073
074    /** Character read buffer used to access the character stream. */
075    private final CharReadBuffer buffer;
076
077    /** Construct a new instance that reads characters from the given reader. The
078     * reader will not be closed.
079     * @param reader reader instance to read characters from
080     */
081    public SimpleTextParser(final Reader reader) {
082        this(new CharReadBuffer(reader));
083    }
084
085    /** Construct a new instance that reads characters from the given character buffer.
086     * @param buffer read buffer to read characters from
087     */
088    public SimpleTextParser(final CharReadBuffer buffer) {
089        this.buffer = buffer;
090    }
091
092    /** Get the current line number. Line numbers start at 1.
093     * @return the current line number
094     */
095    public int getLineNumber() {
096        return lineNumber;
097    }
098
099    /** Set the current line number. This does not affect the character stream position,
100     * only the value returned by {@link #getLineNumber()}.
101     * @param lineNumber line number to set; line numbers start at 1
102     */
103    public void setLineNumber(final int lineNumber) {
104        this.lineNumber = lineNumber;
105    }
106
107    /** Get the current column number. This indicates the column position of the
108     * character that will returned by the next call to {@link #readChar()}. The first
109     * character of each line has a column number of 1.
110     * @return the current column number; column numbers start at 1
111     */
112    public int getColumnNumber() {
113        return columnNumber;
114    }
115
116    /** Set the current column number. This does not affect the character stream position,
117     * only the value returned by {@link #getColumnNumber()}.
118     * @param column the column number to set; column numbers start at 1
119     */
120    public void setColumnNumber(final int column) {
121        this.columnNumber = column;
122    }
123
124    /** Get the maximum length for strings returned by this instance. Operations
125     * that produce strings longer than this length will throw an exception.
126     * @return maximum length for strings returned by this instance
127     */
128    public int getMaxStringLength() {
129        return maxStringLength;
130    }
131
132    /** Set the maximum length for strings returned by this instance. Operations
133     * that produce strings longer than this length will throw an exception.
134     * @param maxStringLength maximum length for strings returned by this instance
135     * @throws IllegalArgumentException if the argument is less than zero
136     */
137    public void setMaxStringLength(final int maxStringLength) {
138        if (maxStringLength < 0) {
139            throw new IllegalArgumentException("Maximum string length cannot be less than zero; was " +
140                    maxStringLength);
141        }
142        this.maxStringLength = maxStringLength;
143    }
144
145    /** Get the current token. This is the most recent string read by one of the {@code nextXXX()}
146     * methods. This value will be null if no token has yet been read or if the end of content has
147     * been reached.
148     * @return the current token
149     * @see #next(int)
150     * @see #next(IntPredicate)
151     * @see #nextLine()
152     * @see #nextAlphanumeric()
153     */
154    public String getCurrentToken() {
155        return currentToken;
156    }
157
158    /** Return true if the current token is not null or empty.
159     * @return true if the current token is not null or empty
160     * @see #getCurrentToken()
161     */
162    public boolean hasNonEmptyToken() {
163        return currentToken != null && !currentToken.isEmpty();
164    }
165
166    /** Get the line number that the current token started on. This value will
167     * be -1 if no token has been read yet.
168     * @return current token starting line number or -1 if no token has been
169     *      read yet
170     * @see #getCurrentToken()
171     */
172    public int getCurrentTokenLineNumber() {
173        return currentTokenLineNumber;
174    }
175
176    /** Get the column position that the current token started on. This value will
177     * be -1 if no token has been read yet.
178     * @return current token column number or -1 if no oken has been read yet
179     * @see #getCurrentToken()
180     */
181    public int getCurrentTokenColumnNumber() {
182        return currentTokenColumnNumber;
183    }
184
185    /** Get the current token parsed as an integer.
186     * @return the current token parsed as an integer
187     * @throws IllegalStateException if no token has been read or the
188     *      current token cannot be parsed as an integer
189     */
190    public int getCurrentTokenAsInt() {
191        ensureHasSetToken();
192
193        Throwable cause = null;
194
195        if (currentToken != null) {
196            try {
197                return Integer.parseInt(currentToken);
198            } catch (NumberFormatException exc) {
199                cause = exc;
200            }
201        }
202
203        throw unexpectedToken("integer", cause);
204    }
205
206    /** Get the current token parsed as a double.
207     * @return the current token parsed as a double
208     * @throws IllegalStateException if no token has been read or the
209     *      current token cannot be parsed as a double
210     */
211    public double getCurrentTokenAsDouble() {
212        ensureHasSetToken();
213
214        Throwable cause = null;
215
216        if (currentToken != null) {
217            try {
218                return Double.parseDouble(currentToken);
219            } catch (NumberFormatException exc) {
220                cause = exc;
221            }
222        }
223
224        throw unexpectedToken("double", cause);
225    }
226
227    /** Return true if there are more characters to read from this instance.
228     * @return true if there are more characters to read from this instance
229     * @throws java.io.UncheckedIOException if an I/O error occurs
230     */
231    public boolean hasMoreCharacters() {
232        return buffer.hasMoreCharacters();
233    }
234
235    /** Return true if there are more characters to read on the current line.
236     * @return true if there are more characters to read on the current line
237     * @throws java.io.UncheckedIOException if an I/O error occurs
238     */
239    public boolean hasMoreCharactersOnLine() {
240        return hasMoreCharacters() && isNotNewLinePart(peekChar());
241    }
242
243    /** Read and return the next character in the stream and advance the parser position.
244     * This method updates the current line number and column number but does <strong>not</strong>
245     * set the {@link #getCurrentToken() current token}.
246     * @return the next character in the stream or -1 if the end of the stream has been
247     *      reached
248     * @throws java.io.UncheckedIOException if an I/O error occurs
249     * @see #peekChar()
250     */
251    public int readChar() {
252        final int value = buffer.read();
253        if (value == LF ||
254                (value == CR && peekChar() != LF)) {
255            ++lineNumber;
256            columnNumber = 1;
257        } else if (value != EOF) {
258            ++columnNumber;
259        }
260
261        return value;
262    }
263
264    /** Read a string containing at most {@code len} characters from the stream and
265     * set it as the current token. Characters are added to the string until the string
266     * has the specified length or the end of the stream is reached. The characters are
267     * consumed from the stream. The token is set to null if no more characters are available
268     * from the character stream when this method is called.
269     * @param len the maximum length of the extracted string
270     * @return this instance
271     * @throws IllegalArgumentException if {@code len} is less than 0 or greater than the
272     *      configured {@link #getMaxStringLength() maximum string length}
273     * @throws java.io.UncheckedIOException if an I/O error occurs
274     * @see #getCurrentToken()
275     * @see #consume(int, IntConsumer)
276     */
277    public SimpleTextParser next(final int len) {
278        validateRequestedStringLength(len);
279
280        final int line = getLineNumber();
281        final int col = getColumnNumber();
282
283        String token = null;
284        if (hasMoreCharacters()) {
285            final StringBuilder sb = new StringBuilder(len);
286
287            consume(len, ch -> sb.append((char) ch));
288
289            token = sb.toString();
290        }
291
292        setToken(line, col, token);
293
294        return this;
295    }
296
297    /** Read a string containing at most {@code len} characters from the stream and
298     * set it as the current token. This is similar to {@link #next(int)} but with the exception
299     * that new line sequences beginning with {@code lineContinuationChar} are skipped.
300     * @param lineContinuationChar character used to indicate skipped new line sequences
301     * @param len the maximum length of the extracted string
302     * @return this instance
303     * @throws IllegalArgumentException if {@code len} is less than 0 or greater than the
304     *      configured {@link #getMaxStringLength() maximum string length}
305     * @throws java.io.UncheckedIOException if an I/O error occurs
306     * @see #getCurrentToken()
307     * @see #consumeWithLineContinuation(char, int, IntConsumer)
308     */
309    public SimpleTextParser nextWithLineContinuation(final char lineContinuationChar, final int len) {
310        validateRequestedStringLength(len);
311
312        final int line = getLineNumber();
313        final int col = getColumnNumber();
314
315        String token = null;
316        if (hasMoreCharacters()) {
317            final StringBuilder sb = new StringBuilder(len);
318
319            consumeWithLineContinuation(lineContinuationChar, len,
320                    ch -> sb.append((char) ch));
321
322            token = sb.toString();
323        }
324
325        setToken(line, col, token);
326
327        return this;
328    }
329
330    /** Read characters from the stream while the given predicate returns true and set the result
331     * as the current token. The next call to {@link #readChar()} will return either a character
332     * that fails the predicate test or -1 if the end of the stream has been reached.
333     * The token will be null if the end of the stream has been reached prior to the method call.
334     * @param pred predicate function passed characters read from the input; reading continues
335     *      until the predicate returns false
336     * @return this instance
337     * @throws IllegalStateException if the length of the produced string exceeds the configured
338     *      {@link #getMaxStringLength() maximum string length}
339     * @throws java.io.UncheckedIOException if an I/O error occurs
340     * @see #getCurrentToken()
341     * @see #consume(IntPredicate, IntConsumer)
342     */
343    public SimpleTextParser next(final IntPredicate pred) {
344        final int line = getLineNumber();
345        final int col = getColumnNumber();
346
347        String token = null;
348        if (hasMoreCharacters()) {
349            final StringCollector collector = new StringCollector(line, col, pred);
350
351            consume(collector, collector);
352
353            token = collector.getString();
354        }
355
356        setToken(line, col, token);
357
358        return this;
359    }
360
361    /** Read characters from the stream while the given predicate returns true and set the result
362     * as the current token. This is similar to {@link #next(IntPredicate)} but with the exception
363     * that new line sequences prefixed with {@code lineContinuationChar} are skipped.
364     * @param lineContinuationChar character used to indicate skipped new line sequences
365     * @param pred predicate function passed characters read from the input; reading continues
366     *      until the predicate returns false
367     * @return this instance
368     * @throws IllegalStateException if the length of the produced string exceeds the configured
369     *      {@link #getMaxStringLength() maximum string length}
370     * @throws java.io.UncheckedIOException if an I/O error occurs
371     * @see #getCurrentToken()
372     * @see #consume(IntPredicate, IntConsumer)
373     */
374    public SimpleTextParser nextWithLineContinuation(final char lineContinuationChar, final IntPredicate pred) {
375        final int line = getLineNumber();
376        final int col = getColumnNumber();
377
378        String token = null;
379        if (hasMoreCharacters()) {
380            final StringCollector collector = new StringCollector(line, col, pred);
381
382            consumeWithLineContinuation(lineContinuationChar, collector, collector);
383
384            token = collector.getString();
385        }
386
387        setToken(line, col, token);
388
389        return this;
390    }
391
392    /** Read characters from the current parser position to the next new line sequence and
393     * set the result as the current token . The newline character sequence
394     * ('\r', '\n', or '\r\n') at the end of the line is consumed but is not included in the token.
395     * The token will be null if the end of the stream has been reached prior to the method call.
396     * @return this instance
397     * @throws IllegalStateException if the length of the produced string exceeds the configured
398     *      {@link #getMaxStringLength() maximum string length}
399     * @throws java.io.UncheckedIOException if an I/O error occurs
400     * @see #getCurrentToken()
401     */
402    public SimpleTextParser nextLine() {
403        next(SimpleTextParser::isNotNewLinePart);
404
405        discardNewLineSequence();
406
407        return this;
408    }
409
410    /** Read a sequence of alphanumeric characters starting from the current parser position
411     * and set the result as the current token. The token will be the empty string if the next
412     * character in the stream is not alphanumeric and will be null if the end of the stream has
413     * been reached prior to the method call.
414     * @return this instance
415     * @throws IllegalStateException if the length of the produced string exceeds the configured
416     *      {@link #getMaxStringLength() maximum string length}
417     * @throws java.io.UncheckedIOException if an I/O error occurs
418     * @see #getCurrentToken()
419     */
420    public SimpleTextParser nextAlphanumeric() {
421        return next(SimpleTextParser::isAlphanumeric);
422    }
423
424    /** Discard {@code len} number of characters from the character stream. The
425     * parser position is updated but the current token is not changed.
426     * @param len number of characters to discard
427     * @return this instance
428     * @throws java.io.UncheckedIOException if an I/O error occurs
429     */
430    public SimpleTextParser discard(final int len) {
431        return consume(len, NOOP_CONSUMER);
432    }
433
434    /** Discard {@code len} number of characters from the character stream. The
435     * parser position is updated but the current token is not changed. Lines beginning
436     * with {@code lineContinuationChar} are skipped.
437     * @param lineContinuationChar character used to indicate skipped new line sequences
438     * @param len number of characters to discard
439     * @return this instance
440     * @throws java.io.UncheckedIOException if an I/O error occurs
441     */
442    public SimpleTextParser discardWithLineContinuation(final char lineContinuationChar,
443            final int len) {
444        return consumeWithLineContinuation(lineContinuationChar, len, NOOP_CONSUMER);
445    }
446
447    /** Discard characters from the stream while the given predicate returns true. The next call
448     * to {@link #readChar()} will return either a character that fails the predicate test or -1
449     * if the end of the stream has been reached. The parser position is updated but the current
450     * token is not changed.
451     * @param pred predicate test for characters to discard
452     * @return this instance
453     * @throws java.io.UncheckedIOException if an I/O error occurs
454     */
455    public SimpleTextParser discard(final IntPredicate pred) {
456        return consume(pred, NOOP_CONSUMER);
457    }
458
459    /** Discard characters from the stream while the given predicate returns true. New line sequences
460     * beginning with {@code lineContinuationChar} are skipped. The next call o {@link #readChar()}
461     * will return either a character that fails the predicate test or -1 if the end of the stream
462     * has been reached. The parser position is updated but the current token is not changed.
463     * @param lineContinuationChar character used to indicate skipped new line sequences
464     * @param pred predicate test for characters to discard
465     * @return this instance
466     * @throws java.io.UncheckedIOException if an I/O error occurs
467     */
468    public SimpleTextParser discardWithLineContinuation(final char lineContinuationChar,
469            final IntPredicate pred) {
470        return consumeWithLineContinuation(lineContinuationChar, pred, NOOP_CONSUMER);
471    }
472
473    /** Discard a sequence of whitespace characters from the character stream starting from the
474     * current parser position. The next call to {@link #readChar()} will return either a non-whitespace
475     * character or -1 if the end of the stream has been reached. The parser position is updated
476     * but the current token is not changed.
477     * @return this instance
478     * @throws java.io.UncheckedIOException if an I/O error occurs
479     */
480    public SimpleTextParser discardWhitespace() {
481        return discard(SimpleTextParser::isWhitespace);
482    }
483
484    /** Discard the next whitespace characters on the current line. The next call to
485     * {@link #readChar()} will return either a non-whitespace character on the current line,
486     * the newline character sequence (indicating the end of the line), or -1 (indicating the
487     * end of the stream). The parser position is updated but the current token is not changed.
488     * @return this instance
489     * @throws java.io.UncheckedIOException if an I/O error occurs
490     */
491    public SimpleTextParser discardLineWhitespace() {
492        return discard(SimpleTextParser::isLineWhitespace);
493    }
494
495    /** Discard the newline character sequence at the current reader position. The sequence
496     * is defined as one of "\r", "\n", or "\r\n". Does nothing if the reader is not positioned
497     * at a newline sequence. The parser position is updated but the current token is not changed.
498     * @return this instance
499     * @throws java.io.UncheckedIOException if an I/O error occurs
500     */
501    public SimpleTextParser discardNewLineSequence() {
502        final int value = peekChar();
503        if (value == LF) {
504            readChar();
505        } else if (value == CR) {
506            readChar();
507
508            if (peekChar() == LF) {
509                readChar();
510            }
511        }
512
513        return this;
514    }
515
516    /** Discard all remaining characters on the current line, including the terminating
517     * newline character sequence. The next call to {@link #readChar()} will return either the
518     * first character on the next line or -1 if the end of the stream has been reached.
519     * The parser position is updated but the current token is not changed.
520     * @return this instance
521     * @throws java.io.UncheckedIOException if an I/O error occurs
522     */
523    public SimpleTextParser discardLine() {
524        discard(SimpleTextParser::isNotNewLinePart);
525
526        discardNewLineSequence();
527
528        return this;
529    }
530
531    /** Consume characters from the stream and pass them to {@code consumer} while the given predicate
532     * returns true. The operation ends when the predicate returns false or the end of the stream is
533     * reached.
534     * @param pred predicate test for characters to consume
535     * @param consumer object to be passed each consumed character
536     * @return this instance
537     * @throws java.io.UncheckedIOException if an I/O error occurs
538     */
539    public SimpleTextParser consume(final IntPredicate pred, final IntConsumer consumer) {
540        int ch;
541        while ((ch = peekChar()) != EOF && pred.test(ch)) {
542            consumer.accept(readChar());
543        }
544
545        return this;
546    }
547
548    /** Consume at most {@code len} characters from the stream, passing each to the given consumer.
549     * This method is similar to {@link #consume(int, IntConsumer)} with the exception that new line
550     * sequences prefixed with {@code lineContinuationChar} are skipped.
551     * @param lineContinuationChar character used to indicate skipped new line sequences
552     * @param len number of characters to consume
553     * @param consumer function to be passed each consumed character
554     * @return this instance
555     * @throws java.io.UncheckedIOException if an I/O error occurs
556     */
557    public SimpleTextParser consumeWithLineContinuation(final char lineContinuationChar,
558            final int len, final IntConsumer consumer) {
559        int i = -1;
560        int ch;
561        while (++i < len && (ch = readChar()) != EOF) {
562            if (ch == lineContinuationChar && isNewLinePart(peekChar())) {
563                --i; // don't count the continuation char toward the total length
564                discardNewLineSequence();
565            } else {
566                consumer.accept(ch);
567            }
568        }
569
570        return this;
571    }
572
573    /** Consume at most {@code len} characters from the stream, passing each to the given consumer.
574     * The operation continues until {@code len} number of characters have been read or the end of
575     * the stream has been reached.
576     * @param len number of characters to consume
577     * @param consumer object to be passed each consumed character
578     * @return this instance
579     * @throws java.io.UncheckedIOException if an I/O error occurs
580     */
581    public SimpleTextParser consume(final int len, final IntConsumer consumer) {
582        int ch;
583        for (int i = 0; i < len; ++i) {
584            ch = readChar();
585            if (ch != EOF) {
586                consumer.accept(ch);
587            } else {
588                break;
589            }
590        }
591
592        return this;
593    }
594
595    /** Consume characters from the stream and pass them to {@code consumer} while the given predicate
596     * returns true. This method is similar to {@link #consume(IntPredicate, IntConsumer)} with the
597     * exception that new lines sequences beginning with {@code lineContinuationChar} are skipped.
598     * @param lineContinuationChar character used to indicate skipped new line sequences
599     * @param pred predicate test for characters to consume
600     * @param consumer object to be passed each consumed character
601     * @return this instance
602     * @throws java.io.UncheckedIOException if an I/O error occurs
603     */
604    public SimpleTextParser consumeWithLineContinuation(final char lineContinuationChar,
605            final IntPredicate pred, final IntConsumer consumer) {
606        int ch;
607        while ((ch = peekChar()) != EOF) {
608            if (ch == lineContinuationChar && isNewLinePart(buffer.charAt(1))) {
609                readChar();
610                discardNewLineSequence();
611            } else if (pred.test(ch)) {
612                consumer.accept(readChar());
613            } else {
614                break;
615            }
616        }
617
618        return this;
619    }
620
621    /** Return the next character in the stream but do not advance the parser position.
622     * @return the next character in the stream or -1 if the end of the stream has been
623     *      reached
624     * @throws java.io.UncheckedIOException if an I/O error occurs
625     * @see #readChar()
626     */
627    public int peekChar() {
628        return buffer.peek();
629    }
630
631    /** Return a string containing containing at most {@code len} characters from the stream but
632     * without changing the parser position. Characters are added to the string until the
633     * string has the specified length or the end of the stream is reached.
634     * @param len the maximum length of the returned string
635     * @return a string containing containing at most {@code len} characters from the stream
636     *      or null if the parser has already reached the end of the stream
637     * @throws IllegalArgumentException if {@code len} is less than 0 or greater than the
638     *      configured {@link #getMaxStringLength() maximum string length}
639     * @throws java.io.UncheckedIOException if an I/O error occurs
640     * @see #next(int)
641     */
642    public String peek(final int len) {
643        validateRequestedStringLength(len);
644
645        return buffer.peekString(len);
646    }
647
648    /** Read characters from the stream while the given predicate returns true but do not
649     * change the current token or advance the parser position.
650     * @param pred predicate function passed characters read from the input; reading continues
651     *      until the predicate returns false
652     * @return string containing characters matching {@code pred} or null if the parser has already
653     *      reached the end of the stream
654     * @throws IllegalStateException if the length of the produced string exceeds the configured
655     *      {@link #getMaxStringLength() maximum string length}
656     * @throws java.io.UncheckedIOException if an I/O error occurs
657     * @see #getCurrentToken()
658     */
659    public String peek(final IntPredicate pred) {
660        String token = null;
661
662        if (hasMoreCharacters()) {
663            final StringCollector collector = new StringCollector(lineNumber, columnNumber, pred);
664
665            int i = -1;
666            int ch = buffer.charAt(++i);
667            while (ch != EOF && collector.test(ch)) {
668                collector.accept(ch);
669
670                ch = buffer.charAt(++i);
671            }
672
673            token = collector.getString();
674        }
675
676        return token;
677    }
678
679    /** Compare the {@link #getCurrentToken() current token} with the argument and throw an
680     * exception if they are not equal. The comparison is case-sensitive.
681     * @param expected expected token
682     * @return this instance
683     * @throws IllegalStateException if no token has been read or {@code expected} does not exactly
684     *      equal the current token
685     */
686    public SimpleTextParser match(final String expected) {
687        matchInternal(expected, true, true);
688        return this;
689    }
690
691    /** Compare the {@link #getCurrentToken() current token} with the argument and throw an
692     * exception if they are not equal. The comparison is <em>not</em> case-sensitive.
693     * @param expected expected token
694     * @return this instance
695     * @throws IllegalStateException if no token has been read or {@code expected} does not equal
696     *      the current token (ignoring case)
697     */
698    public SimpleTextParser matchIgnoreCase(final String expected) {
699        matchInternal(expected, false, true);
700        return this;
701    }
702
703    /** Return true if the {@link #getCurrentToken() current token} is equal to the argument.
704     * The comparison is case-sensitive.
705     * @param expected expected token
706     * @return true if the argument exactly equals the current token
707     * @throws IllegalStateException if no token has been read
708     * @throws java.io.UncheckedIOException if an I/O error occurs
709     */
710    public boolean tryMatch(final String expected) {
711        return matchInternal(expected, true, false);
712    }
713
714    /** Return true if the {@link #getCurrentToken() current token} is equal to the argument.
715     * The comparison is <em>not</em> case-sensitive.
716     * @param expected expected token
717     * @return true if the argument equals the current token (ignoring case)
718     * @throws IllegalStateException if no token has been read
719     */
720    public boolean tryMatchIgnoreCase(final String expected) {
721        return matchInternal(expected, false, false);
722    }
723
724    /** Internal method to compare the current token with the argument.
725     * @param expected expected token
726     * @param caseSensitive if the comparison should be case-sensitive
727     * @param throwOnFailure if an exception should be thrown if the argument is not
728     *      equal to the current token
729     * @return true if the argument is equal to the current token
730     * @throws IllegalStateException if no token has been read or {@code expected} does not match the
731     *      current token and {@code throwOnFailure} is true
732     */
733    private boolean matchInternal(final String expected, final boolean caseSensitive,
734            final boolean throwOnFailure) {
735        ensureHasSetToken();
736
737        if (!stringsEqual(expected, currentToken, caseSensitive)) {
738            if (throwOnFailure) {
739                throw unexpectedToken("[" + expected + "]");
740            }
741
742            return false;
743        }
744
745        return true;
746    }
747
748    /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}.
749     * An exception is thrown if no match is found. String comparisons are case-sensitive.
750     * @param expected strings to compare with the current token
751     * @return index of the argument that exactly matches the current token
752     * @throws IllegalStateException if no token has been read or no match is found among the arguments
753     */
754    public int choose(final String... expected) {
755        return choose(Arrays.asList(expected));
756    }
757
758    /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}.
759     * An exception is thrown if no match is found. String comparisons are case-sensitive.
760     * @param expected strings to compare with the current token
761     * @return index of the argument that exactly matches the current token
762     * @throws IllegalStateException if no token has been read or no match is found among the arguments
763     */
764    public int choose(final List<String> expected) {
765        return chooseInternal(expected, true, true);
766    }
767
768    /** Return the index of the argument that matches the {@link #getCurrentToken() current token},
769     * ignoring case. An exception is thrown if no match is found. String comparisons are <em>not</em>
770     * case-sensitive.
771     * @param expected strings to compare with the current token
772     * @return index of the argument that matches the current token (ignoring case)
773     * @throws IllegalStateException if no token has been read or no match is found among the arguments
774     */
775    public int chooseIgnoreCase(final String... expected) {
776        return chooseIgnoreCase(Arrays.asList(expected));
777    }
778
779    /** Return the index of the argument that matches the {@link #getCurrentToken() current token},
780     * ignoring case. An exception is thrown if no match is found. String comparisons are <em>not</em>
781     * case-sensitive.
782     * @param expected strings to compare with the current token
783     * @return index of the argument that matches the current token (ignoring case)
784     * @throws IllegalStateException if no token has been read or no match is found among the arguments
785     */
786    public int chooseIgnoreCase(final List<String> expected) {
787        return chooseInternal(expected, false, true);
788    }
789
790    /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}
791     * or -1 if no match is found. String comparisons are case-sensitive.
792     * @param expected strings to compare with the current token
793     * @return index of the argument that exactly matches the current token or -1 if
794     *      no match is found
795     * @throws IllegalStateException if no token has been read
796     */
797    public int tryChoose(final String... expected) {
798        return tryChoose(Arrays.asList(expected));
799    }
800
801    /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}
802     * or -1 if no match is found. String comparisons are case-sensitive.
803     * @param expected strings to compare with the current token
804     * @return index of the argument that exactly matches the current token or -1 if
805     *      no match is found
806     * @throws IllegalStateException if no token has been read
807     */
808    public int tryChoose(final List<String> expected) {
809        return chooseInternal(expected, true, false);
810    }
811
812    /** Return the index of the argument that matches the {@link #getCurrentToken() current token}
813     * or -1 if no match is found. String comparisons are <em>not</em> case-sensitive.
814     * @param expected strings to compare with the current token
815     * @return index of the argument that matches the current token (ignoring case) or -1 if
816     *      no match is found
817     * @throws IllegalStateException if no token has been read
818     */
819    public int tryChooseIgnoreCase(final String... expected) {
820        return tryChooseIgnoreCase(Arrays.asList(expected));
821    }
822
823    /** Return the index of the argument that matches the {@link #getCurrentToken() current token}
824     * or -1 if no match is found. String comparisons are <em>not</em> case-sensitive.
825     * @param expected strings to compare with the current token
826     * @return index of the argument that matches the current token (ignoring case) or -1 if
827     *      no match is found
828     * @throws IllegalStateException if no token has been read
829     */
830    public int tryChooseIgnoreCase(final List<String> expected) {
831        return chooseInternal(expected, false, false);
832    }
833
834    /** Internal method to compare the current token with a list of possible strings. The index of
835     * the matching argument is returned.
836     * @param expected strings to compare with the current token
837     * @param caseSensitive if the comparisons should be case-sensitive
838     * @param throwOnFailure if an exception should be thrown if no match is found
839     * @return the index of the matching argument or -1 if no match is found
840     * @throws IllegalStateException if no token has been read or no match is found and
841     *      {@code throwOnFailure} is true
842     */
843    private int chooseInternal(final List<String> expected, final boolean caseSensitive,
844            final boolean throwOnFailure) {
845        ensureHasSetToken();
846
847        int i = 0;
848        for (final String str : expected) {
849            if (stringsEqual(str, currentToken, caseSensitive)) {
850                return i;
851            }
852
853            ++i;
854        }
855
856        if (throwOnFailure) {
857            throw unexpectedToken("one of " + expected);
858        }
859
860        return -1;
861    }
862
863    /** Get an exception indicating that the current token was unexpected. The returned
864     * exception contains a message with the line number and column of the current token and
865     * a description of its value.
866     * @param expected string describing what was expected
867     * @return exception indicating that the current token was unexpected
868     */
869    public IllegalStateException unexpectedToken(final String expected) {
870        return unexpectedToken(expected, null);
871    }
872
873    /** Get an exception indicating that the current token was unexpected. The returned
874     * exception contains a message with the line number and column of the current token and
875     * a description of its value.
876     * @param expected string describing what was expected
877     * @param cause cause of the error
878     * @return exception indicating that the current token was unexpected
879     */
880    public IllegalStateException unexpectedToken(final String expected, final Throwable cause) {
881
882        StringBuilder msg = new StringBuilder();
883        msg.append("expected ")
884            .append(expected)
885            .append(" but found ")
886            .append(getCurrentTokenDescription());
887
888        final int line = hasSetToken ? currentTokenLineNumber : lineNumber;
889        final int col = hasSetToken ? currentTokenColumnNumber : columnNumber;
890
891        return parseError(line, col, msg.toString(), cause);
892    }
893
894    /** Get an exception indicating an error during parsing at the current token position.
895     * @param msg error message
896     * @return an exception indicating an error during parsing at the current token position
897     */
898    public IllegalStateException tokenError(final String msg) {
899        return tokenError(msg, null);
900    }
901
902    /** Get an exception indicating an error during parsing at the current token position.
903     * @param msg error message
904     * @param cause the cause of the error; may be null
905     * @return an exception indicating an error during parsing at the current token position
906     */
907    public IllegalStateException tokenError(final String msg, final Throwable cause) {
908        final int line = hasSetToken ? currentTokenLineNumber : lineNumber;
909        final int col = hasSetToken ? currentTokenColumnNumber : columnNumber;
910
911        return parseError(line, col, msg, cause);
912    }
913
914    /** Return an exception indicating an error occurring at the current parser position.
915     * @param msg error message
916     * @return an exception indicating an error during parsing
917     */
918    public IllegalStateException parseError(final String msg) {
919        return parseError(msg, null);
920    }
921
922    /** Return an exception indicating an error occurring at the current parser position.
923     * @param msg error message
924     * @param cause the cause of the error; may be null
925     * @return an exception indicating an error during parsing
926     */
927    public IllegalStateException parseError(final String msg, final Throwable cause) {
928        return parseError(lineNumber, columnNumber, msg, cause);
929    }
930
931    /** Return an exception indicating an error during parsing.
932     * @param line line number of the error
933     * @param col column number of the error
934     * @param msg error message
935     * @return an exception indicating an error during parsing
936     */
937    public IllegalStateException parseError(final int line, final int col, final String msg) {
938        return parseError(line, col, msg, null);
939    }
940
941    /** Return an exception indicating an error during parsing.
942     * @param line line number of the error
943     * @param col column number of the error
944     * @param msg error message
945     * @param cause the cause of the error
946     * @return an exception indicating an error during parsing
947     */
948    public IllegalStateException parseError(final int line, final int col, final String msg,
949            final Throwable cause) {
950        final String fullMsg = String.format("Parsing failed at line %d, column %d: %s",
951                line, col, msg);
952        return GeometryIOUtils.parseError(fullMsg, cause);
953    }
954
955    /** Set the current token string and position.
956     * @param line line number for the start of the token
957     * @param col column number for the start of the token
958     * @param token token to set
959     */
960    private void setToken(final int line, final int col, final String token) {
961        currentTokenLineNumber = line;
962        currentTokenColumnNumber = col;
963        currentToken = token;
964
965        hasSetToken = true;
966    }
967
968    /** Get a user-friendly description of the current token.
969     * @return a user-friendly description of the current token.
970     */
971    private String getCurrentTokenDescription() {
972        if (currentToken == null || currentToken.isEmpty()) {
973            // attempt to return a more helpful message about the location
974            // of empty tokens by checking the buffer content; if this fails
975            // we'll ignore the error and continue with a more generic message
976            try {
977                if (!hasMoreCharacters()) {
978                    return "end of content";
979                } else if (currentToken != null) {
980                    if (!hasMoreCharactersOnLine()) {
981                        return "end of line";
982                    }
983                    return "empty token followed by [" + peek(1) + "]";
984                }
985            } catch (IllegalStateException exc) {
986                // ignore
987            }
988        }
989
990        if (currentToken == null) {
991            return "no current token";
992        } else if (currentToken.isEmpty()) {
993            return "empty token";
994        }
995
996        return "[" + currentToken + "]";
997    }
998
999    /** Validate the requested string length.
1000     * @param len requested string length
1001     * @throws IllegalArgumentException if {@code len} is less than 0 or greater than {@code maxStringLength}
1002     */
1003    private void validateRequestedStringLength(final int len) {
1004        if (len < 0) {
1005            throw new IllegalArgumentException("Requested string length cannot be negative; was " + len);
1006        } else if (len > maxStringLength) {
1007            throw new IllegalArgumentException("Requested string length of " + len + " exceeds maximum value of " +
1008                    maxStringLength);
1009        }
1010    }
1011
1012    /** Ensure that a token read operation has been performed, throwing an exception if not.
1013     * @throws IllegalStateException if no token read operation has been performed
1014     */
1015    private void ensureHasSetToken() {
1016        if (!hasSetToken) {
1017            throw new IllegalStateException("No token has been read from the character stream");
1018        }
1019    }
1020
1021    /** Return true if the given character (Unicode code point) is whitespace.
1022     * @param ch character (Unicode code point) to test
1023     * @return true if the given character is whitespace
1024     * @see Character#isWhitespace(int)
1025     */
1026    public static boolean isWhitespace(final int ch) {
1027        return Character.isWhitespace(ch);
1028    }
1029
1030    /** Return true if the given character (Unicode code point) is not whitespace.
1031     * @param ch character (Unicode code point) to test
1032     * @return true if the given character is not whitespace
1033     * @see #isWhitespace(int)
1034     */
1035    public static boolean isNotWhitespace(final int ch) {
1036        return !isWhitespace(ch);
1037    }
1038
1039    /** Return true if the given character (Unicode code point) is whitespace
1040     * that is not used in newline sequences (ie, not '\r' or '\n').
1041     * @param ch character (Unicode code point) to test
1042     * @return true if the given character is a whitespace character not used in newline
1043     *      sequences
1044     */
1045    public static boolean isLineWhitespace(final int ch) {
1046        return isWhitespace(ch) && isNotNewLinePart(ch);
1047    }
1048
1049    /** Return true if the given character (Unicode code point) is used
1050     * as part of newline sequences (ie, is either '\r' or '\n').
1051     * @param ch character (Unicode code point) to test
1052     * @return true if the given character is used as part of newline sequences
1053     */
1054    public static boolean isNewLinePart(final int ch) {
1055        return ch == CR || ch == LF;
1056    }
1057
1058    /** Return true if the given character (Unicode code point) is not used as
1059     * part of newline sequences (ie, not '\r' or '\n').
1060     * @param ch character (Unicode code point) to test
1061     * @return true if the given character is not used as part of newline sequences
1062     * @see #isNewLinePart(int)
1063     */
1064    public static boolean isNotNewLinePart(final int ch) {
1065        return !isNewLinePart(ch);
1066    }
1067
1068    /** Return true if the given character (Unicode code point) is alphanumeric.
1069     * @param ch character (Unicode code point) to test
1070     * @return true if the argument is alphanumeric
1071     * @see Character#isAlphabetic(int)
1072     * @see Character#isDigit(int)
1073     */
1074    public static boolean isAlphanumeric(final int ch) {
1075        return Character.isAlphabetic(ch) ||
1076                Character.isDigit(ch);
1077    }
1078
1079    /** Return true if the given character (Unicode code point) is not alphanumeric.
1080     * @param ch character (Unicode code point) to test
1081     * @return true if the argument is not alphanumeric
1082     * @see #isAlphanumeric(int)
1083     */
1084    public static boolean isNotAlphanumeric(final int ch) {
1085        return !isAlphanumeric(ch);
1086    }
1087
1088    /** Return true if the given character (Unicode code point) can be used as part of
1089     * the string representation of an integer. This will be true for the following types
1090     * of characters:
1091     * <ul>
1092     *  <li>{@link Character#isDigit(int) digits}</li>
1093     *  <li>the '-' (minus) character</li>
1094     *  <li>the '+' (plus) character</li>
1095     * </ul>
1096     * @param ch character (Unicode code point) to test
1097     * @return true if the given character can be used as part of an integer string
1098     */
1099    public static boolean isIntegerPart(final int ch) {
1100        return Character.isDigit(ch) ||
1101                ch == '-' ||
1102                ch == '+';
1103    }
1104
1105    /** Return true if the given character (Unicode code point) can be used as part of
1106     * the string representation of a decimal number. This will be true for the following types
1107     * of characters:
1108     * <ul>
1109     *  <li>{@link Character#isDigit(int) digits}</li>
1110     *  <li>the '-' (minus) character</li>
1111     *  <li>the '+' (plus) character</li>
1112     *  <li>the '.' (period) character</li>
1113     *  <li>the 'e' character</li>
1114     *  <li>the 'E' character</li>
1115     * </ul>
1116     * @param ch character (Unicode code point) to test
1117     * @return true if the given character can be used as part of a decimal number string
1118     */
1119    public static boolean isDecimalPart(final int ch) {
1120        return Character.isDigit(ch) ||
1121            ch == '-' ||
1122            ch == '+' ||
1123            ch == '.' ||
1124            ch == 'e' ||
1125            ch == 'E';
1126    }
1127
1128    /** Test two strings for equality. One or both arguments may be null.
1129     * @param a first string
1130     * @param b second string
1131     * @param caseSensitive comparison is case-sensitive if set to true
1132     * @return true if the string arguments are considered equal
1133     */
1134    private static boolean stringsEqual(final String a, final String b, final boolean caseSensitive) {
1135        if (a == null) {
1136            return b == null;
1137        }
1138
1139        return caseSensitive ?
1140                a.equals(b) :
1141                a.equalsIgnoreCase(b);
1142    }
1143
1144    /** Internal class used to collect strings from the character stream while ensuring that the
1145     * collected strings do not exceed the maximum configured string length.
1146     */
1147    private final class StringCollector implements IntPredicate, IntConsumer {
1148
1149        /** String builder instance. */
1150        private final StringBuilder sb = new StringBuilder();
1151
1152        /** Start position line. */
1153        private final int line;
1154
1155        /** Start position column. */
1156        private final int col;
1157
1158        /** Character predicate. */
1159        private final IntPredicate pred;
1160
1161        /** Construct a new instance with the given start position and character predicate.
1162         * @param line start position line
1163         * @param col start position col
1164         * @param pred character predicate
1165         */
1166        StringCollector(final int line, final int col, final IntPredicate pred) {
1167            this.line = line;
1168            this.col = col;
1169            this.pred = pred;
1170        }
1171
1172        /** {@inheritDoc} */
1173        @Override
1174        public boolean test(final int value) {
1175            return pred.test(value) && !hasExceededMaxStringLength();
1176        }
1177
1178        /** {@inheritDoc} */
1179        @Override
1180        public void accept(final int value) {
1181            sb.append((char) value);
1182        }
1183
1184        /** Get the string collected by this instance.
1185         * @return the string collected by this instance
1186         * @throws IllegalStateException if the string exceeds the maximum configured length
1187         */
1188        public String getString() {
1189            if (hasExceededMaxStringLength()) {
1190                throw parseError(line, col, STRING_LENGTH_ERR_MSG + maxStringLength);
1191            }
1192
1193            return sb.toString();
1194        }
1195
1196        /** Return true if this collector has exceeded the maximum configured string length.
1197         * @return true if this collector has exceeded the maximum string length
1198         */
1199        private boolean hasExceededMaxStringLength() {
1200            return sb.length() > maxStringLength;
1201        }
1202    }
1203}