001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.geometry.io.euclidean.threed.txt;
018
019import java.io.Reader;
020import java.util.ArrayList;
021import java.util.Arrays;
022import java.util.List;
023
024import org.apache.commons.geometry.euclidean.threed.Vector3D;
025import org.apache.commons.geometry.io.core.internal.GeometryIOUtils;
026import org.apache.commons.geometry.io.core.internal.SimpleTextParser;
027import org.apache.commons.geometry.io.euclidean.threed.FacetDefinition;
028import org.apache.commons.geometry.io.euclidean.threed.FacetDefinitionReader;
029import org.apache.commons.geometry.io.euclidean.threed.SimpleFacetDefinition;
030
031/** Facet definition reader implementation that reads an extremely simple
032 * text format. The format simply consists of sequences of decimal numbers
033 * defining the vertices of each facet, with one facet defined per line.
034 * Facet vertices are defined by listing their {@code x}, {@code y}, and {@code z}
035 * components in that order. The format can be described as follows:
036 * <p>
037 * <code>
038 *      p1<sub>x</sub> p1<sub>y</sub> p1<sub>z</sub> p2<sub>x</sub> p2<sub>y</sub> p2<sub>z</sub> p3<sub>x</sub> p3<sub>y</sub> p3<sub>z</sub> ...
039 * </code>
040 * </p>
041 * <p>where the <em>p1</em> elements contain the coordinates of the first facet vertex,
042 * <em>p2</em> those of the second, and so on. At least 3 vertices are required for each
043 * facet but more can be specified as long as all {@code x, y, z} components are provided
044 * for each vertex. The facet normal is defined implicitly from the facet vertices using
045 * the right-hand rule (i.e. vertices are arranged counter-clockwise).</p>
046 *
047 * <p><strong>Delimiters</strong></p>
048 * <p>Vertex coordinate values may be separated by any character that is
049 * not a digit, alphabetic, '-' (minus), or '+' (plus). The character does
050 * not need to be consistent between (or even within) lines and does not
051 * need to be configured in the reader. This design provides configuration-free
052 * support for common formats such as CSV as well as other formats designed
053 * for human readability.</p>
054 *
055 * <p><strong>Comments</strong></p>
056 * <p>Comments are supported through use of the {@link #getCommentToken() comment token}
057 * property. Characters from the comment token through the end of the current line are
058 * discarded. Setting the comment token to null or the empty string disables comment parsing.
059 * The default comment token is {@value #DEFAULT_COMMENT_TOKEN}</p>
060 *
061 * <p><strong>Examples</strong></p>
062 * <p>The following examples demonstrate the definition of two facets,
063 * one with 3 vertices and one with 4 vertices, in different formats.</p>
064 * <p><em>CSV</em></p>
065 * <pre>
066 *  0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0
067 *  1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0
068 * </pre>
069 * <p><em>Whitespace and semicolons</em></p>
070 * <pre>
071 *  # line comment
072 *  0 0 0; 1 0 0; 1 1 0 # 3 vertices
073 *  1 0 0; 1 1 0; 1 1 1; 1 0 1 # 4 vertices
074 * </pre>
075 *
076 * @see TextFacetDefinitionWriter
077 */
078public class TextFacetDefinitionReader implements FacetDefinitionReader {
079
080    /** Default comment token string. */
081    public static final String DEFAULT_COMMENT_TOKEN = "#";
082
083    /** Reader for accessing the character stream. */
084    private final Reader reader;
085
086    /** Parser used to parse text content. */
087    private final SimpleTextParser parser;
088
089    /** Comment token string; may be null. */
090    private String commentToken;
091
092    /** True if the instance has a non-null, non-empty comment token. */
093    private boolean hasCommentToken;
094
095    /** First character of the comment token. */
096    private int commentStartChar;
097
098    /** Construct a new instance that reads characters from the argument and uses
099     * the default comment token value of {@value TextFacetDefinitionReader#DEFAULT_COMMENT_TOKEN}.
100     * @param reader reader to read characters from
101     */
102    public TextFacetDefinitionReader(final Reader reader) {
103        this(reader, DEFAULT_COMMENT_TOKEN);
104    }
105
106    /** Construct a new instance with the given reader and comment token.
107     * @param reader reader to read characters from
108     * @param commentToken comment token string; set to null to disable comment parsing
109     * @throws IllegalArgumentException if {@code commentToken} is non-null and contains whitespace
110     */
111    public TextFacetDefinitionReader(final Reader reader, final String commentToken) {
112        this.reader = reader;
113        this.parser = new SimpleTextParser(reader);
114
115        setCommentTokenInternal(commentToken);
116    }
117
118    /** Get the comment token string. If not null or empty, any characters from
119     * this token to the end of the current line are discarded during parsing.
120     * @return comment token string; may be null
121     */
122    public String getCommentToken() {
123        return commentToken;
124    }
125
126    /** Set the comment token string. If not null or empty, any characters from this
127     * token to the end of the current line are discarded during parsing. Set to null
128     * or the empty string to disable comment parsing. Comment tokens may not contain
129     * whitespace.
130     * @param commentToken token to set
131     * @throws IllegalArgumentException if the argument is non-null and contains whitespace
132     */
133    public void setCommentToken(final String commentToken) {
134        setCommentTokenInternal(commentToken);
135    }
136
137    /** {@inheritDoc} */
138    @Override
139    public FacetDefinition readFacet() {
140        discardNonDataLines();
141        if (parser.hasMoreCharacters()) {
142            try {
143                return readFacetInternal();
144            } finally {
145                // advance to the next line even if parsing failed for the
146                // current line
147                parser.discardLine();
148            }
149        }
150        return null;
151    }
152
153    /** {@inheritDoc} */
154    @Override
155    public void close() {
156        GeometryIOUtils.closeUnchecked(reader);
157    }
158
159    /** Internal method to read a facet definition starting from the current parser
160     * position. Empty lines (including lines containing only comments) are discarded.
161     * @return facet definition or null if the end of input is reached
162     * @throws IllegalStateException if a data format error occurs
163     * @throws java.io.UncheckedIOException if an I/O error occurs
164     */
165    private FacetDefinition readFacetInternal() {
166        final Vector3D p1 = readVector();
167        discardNonData();
168        final Vector3D p2 = readVector();
169        discardNonData();
170        final Vector3D p3 = readVector();
171
172        final List<Vector3D> vertices;
173
174        discardNonData();
175        if (parser.hasMoreCharactersOnLine()) {
176            vertices = new ArrayList<>();
177            vertices.add(p1);
178            vertices.add(p2);
179            vertices.add(p3);
180
181            do {
182                vertices.add(readVector());
183                discardNonData();
184            } while (parser.hasMoreCharactersOnLine());
185        } else {
186            vertices = Arrays.asList(p1, p2, p3);
187        }
188
189        return new SimpleFacetDefinition(vertices);
190    }
191
192    /** Read a vector starting from the current parser position.
193     * @return vector read from the parser
194     * @throws IllegalStateException if a data format error occurs
195     * @throws java.io.UncheckedIOException if an I/O error occurs
196     */
197    private Vector3D readVector() {
198        final double x = readDouble();
199        discardNonData();
200        final double y = readDouble();
201        discardNonData();
202        final double z = readDouble();
203
204        return Vector3D.of(x, y, z);
205    }
206
207    /** Read a double starting from the current parser position.
208     * @return double value read from the parser
209     * @throws IllegalStateException if a data format error occurs
210     * @throws java.io.UncheckedIOException if an I/O error occurs
211     */
212    private double readDouble() {
213        return parser
214                .next(TextFacetDefinitionReader::isDataTokenPart)
215                .getCurrentTokenAsDouble();
216    }
217
218    /** Discard lines that do not contain any data. This includes empty lines
219     * and lines that only contain comments.
220     * @throws IllegalStateException if a data format error occurs
221     * @throws java.io.UncheckedIOException if an I/O error occurs
222     */
223    private void discardNonDataLines() {
224        parser.discardLineWhitespace();
225        while (parser.hasMoreCharacters() &&
226                (!parser.hasMoreCharactersOnLine() ||
227                foundComment())) {
228
229            parser
230                .discardLine()
231                .discardLineWhitespace();
232        }
233    }
234
235    /** Discard a sequence of non-data characters on the current line starting
236     * from the current parser position.
237     * @throws IllegalStateException if a data format error occurs
238     * @throws java.io.UncheckedIOException if an I/O error occurs
239     */
240    private void discardNonData() {
241        parser.discard(c ->
242            !SimpleTextParser.isNewLinePart(c) &&
243            !isDataTokenPart(c) &&
244            c != commentStartChar);
245
246        if (foundComment()) {
247            // discard everything to the end of the line but do
248            // not read the new line sequence
249            parser.discard(SimpleTextParser::isNotNewLinePart);
250        }
251    }
252
253    /** Return true if the parser is positioned at the start of the comment token.
254     * @return true if the parser is positioned at the start of the comment token.
255     * @throws IllegalStateException if a data format error occurs
256     * @throws java.io.UncheckedIOException if an I/O error occurs
257     */
258    private boolean foundComment() {
259        return hasCommentToken &&
260                commentToken.equals(parser.peek(commentToken.length()));
261    }
262
263    /** Internal method called to set the comment token state.
264     * @param commentTokenStr comment token to set
265     * @throws IllegalArgumentException if the argument is non-null and contains whitespace
266     */
267    private void setCommentTokenInternal(final String commentTokenStr) {
268        if (commentTokenStr != null && containsWhitespace(commentTokenStr)) {
269            throw new IllegalArgumentException("Comment token cannot contain whitespace; was [" +
270                    commentTokenStr + "]");
271        }
272
273        this.commentToken = commentTokenStr;
274        this.hasCommentToken = commentTokenStr != null && commentTokenStr.length() > 0;
275        this.commentStartChar = this.hasCommentToken ?
276                commentTokenStr.charAt(0) :
277                -1;
278    }
279
280    /** Return true if the given character is considered as part of a data token
281     * for this reader.
282     * @param ch character to test
283     * @return true if {@code ch} is part of a data token
284     */
285    private static boolean isDataTokenPart(final int ch) {
286        // include all alphabetic characters in the data tokens, which will help
287        // to provide better error messages in case of failure (ie, tokens will
288        // be split more naturally)
289        return Character.isAlphabetic(ch) ||
290                SimpleTextParser.isDecimalPart(ch);
291    }
292
293    /** Return true if the given string contains any whitespace characters.
294     * @param str string to test
295     * @return true if {@code str} contains any whitespace characters
296     */
297    private static boolean containsWhitespace(final String str) {
298        for (final char ch : str.toCharArray()) {
299            if (Character.isWhitespace(ch)) {
300                return true;
301            }
302        }
303
304        return false;
305    }
306}