001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.commons.rdf.rdf4j.experimental;
019
020import java.io.IOException;
021import java.io.InputStream;
022import java.net.MalformedURLException;
023import java.net.URL;
024import java.nio.file.Files;
025import java.nio.file.Path;
026import java.util.Optional;
027import java.util.function.Consumer;
028import java.util.stream.Stream;
029
030import org.apache.commons.rdf.api.IRI;
031import org.apache.commons.rdf.api.Quad;
032import org.apache.commons.rdf.api.RDFSyntax;
033import org.apache.commons.rdf.rdf4j.RDF4J;
034import org.apache.commons.rdf.rdf4j.RDF4JBlankNodeOrIRI;
035import org.apache.commons.rdf.rdf4j.RDF4JDataset;
036import org.apache.commons.rdf.rdf4j.RDF4JGraph;
037import org.apache.commons.rdf.simple.experimental.AbstractRDFParser;
038import org.eclipse.rdf4j.model.Model;
039import org.eclipse.rdf4j.model.Resource;
040import org.eclipse.rdf4j.repository.util.RDFInserter;
041import org.eclipse.rdf4j.repository.util.RDFLoader;
042import org.eclipse.rdf4j.rio.ParserConfig;
043import org.eclipse.rdf4j.rio.RDFFormat;
044import org.eclipse.rdf4j.rio.RDFHandler;
045import org.eclipse.rdf4j.rio.RDFHandlerException;
046import org.eclipse.rdf4j.rio.Rio;
047import org.eclipse.rdf4j.rio.helpers.AbstractRDFHandler;
048
049/**
050 * RDF4J-based parser.
051 * <p>
052 * This can handle the RDF syntaxes {@link RDFSyntax#JSONLD},
053 * {@link RDFSyntax#NQUADS}, {@link RDFSyntax#NTRIPLES},
054 * {@link RDFSyntax#RDFXML}, {@link RDFSyntax#TRIG} and {@link RDFSyntax#TURTLE}
055 * - additional syntaxes can be supported by including the corresponding
056 * <em>rdf4j-rio-*</em> module on the classpath.
057 *
058 */
059public class RDF4JParser extends AbstractRDFParser<RDF4JParser> {
060
061    private final class AddToQuadConsumer extends AbstractRDFHandler {
062        private final Consumer<Quad> quadTarget;
063
064        private AddToQuadConsumer(final Consumer<Quad> quadTarget) {
065            this.quadTarget = quadTarget;
066        }
067
068        @Override
069        public void handleStatement(final org.eclipse.rdf4j.model.Statement st)
070                throws org.eclipse.rdf4j.rio.RDFHandlerException {
071            // TODO: if getRdfTermFactory() is a non-rdf4j factory, should
072            // we use factory.createQuad() instead?
073            // Unsure what is the promise of setting getRdfTermFactory() --
074            // does it go all the way down to creating BlankNode, IRI and
075            // Literal?
076            quadTarget.accept(rdf4jTermFactory.asQuad(st));
077            // Performance note:
078            // Graph/Quad.add should pick up again our
079            // RDF4JGraphLike.asStatement()
080            // and avoid double conversion.
081            // Additionally the RDF4JQuad and RDF4JTriple implementations
082            // are lazily converting subj/obj/pred/graph.s
083        }
084    }
085
086    private final static class AddToModel extends AbstractRDFHandler {
087        private final Model model;
088
089        public AddToModel(final Model model) {
090            this.model = model;
091        }
092
093        @Override
094        public void handleStatement(final org.eclipse.rdf4j.model.Statement st)
095                throws org.eclipse.rdf4j.rio.RDFHandlerException {
096            model.add(st);
097        }
098
099        @Override
100        public void handleNamespace(final String prefix, final String uri) throws RDFHandlerException {
101            model.setNamespace(prefix, uri);
102        }
103    }
104
105    private RDF4J rdf4jTermFactory;
106    private ParserConfig parserConfig = new ParserConfig();
107
108    @Override
109    protected RDF4J createRDFTermFactory() {
110        return new RDF4J();
111    }
112
113    @Override
114    protected RDF4JParser prepareForParsing() throws IOException, IllegalStateException {
115        final RDF4JParser c = super.prepareForParsing();
116        // Ensure we have an RDF4J for conversion.
117        // We'll make a new one if user has provided a non-RDF4J factory
118        c.rdf4jTermFactory = (RDF4J) getRdfTermFactory().filter(RDF4J.class::isInstance)
119                .orElseGet(c::createRDFTermFactory);
120        return c;
121    }
122
123    @Override
124    protected void parseSynchronusly() throws IOException {
125        final Optional<RDFFormat> formatByMimeType = getContentType().flatMap(Rio::getParserFormatForMIMEType);
126        final String base = getBase().map(IRI::getIRIString).orElse(null);
127
128        final ParserConfig parserConfig = getParserConfig();
129        // TODO: Should we need to set anything?
130        final RDFLoader loader = new RDFLoader(parserConfig, rdf4jTermFactory.getValueFactory());
131        final RDFHandler rdfHandler = makeRDFHandler();
132        if (getSourceFile().isPresent()) {
133            // NOTE: While we could have used
134            // loader.load(sourcePath.toFile()
135            // if the path fs provider == FileSystems.getDefault(),
136            // that RDFLoader method does not use absolute path
137            // as the base URI, so to be consistent
138            // we'll always do it with our own input stream
139            //
140            // That means we may have to guess format by extensions:
141            final Optional<RDFFormat> formatByFilename = getSourceFile().map(Path::getFileName).map(Path::toString)
142                    .flatMap(Rio::getParserFormatForFileName);
143            // TODO: for the excited.. what about the extension after following
144            // symlinks?
145
146            final RDFFormat format = formatByMimeType.orElse(formatByFilename.orElse(null));
147            try (InputStream in = Files.newInputStream(getSourceFile().get())) {
148                loader.load(in, base, format, rdfHandler);
149            }
150        } else if (getSourceIri().isPresent()) {
151            try {
152                // TODO: Handle international IRIs properly
153                // (Unicode support for for hostname, path and query)
154                final URL url = new URL(getSourceIri().get().getIRIString());
155                // TODO: This probably does not support https:// -> http://
156                // redirections
157                loader.load(url, base, formatByMimeType.orElse(null), makeRDFHandler());
158            } catch (final MalformedURLException ex) {
159                throw new IOException("Can't handle source URL: " + getSourceIri().get(), ex);
160            }
161        }
162        // must be getSourceInputStream then, this is guaranteed by
163        // super.checkSource();
164        loader.load(getSourceInputStream().get(), base, formatByMimeType.orElse(null), rdfHandler);
165    }
166
167    /**
168     * Get the RDF4J {@link ParserConfig} to use.
169     * <p>
170     * If no parser config is set, the default configuration is provided.
171     * <p>
172     * <strong>Note:</strong> The parser config is mutable - changes in the
173     * returned config is reflected in this instance of the parser. To avoid
174     * mutation, create a new {@link ParserConfig} and set
175     * {@link #setParserConfig(ParserConfig)}.
176     *
177     * @return The RDF4J {@link ParserConfig}
178     */
179    public ParserConfig getParserConfig() {
180        return parserConfig;
181    }
182
183    /**
184     * Set an RDF4J {@link ParserConfig} to use
185     *
186     * @param parserConfig
187     *            Parser configuration
188     */
189    public void setParserConfig(final ParserConfig parserConfig) {
190        this.parserConfig = parserConfig;
191    }
192
193    protected RDFHandler makeRDFHandler() {
194
195        // TODO: Can we join the below DF4JDataset and RDF4JGraph cases
196        // using RDF4JGraphLike<TripleLike<BlankNodeOrIRI,IRI,RDFTerm>>
197        // or will that need tricky generics types?
198
199        if (getTargetDataset().filter(RDF4JDataset.class::isInstance).isPresent()) {
200            // One of us, we can add them as Statements directly
201            final RDF4JDataset dataset = (RDF4JDataset) getTargetDataset().get();
202            if (dataset.asRepository().isPresent()) {
203                return new RDFInserter(dataset.asRepository().get().getConnection());
204            }
205            if (dataset.asModel().isPresent()) {
206                final Model model = dataset.asModel().get();
207                return new AddToModel(model);
208            }
209            // Not backed by Repository or Model?
210            // Third-party RDF4JDataset subclass, so we'll fall through to the
211            // getTarget() handling further down
212        } else if (getTargetGraph().filter(RDF4JGraph.class::isInstance).isPresent()) {
213            final RDF4JGraph graph = (RDF4JGraph) getTargetGraph().get();
214
215            if (graph.asRepository().isPresent()) {
216                final RDFInserter inserter = new RDFInserter(graph.asRepository().get().getConnection());
217                if (!graph.getContextMask().isEmpty()) {
218                    final Stream<RDF4JBlankNodeOrIRI> b = graph.getContextMask().stream();
219                    final Stream<Resource> c = b.map(RDF4JBlankNodeOrIRI::asValue);
220                    final Resource[] contexts = c.toArray(Resource[]::new);
221                    inserter.enforceContext(contexts);
222                }
223                return inserter;
224            }
225            if (graph.asModel().isPresent() && graph.getContextMask().isEmpty()) {
226                // the model accepts any quad
227                final Model model = graph.asModel().get();
228                return new AddToModel(model);
229            }
230            // else - fall through
231        }
232
233        // Fall thorough: let target() consume our converted quads.
234        return new AddToQuadConsumer(getTarget());
235    }
236
237}