001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.commons.rdf.rdf4j.experimental; 019 020import java.io.IOException; 021import java.io.InputStream; 022import java.net.MalformedURLException; 023import java.net.URL; 024import java.nio.file.Files; 025import java.nio.file.Path; 026import java.util.Optional; 027import java.util.function.Consumer; 028import java.util.stream.Stream; 029 030import org.apache.commons.rdf.api.IRI; 031import org.apache.commons.rdf.api.Quad; 032import org.apache.commons.rdf.api.RDFSyntax; 033import org.apache.commons.rdf.rdf4j.RDF4J; 034import org.apache.commons.rdf.rdf4j.RDF4JBlankNodeOrIRI; 035import org.apache.commons.rdf.rdf4j.RDF4JDataset; 036import org.apache.commons.rdf.rdf4j.RDF4JGraph; 037import org.apache.commons.rdf.simple.experimental.AbstractRDFParser; 038import org.eclipse.rdf4j.model.Model; 039import org.eclipse.rdf4j.model.Resource; 040import org.eclipse.rdf4j.repository.util.RDFInserter; 041import org.eclipse.rdf4j.repository.util.RDFLoader; 042import org.eclipse.rdf4j.rio.ParserConfig; 043import org.eclipse.rdf4j.rio.RDFFormat; 044import org.eclipse.rdf4j.rio.RDFHandler; 045import org.eclipse.rdf4j.rio.RDFHandlerException; 046import org.eclipse.rdf4j.rio.Rio; 047import org.eclipse.rdf4j.rio.helpers.AbstractRDFHandler; 048 049/** 050 * RDF4J-based parser. 051 * <p> 052 * This can handle the RDF syntaxes {@link RDFSyntax#JSONLD}, 053 * {@link RDFSyntax#NQUADS}, {@link RDFSyntax#NTRIPLES}, 054 * {@link RDFSyntax#RDFXML}, {@link RDFSyntax#TRIG} and {@link RDFSyntax#TURTLE} 055 * - additional syntaxes can be supported by including the corresponding 056 * <em>rdf4j-rio-*</em> module on the classpath. 057 * 058 */ 059public class RDF4JParser extends AbstractRDFParser<RDF4JParser> { 060 061 private final class AddToQuadConsumer extends AbstractRDFHandler { 062 private final Consumer<Quad> quadTarget; 063 064 private AddToQuadConsumer(final Consumer<Quad> quadTarget) { 065 this.quadTarget = quadTarget; 066 } 067 068 @Override 069 public void handleStatement(final org.eclipse.rdf4j.model.Statement st) 070 throws org.eclipse.rdf4j.rio.RDFHandlerException { 071 // TODO: if getRdfTermFactory() is a non-rdf4j factory, should 072 // we use factory.createQuad() instead? 073 // Unsure what is the promise of setting getRdfTermFactory() -- 074 // does it go all the way down to creating BlankNode, IRI and 075 // Literal? 076 quadTarget.accept(rdf4jTermFactory.asQuad(st)); 077 // Performance note: 078 // Graph/Quad.add should pick up again our 079 // RDF4JGraphLike.asStatement() 080 // and avoid double conversion. 081 // Additionally the RDF4JQuad and RDF4JTriple implementations 082 // are lazily converting subj/obj/pred/graph.s 083 } 084 } 085 086 private final static class AddToModel extends AbstractRDFHandler { 087 private final Model model; 088 089 public AddToModel(final Model model) { 090 this.model = model; 091 } 092 093 @Override 094 public void handleStatement(final org.eclipse.rdf4j.model.Statement st) 095 throws org.eclipse.rdf4j.rio.RDFHandlerException { 096 model.add(st); 097 } 098 099 @Override 100 public void handleNamespace(final String prefix, final String uri) throws RDFHandlerException { 101 model.setNamespace(prefix, uri); 102 } 103 } 104 105 private RDF4J rdf4jTermFactory; 106 private ParserConfig parserConfig = new ParserConfig(); 107 108 @Override 109 protected RDF4J createRDFTermFactory() { 110 return new RDF4J(); 111 } 112 113 @Override 114 protected RDF4JParser prepareForParsing() throws IOException, IllegalStateException { 115 final RDF4JParser c = super.prepareForParsing(); 116 // Ensure we have an RDF4J for conversion. 117 // We'll make a new one if user has provided a non-RDF4J factory 118 c.rdf4jTermFactory = (RDF4J) getRdfTermFactory().filter(RDF4J.class::isInstance) 119 .orElseGet(c::createRDFTermFactory); 120 return c; 121 } 122 123 @Override 124 protected void parseSynchronusly() throws IOException { 125 final Optional<RDFFormat> formatByMimeType = getContentType().flatMap(Rio::getParserFormatForMIMEType); 126 final String base = getBase().map(IRI::getIRIString).orElse(null); 127 128 final ParserConfig parserConfig = getParserConfig(); 129 // TODO: Should we need to set anything? 130 final RDFLoader loader = new RDFLoader(parserConfig, rdf4jTermFactory.getValueFactory()); 131 final RDFHandler rdfHandler = makeRDFHandler(); 132 if (getSourceFile().isPresent()) { 133 // NOTE: While we could have used 134 // loader.load(sourcePath.toFile() 135 // if the path fs provider == FileSystems.getDefault(), 136 // that RDFLoader method does not use absolute path 137 // as the base URI, so to be consistent 138 // we'll always do it with our own input stream 139 // 140 // That means we may have to guess format by extensions: 141 final Optional<RDFFormat> formatByFilename = getSourceFile().map(Path::getFileName).map(Path::toString) 142 .flatMap(Rio::getParserFormatForFileName); 143 // TODO: for the excited.. what about the extension after following 144 // symlinks? 145 146 final RDFFormat format = formatByMimeType.orElse(formatByFilename.orElse(null)); 147 try (InputStream in = Files.newInputStream(getSourceFile().get())) { 148 loader.load(in, base, format, rdfHandler); 149 } 150 } else if (getSourceIri().isPresent()) { 151 try { 152 // TODO: Handle international IRIs properly 153 // (Unicode support for for hostname, path and query) 154 final URL url = new URL(getSourceIri().get().getIRIString()); 155 // TODO: This probably does not support https:// -> http:// 156 // redirections 157 loader.load(url, base, formatByMimeType.orElse(null), makeRDFHandler()); 158 } catch (final MalformedURLException ex) { 159 throw new IOException("Can't handle source URL: " + getSourceIri().get(), ex); 160 } 161 } 162 // must be getSourceInputStream then, this is guaranteed by 163 // super.checkSource(); 164 loader.load(getSourceInputStream().get(), base, formatByMimeType.orElse(null), rdfHandler); 165 } 166 167 /** 168 * Get the RDF4J {@link ParserConfig} to use. 169 * <p> 170 * If no parser config is set, the default configuration is provided. 171 * <p> 172 * <strong>Note:</strong> The parser config is mutable - changes in the 173 * returned config is reflected in this instance of the parser. To avoid 174 * mutation, create a new {@link ParserConfig} and set 175 * {@link #setParserConfig(ParserConfig)}. 176 * 177 * @return The RDF4J {@link ParserConfig} 178 */ 179 public ParserConfig getParserConfig() { 180 return parserConfig; 181 } 182 183 /** 184 * Set an RDF4J {@link ParserConfig} to use 185 * 186 * @param parserConfig 187 * Parser configuration 188 */ 189 public void setParserConfig(final ParserConfig parserConfig) { 190 this.parserConfig = parserConfig; 191 } 192 193 protected RDFHandler makeRDFHandler() { 194 195 // TODO: Can we join the below DF4JDataset and RDF4JGraph cases 196 // using RDF4JGraphLike<TripleLike<BlankNodeOrIRI,IRI,RDFTerm>> 197 // or will that need tricky generics types? 198 199 if (getTargetDataset().filter(RDF4JDataset.class::isInstance).isPresent()) { 200 // One of us, we can add them as Statements directly 201 final RDF4JDataset dataset = (RDF4JDataset) getTargetDataset().get(); 202 if (dataset.asRepository().isPresent()) { 203 return new RDFInserter(dataset.asRepository().get().getConnection()); 204 } 205 if (dataset.asModel().isPresent()) { 206 final Model model = dataset.asModel().get(); 207 return new AddToModel(model); 208 } 209 // Not backed by Repository or Model? 210 // Third-party RDF4JDataset subclass, so we'll fall through to the 211 // getTarget() handling further down 212 } else if (getTargetGraph().filter(RDF4JGraph.class::isInstance).isPresent()) { 213 final RDF4JGraph graph = (RDF4JGraph) getTargetGraph().get(); 214 215 if (graph.asRepository().isPresent()) { 216 final RDFInserter inserter = new RDFInserter(graph.asRepository().get().getConnection()); 217 if (!graph.getContextMask().isEmpty()) { 218 final Stream<RDF4JBlankNodeOrIRI> b = graph.getContextMask().stream(); 219 final Stream<Resource> c = b.map(RDF4JBlankNodeOrIRI::asValue); 220 final Resource[] contexts = c.toArray(Resource[]::new); 221 inserter.enforceContext(contexts); 222 } 223 return inserter; 224 } 225 if (graph.asModel().isPresent() && graph.getContextMask().isEmpty()) { 226 // the model accepts any quad 227 final Model model = graph.asModel().get(); 228 return new AddToModel(model); 229 } 230 // else - fall through 231 } 232 233 // Fall thorough: let target() consume our converted quads. 234 return new AddToQuadConsumer(getTarget()); 235 } 236 237}