Package org.apache.xmpbox.xml
Class DomXmpParser
- java.lang.Object
-
- org.apache.xmpbox.xml.DomXmpParser
-
public class DomXmpParser extends java.lang.Object
-
-
Nested Class Summary
Nested Classes Modifier and Type Class Description protected static classDomXmpParser.NamespaceFinder
-
Field Summary
Fields Modifier and Type Field Description private javax.xml.parsers.DocumentBuilderdBuilderprivate DomXmpParser.NamespaceFindernsFinderprivate booleanstrictParsing
-
Constructor Summary
Constructors Constructor Description DomXmpParser()
-
Method Summary
All Methods Instance Methods Concrete Methods Modifier and Type Method Description private PropertyTypecheckPropertyDefinition(TypeMapping tm, javax.xml.namespace.QName qName, java.lang.String parentTypeName)private voidcreateProperty(XMPMetadata xmp, org.w3c.dom.Element property, PropertyType type, ComplexPropertyContainer container)private voidexpectNaming(org.w3c.dom.Element element, java.lang.String ns, java.lang.String prefix, java.lang.String ln)private org.w3c.dom.ElementfindDescriptionsParent(org.w3c.dom.Element root)private AbstractStructuredTypeinstanciateStructured(TypeMapping tm, Types type, java.lang.String name, java.lang.String structuredNamespace)private booleanisSchemaExtensionProperty(org.w3c.dom.Element element)booleanisStrictParsing()Tell if strict parsing mode is enabled.private voidloadAttributes(AbstractField sp, org.w3c.dom.Element element)private voidmanageArray(XMPMetadata xmp, org.w3c.dom.Element property, PropertyType type, ComplexPropertyContainer container)private voidmanageDefinedType(XMPMetadata xmp, org.w3c.dom.Element property, java.lang.String prefix, ComplexPropertyContainer container)private voidmanageLangAlt(XMPMetadata xmp, org.w3c.dom.Element property, ComplexPropertyContainer container)private voidmanageSimpleType(XMPMetadata xmp, org.w3c.dom.Element property, Types type, ComplexPropertyContainer container)private voidmanageStructuredType(XMPMetadata xmp, org.w3c.dom.Element property, java.lang.String prefix, ComplexPropertyContainer container)private voidmaybeAddNonStandardNamespace(XMPMetadata xmp, org.w3c.dom.Attr attr)XMPMetadataparse(byte[] xmp)XMPMetadataparse(java.io.InputStream input)private voidparseChildrenAsProperties(XMPMetadata xmp, java.util.List<org.w3c.dom.Element> properties, TypeMapping tm, org.w3c.dom.Element description)private voidparseDescriptionInner(XMPMetadata xmp, org.w3c.dom.Element description, ComplexPropertyContainer parentContainer)private voidparseDescriptionRoot(XMPMetadata xmp, org.w3c.dom.Element description)private voidparseDescriptionRootAttr(XMPMetadata xmp, org.w3c.dom.Element description, org.w3c.dom.Attr attr, TypeMapping tm)private voidparseEndPacket(XMPMetadata metadata, org.w3c.dom.ProcessingInstruction pi)private XMPMetadataparseInitialXpacket(org.w3c.dom.ProcessingInstruction pi)private AbstractStructuredTypeparseLiDescription(XMPMetadata xmp, javax.xml.namespace.QName parentQName, org.w3c.dom.Element liDescriptionElement)private AbstractFieldparseLiElement(XMPMetadata xmp, javax.xml.namespace.QName descriptor, org.w3c.dom.Element liElement, Types type)private voidparseSchemaExtensions(XMPMetadata xmp, org.w3c.dom.Element description)private voidremoveCommentsAndBlanks(org.w3c.dom.Node root)Remove all the comments and blank nodes in the parent element of the parametervoidsetStrictParsing(boolean strictParsing)Enable or disable strict parsing mode.private AbstractStructuredTypetryParseAttributesAsProperties(TypeMapping tm, org.w3c.dom.Element liElement, AbstractStructuredType ast, PropertiesDescription pm, javax.xml.namespace.QName qName)This attempts to run the same logic as in parseLiDescription() but with simple attributes that will be treated like children.
-
-
-
Field Detail
-
dBuilder
private javax.xml.parsers.DocumentBuilder dBuilder
-
nsFinder
private DomXmpParser.NamespaceFinder nsFinder
-
strictParsing
private boolean strictParsing
-
-
Constructor Detail
-
DomXmpParser
public DomXmpParser() throws XmpParsingException- Throws:
XmpParsingException
-
-
Method Detail
-
isStrictParsing
public boolean isStrictParsing()
Tell if strict parsing mode is enabled.- Returns:
- Whether strict parsing mode is enabled or not.
-
setStrictParsing
public void setStrictParsing(boolean strictParsing)
Enable or disable strict parsing mode.- Parameters:
strictParsing- Whether to be strict or lenient when parsing XMP. True (the default) means that malformed XMP will result in an exception, false (lenient) means that if malformed content is encountered, the parser will continue its work if possible. Use strict mode if you want to work with PDF/A files. Use lenient mode if you care more about getting metadata.
-
parse
public XMPMetadata parse(byte[] xmp) throws XmpParsingException
- Throws:
XmpParsingException
-
parse
public XMPMetadata parse(java.io.InputStream input) throws XmpParsingException
- Throws:
XmpParsingException
-
maybeAddNonStandardNamespace
private void maybeAddNonStandardNamespace(XMPMetadata xmp, org.w3c.dom.Attr attr)
-
isSchemaExtensionProperty
private boolean isSchemaExtensionProperty(org.w3c.dom.Element element)
-
parseSchemaExtensions
private void parseSchemaExtensions(XMPMetadata xmp, org.w3c.dom.Element description) throws XmpParsingException
- Throws:
XmpParsingException
-
parseDescriptionRoot
private void parseDescriptionRoot(XMPMetadata xmp, org.w3c.dom.Element description) throws XmpParsingException
- Throws:
XmpParsingException
-
parseDescriptionRootAttr
private void parseDescriptionRootAttr(XMPMetadata xmp, org.w3c.dom.Element description, org.w3c.dom.Attr attr, TypeMapping tm) throws XmpSchemaException, XmpParsingException
-
parseChildrenAsProperties
private void parseChildrenAsProperties(XMPMetadata xmp, java.util.List<org.w3c.dom.Element> properties, TypeMapping tm, org.w3c.dom.Element description) throws XmpParsingException, XmpSchemaException
-
createProperty
private void createProperty(XMPMetadata xmp, org.w3c.dom.Element property, PropertyType type, ComplexPropertyContainer container) throws XmpParsingException
- Throws:
XmpParsingException
-
manageDefinedType
private void manageDefinedType(XMPMetadata xmp, org.w3c.dom.Element property, java.lang.String prefix, ComplexPropertyContainer container) throws XmpParsingException
- Throws:
XmpParsingException
-
manageStructuredType
private void manageStructuredType(XMPMetadata xmp, org.w3c.dom.Element property, java.lang.String prefix, ComplexPropertyContainer container) throws XmpParsingException
- Throws:
XmpParsingException
-
manageSimpleType
private void manageSimpleType(XMPMetadata xmp, org.w3c.dom.Element property, Types type, ComplexPropertyContainer container)
-
manageArray
private void manageArray(XMPMetadata xmp, org.w3c.dom.Element property, PropertyType type, ComplexPropertyContainer container) throws XmpParsingException
- Throws:
XmpParsingException
-
manageLangAlt
private void manageLangAlt(XMPMetadata xmp, org.w3c.dom.Element property, ComplexPropertyContainer container) throws XmpParsingException
- Throws:
XmpParsingException
-
parseDescriptionInner
private void parseDescriptionInner(XMPMetadata xmp, org.w3c.dom.Element description, ComplexPropertyContainer parentContainer) throws XmpParsingException
- Throws:
XmpParsingException
-
parseLiElement
private AbstractField parseLiElement(XMPMetadata xmp, javax.xml.namespace.QName descriptor, org.w3c.dom.Element liElement, Types type) throws XmpParsingException
- Throws:
XmpParsingException
-
loadAttributes
private void loadAttributes(AbstractField sp, org.w3c.dom.Element element)
-
parseLiDescription
private AbstractStructuredType parseLiDescription(XMPMetadata xmp, javax.xml.namespace.QName parentQName, org.w3c.dom.Element liDescriptionElement) throws XmpParsingException
- Throws:
XmpParsingException
-
parseInitialXpacket
private XMPMetadata parseInitialXpacket(org.w3c.dom.ProcessingInstruction pi) throws XmpParsingException
- Throws:
XmpParsingException
-
parseEndPacket
private void parseEndPacket(XMPMetadata metadata, org.w3c.dom.ProcessingInstruction pi) throws XmpParsingException
- Throws:
XmpParsingException
-
findDescriptionsParent
private org.w3c.dom.Element findDescriptionsParent(org.w3c.dom.Element root) throws XmpParsingException- Throws:
XmpParsingException
-
expectNaming
private void expectNaming(org.w3c.dom.Element element, java.lang.String ns, java.lang.String prefix, java.lang.String ln) throws XmpParsingException- Throws:
XmpParsingException
-
removeCommentsAndBlanks
private void removeCommentsAndBlanks(org.w3c.dom.Node root)
Remove all the comments and blank nodes in the parent element of the parameter- Parameters:
root- the first node of an element or document to clear
-
instanciateStructured
private AbstractStructuredType instanciateStructured(TypeMapping tm, Types type, java.lang.String name, java.lang.String structuredNamespace) throws XmpParsingException
- Throws:
XmpParsingException
-
checkPropertyDefinition
private PropertyType checkPropertyDefinition(TypeMapping tm, javax.xml.namespace.QName qName, java.lang.String parentTypeName) throws XmpParsingException
- Throws:
XmpParsingException
-
tryParseAttributesAsProperties
private AbstractStructuredType tryParseAttributesAsProperties(TypeMapping tm, org.w3c.dom.Element liElement, AbstractStructuredType ast, PropertiesDescription pm, javax.xml.namespace.QName qName) throws XmpParsingException
This attempts to run the same logic as in parseLiDescription() but with simple attributes that will be treated like children. This is inspired by loadAttributes() and parseDescriptionRootAttr(). This solves the problem in PDFBOX-3882 where properties appear as attributes in places lower than the descriptor root.- Parameters:
tm-liElement-ast- An AbstractStructuredType object, can be null.pm- A PropertiesDescription object, must be set if ast is not null.qName- QName of the parent, will be used if instantiating an AbstractStructuredType object, must be set if ast is not null.- Returns:
- An AbstractStructuredType, possibly created here if it was null as parameter.
- Throws:
XmpParsingException
-
-