ANY23-414 support reverse itemprops in microdata
authorHans <firedrake93@gmail.com>
Tue, 30 Oct 2018 18:56:06 +0000 (13:56 -0500)
committerHans <firedrake93@gmail.com>
Tue, 30 Oct 2018 18:56:06 +0000 (13:56 -0500)
core/src/main/java/org/apache/any23/extractor/microdata/ItemProp.java
core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java

index d516c88..6b8072f 100644 (file)
@@ -42,6 +42,11 @@ public class ItemProp extends Item {
      * @param value item property value.
      */
     public ItemProp(String xpath, String name, ItemPropValue value) {
+        this(xpath, name, value, false);
+    }
+
+    final boolean reverse;
+    ItemProp(String xpath, String name, ItemPropValue value, boolean reverse) {
         super(xpath);
 
         if(name == null) {
@@ -55,6 +60,7 @@ public class ItemProp extends Item {
         }
         this.name = name;
         this.value = value;
+        this.reverse = reverse;
     }
 
     /**
index 50f880f..829866d 100644 (file)
@@ -190,13 +190,21 @@ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
             value = toAbsoluteIRI(documentIRI, (String)propValue);
             //TODO: support registries so hardcoding not needed
             if (predicate.stringValue().equals("http://schema.org/additionalType")) {
-                out.writeTriple(subject, RDF.TYPE, value);
+                if (itemProp.reverse) {
+                    out.writeTriple((Resource)value, RDF.TYPE, subject);
+                } else {
+                    out.writeTriple(subject, RDF.TYPE, value);
+                }
             }
         } else {
             throw new RuntimeException("Invalid Type '" +
                     propType + "' for ItemPropValue with name: '" + predicate + "'");
         }
-        out.writeTriple(subject, predicate, value);
+        if (itemProp.reverse) {
+            out.writeTriple((Resource)value, predicate, subject);
+        } else {
+            out.writeTriple(subject, predicate, value);
+        }
     }
 
     private static final String hcardPrefix    = "http://microformats.org/profile/hcard";
index 8c3c641..0c993e1 100644 (file)
@@ -103,6 +103,7 @@ public class MicrodataParser {
 
     public static final String ITEMSCOPE_ATTRIBUTE = "itemscope";
     public static final String ITEMPROP_ATTRIBUTE  = "itemprop";
+    private static final String REVERSE_ITEMPROP_ATTRIBUTE = "itemprop-reverse";
 
     /**
      * List of tags providing the <code>src</code> property.
@@ -198,7 +199,8 @@ public class MicrodataParser {
         final List<Node> topLevelItemScopes = new ArrayList<>();
         final List<Node> possibles = new ArrayList<>();
         for (Node itemScope : itemScopes) {
-            if (!isItemProp(itemScope)) {
+            if (!isItemProp(itemScope)
+                    && DomUtils.readAttribute(itemScope, REVERSE_ITEMPROP_ATTRIBUTE, null) == null) {
                 topLevelItemScopes.add(itemScope);
             } else if (!isContainedInItemScope(itemScope)) {
                 possibles.add(itemScope);
@@ -504,7 +506,8 @@ public class MicrodataParser {
         boolean skipRootChildren = false;
         if (!skipRoot) {
             NamedNodeMap attributes = scopeNode.getAttributes();
-            if (attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null) {
+            if (attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null
+                    || attributes.getNamedItem(REVERSE_ITEMPROP_ATTRIBUTE) != null) {
                 accepted.add(scopeNode);
             }
             if (attributes.getNamedItem(ITEMSCOPE_ATTRIBUTE) != null) {
@@ -520,7 +523,8 @@ public class MicrodataParser {
                         public short acceptNode(Node node) {
                             if (node.getNodeType() == Node.ELEMENT_NODE) {
                                 NamedNodeMap attributes = node.getAttributes();
-                                if (attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null && !scopeNode.equals(node)) {
+                                if ((attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null
+                                        || attributes.getNamedItem(REVERSE_ITEMPROP_ATTRIBUTE) != null) && scopeNode != node) {
                                     accepted.add(node);
                                 }
 
@@ -541,8 +545,12 @@ public class MicrodataParser {
         final List<ItemProp> result = new ArrayList<>();
         for (Node itemPropNode : accepted) {
             final String itemProp = DomUtils.readAttribute(itemPropNode, ITEMPROP_ATTRIBUTE, null);
+            final String reverseProp = DomUtils.readAttribute(itemPropNode, REVERSE_ITEMPROP_ATTRIBUTE, null);
 
-            if (StringUtils.isBlank(itemProp)) {
+            boolean hasItemProp = StringUtils.isNotBlank(itemProp);
+            boolean hasReverseProp = StringUtils.isNotBlank(reverseProp);
+
+            if (!hasItemProp && !hasReverseProp) {
                 manageError(new MicrodataParserException("invalid property name '" + itemProp + "'", itemPropNode));
                 continue;
             }
@@ -554,14 +562,34 @@ public class MicrodataParser {
                 manageError(mpe);
                 continue;
             }
-            for (String propertyName : itemProp.trim().split("\\s+")) {
-                result.add(
-                        new ItemProp(
-                                DomUtils.getXPathForNode(itemPropNode),
-                                propertyName,
-                                itemPropValue
-                        )
-                );
+            if (hasItemProp) {
+                for (String propertyName : itemProp.trim().split("\\s+")) {
+                    result.add(
+                            new ItemProp(
+                                    DomUtils.getXPathForNode(itemPropNode),
+                                    propertyName,
+                                    itemPropValue,
+                                    false
+                            )
+                    );
+                }
+            }
+            if (hasReverseProp) {
+                if (itemPropValue.literal != null) {
+                    manageError(new MicrodataParserException(REVERSE_ITEMPROP_ATTRIBUTE
+                            + " cannot point to a literal", itemPropNode));
+                    continue;
+                }
+                for (String propertyName : reverseProp.trim().split("\\s+")) {
+                    result.add(
+                            new ItemProp(
+                                    DomUtils.getXPathForNode(itemPropNode),
+                                    propertyName,
+                                    itemPropValue,
+                                    true
+                            )
+                    );
+                }
             }
         }
         return result;
index 01a0585..9d27ffa 100644 (file)
@@ -127,10 +127,7 @@ public class MicrodataExtractorTest extends AbstractExtractorTestCase {
 
     private static final List<String> ignoredOnlineTestNames = Arrays.asList(
             "Test 0073", //Vocabulary Expansion test with rdfs:subPropertyOf
-            "Test 0074", //Vocabulary Expansion test with owl:equivalentProperty
-            "Test 0081", //Simple @itemprop-reverse (experimental)
-            "Test 0082", //@itemprop-reverse with @itemscope value (experimental)
-            "Test 0084"  //@itemprop-reverse with @itemprop (experimental)
+            "Test 0074" //Vocabulary Expansion test with owl:equivalentProperty
     );
 
     private static Any23 createRunner(String extractorName) {