ANY23-377 don't replace empty strings with 'Null'
authorHans <firedrake93@gmail.com>
Tue, 31 Jul 2018 21:37:25 +0000 (16:37 -0500)
committerHans <firedrake93@gmail.com>
Tue, 31 Jul 2018 21:46:41 +0000 (16:46 -0500)
core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
test-resources/src/test/resources/microdata/microdata-bad-properties-expected.nquads

index f32b468..b4710de 100644 (file)
@@ -22,6 +22,8 @@ import java.net.URL;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.util.Date;
+import java.util.Objects;
+
 import org.apache.any23.util.StringUtils;
 
 /**
@@ -47,10 +49,24 @@ public class ItemPropValue {
      * Supported types.
      */
     public enum Type {
-        Plain,
-        Link,
-        Date,
-        Nested
+        Plain(String.class),
+        Link(String.class),
+        Date(Date.class),
+        Nested(ItemScope.class);
+
+        Type(Class<?> contentClass) {
+            this.contentClass = contentClass;
+        }
+
+        private final Class<?> contentClass;
+
+        private Object checkClass(Object content) {
+            Objects.requireNonNull(content, "content cannot be null");
+            if (!contentClass.isInstance(content)) {
+                throw new IllegalArgumentException("content must be a " + contentClass.getName() + " when type is " + this);
+            }
+            return content;
+        }
     }
 
     public static Date parseDateTime(String dateStr) throws ParseException {
@@ -77,31 +93,8 @@ public class ItemPropValue {
      * @param type content type.
      */
     public ItemPropValue(Object content, Type type) {
-        if(content == null) {
-            throw new NullPointerException("content cannot be null.");
-        }
-        if(type == null) {
-            throw new NullPointerException("type cannot be null.");
-        }
-        if(type == Type.Nested && ! (content instanceof ItemScope) ) {
-            throw new IllegalArgumentException(
-                    "content must be an " + ItemScope.class + " when type is " + Type.Nested
-            );
-        }
-        if(type == Type.Date && !(content instanceof Date) ) {
-            throw new IllegalArgumentException(
-                    "content must be a " + Date.class.getName() + " whe type is " + Type.Date
-            );
-        }
-        if(content instanceof String && ((String) content).trim().length() == 0) {
-            // ANY23-115 Empty spans seem to break ANY23
-            // instead of throwing the exception and in effect failing the entire
-            // parse job we wish to be lenient on web content publishers and add
-            // Null (String) as content.
-            content = "Null";
-        }
-        this.content = content;
-        this.type = type;
+        this.type = Objects.requireNonNull(type, "type cannot be null");
+        this.content = type.checkClass(content);
     }
 
     /**
index e5b6f29..b759d1b 100644 (file)
@@ -49,7 +49,7 @@ _:node1cjov1p83x7 <http://schema.org/name> "Begin to Knit Classes" <http://bob.e
 _:node1cjov1p83x7 <http://schema.org/description> "Learn to knit at Kaleidoscope Fibers - Cambridge's speciality yarn,..." <http://bob.example.com/> .
 _:node1cjov1p83x8 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Place> <http://bob.example.com/> .
 _:node1cjov1p83x9 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/PostalAddress> <http://bob.example.com/> .
-_:node1cjov1p83x9 <http://schema.org/streetAddress> "Null" <http://bob.example.com/> .
+_:node1cjov1p83x9 <http://schema.org/streetAddress> "" <http://bob.example.com/> .
 _:node1cjov1p83x8 <http://schema.org/address> _:node1cjov1p83x9 <http://bob.example.com/> .
 _:node1cjov1p83x8 <http://schema.org/name> "Kaleidoscope Fibers (131 W. Main Street" <http://bob.example.com/> .
 _:node1cjov1p83x7 <http://schema.org/location> _:node1cjov1p83x8 <http://bob.example.com/> .
@@ -62,7 +62,7 @@ _:node1cjov1p83x10 <http://schema.org/name> "Cambridge Historic School Museum To
 _:node1cjov1p83x10 <http://schema.org/description> "Built in 1906, the Cambridge Historic School -┬álisted on the..." <http://bob.example.com/> .
 _:node1cjov1p83x11 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Place> <http://bob.example.com/> .
 _:node1cjov1p83x12 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/PostalAddress> <http://bob.example.com/> .
-_:node1cjov1p83x12 <http://schema.org/streetAddress> "Null" <http://bob.example.com/> .
+_:node1cjov1p83x12 <http://schema.org/streetAddress> "" <http://bob.example.com/> .
 _:node1cjov1p83x11 <http://schema.org/address> _:node1cjov1p83x12 <http://bob.example.com/> .
 _:node1cjov1p83x11 <http://schema.org/name> "Cambridge Historic School" <http://bob.example.com/> .
 _:node1cjov1p83x10 <http://schema.org/location> _:node1cjov1p83x11 <http://bob.example.com/> .
@@ -75,7 +75,7 @@ _:node1cjov1p83x13 <http://schema.org/name> "Begin to Knit Classes" <http://bob.
 _:node1cjov1p83x13 <http://schema.org/description> "Learn to knit at Kaleidoscope Fibers - Cambridge's speciality yarn,..." <http://bob.example.com/> .
 _:node1cjov1p83x14 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Place> <http://bob.example.com/> .
 _:node1cjov1p83x15 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/PostalAddress> <http://bob.example.com/> .
-_:node1cjov1p83x15 <http://schema.org/streetAddress> "Null" <http://bob.example.com/> .
+_:node1cjov1p83x15 <http://schema.org/streetAddress> "" <http://bob.example.com/> .
 _:node1cjov1p83x14 <http://schema.org/address> _:node1cjov1p83x15 <http://bob.example.com/> .
 _:node1cjov1p83x14 <http://schema.org/name> "Kaleidoscope Fibers (131 W. Main Street" <http://bob.example.com/> .
 _:node1cjov1p83x13 <http://schema.org/location> _:node1cjov1p83x14 <http://bob.example.com/> .