sanity check: added another JSON-cleaning test case
authorHans <firedrake93@gmail.com>
Mon, 6 Aug 2018 23:00:33 +0000 (18:00 -0500)
committerHans <firedrake93@gmail.com>
Mon, 6 Aug 2018 23:00:33 +0000 (18:00 -0500)
core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java
test-resources/src/test/resources/html/json-cleaning-test.json [new file with mode: 0644]

index f1338b4..5db98ae 100644 (file)
@@ -22,6 +22,8 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.nio.charset.StandardCharsets;
 
+import com.fasterxml.jackson.core.JsonFactory;
+import com.fasterxml.jackson.core.JsonParser;
 import org.apache.any23.extractor.ExtractionContext;
 import org.apache.any23.extractor.ExtractionException;
 import org.apache.any23.extractor.ExtractionParameters;
@@ -82,6 +84,21 @@ public class JSONLDExtractorTest {
     }
   }
 
+  @Test
+  public void testJsonCleaning() throws Exception {
+    JsonCleaningInputStream stream = new JsonCleaningInputStream(getClass().getResourceAsStream("/html/json-cleaning-test.json"));
+
+    JsonParser parser = new JsonFactory().createParser(stream);
+
+    int numTokens = 0;
+    while (parser.nextToken() != null) {
+      numTokens++;
+    }
+
+    Assert.assertEquals(numTokens, 41);
+
+  }
+
   public void extract(IRI uri, String filePath) 
     throws IOException, ExtractionException, TripleHandlerException {
     ByteArrayOutputStream baos = new ByteArrayOutputStream();
diff --git a/test-resources/src/test/resources/html/json-cleaning-test.json b/test-resources/src/test/resources/html/json-cleaning-test.json
new file mode 100644 (file)
index 0000000..09ec189
--- /dev/null
@@ -0,0 +1,26 @@
+{ /*  " ' # //*/
+  "a": { #comment <![CDATA[
+    "b": 1234;'c': {
+      "d": [
+        {
+          "f": {
+
+          } "g": {
+           'i':[1,2,3,//comment
+                                      ,4,5,,
+                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      6] 'j':[
+          1 ]]>
+
+        ] /**/
+          }, /*}
+          */} {
+    "key"
+            : 'value',,,
+  }
+
+      ] "e": {
+
+      },
+    }
+  }
+}
\ No newline at end of file