SQOOP-2849: Sqoop2: Job failure when writing parquet in hdfs with data coming from...
[sqoop.git] / connector / connector-sdk / src / test / java / org / apache / sqoop / connector / idf / TestAVROIntermediateDataFormat.java
1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 package org.apache.sqoop.connector.idf;
20
21 import static org.apache.sqoop.connector.common.SqoopAvroUtils.createEnumSchema;
22 import static org.apache.sqoop.connector.common.SqoopIDFUtils.DEFAULT_NULL_VALUE;
23 import static org.apache.sqoop.connector.common.TestSqoopIDFUtils.getByteFieldString;
24 import static org.testng.Assert.assertNull;
25 import static org.testng.Assert.assertEquals;
26
27 import org.apache.avro.generic.GenericData;
28 import org.apache.avro.generic.GenericRecord;
29 import org.apache.avro.util.Utf8;
30 import org.apache.commons.lang.StringUtils;
31 import org.apache.sqoop.common.SqoopException;
32 import org.apache.sqoop.connector.common.SqoopAvroUtils;
33 import org.apache.sqoop.schema.Schema;
34 import org.apache.sqoop.schema.type.Array;
35 import org.apache.sqoop.schema.type.Binary;
36 import org.apache.sqoop.schema.type.Bit;
37 import org.apache.sqoop.schema.type.Column;
38 import org.apache.sqoop.schema.type.Decimal;
39 import org.apache.sqoop.schema.type.FixedPoint;
40 import org.apache.sqoop.schema.type.Text;
41 import org.joda.time.LocalDateTime;
42 import org.testng.Assert;
43 import org.testng.annotations.BeforeMethod;
44 import org.testng.annotations.Test;
45
46 import java.math.BigDecimal;
47 import java.nio.ByteBuffer;
48 import java.util.ArrayList;
49 import java.util.Arrays;
50 import java.util.HashMap;
51 import java.util.HashSet;
52 import java.util.List;
53 import java.util.Map;
54 import java.util.Set;
55
56 public class TestAVROIntermediateDataFormat {
57
58 private AVROIntermediateDataFormat dataFormat;
59 private org.apache.avro.Schema avroSchema;
60 private final static String csvArray = "'[[11,11],[14,15]]'";
61 private final static String map = "'{\"testKey\":\"testValue\"}'";
62 private final static String csvSet = "'[[11,12],[14,15]]'";
63 private final static String csvDate = "'2014-10-01'";
64 private final static String csvDateTime = "'2014-10-01 12:00:00.000'";
65 private final static String csvTime = "'12:59:59'";
66 private Column enumCol;
67 // no time zone
68 private final static LocalDateTime dateTime = new org.joda.time.LocalDateTime(2014, 10, 01, 12,
69 0, 0);
70 private final static org.joda.time.LocalTime time = new org.joda.time.LocalTime(12, 59, 59);
71 private final static org.joda.time.LocalDate date = new org.joda.time.LocalDate(2014, 10, 01);
72
73 @BeforeMethod(alwaysRun = true)
74 public void setUp() {
75 createAvroIDF();
76 }
77
78 private void createAvroIDF() {
79 Schema sqoopSchema = new Schema("test");
80 Set<String> options = new HashSet<String>();
81 options.add("ENUM");
82 options.add("NUME");
83 enumCol = new org.apache.sqoop.schema.type.Enum("seven").setOptions(options);
84 sqoopSchema
85 .addColumn(new FixedPoint("one", 8L, true))
86 .addColumn(new Decimal("two", 4, 2))
87 .addColumn(new Text("three"))
88 .addColumn(new Text("four"))
89 .addColumn(new Binary("five"))
90 .addColumn(new Text("six"))
91 .addColumn(enumCol)
92 .addColumn(new Array("eight", new Array("array", new FixedPoint("ft",2L, false))))
93 .addColumn(new org.apache.sqoop.schema.type.Map("nine", new Text("t1"), new Text("t2")))
94 .addColumn(new Bit("ten"))
95 .addColumn(new org.apache.sqoop.schema.type.DateTime("eleven", true, false))
96 .addColumn(new org.apache.sqoop.schema.type.Time("twelve", false))
97 .addColumn(new org.apache.sqoop.schema.type.Date("thirteen"))
98 .addColumn(new org.apache.sqoop.schema.type.FloatingPoint("fourteen", 4L))
99 .addColumn(
100 new org.apache.sqoop.schema.type.Set("fifteen", new Array("set", new FixedPoint("ftw", 2L, false))));
101 dataFormat = new AVROIntermediateDataFormat(sqoopSchema);
102 avroSchema = SqoopAvroUtils.createAvroSchema(sqoopSchema);
103 }
104
105 /**
106 * setCSVGetData setCSVGetObjectArray setCSVGetCSV
107 */
108 @Test
109 public void testInputAsCSVTextInAndDataOut() {
110
111 String csvText = "10,34.56,'54','random data',"
112 + getByteFieldString(new byte[] { (byte) -112, (byte) 54 }) + ",'" + String.valueOf(0x0A)
113 + "','ENUM'," + csvArray + "," + map + ",true," + csvDateTime + "," + csvTime + ","
114 + csvDate + ",13.44," + csvSet;
115 dataFormat.setCSVTextData(csvText);
116 GenericRecord avroObject = createAvroGenericRecord();
117 assertEquals(avroObject.toString(), dataFormat.getData().toString());
118 }
119
120 @Test
121 public void testInputAsCSVTextInAndObjectArrayOut() {
122 String csvText = "10,34.56,'54','random data',"
123 + getByteFieldString(new byte[] { (byte) -112, (byte) 54 }) + ",'" + String.valueOf(0x0A)
124 + "','ENUM'," + csvArray + "," + map + ",true," + csvDateTime + "," + csvTime + ","
125 + csvDate + ",13.44," + csvSet;
126 dataFormat.setCSVTextData(csvText);
127 assertEquals(dataFormat.getObjectData().length, 15);
128 assertObjectArray();
129
130 }
131
132 private void assertObjectArray() {
133 Object[] out = dataFormat.getObjectData();
134 assertEquals(10L, out[0]);
135 assertEquals(new BigDecimal("34.56"), out[1]);
136 assertEquals("54", out[2]);
137 assertEquals("random data", out[3]);
138 assertEquals(-112, ((byte[]) out[4])[0]);
139 assertEquals(54, ((byte[]) out[4])[1]);
140 assertEquals("10", out[5]);
141 assertEquals("ENUM", out[6]);
142
143 Object[] givenArrayOne = new Object[2];
144 givenArrayOne[0] = 11;
145 givenArrayOne[1] = 11;
146 Object[] givenArrayTwo = new Object[2];
147 givenArrayTwo[0] = 14;
148 givenArrayTwo[1] = 15;
149 Object[] arrayOfArrays = new Object[2];
150 arrayOfArrays[0] = givenArrayOne;
151 arrayOfArrays[1] = givenArrayTwo;
152 Map<Object, Object> map = new HashMap<Object, Object>();
153 map.put("testKey", "testValue");
154 Object[] set0 = new Object[2];
155 set0[0] = 11;
156 set0[1] = 12;
157 Object[] set1 = new Object[2];
158 set1[0] = 14;
159 set1[1] = 15;
160 Object[] set = new Object[2];
161 set[0] = set0;
162 set[1] = set1;
163 out[14] = set;
164 assertEquals(arrayOfArrays.length, 2);
165 assertEquals(Arrays.deepToString(arrayOfArrays), Arrays.deepToString((Object[]) out[7]));
166 assertEquals(map, out[8]);
167 assertEquals(true, out[9]);
168 assertEquals(dateTime, out[10]);
169 assertEquals(time, out[11]);
170 assertEquals(date, out[12]);
171 assertEquals(13.44f, out[13]);
172 assertEquals(set.length, 2);
173 assertEquals(Arrays.deepToString(set), Arrays.deepToString((Object[]) out[14]));
174
175 }
176
177 @Test
178 public void testInputAsCSVTextInCSVTextOut() {
179 String csvText = "10,34.56,'54','random data',"
180 + getByteFieldString(new byte[] { (byte) -112, (byte) 54 }) + ",'" + String.valueOf(0x0A)
181 + "','ENUM'," + csvArray + "," + map + ",true," + csvDateTime + "," + csvTime + ","
182 + csvDate + ",13.44," + csvSet;
183 dataFormat.setCSVTextData(csvText);
184 assertEquals(csvText, dataFormat.getCSVTextData());
185 }
186
187 private GenericRecord createAvroGenericRecord() {
188 GenericRecord avroObject = new GenericData.Record(avroSchema);
189 avroObject.put("one", 10L);
190 avroObject.put("two", "34.56");
191 avroObject.put("three", new Utf8("54"));
192 avroObject.put("four", new Utf8("random data"));
193 // store byte array in byte buffer
194 byte[] b = new byte[] { (byte) -112, (byte) 54 };
195 avroObject.put("five", ByteBuffer.wrap(b));
196 avroObject.put("six", new Utf8(String.valueOf(0x0A)));
197 avroObject.put("seven", new GenericData.EnumSymbol(createEnumSchema(enumCol), "ENUM"));
198
199 List<Object> givenArrayOne = new ArrayList<Object>();
200 givenArrayOne.add(11);
201 givenArrayOne.add(11);
202 List<Object> givenArrayTwo = new ArrayList<Object>();
203 givenArrayTwo.add(14);
204 givenArrayTwo.add(15);
205 List<Object> arrayOfArrays = new ArrayList<Object>();
206
207 arrayOfArrays.add(givenArrayOne);
208 arrayOfArrays.add(givenArrayTwo);
209
210 Map<Object, Object> map = new HashMap<Object, Object>();
211 map.put("testKey", "testValue");
212
213 avroObject.put("eight", arrayOfArrays);
214 avroObject.put("nine", map);
215 avroObject.put("ten", true);
216
217 // expect dates as strings
218 avroObject.put("eleven", dateTime.toDate().getTime());
219 avroObject.put("twelve", time.toDateTimeToday().getMillis());
220 avroObject.put("thirteen", date.toDate().getTime());
221 avroObject.put("fourteen", 13.44f);
222 List<Object> givenSetOne = new ArrayList<Object>();
223 givenSetOne.add(11);
224 givenSetOne.add(12);
225 List<Object> givenSetTwo = new ArrayList<Object>();
226 givenSetTwo.add(14);
227 givenSetTwo.add(15);
228 List<Object> set = new ArrayList<Object>();
229 set.add(givenSetOne);
230 set.add(givenSetTwo);
231 avroObject.put("fifteen", set);
232 return avroObject;
233 }
234
235 /**
236 * setDataGetCSV setDataGetObjectArray setDataGetData
237 */
238 @Test
239 public void testInputAsDataInAndCSVOut() {
240
241 String csvExpected = "10,34.56,'54','random data',"
242 + getByteFieldString(new byte[] { (byte) -112, (byte) 54 }) + ",'" + String.valueOf(0x0A)
243 + "','ENUM'," + csvArray + "," + map + ",true," + csvDateTime + "," + csvTime + ","
244 + csvDate + ",13.44," + csvSet;
245 dataFormat.setData(createAvroGenericRecord());
246 assertEquals(csvExpected, dataFormat.getCSVTextData());
247 }
248
249 @Test
250 public void testInputAsDataInAndObjectArrayOut() {
251 GenericRecord avroObject = createAvroGenericRecord();
252 dataFormat.setData(avroObject);
253 assertObjectArray();
254 }
255
256 @Test
257 public void testInputAsDataInAndDataOut() {
258 GenericRecord avroObject = createAvroGenericRecord();
259 dataFormat.setData(avroObject);
260 assertEquals(avroObject, dataFormat.getData());
261 }
262
263 private Object[] createObjectArray() {
264 Object[] out = new Object[15];
265 out[0] = 10L;
266 out[1] = new BigDecimal("34.56");
267 out[2] = "54";
268 out[3] = "random data";
269 out[4] = new byte[] { (byte) -112, (byte) 54 };
270 out[5] = String.valueOf(0x0A);
271 out[6] = "ENUM";
272
273 Object[] givenArrayOne = new Object[2];
274 givenArrayOne[0] = 11;
275 givenArrayOne[1] = 11;
276 Object[] givenArrayTwo = new Object[2];
277 givenArrayTwo[0] = 14;
278 givenArrayTwo[1] = 15;
279
280 Object[] arrayOfArrays = new Object[2];
281 arrayOfArrays[0] = givenArrayOne;
282 arrayOfArrays[1] = givenArrayTwo;
283
284 Map<Object, Object> map = new HashMap<Object, Object>();
285 map.put("testKey", "testValue");
286
287 out[7] = arrayOfArrays;
288 out[8] = map;
289 out[9] = true;
290 out[10] = dateTime;
291 out[11] = time;
292 out[12] = date;
293
294 out[13] = 13.44f;
295 Object[] set0 = new Object[2];
296 set0[0] = 11;
297 set0[1] = 12;
298 Object[] set1 = new Object[2];
299 set1[0] = 14;
300 set1[1] = 15;
301
302 Object[] set = new Object[2];
303 set[0] = set0;
304 set[1] = set1;
305 out[14] = set;
306 return out;
307 }
308
309 /**
310 * setObjectArrayGetData setObjectArrayGetCSV setObjectArrayGetObjectArray
311 */
312 @Test
313 public void testInputAsObjectArrayInAndDataOut() {
314
315 Object[] out = createObjectArray();
316 dataFormat.setObjectData(out);
317 GenericRecord avroObject = createAvroGenericRecord();
318 // SQOOP-SQOOP-1975: direct object compare will fail unless we use the Avro
319 // complex types
320 assertEquals(avroObject.toString(), dataFormat.getData().toString());
321
322 }
323
324 @Test
325 public void testInputAsObjectArrayInAndCSVOut() {
326 Object[] out = createObjectArray();
327 dataFormat.setObjectData(out);
328 String csvText = "10,34.56,'54','random data',"
329 + getByteFieldString(new byte[] { (byte) -112, (byte) 54 }) + ",'" + String.valueOf(0x0A)
330 + "','ENUM'," + csvArray + "," + map + ",true," + csvDateTime + "," + csvTime + ","
331 + csvDate + ",13.44," + csvSet;
332 assertEquals(csvText, dataFormat.getCSVTextData());
333 }
334
335 @Test
336 public void testInputAsObjectArrayInAndObjectArrayOut() {
337 Object[] out = createObjectArray();
338 dataFormat.setObjectData(out);
339 assertObjectArray();
340 }
341
342 // **************test cases for empty and null schema*******************
343 @Test(expectedExceptions = SqoopException.class)
344 public void testEmptySchema() {
345 String testData = "10,34.56,'54','random data',"
346 + getByteFieldString(new byte[] { (byte) -112, (byte) 54 }) + ",'\\n'";
347 // no coumns
348 Schema schema = new Schema("Test");
349 dataFormat = new AVROIntermediateDataFormat(schema);
350 dataFormat.setCSVTextData(testData);
351
352 @SuppressWarnings("unused")
353 Object[] out = dataFormat.getObjectData();
354 }
355
356 @Test(expectedExceptions = SqoopException.class)
357 public void testNullSchema() {
358 dataFormat = new AVROIntermediateDataFormat(null);
359 dataFormat.getObjectData();
360 }
361
362 @Test(expectedExceptions = SqoopException.class)
363 public void testNotSettingSchemaAndGetObjectData() {
364 dataFormat = new AVROIntermediateDataFormat();
365 dataFormat.getObjectData();
366 }
367
368 @Test(expectedExceptions = SqoopException.class)
369 public void testNotSettingSchemaAndGetData() {
370 dataFormat = new AVROIntermediateDataFormat();
371 dataFormat.getData();
372 }
373
374 @Test(expectedExceptions = SqoopException.class)
375 public void testNotSettingSchemaAndGetCSVData() {
376 dataFormat = new AVROIntermediateDataFormat();
377 dataFormat.getCSVTextData();
378 }
379
380 @Test(expectedExceptions = SqoopException.class)
381 public void testNotSettingSchemaAndSetObjectData() {
382 dataFormat = new AVROIntermediateDataFormat();
383 dataFormat.setObjectData(null);
384 }
385
386 @Test(expectedExceptions = SqoopException.class)
387 public void testNotSettingSchemaAndSetData() {
388 dataFormat = new AVROIntermediateDataFormat();
389 dataFormat.setData(null);
390 }
391
392 @Test(expectedExceptions = SqoopException.class)
393 public void testNotSettingSchemaAndSetCSVData() {
394 dataFormat = new AVROIntermediateDataFormat();
395 dataFormat.setCSVTextData(null);
396 }
397
398 // **************test cases for null and empty input*******************
399
400 @Test
401 public void testNullInputAsCSVTextInObjectArrayOut() {
402
403 dataFormat.setCSVTextData(null);
404 Object[] out = dataFormat.getObjectData();
405 assertNull(out);
406 }
407
408 @Test(expectedExceptions = SqoopException.class)
409 public void testEmptyInputAsCSVTextInObjectArrayOut() {
410 dataFormat.setCSVTextData("");
411 dataFormat.getObjectData();
412 }
413
414 @Test
415 public void testNullValueAsObjectArrayInAndCSVTextOut() {
416
417 Object[] in = { null, null, null, null, null, null, null, null, null, null, null, null, null,
418 null, null };
419 dataFormat.setObjectData(in);
420
421 String csvText = dataFormat.getCSVTextData();
422 String[] textValues = csvText.split(",");
423 assertEquals(15, textValues.length);
424 for (String text : textValues) {
425 assertEquals(text, DEFAULT_NULL_VALUE);
426 }
427 }
428
429 @Test
430 public void testNullValueAsObjectArrayInAndObjectArrayOut() {
431 Object[] in = { null, null, null, null, null, null, null, null, null, null, null, null, null,
432 null, null };
433 dataFormat.setObjectData(in);
434
435 Object[] out = dataFormat.getObjectData();
436 assertEquals(15, out.length);
437 for (Object obj : out) {
438 assertEquals(obj, null);
439 }
440 }
441
442 @Test
443 public void testNullValueAsCSVTextInAndObjectArrayOut() {
444 String[] test = { "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL",
445 "NULL", "NULL", "NULL", "NULL", "NULL", "NULL" };
446 dataFormat.setCSVTextData(StringUtils.join(test, ","));
447 Object[] out = dataFormat.getObjectData();
448 assertEquals(15, out.length);
449 for (Object obj : out) {
450 assertEquals(obj, null);
451 }
452 }
453
454 @Test
455 public void testNullValueAsCSVTextInAndCSVTextOut() {
456
457 String[] test = { "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL",
458 "NULL", "NULL", "NULL", "NULL", "NULL", "NULL" };
459 dataFormat.setCSVTextData(StringUtils.join(test, ","));
460
461 String csvText = dataFormat.getCSVTextData();
462 String[] textValues = csvText.split(",");
463 assertEquals(15, textValues.length);
464 for (String text : textValues) {
465 assertEquals(text, DEFAULT_NULL_VALUE);
466 }
467 }
468
469 @Test
470 public void testNullValueAsDataInAndCSVTextOut() {
471
472 GenericRecord avroObject = new GenericData.Record(avroSchema);
473 avroObject = setAvroRecordWithNulls();
474 dataFormat.setData(avroObject);
475
476 String csvText = dataFormat.getCSVTextData();
477 String[] textValues = csvText.split(",");
478 assertEquals(15, textValues.length);
479 for (String text : textValues) {
480 assertEquals(text, DEFAULT_NULL_VALUE);
481 }
482 }
483
484 @Test
485 public void testNullValueAsDataInAndObjectArrayOut() {
486 GenericRecord avroObject = new GenericData.Record(avroSchema);
487 avroObject = setAvroRecordWithNulls();
488 dataFormat.setData(avroObject);
489
490 Object[] out = dataFormat.getObjectData();
491 assertEquals(15, out.length);
492 for (Object obj : out) {
493 assertEquals(obj, null);
494 }
495
496 }
497
498 private GenericRecord setAvroRecordWithNulls() {
499 GenericRecord avroObject = new GenericData.Record(avroSchema);
500 avroObject.put("one", null);
501 avroObject.put("two", null);
502 avroObject.put("three", null);
503 avroObject.put("four", null);
504 avroObject.put("five", null);
505 avroObject.put("six", null);
506 avroObject.put("seven", null);
507
508 avroObject.put("eight", null);
509 avroObject.put("nine", null);
510 avroObject.put("ten", null);
511
512 // expect dates as strings
513 avroObject.put("eleven", null);
514 avroObject.put("twelve", null);
515 avroObject.put("thirteen", null);
516 avroObject.put("fourteen", null);
517
518 avroObject.put("fifteen", null);
519 return avroObject;
520 }
521 @Test(expectedExceptions = SqoopException.class)
522 public void testSchemaNotNullableWithObjectArray() {
523 Schema overrideSchema = new Schema("Test").addColumn(new Text("t").setNullable(false));
524 AVROIntermediateDataFormat dataFormat = new AVROIntermediateDataFormat(overrideSchema);
525 Object[] out = new Object[1];
526 out[0] = null;
527 dataFormat.setObjectData(out);
528 }
529
530 @Test(expectedExceptions = SqoopException.class)
531 public void testSchemaNotNullableWithCSV() {
532 Schema overrideSchema = new Schema("Test").addColumn(new Text("one").setNullable(false));
533 AVROIntermediateDataFormat dataFormat = new AVROIntermediateDataFormat(overrideSchema);
534 dataFormat.setCSVTextData(DEFAULT_NULL_VALUE);
535 }
536
537 // no validation happens when the setAvro and getAvro is used
538 @Test
539 public void testSchemaNotNullableWithAvro() {
540 Schema overrideSchema = new Schema("Test").addColumn(new Text("one").setNullable(false));
541 AVROIntermediateDataFormat dataFormat = new AVROIntermediateDataFormat(overrideSchema);
542 org.apache.avro.Schema avroSchema = SqoopAvroUtils.createAvroSchema(overrideSchema);
543 GenericRecord avroObject = new GenericData.Record(avroSchema);
544 avroObject.put("one", null);
545 dataFormat.setData(avroObject);
546 dataFormat.getData();
547 }
548
549 @Test
550 public void testSchemaWithBadCharacters() {
551 Schema schema = new Schema("9`\" blah`^&*(^&*(%$^&").addColumn(new Text("one").setNullable(false));
552 AVROIntermediateDataFormat dataFormat = new AVROIntermediateDataFormat(schema);
553 Assert.assertEquals(dataFormat.getAvroSchema().getName(), "blah");
554 }
555 }