SQOOP-519. Update the version of hsqldb decadency to 2.X.
[sqoop.git] / src / test / com / cloudera / sqoop / hive / TestHiveImport.java
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 package com.cloudera.sqoop.hive;
20
21 import java.io.BufferedReader;
22 import java.io.File;
23 import java.io.FileReader;
24 import java.io.IOException;
25 import java.util.ArrayList;
26
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.hadoop.conf.Configuration;
30 import org.apache.hadoop.fs.FileSystem;
31 import org.apache.hadoop.fs.Path;
32 import org.junit.Test;
33
34 import com.cloudera.sqoop.SqoopOptions;
35 import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException;
36 import com.cloudera.sqoop.testutil.CommonArgs;
37 import com.cloudera.sqoop.testutil.HsqldbTestServer;
38 import com.cloudera.sqoop.testutil.ImportJobTestCase;
39 import com.cloudera.sqoop.tool.BaseSqoopTool;
40 import com.cloudera.sqoop.tool.CodeGenTool;
41 import com.cloudera.sqoop.tool.CreateHiveTableTool;
42 import com.cloudera.sqoop.tool.ImportTool;
43 import com.cloudera.sqoop.tool.SqoopTool;
44 import org.apache.commons.cli.ParseException;
45
46 /**
47 * Test HiveImport capability after an import to HDFS.
48 */
49 public class TestHiveImport extends ImportJobTestCase {
50
51 public static final Log LOG = LogFactory.getLog(
52 TestHiveImport.class.getName());
53
54 /**
55 * Sets the expected number of columns in the table being manipulated
56 * by the test. Under the hood, this sets the expected column names
57 * to DATA_COLi for 0 <= i < numCols.
58 * @param numCols the number of columns to be created.
59 */
60 private void setNumCols(int numCols) {
61 String [] cols = new String[numCols];
62 for (int i = 0; i < numCols; i++) {
63 cols[i] = "DATA_COL" + i;
64 }
65
66 setColNames(cols);
67 }
68
69 /**
70 * Create the argv to pass to Sqoop.
71 * @return the argv as an array of strings.
72 */
73 protected String [] getArgv(boolean includeHadoopFlags, String [] moreArgs) {
74 ArrayList<String> args = new ArrayList<String>();
75
76 if (includeHadoopFlags) {
77 CommonArgs.addHadoopFlags(args);
78 }
79
80 if (null != moreArgs) {
81 for (String arg: moreArgs) {
82 args.add(arg);
83 }
84 }
85
86 args.add("--table");
87 args.add(getTableName());
88 args.add("--warehouse-dir");
89 args.add(getWarehouseDir());
90 args.add("--connect");
91 args.add(HsqldbTestServer.getUrl());
92 args.add("--hive-import");
93 String [] colNames = getColNames();
94 if (null != colNames) {
95 args.add("--split-by");
96 args.add(colNames[0]);
97 } else {
98 fail("Could not determine column names.");
99 }
100
101 args.add("--num-mappers");
102 args.add("1");
103
104 for (String a : args) {
105 LOG.debug("ARG : "+ a);
106 }
107
108 return args.toArray(new String[0]);
109 }
110
111 /**
112 * @return the argv to supply to a create-table only job for Hive imports.
113 */
114 protected String [] getCreateTableArgv(boolean includeHadoopFlags,
115 String [] moreArgs) {
116
117 ArrayList<String> args = new ArrayList<String>();
118
119 if (null != moreArgs) {
120 for (String arg: moreArgs) {
121 args.add(arg);
122 }
123 }
124
125 args.add("--table");
126 args.add(getTableName());
127 args.add("--connect");
128 args.add(HsqldbTestServer.getUrl());
129
130 return args.toArray(new String[0]);
131 }
132
133 /**
134 * @return the argv to supply to a code-gen only job for Hive imports.
135 */
136 protected String [] getCodeGenArgs() {
137 ArrayList<String> args = new ArrayList<String>();
138
139 args.add("--table");
140 args.add(getTableName());
141 args.add("--connect");
142 args.add(HsqldbTestServer.getUrl());
143 args.add("--hive-import");
144
145 return args.toArray(new String[0]);
146 }
147
148 /**
149 * @return the argv to supply to a ddl-executing-only job for Hive imports.
150 */
151 protected String [] getCreateHiveTableArgs(String [] extraArgs) {
152 ArrayList<String> args = new ArrayList<String>();
153
154 args.add("--table");
155 args.add(getTableName());
156 args.add("--connect");
157 args.add(HsqldbTestServer.getUrl());
158
159 if (null != extraArgs) {
160 for (String arg : extraArgs) {
161 args.add(arg);
162 }
163 }
164
165 return args.toArray(new String[0]);
166 }
167
168 private SqoopOptions getSqoopOptions(String [] args, SqoopTool tool) {
169 SqoopOptions opts = null;
170 try {
171 opts = tool.parseArguments(args, null, null, true);
172 } catch (Exception e) {
173 fail("Invalid options: " + e.toString());
174 }
175
176 return opts;
177 }
178
179 private void runImportTest(String tableName, String [] types,
180 String [] values, String verificationScript, String [] args,
181 SqoopTool tool) throws IOException {
182
183 // create a table and populate it with a row...
184 createTableWithColTypes(types, values);
185
186 // set up our mock hive shell to compare our generated script
187 // against the correct expected one.
188 SqoopOptions options = getSqoopOptions(args, tool);
189 String hiveHome = options.getHiveHome();
190 assertNotNull("hive.home was not set", hiveHome);
191 Path testDataPath = new Path(new Path(hiveHome),
192 "scripts/" + verificationScript);
193 System.setProperty("expected.script", testDataPath.toString());
194
195 // verify that we can import it correctly into hive.
196 runImport(tool, args);
197 }
198
199 /** Test that we can generate a file containing the DDL and not import. */
200 @Test
201 public void testGenerateOnly() throws IOException {
202 final String TABLE_NAME = "GenerateOnly";
203 setCurTableName(TABLE_NAME);
204 setNumCols(1);
205
206 // Figure out where our target generated .q file is going to be.
207 SqoopOptions options = getSqoopOptions(getArgv(false, null),
208 new ImportTool());
209 Path ddlFile = new Path(new Path(options.getCodeOutputDir()),
210 TABLE_NAME + ".q");
211 FileSystem fs = FileSystem.getLocal(new Configuration());
212
213 // If it's already there, remove it before running the test to ensure
214 // that it's the current test that generated the file.
215 if (fs.exists(ddlFile)) {
216 if (!fs.delete(ddlFile, false)) {
217 LOG.warn("Could not delete previous ddl file: " + ddlFile);
218 }
219 }
220
221 // Run a basic import, but specify that we're just generating definitions.
222 String [] types = { "INTEGER" };
223 String [] vals = { "42" };
224 runImportTest(TABLE_NAME, types, vals, null, getCodeGenArgs(),
225 new CodeGenTool());
226
227 // Test that the generated definition file exists.
228 assertTrue("Couldn't find expected ddl file", fs.exists(ddlFile));
229
230 Path hiveImportPath = new Path(new Path(options.getWarehouseDir()),
231 TABLE_NAME);
232 assertFalse("Import actually happened!", fs.exists(hiveImportPath));
233 }
234
235
236 /** Test that strings and ints are handled in the normal fashion. */
237 @Test
238 public void testNormalHiveImport() throws IOException {
239 final String TABLE_NAME = "NORMAL_HIVE_IMPORT";
240 setCurTableName(TABLE_NAME);
241 setNumCols(3);
242 String [] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" };
243 String [] vals = { "'test'", "42", "'somestring'" };
244 runImportTest(TABLE_NAME, types, vals, "normalImport.q",
245 getArgv(false, null), new ImportTool());
246 }
247
248 /** Test that table is created in hive with no data import. */
249 @Test
250 public void testCreateOnlyHiveImport() throws IOException {
251 final String TABLE_NAME = "CREATE_ONLY_HIVE_IMPORT";
252 setCurTableName(TABLE_NAME);
253 setNumCols(3);
254 String [] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" };
255 String [] vals = { "'test'", "42", "'somestring'" };
256 runImportTest(TABLE_NAME, types, vals,
257 "createOnlyImport.q", getCreateHiveTableArgs(null),
258 new CreateHiveTableTool());
259 }
260
261 /**
262 * Test that table is created in hive and replaces the existing table if
263 * any.
264 */
265 @Test
266 public void testCreateOverwriteHiveImport() throws IOException {
267 final String TABLE_NAME = "CREATE_OVERWRITE_HIVE_IMPORT";
268 setCurTableName(TABLE_NAME);
269 setNumCols(3);
270 String [] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" };
271 String [] vals = { "'test'", "42", "'somestring'" };
272 String [] extraArgs = {"--hive-overwrite", "--create-hive-table"};
273 runImportTest(TABLE_NAME, types, vals,
274 "createOverwriteImport.q", getCreateHiveTableArgs(extraArgs),
275 new CreateHiveTableTool());
276 }
277
278 /** Test that dates are coerced properly to strings. */
279 @Test
280 public void testDate() throws IOException {
281 final String TABLE_NAME = "DATE_HIVE_IMPORT";
282 setCurTableName(TABLE_NAME);
283 setNumCols(2);
284 String [] types = { "VARCHAR(32)", "DATE" };
285 String [] vals = { "'test'", "'2009-05-12'" };
286 runImportTest(TABLE_NAME, types, vals, "dateImport.q",
287 getArgv(false, null), new ImportTool());
288 }
289
290 /** Test that NUMERICs are coerced to doubles. */
291 @Test
292 public void testNumeric() throws IOException {
293 final String TABLE_NAME = "NUMERIC_HIVE_IMPORT";
294 setCurTableName(TABLE_NAME);
295 setNumCols(2);
296 String [] types = { "NUMERIC", "CHAR(64)" };
297 String [] vals = { "3.14159", "'foo'" };
298 runImportTest(TABLE_NAME, types, vals, "numericImport.q",
299 getArgv(false, null), new ImportTool());
300 }
301
302 /** If bin/hive returns an error exit status, we should get an IOException. */
303 @Test
304 public void testHiveExitFails() {
305 // The expected script is different than the one which would be generated
306 // by this, so we expect an IOException out.
307 final String TABLE_NAME = "FAILING_HIVE_IMPORT";
308 setCurTableName(TABLE_NAME);
309 setNumCols(2);
310 String [] types = { "NUMERIC", "CHAR(64)" };
311 String [] vals = { "3.14159", "'foo'" };
312 try {
313 runImportTest(TABLE_NAME, types, vals, "failingImport.q",
314 getArgv(false, null), new ImportTool());
315 // If we get here, then the run succeeded -- which is incorrect.
316 fail("FAILING_HIVE_IMPORT test should have thrown IOException");
317 } catch (IOException ioe) {
318 // expected; ok.
319 }
320 }
321
322 /** Test that we can set delimiters how we want them. */
323 @Test
324 public void testCustomDelimiters() throws IOException {
325 final String TABLE_NAME = "CUSTOM_DELIM_IMPORT";
326 setCurTableName(TABLE_NAME);
327 setNumCols(3);
328 String [] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" };
329 String [] vals = { "'test'", "42", "'somestring'" };
330 String [] extraArgs = {
331 "--fields-terminated-by", ",",
332 "--lines-terminated-by", "|",
333 };
334 runImportTest(TABLE_NAME, types, vals, "customDelimImport.q",
335 getArgv(false, extraArgs), new ImportTool());
336 }
337
338 /**
339 * Test hive import with row that has new line in it.
340 */
341 @Test
342 public void testFieldWithHiveDelims() throws IOException,
343 InterruptedException {
344 final String TABLE_NAME = "FIELD_WITH_NL_HIVE_IMPORT";
345
346 LOG.info("Doing import of single row into FIELD_WITH_NL_HIVE_IMPORT table");
347 setCurTableName(TABLE_NAME);
348 setNumCols(3);
349 String[] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" };
350 String[] vals = { "'test with \n new lines \n'", "42",
351 "'oh no " + '\01' + " field delims " + '\01' + "'", };
352 String[] moreArgs = { "--"+ BaseSqoopTool.HIVE_DROP_DELIMS_ARG };
353
354 runImportTest(TABLE_NAME, types, vals, "fieldWithNewlineImport.q",
355 getArgv(false, moreArgs), new ImportTool());
356
357 LOG.info("Validating data in single row is present in: "
358 + "FIELD_WITH_NL_HIVE_IMPORT table");
359
360 // Ideally, we would actually invoke hive code to verify that record with
361 // record and field delimiters have values replaced and that we have the
362 // proper number of hive records. Unfortunately, this is a non-trivial task,
363 // and better dealt with at an integration test level
364 //
365 // Instead, this assumes the path of the generated table and just validate
366 // map job output.
367
368 // Get and read the raw output file
369 String whDir = getWarehouseDir();
370 File p = new File(new File(whDir, TABLE_NAME), "part-m-00000");
371 File f = new File(p.toString());
372 FileReader fr = new FileReader(f);
373 BufferedReader br = new BufferedReader(fr);
374 try {
375 // verify the output
376 assertEquals("test with new lines " + '\01' + "42" + '\01'
377 + "oh no field delims ",
378 br.readLine());
379 assertEquals(br.readLine(), null); // should only be one line
380 } catch (IOException ioe) {
381 fail("Unable to read files generated from hive");
382 } finally {
383 br.close();
384 }
385 }
386
387 /**
388 * Test hive import with row that has new line in it.
389 */
390 @Test
391 public void testFieldWithHiveDelimsReplacement() throws IOException,
392 InterruptedException {
393 final String TABLE_NAME = "FIELD_WITH_NL_REPLACEMENT_HIVE_IMPORT";
394
395 LOG.info("Doing import of single row into "
396 + "FIELD_WITH_NL_REPLACEMENT_HIVE_IMPORT table");
397 setCurTableName(TABLE_NAME);
398 setNumCols(3);
399 String[] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" };
400 String[] vals = { "'test with\nnew lines\n'", "42",
401 "'oh no " + '\01' + " field delims " + '\01' + "'", };
402 String[] moreArgs = { "--"+BaseSqoopTool.HIVE_DELIMS_REPLACEMENT_ARG, " "};
403
404 runImportTest(TABLE_NAME, types, vals,
405 "fieldWithNewlineReplacementImport.q", getArgv(false, moreArgs),
406 new ImportTool());
407
408 LOG.info("Validating data in single row is present in: "
409 + "FIELD_WITH_NL_REPLACEMENT_HIVE_IMPORT table");
410
411 // Ideally, we would actually invoke hive code to verify that record with
412 // record and field delimiters have values replaced and that we have the
413 // proper number of hive records. Unfortunately, this is a non-trivial task,
414 // and better dealt with at an integration test level
415 //
416 // Instead, this assumes the path of the generated table and just validate
417 // map job output.
418
419 // Get and read the raw output file
420 String whDir = getWarehouseDir();
421 File p = new File(new File(whDir, TABLE_NAME), "part-m-00000");
422 File f = new File(p.toString());
423 FileReader fr = new FileReader(f);
424 BufferedReader br = new BufferedReader(fr);
425 try {
426 // verify the output
427 assertEquals("test with new lines " + '\01' + "42" + '\01'
428 + "oh no field delims ",
429 br.readLine());
430 assertEquals(br.readLine(), null); // should only be one line
431 } catch (IOException ioe) {
432 fail("Unable to read files generated from hive");
433 } finally {
434 br.close();
435 }
436 }
437
438 /**
439 * Test hive drop and replace option validation.
440 */
441 @Test
442 public void testHiveDropAndReplaceOptionValidation() throws ParseException {
443 LOG.info("Testing conflicting Hive delimiter drop/replace options");
444
445 setNumCols(3);
446 String[] moreArgs = { "--"+BaseSqoopTool.HIVE_DELIMS_REPLACEMENT_ARG, " ",
447 "--"+BaseSqoopTool.HIVE_DROP_DELIMS_ARG, };
448
449 ImportTool tool = new ImportTool();
450 try {
451 tool.validateOptions(tool.parseArguments(getArgv(false, moreArgs), null,
452 null, true));
453 fail("Expected InvalidOptionsException");
454 } catch (InvalidOptionsException ex) {
455 /* success */
456 }
457 }
458
459 /**
460 * Test hive import with row that has new line in it.
461 */
462 @Test
463 public void testImportHiveWithPartitions() throws IOException,
464 InterruptedException {
465 final String TABLE_NAME = "PARTITION_HIVE_IMPORT";
466
467 LOG.info("Doing import of single row into PARTITION_HIVE_IMPORT table");
468 setCurTableName(TABLE_NAME);
469 setNumCols(3);
470 String[] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)", };
471 String[] vals = { "'whoop'", "42", "'I am a row in a partition'", };
472 String[] moreArgs = { "--" + BaseSqoopTool.HIVE_PARTITION_KEY_ARG, "ds",
473 "--" + BaseSqoopTool.HIVE_PARTITION_VALUE_ARG, "20110413", };
474
475 runImportTest(TABLE_NAME, types, vals, "partitionImport.q",
476 getArgv(false, moreArgs), new ImportTool());
477 }
478
479 /**
480 * If partition key is set to one of importing columns, we should get an
481 * IOException.
482 * */
483 @Test
484 public void testImportWithBadPartitionKey() {
485 final String TABLE_NAME = "FAILING_PARTITION_HIVE_IMPORT";
486
487 LOG.info("Doing import of single row into " + TABLE_NAME + " table");
488 setCurTableName(TABLE_NAME);
489 setNumCols(3);
490 String[] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)", };
491 String[] vals = { "'key'", "42", "'I am a row in a partition'", };
492
493 String partitionKey = getColNames()[0];
494
495 // Specify 1st column as partition key and import every column of the
496 // table by default (i.e. no --columns option).
497 String[] moreArgs1 = {
498 "--" + BaseSqoopTool.HIVE_PARTITION_KEY_ARG,
499 partitionKey,
500 };
501
502 // Specify 1st column as both partition key and importing column.
503 String[] moreArgs2 = {
504 "--" + BaseSqoopTool.HIVE_PARTITION_KEY_ARG,
505 partitionKey,
506 "--" + BaseSqoopTool.COLUMNS_ARG,
507 partitionKey,
508 };
509
510 // Test hive-import with the 1st args.
511 try {
512 runImportTest(TABLE_NAME, types, vals, "partitionImport.q",
513 getArgv(false, moreArgs1), new ImportTool());
514 fail(TABLE_NAME + " test should have thrown IOException");
515 } catch (IOException ioe) {
516 // expected; ok.
517 }
518
519 // Test hive-import with the 2nd args.
520 try {
521 runImportTest(TABLE_NAME, types, vals, "partitionImport.q",
522 getArgv(false, moreArgs2), new ImportTool());
523 fail(TABLE_NAME + " test should have thrown IOException");
524 } catch (IOException ioe) {
525 // expected; ok.
526 }
527
528 // Test create-hive-table with the 1st args.
529 try {
530 runImportTest(TABLE_NAME, types, vals, "partitionImport.q",
531 getCreateTableArgv(false, moreArgs1), new CreateHiveTableTool());
532 fail(TABLE_NAME + " test should have thrown IOException");
533 } catch (IOException ioe) {
534 // expected; ok.
535 }
536 }
537 }