Move org.apache.hadoop.sqoop to com.cloudera.sqoop
[sqoop.git] / src / test / com / cloudera / sqoop / hive / TestHiveImport.java
1 /**
2 * Licensed to Cloudera, Inc. under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. Cloudera, Inc. licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 package com.cloudera.sqoop.hive;
20
21 import java.io.IOException;
22 import java.util.ArrayList;
23
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.junit.Test;
27
28 import org.apache.hadoop.conf.Configuration;
29 import org.apache.hadoop.fs.FileSystem;
30 import org.apache.hadoop.fs.Path;
31 import com.cloudera.sqoop.SqoopOptions;
32 import com.cloudera.sqoop.testutil.CommonArgs;
33 import com.cloudera.sqoop.testutil.HsqldbTestServer;
34 import com.cloudera.sqoop.testutil.ImportJobTestCase;
35 import com.cloudera.sqoop.tool.CodeGenTool;
36 import com.cloudera.sqoop.tool.CreateHiveTableTool;
37 import com.cloudera.sqoop.tool.ImportTool;
38 import com.cloudera.sqoop.tool.SqoopTool;
39
40 /**
41 * Test HiveImport capability after an import to HDFS.
42 */
43 public class TestHiveImport extends ImportJobTestCase {
44
45 public static final Log LOG = LogFactory.getLog(
46 TestHiveImport.class.getName());
47
48 /**
49 * Sets the expected number of columns in the table being manipulated
50 * by the test. Under the hood, this sets the expected column names
51 * to DATA_COLi for 0 <= i < numCols.
52 * @param numCols the number of columns to be created.
53 */
54 private void setNumCols(int numCols) {
55 String [] cols = new String[numCols];
56 for (int i = 0; i < numCols; i++) {
57 cols[i] = "DATA_COL" + i;
58 }
59
60 setColNames(cols);
61 }
62
63 /**
64 * Create the argv to pass to Sqoop.
65 * @return the argv as an array of strings.
66 */
67 protected String [] getArgv(boolean includeHadoopFlags, String [] moreArgs) {
68 ArrayList<String> args = new ArrayList<String>();
69
70 if (includeHadoopFlags) {
71 CommonArgs.addHadoopFlags(args);
72 }
73
74 if (null != moreArgs) {
75 for (String arg: moreArgs) {
76 args.add(arg);
77 }
78 }
79
80 args.add("--table");
81 args.add(getTableName());
82 args.add("--warehouse-dir");
83 args.add(getWarehouseDir());
84 args.add("--connect");
85 args.add(HsqldbTestServer.getUrl());
86 args.add("--hive-import");
87 String [] colNames = getColNames();
88 if (null != colNames) {
89 args.add("--split-by");
90 args.add(colNames[0]);
91 } else {
92 fail("Could not determine column names.");
93 }
94
95 args.add("--num-mappers");
96 args.add("1");
97
98 for (String a : args) {
99 LOG.debug("ARG : "+ a);
100 }
101
102 return args.toArray(new String[0]);
103 }
104
105 /**
106 * @return the argv to supply to a code-gen only job for Hive imports.
107 */
108 protected String [] getCodeGenArgs() {
109 ArrayList<String> args = new ArrayList<String>();
110
111 args.add("--table");
112 args.add(getTableName());
113 args.add("--connect");
114 args.add(HsqldbTestServer.getUrl());
115 args.add("--hive-import");
116
117 return args.toArray(new String[0]);
118 }
119
120 /**
121 * @return the argv to supply to a ddl-executing-only job for Hive imports.
122 */
123 protected String [] getCreateHiveTableArgs(String [] extraArgs) {
124 ArrayList<String> args = new ArrayList<String>();
125
126 args.add("--table");
127 args.add(getTableName());
128 args.add("--connect");
129 args.add(HsqldbTestServer.getUrl());
130
131 if (null != extraArgs) {
132 for (String arg : extraArgs) {
133 args.add(arg);
134 }
135 }
136
137 return args.toArray(new String[0]);
138 }
139
140 private SqoopOptions getSqoopOptions(String [] args, SqoopTool tool) {
141 SqoopOptions opts = null;
142 try {
143 opts = tool.parseArguments(args, null, null, true);
144 } catch (Exception e) {
145 fail("Invalid options: " + e.toString());
146 }
147
148 return opts;
149 }
150
151 private void runImportTest(String tableName, String [] types,
152 String [] values, String verificationScript, String [] args,
153 SqoopTool tool) throws IOException {
154
155 // create a table and populate it with a row...
156 createTableWithColTypes(types, values);
157
158 // set up our mock hive shell to compare our generated script
159 // against the correct expected one.
160 SqoopOptions options = getSqoopOptions(args, tool);
161 String hiveHome = options.getHiveHome();
162 assertNotNull("hive.home was not set", hiveHome);
163 Path testDataPath = new Path(new Path(hiveHome),
164 "scripts/" + verificationScript);
165 System.setProperty("expected.script", testDataPath.toString());
166
167 // verify that we can import it correctly into hive.
168 runImport(tool, args);
169 }
170
171 /** Test that we can generate a file containing the DDL and not import. */
172 @Test
173 public void testGenerateOnly() throws IOException {
174 final String TABLE_NAME = "GenerateOnly";
175 setCurTableName(TABLE_NAME);
176 setNumCols(1);
177
178 // Figure out where our target generated .q file is going to be.
179 SqoopOptions options = getSqoopOptions(getArgv(false, null),
180 new ImportTool());
181 Path ddlFile = new Path(new Path(options.getCodeOutputDir()),
182 TABLE_NAME + ".q");
183 FileSystem fs = FileSystem.getLocal(new Configuration());
184
185 // If it's already there, remove it before running the test to ensure
186 // that it's the current test that generated the file.
187 if (fs.exists(ddlFile)) {
188 if (!fs.delete(ddlFile, false)) {
189 LOG.warn("Could not delete previous ddl file: " + ddlFile);
190 }
191 }
192
193 // Run a basic import, but specify that we're just generating definitions.
194 String [] types = { "INTEGER" };
195 String [] vals = { "42" };
196 runImportTest(TABLE_NAME, types, vals, null, getCodeGenArgs(),
197 new CodeGenTool());
198
199 // Test that the generated definition file exists.
200 assertTrue("Couldn't find expected ddl file", fs.exists(ddlFile));
201
202 Path hiveImportPath = new Path(new Path(options.getWarehouseDir()),
203 TABLE_NAME);
204 assertFalse("Import actually happened!", fs.exists(hiveImportPath));
205 }
206
207
208 /** Test that strings and ints are handled in the normal fashion. */
209 @Test
210 public void testNormalHiveImport() throws IOException {
211 final String TABLE_NAME = "NORMAL_HIVE_IMPORT";
212 setCurTableName(TABLE_NAME);
213 setNumCols(3);
214 String [] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" };
215 String [] vals = { "'test'", "42", "'somestring'" };
216 runImportTest(TABLE_NAME, types, vals, "normalImport.q",
217 getArgv(false, null), new ImportTool());
218 }
219
220 /** Test that table is created in hive with no data import. */
221 @Test
222 public void testCreateOnlyHiveImport() throws IOException {
223 final String TABLE_NAME = "CREATE_ONLY_HIVE_IMPORT";
224 setCurTableName(TABLE_NAME);
225 setNumCols(3);
226 String [] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" };
227 String [] vals = { "'test'", "42", "'somestring'" };
228 runImportTest(TABLE_NAME, types, vals,
229 "createOnlyImport.q", getCreateHiveTableArgs(null),
230 new CreateHiveTableTool());
231 }
232
233 /**
234 * Test that table is created in hive and replaces the existing table if
235 * any.
236 */
237 @Test
238 public void testCreateOverwriteHiveImport() throws IOException {
239 final String TABLE_NAME = "CREATE_OVERWRITE_HIVE_IMPORT";
240 setCurTableName(TABLE_NAME);
241 setNumCols(3);
242 String [] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" };
243 String [] vals = { "'test'", "42", "'somestring'" };
244 String [] extraArgs = {"--hive-overwrite"};
245 runImportTest(TABLE_NAME, types, vals,
246 "createOverwriteImport.q", getCreateHiveTableArgs(extraArgs),
247 new CreateHiveTableTool());
248 }
249
250 /** Test that dates are coerced properly to strings. */
251 @Test
252 public void testDate() throws IOException {
253 final String TABLE_NAME = "DATE_HIVE_IMPORT";
254 setCurTableName(TABLE_NAME);
255 setNumCols(2);
256 String [] types = { "VARCHAR(32)", "DATE" };
257 String [] vals = { "'test'", "'2009-05-12'" };
258 runImportTest(TABLE_NAME, types, vals, "dateImport.q",
259 getArgv(false, null), new ImportTool());
260 }
261
262 /** Test that NUMERICs are coerced to doubles. */
263 @Test
264 public void testNumeric() throws IOException {
265 final String TABLE_NAME = "NUMERIC_HIVE_IMPORT";
266 setCurTableName(TABLE_NAME);
267 setNumCols(2);
268 String [] types = { "NUMERIC", "CHAR(64)" };
269 String [] vals = { "3.14159", "'foo'" };
270 runImportTest(TABLE_NAME, types, vals, "numericImport.q",
271 getArgv(false, null), new ImportTool());
272 }
273
274 /** If bin/hive returns an error exit status, we should get an IOException. */
275 @Test
276 public void testHiveExitFails() {
277 // The expected script is different than the one which would be generated
278 // by this, so we expect an IOException out.
279 final String TABLE_NAME = "FAILING_HIVE_IMPORT";
280 setCurTableName(TABLE_NAME);
281 setNumCols(2);
282 String [] types = { "NUMERIC", "CHAR(64)" };
283 String [] vals = { "3.14159", "'foo'" };
284 try {
285 runImportTest(TABLE_NAME, types, vals, "failingImport.q",
286 getArgv(false, null), new ImportTool());
287 // If we get here, then the run succeeded -- which is incorrect.
288 fail("FAILING_HIVE_IMPORT test should have thrown IOException");
289 } catch (IOException ioe) {
290 // expected; ok.
291 }
292 }
293
294 /** Test that we can set delimiters how we want them. */
295 @Test
296 public void testCustomDelimiters() throws IOException {
297 final String TABLE_NAME = "CUSTOM_DELIM_IMPORT";
298 setCurTableName(TABLE_NAME);
299 setNumCols(3);
300 String [] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" };
301 String [] vals = { "'test'", "42", "'somestring'" };
302 String [] extraArgs = {
303 "--fields-terminated-by", ",",
304 "--lines-terminated-by", "|",
305 };
306 runImportTest(TABLE_NAME, types, vals, "customDelimImport.q",
307 getArgv(false, extraArgs), new ImportTool());
308 }
309 }
310