SQOOP-319. Support for replacing Hive delimiters.
[sqoop.git] / src / java / com / cloudera / sqoop / tool / BaseSqoopTool.java
1 /**
2 * Licensed to Cloudera, Inc. under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. Cloudera, Inc. licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 package com.cloudera.sqoop.tool;
20
21 import java.io.File;
22 import java.io.FileInputStream;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.sql.SQLException;
26 import java.util.Arrays;
27 import java.util.Properties;
28
29 import org.apache.commons.cli.CommandLine;
30 import org.apache.commons.cli.Option;
31 import org.apache.commons.cli.OptionBuilder;
32 import org.apache.commons.cli.OptionGroup;
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.util.StringUtils;
36 import org.apache.log4j.Category;
37 import org.apache.log4j.Level;
38 import org.apache.log4j.Logger;
39
40 import com.cloudera.sqoop.ConnFactory;
41 import com.cloudera.sqoop.Sqoop;
42 import com.cloudera.sqoop.SqoopOptions;
43 import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException;
44 import com.cloudera.sqoop.cli.RelatedOptions;
45 import com.cloudera.sqoop.cli.ToolOptions;
46 import com.cloudera.sqoop.lib.DelimiterSet;
47 import com.cloudera.sqoop.manager.ConnManager;
48 import com.cloudera.sqoop.metastore.JobData;
49
50 /**
51 * Layer on top of SqoopTool that provides some basic common code
52 * that most SqoopTool implementations will use.
53 *
54 * Subclasses should call init() at the top of their run() method,
55 * and call destroy() at the end in a finally block.
56 */
57 public abstract class BaseSqoopTool extends SqoopTool {
58
59 public static final Log LOG = LogFactory.getLog(
60 BaseSqoopTool.class.getName());
61
62 public static final String HELP_STR = "\nTry --help for usage instructions.";
63
64 // Here are all the arguments that are used by the standard sqoop tools.
65 // Their names are recorded here so that tools can share them and their
66 // use consistently. The argument parser applies the leading '--' to each
67 // string.
68 public static final String CONNECT_STRING_ARG = "connect";
69 public static final String CONN_MANAGER_CLASS_NAME =
70 "connection-manager";
71 public static final String CONNECT_PARAM_FILE = "connection-param-file";
72 public static final String DRIVER_ARG = "driver";
73 public static final String USERNAME_ARG = "username";
74 public static final String PASSWORD_ARG = "password";
75 public static final String PASSWORD_PROMPT_ARG = "P";
76 public static final String DIRECT_ARG = "direct";
77 public static final String BATCH_ARG = "batch";
78 public static final String TABLE_ARG = "table";
79 public static final String STAGING_TABLE_ARG = "staging-table";
80 public static final String CLEAR_STAGING_TABLE_ARG = "clear-staging-table";
81 public static final String COLUMNS_ARG = "columns";
82 public static final String SPLIT_BY_ARG = "split-by";
83 public static final String WHERE_ARG = "where";
84 public static final String HADOOP_HOME_ARG = "hadoop-home";
85 public static final String HIVE_HOME_ARG = "hive-home";
86 public static final String WAREHOUSE_DIR_ARG = "warehouse-dir";
87 public static final String TARGET_DIR_ARG = "target-dir";
88 public static final String APPEND_ARG = "append";
89 public static final String NULL_STRING = "null-string";
90 public static final String INPUT_NULL_STRING = "input-null-string";
91 public static final String NULL_NON_STRING = "null-non-string";
92 public static final String INPUT_NULL_NON_STRING = "input-null-non-string";
93
94 public static final String FMT_SEQUENCEFILE_ARG = "as-sequencefile";
95 public static final String FMT_TEXTFILE_ARG = "as-textfile";
96 public static final String FMT_AVRODATAFILE_ARG = "as-avrodatafile";
97 public static final String HIVE_IMPORT_ARG = "hive-import";
98 public static final String HIVE_TABLE_ARG = "hive-table";
99 public static final String HIVE_OVERWRITE_ARG = "hive-overwrite";
100 public static final String HIVE_DROP_DELIMS_ARG = "hive-drop-import-delims";
101 public static final String HIVE_DELIMS_REPLACEMENT_ARG =
102 "hive-delims-replacement";
103 public static final String HIVE_PARTITION_KEY_ARG = "hive-partition-key";
104 public static final String HIVE_PARTITION_VALUE_ARG = "hive-partition-value";
105 public static final String CREATE_HIVE_TABLE_ARG =
106 "create-hive-table";
107 public static final String NUM_MAPPERS_ARG = "num-mappers";
108 public static final String NUM_MAPPERS_SHORT_ARG = "m";
109 public static final String COMPRESS_ARG = "compress";
110 public static final String COMPRESSION_CODEC_ARG = "compression-codec";
111 public static final String COMPRESS_SHORT_ARG = "z";
112 public static final String DIRECT_SPLIT_SIZE_ARG = "direct-split-size";
113 public static final String INLINE_LOB_LIMIT_ARG = "inline-lob-limit";
114 public static final String FETCH_SIZE_ARG = "fetch-size";
115 public static final String EXPORT_PATH_ARG = "export-dir";
116 public static final String FIELDS_TERMINATED_BY_ARG = "fields-terminated-by";
117 public static final String LINES_TERMINATED_BY_ARG = "lines-terminated-by";
118 public static final String OPTIONALLY_ENCLOSED_BY_ARG =
119 "optionally-enclosed-by";
120 public static final String ENCLOSED_BY_ARG = "enclosed-by";
121 public static final String ESCAPED_BY_ARG = "escaped-by";
122 public static final String MYSQL_DELIMITERS_ARG = "mysql-delimiters";
123 public static final String INPUT_FIELDS_TERMINATED_BY_ARG =
124 "input-fields-terminated-by";
125 public static final String INPUT_LINES_TERMINATED_BY_ARG =
126 "input-lines-terminated-by";
127 public static final String INPUT_OPTIONALLY_ENCLOSED_BY_ARG =
128 "input-optionally-enclosed-by";
129 public static final String INPUT_ENCLOSED_BY_ARG = "input-enclosed-by";
130 public static final String INPUT_ESCAPED_BY_ARG = "input-escaped-by";
131 public static final String CODE_OUT_DIR_ARG = "outdir";
132 public static final String BIN_OUT_DIR_ARG = "bindir";
133 public static final String PACKAGE_NAME_ARG = "package-name";
134 public static final String CLASS_NAME_ARG = "class-name";
135 public static final String JAR_FILE_NAME_ARG = "jar-file";
136 public static final String SQL_QUERY_ARG = "query";
137 public static final String SQL_QUERY_SHORT_ARG = "e";
138 public static final String VERBOSE_ARG = "verbose";
139 public static final String HELP_ARG = "help";
140 public static final String UPDATE_KEY_ARG = "update-key";
141
142 // Arguments for incremental imports.
143 public static final String INCREMENT_TYPE_ARG = "incremental";
144 public static final String INCREMENT_COL_ARG = "check-column";
145 public static final String INCREMENT_LAST_VAL_ARG = "last-value";
146
147 // HBase arguments.
148 public static final String HBASE_TABLE_ARG = "hbase-table";
149 public static final String HBASE_COL_FAM_ARG = "column-family";
150 public static final String HBASE_ROW_KEY_ARG = "hbase-row-key";
151 public static final String HBASE_CREATE_TABLE_ARG = "hbase-create-table";
152
153
154 // Arguments for the saved job management system.
155 public static final String STORAGE_METASTORE_ARG = "meta-connect";
156 public static final String JOB_CMD_CREATE_ARG = "create";
157 public static final String JOB_CMD_DELETE_ARG = "delete";
158 public static final String JOB_CMD_EXEC_ARG = "exec";
159 public static final String JOB_CMD_LIST_ARG = "list";
160 public static final String JOB_CMD_SHOW_ARG = "show";
161
162 // Arguments for the metastore.
163 public static final String METASTORE_SHUTDOWN_ARG = "shutdown";
164
165
166 // Arguments for merging datasets.
167 public static final String NEW_DATASET_ARG = "new-data";
168 public static final String OLD_DATASET_ARG = "onto";
169 public static final String MERGE_KEY_ARG = "merge-key";
170
171 public BaseSqoopTool() {
172 }
173
174 public BaseSqoopTool(String toolName) {
175 super(toolName);
176 }
177
178 protected ConnManager manager;
179
180 public ConnManager getManager() {
181 return manager;
182 }
183
184 public void setManager(ConnManager mgr) {
185 this.manager = mgr;
186 }
187
188 /**
189 * Should be called at the beginning of the run() method to initialize
190 * the connection manager, etc. If this succeeds (returns true), it should
191 * be paired with a call to destroy().
192 * @return true on success, false on failure.
193 */
194 protected boolean init(SqoopOptions sqoopOpts) {
195 // Get the connection to the database.
196 try {
197 JobData data = new JobData(sqoopOpts, this);
198 this.manager = new ConnFactory(sqoopOpts.getConf()).getManager(data);
199 return true;
200 } catch (Exception e) {
201 LOG.error("Got error creating database manager: "
202 + StringUtils.stringifyException(e));
203 if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) {
204 throw new RuntimeException(e);
205 }
206 }
207
208 return false;
209 }
210
211 /**
212 * Should be called in a 'finally' block at the end of the run() method.
213 */
214 protected void destroy(SqoopOptions sqoopOpts) {
215 if (null != manager) {
216 try {
217 manager.close();
218 } catch (SQLException sqlE) {
219 LOG.warn("Error while closing connection: " + sqlE);
220 }
221 }
222 }
223
224 /**
225 * Examines a subset of the arrray presented, and determines if it
226 * contains any non-empty arguments. If so, logs the arguments
227 * and returns true.
228 *
229 * @param argv an array of strings to check.
230 * @param offset the first element of the array to check
231 * @param len the number of elements to check
232 * @return true if there are any non-null, non-empty argument strings
233 * present.
234 */
235 protected boolean hasUnrecognizedArgs(String [] argv, int offset, int len) {
236 if (argv == null) {
237 return false;
238 }
239
240 boolean unrecognized = false;
241 boolean printedBanner = false;
242 for (int i = offset; i < Math.min(argv.length, offset + len); i++) {
243 if (argv[i] != null && argv[i].length() > 0) {
244 if (!printedBanner) {
245 LOG.error("Error parsing arguments for " + getToolName() + ":");
246 printedBanner = true;
247 }
248 LOG.error("Unrecognized argument: " + argv[i]);
249 unrecognized = true;
250 }
251 }
252
253 return unrecognized;
254 }
255
256 protected boolean hasUnrecognizedArgs(String [] argv) {
257 if (null == argv) {
258 return false;
259 }
260 return hasUnrecognizedArgs(argv, 0, argv.length);
261 }
262
263
264 /**
265 * If argv contains an entry "--", return an array containing all elements
266 * after the "--" separator. Otherwise, return null.
267 * @param argv a set of arguments to scan for the subcommand arguments.
268 */
269 protected String [] getSubcommandArgs(String [] argv) {
270 if (null == argv) {
271 return null;
272 }
273
274 for (int i = 0; i < argv.length; i++) {
275 if (argv[i].equals("--")) {
276 return Arrays.copyOfRange(argv, i + 1, argv.length);
277 }
278 }
279
280 return null;
281 }
282
283 /**
284 * @return RelatedOptions used by job management tools.
285 */
286 protected RelatedOptions getJobOptions() {
287 RelatedOptions relatedOpts = new RelatedOptions(
288 "Job management arguments");
289 relatedOpts.addOption(OptionBuilder.withArgName("jdbc-uri")
290 .hasArg()
291 .withDescription("Specify JDBC connect string for the metastore")
292 .withLongOpt(STORAGE_METASTORE_ARG)
293 .create());
294
295 // Create an option-group surrounding the operations a user
296 // can perform on jobs.
297 OptionGroup group = new OptionGroup();
298 group.addOption(OptionBuilder.withArgName("job-id")
299 .hasArg()
300 .withDescription("Create a new saved job")
301 .withLongOpt(JOB_CMD_CREATE_ARG)
302 .create());
303 group.addOption(OptionBuilder.withArgName("job-id")
304 .hasArg()
305 .withDescription("Delete a saved job")
306 .withLongOpt(JOB_CMD_DELETE_ARG)
307 .create());
308 group.addOption(OptionBuilder.withArgName("job-id")
309 .hasArg()
310 .withDescription("Show the parameters for a saved job")
311 .withLongOpt(JOB_CMD_SHOW_ARG)
312 .create());
313
314 Option execOption = OptionBuilder.withArgName("job-id")
315 .hasArg()
316 .withDescription("Run a saved job")
317 .withLongOpt(JOB_CMD_EXEC_ARG)
318 .create();
319 group.addOption(execOption);
320
321 group.addOption(OptionBuilder
322 .withDescription("List saved jobs")
323 .withLongOpt(JOB_CMD_LIST_ARG)
324 .create());
325
326 relatedOpts.addOptionGroup(group);
327
328 // Since the "common" options aren't used in the job tool,
329 // add these settings here.
330 relatedOpts.addOption(OptionBuilder
331 .withDescription("Print more information while working")
332 .withLongOpt(VERBOSE_ARG)
333 .create());
334 relatedOpts.addOption(OptionBuilder
335 .withDescription("Print usage instructions")
336 .withLongOpt(HELP_ARG)
337 .create());
338
339 return relatedOpts;
340 }
341
342 /**
343 * @return RelatedOptions used by most/all Sqoop tools.
344 */
345 protected RelatedOptions getCommonOptions() {
346 // Connection args (common)
347 RelatedOptions commonOpts = new RelatedOptions("Common arguments");
348 commonOpts.addOption(OptionBuilder.withArgName("jdbc-uri")
349 .hasArg().withDescription("Specify JDBC connect string")
350 .withLongOpt(CONNECT_STRING_ARG)
351 .create());
352 commonOpts.addOption(OptionBuilder.withArgName("class-name")
353 .hasArg().withDescription("Specify connection manager class name")
354 .withLongOpt(CONN_MANAGER_CLASS_NAME)
355 .create());
356 commonOpts.addOption(OptionBuilder.withArgName("properties-file")
357 .hasArg().withDescription("Specify connection parameters file")
358 .withLongOpt(CONNECT_PARAM_FILE)
359 .create());
360 commonOpts.addOption(OptionBuilder.withArgName("class-name")
361 .hasArg().withDescription("Manually specify JDBC driver class to use")
362 .withLongOpt(DRIVER_ARG)
363 .create());
364 commonOpts.addOption(OptionBuilder.withArgName("username")
365 .hasArg().withDescription("Set authentication username")
366 .withLongOpt(USERNAME_ARG)
367 .create());
368 commonOpts.addOption(OptionBuilder.withArgName("password")
369 .hasArg().withDescription("Set authentication password")
370 .withLongOpt(PASSWORD_ARG)
371 .create());
372 commonOpts.addOption(OptionBuilder
373 .withDescription("Read password from console")
374 .create(PASSWORD_PROMPT_ARG));
375
376 commonOpts.addOption(OptionBuilder.withArgName("dir")
377 .hasArg().withDescription("Override $HADOOP_HOME")
378 .withLongOpt(HADOOP_HOME_ARG)
379 .create());
380
381 // misc (common)
382 commonOpts.addOption(OptionBuilder
383 .withDescription("Print more information while working")
384 .withLongOpt(VERBOSE_ARG)
385 .create());
386 commonOpts.addOption(OptionBuilder
387 .withDescription("Print usage instructions")
388 .withLongOpt(HELP_ARG)
389 .create());
390
391 return commonOpts;
392 }
393
394 /**
395 * @param explicitHiveImport true if the user has an explicit --hive-import
396 * available, or false if this is implied by the tool.
397 * @return options governing interaction with Hive
398 */
399 protected RelatedOptions getHiveOptions(boolean explicitHiveImport) {
400 RelatedOptions hiveOpts = new RelatedOptions("Hive arguments");
401 if (explicitHiveImport) {
402 hiveOpts.addOption(OptionBuilder
403 .withDescription("Import tables into Hive "
404 + "(Uses Hive's default delimiters if none are set.)")
405 .withLongOpt(HIVE_IMPORT_ARG)
406 .create());
407 }
408
409 hiveOpts.addOption(OptionBuilder.withArgName("dir")
410 .hasArg().withDescription("Override $HIVE_HOME")
411 .withLongOpt(HIVE_HOME_ARG)
412 .create());
413 hiveOpts.addOption(OptionBuilder
414 .withDescription("Overwrite existing data in the Hive table")
415 .withLongOpt(HIVE_OVERWRITE_ARG)
416 .create());
417 hiveOpts.addOption(OptionBuilder
418 .withDescription("Fail if the target hive table exists")
419 .withLongOpt(CREATE_HIVE_TABLE_ARG)
420 .create());
421 hiveOpts.addOption(OptionBuilder.withArgName("table-name")
422 .hasArg()
423 .withDescription("Sets the table name to use when importing to hive")
424 .withLongOpt(HIVE_TABLE_ARG)
425 .create());
426 hiveOpts.addOption(OptionBuilder
427 .withDescription("Drop Hive record \\0x01 and row delimiters "
428 + "(\\n\\r) from imported string fields")
429 .withLongOpt(HIVE_DROP_DELIMS_ARG)
430 .create());
431 hiveOpts.addOption(OptionBuilder
432 .hasArg()
433 .withDescription("Replace Hive record \\0x01 and row delimiters "
434 + "(\\n\\r) from imported string fields with user-defined string")
435 .withLongOpt(HIVE_DELIMS_REPLACEMENT_ARG)
436 .create());
437 hiveOpts.addOption(OptionBuilder.withArgName("partition-key")
438 .hasArg()
439 .withDescription("Sets the partition key to use when importing to hive")
440 .withLongOpt(HIVE_PARTITION_KEY_ARG)
441 .create());
442 hiveOpts.addOption(OptionBuilder.withArgName("partition-value")
443 .hasArg()
444 .withDescription("Sets the partition value to use when importing "
445 + "to hive")
446 .withLongOpt(HIVE_PARTITION_VALUE_ARG)
447 .create());
448 return hiveOpts;
449 }
450
451 /**
452 * @return options governing output format delimiters
453 */
454 protected RelatedOptions getOutputFormatOptions() {
455 RelatedOptions formatOpts = new RelatedOptions(
456 "Output line formatting arguments");
457 formatOpts.addOption(OptionBuilder.withArgName("char")
458 .hasArg()
459 .withDescription("Sets the field separator character")
460 .withLongOpt(FIELDS_TERMINATED_BY_ARG)
461 .create());
462 formatOpts.addOption(OptionBuilder.withArgName("char")
463 .hasArg()
464 .withDescription("Sets the end-of-line character")
465 .withLongOpt(LINES_TERMINATED_BY_ARG)
466 .create());
467 formatOpts.addOption(OptionBuilder.withArgName("char")
468 .hasArg()
469 .withDescription("Sets a field enclosing character")
470 .withLongOpt(OPTIONALLY_ENCLOSED_BY_ARG)
471 .create());
472 formatOpts.addOption(OptionBuilder.withArgName("char")
473 .hasArg()
474 .withDescription("Sets a required field enclosing character")
475 .withLongOpt(ENCLOSED_BY_ARG)
476 .create());
477 formatOpts.addOption(OptionBuilder.withArgName("char")
478 .hasArg()
479 .withDescription("Sets the escape character")
480 .withLongOpt(ESCAPED_BY_ARG)
481 .create());
482 formatOpts.addOption(OptionBuilder
483 .withDescription("Uses MySQL's default delimiter set: "
484 + "fields: , lines: \\n escaped-by: \\ optionally-enclosed-by: '")
485 .withLongOpt(MYSQL_DELIMITERS_ARG)
486 .create());
487
488 return formatOpts;
489 }
490
491 /**
492 * @return options governing input format delimiters.
493 */
494 protected RelatedOptions getInputFormatOptions() {
495 RelatedOptions inputFormatOpts =
496 new RelatedOptions("Input parsing arguments");
497 inputFormatOpts.addOption(OptionBuilder.withArgName("char")
498 .hasArg()
499 .withDescription("Sets the input field separator")
500 .withLongOpt(INPUT_FIELDS_TERMINATED_BY_ARG)
501 .create());
502 inputFormatOpts.addOption(OptionBuilder.withArgName("char")
503 .hasArg()
504 .withDescription("Sets the input end-of-line char")
505 .withLongOpt(INPUT_LINES_TERMINATED_BY_ARG)
506 .create());
507 inputFormatOpts.addOption(OptionBuilder.withArgName("char")
508 .hasArg()
509 .withDescription("Sets a field enclosing character")
510 .withLongOpt(INPUT_OPTIONALLY_ENCLOSED_BY_ARG)
511 .create());
512 inputFormatOpts.addOption(OptionBuilder.withArgName("char")
513 .hasArg()
514 .withDescription("Sets a required field encloser")
515 .withLongOpt(INPUT_ENCLOSED_BY_ARG)
516 .create());
517 inputFormatOpts.addOption(OptionBuilder.withArgName("char")
518 .hasArg()
519 .withDescription("Sets the input escape character")
520 .withLongOpt(INPUT_ESCAPED_BY_ARG)
521 .create());
522
523 return inputFormatOpts;
524 }
525
526 /**
527 * @param multiTable true if these options will be used for bulk code-gen.
528 * @return options related to code generation.
529 */
530 protected RelatedOptions getCodeGenOpts(boolean multiTable) {
531 RelatedOptions codeGenOpts =
532 new RelatedOptions("Code generation arguments");
533 codeGenOpts.addOption(OptionBuilder.withArgName("dir")
534 .hasArg()
535 .withDescription("Output directory for generated code")
536 .withLongOpt(CODE_OUT_DIR_ARG)
537 .create());
538 codeGenOpts.addOption(OptionBuilder.withArgName("dir")
539 .hasArg()
540 .withDescription("Output directory for compiled objects")
541 .withLongOpt(BIN_OUT_DIR_ARG)
542 .create());
543 codeGenOpts.addOption(OptionBuilder.withArgName("name")
544 .hasArg()
545 .withDescription("Put auto-generated classes in this package")
546 .withLongOpt(PACKAGE_NAME_ARG)
547 .create());
548 codeGenOpts.addOption(OptionBuilder.withArgName("null-str")
549 .hasArg()
550 .withDescription("Null string representation")
551 .withLongOpt(NULL_STRING)
552 .create());
553 codeGenOpts.addOption(OptionBuilder.withArgName("null-str")
554 .hasArg()
555 .withDescription("Input null string representation")
556 .withLongOpt(INPUT_NULL_STRING)
557 .create());
558 codeGenOpts.addOption(OptionBuilder.withArgName("null-str")
559 .hasArg()
560 .withDescription("Null non-string representation")
561 .withLongOpt(NULL_NON_STRING)
562 .create());
563 codeGenOpts.addOption(OptionBuilder.withArgName("null-str")
564 .hasArg()
565 .withDescription("Input null non-string representation")
566 .withLongOpt(INPUT_NULL_NON_STRING)
567 .create());
568 if (!multiTable) {
569 codeGenOpts.addOption(OptionBuilder.withArgName("name")
570 .hasArg()
571 .withDescription("Sets the generated class name. "
572 + "This overrides --" + PACKAGE_NAME_ARG + ". When combined "
573 + "with --" + JAR_FILE_NAME_ARG + ", sets the input class.")
574 .withLongOpt(CLASS_NAME_ARG)
575 .create());
576 }
577 return codeGenOpts;
578 }
579
580 protected RelatedOptions getHBaseOptions() {
581 RelatedOptions hbaseOpts =
582 new RelatedOptions("HBase arguments");
583 hbaseOpts.addOption(OptionBuilder.withArgName("table")
584 .hasArg()
585 .withDescription("Import to <table> in HBase")
586 .withLongOpt(HBASE_TABLE_ARG)
587 .create());
588 hbaseOpts.addOption(OptionBuilder.withArgName("family")
589 .hasArg()
590 .withDescription("Sets the target column family for the import")
591 .withLongOpt(HBASE_COL_FAM_ARG)
592 .create());
593 hbaseOpts.addOption(OptionBuilder.withArgName("col")
594 .hasArg()
595 .withDescription("Specifies which input column to use as the row key")
596 .withLongOpt(HBASE_ROW_KEY_ARG)
597 .create());
598 hbaseOpts.addOption(OptionBuilder
599 .withDescription("If specified, create missing HBase tables")
600 .withLongOpt(HBASE_CREATE_TABLE_ARG)
601 .create());
602
603 return hbaseOpts;
604 }
605
606
607
608 /**
609 * Apply common command-line to the state.
610 */
611 protected void applyCommonOptions(CommandLine in, SqoopOptions out)
612 throws InvalidOptionsException {
613
614 // common options.
615 if (in.hasOption(VERBOSE_ARG)) {
616 // Immediately switch into DEBUG logging.
617 Category sqoopLogger = Logger.getLogger(
618 Sqoop.class.getName()).getParent();
619 sqoopLogger.setLevel(Level.DEBUG);
620 LOG.debug("Enabled debug logging.");
621 }
622
623 if (in.hasOption(HELP_ARG)) {
624 ToolOptions toolOpts = new ToolOptions();
625 configureOptions(toolOpts);
626 printHelp(toolOpts);
627 throw new InvalidOptionsException("");
628 }
629
630 if (in.hasOption(CONNECT_STRING_ARG)) {
631 out.setConnectString(in.getOptionValue(CONNECT_STRING_ARG));
632 }
633
634 if (in.hasOption(CONN_MANAGER_CLASS_NAME)) {
635 out.setConnManagerClassName(in.getOptionValue(CONN_MANAGER_CLASS_NAME));
636 }
637
638 if (in.hasOption(CONNECT_PARAM_FILE)) {
639 File paramFile = new File(in.getOptionValue(CONNECT_PARAM_FILE));
640 if (!paramFile.exists()) {
641 throw new InvalidOptionsException(
642 "Specified connection parameter file not found: " + paramFile);
643 }
644 InputStream inStream = null;
645 Properties connectionParams = new Properties();
646 try {
647 inStream = new FileInputStream(
648 new File(in.getOptionValue(CONNECT_PARAM_FILE)));
649 connectionParams.load(inStream);
650 } catch (IOException ex) {
651 LOG.warn("Failed to load connection parameter file", ex);
652 throw new InvalidOptionsException(
653 "Error while loading connection parameter file: "
654 + ex.getMessage());
655 } finally {
656 if (inStream != null) {
657 try {
658 inStream.close();
659 } catch (IOException ex) {
660 LOG.warn("Failed to close input stream", ex);
661 }
662 }
663 }
664 LOG.debug("Loaded connection parameters: " + connectionParams);
665 out.setConnectionParams(connectionParams);
666 }
667
668 if (in.hasOption(NULL_STRING)) {
669 out.setNullStringValue(in.getOptionValue(NULL_STRING));
670 }
671
672 if (in.hasOption(INPUT_NULL_STRING)) {
673 out.setInNullStringValue(in.getOptionValue(INPUT_NULL_STRING));
674 }
675
676 if (in.hasOption(NULL_NON_STRING)) {
677 out.setNullNonStringValue(in.getOptionValue(NULL_NON_STRING));
678 }
679
680 if (in.hasOption(INPUT_NULL_NON_STRING)) {
681 out.setInNullNonStringValue(in.getOptionValue(INPUT_NULL_NON_STRING));
682 }
683
684 if (in.hasOption(DRIVER_ARG)) {
685 out.setDriverClassName(in.getOptionValue(DRIVER_ARG));
686 }
687
688 if (in.hasOption(USERNAME_ARG)) {
689 out.setUsername(in.getOptionValue(USERNAME_ARG));
690 if (null == out.getPassword()) {
691 // Set password to empty if the username is set first,
692 // to ensure that they're either both null or neither is.
693 out.setPassword("");
694 }
695 }
696
697 if (in.hasOption(PASSWORD_ARG)) {
698 LOG.warn("Setting your password on the command-line is insecure. "
699 + "Consider using -" + PASSWORD_PROMPT_ARG + " instead.");
700 out.setPassword(in.getOptionValue(PASSWORD_ARG));
701 }
702
703 if (in.hasOption(PASSWORD_PROMPT_ARG)) {
704 out.setPasswordFromConsole();
705 }
706
707 if (in.hasOption(HADOOP_HOME_ARG)) {
708 out.setHadoopHome(in.getOptionValue(HADOOP_HOME_ARG));
709 }
710
711 }
712
713 protected void applyHiveOptions(CommandLine in, SqoopOptions out)
714 throws InvalidOptionsException {
715
716 if (in.hasOption(HIVE_HOME_ARG)) {
717 out.setHiveHome(in.getOptionValue(HIVE_HOME_ARG));
718 }
719
720 if (in.hasOption(HIVE_IMPORT_ARG)) {
721 out.setHiveImport(true);
722 }
723
724 if (in.hasOption(HIVE_OVERWRITE_ARG)) {
725 out.setOverwriteHiveTable(true);
726 }
727
728 if (in.hasOption(CREATE_HIVE_TABLE_ARG)) {
729 out.setFailIfHiveTableExists(true);
730 }
731
732 if (in.hasOption(HIVE_TABLE_ARG)) {
733 out.setHiveTableName(in.getOptionValue(HIVE_TABLE_ARG));
734 }
735
736 if (in.hasOption(HIVE_DROP_DELIMS_ARG)) {
737 out.setHiveDropDelims(true);
738 }
739
740 if (in.hasOption(HIVE_DELIMS_REPLACEMENT_ARG)) {
741 out.setHiveDelimsReplacement(
742 in.getOptionValue(HIVE_DELIMS_REPLACEMENT_ARG));
743 }
744
745 if (in.hasOption(HIVE_PARTITION_KEY_ARG)) {
746 out.setHivePartitionKey(in.getOptionValue(HIVE_PARTITION_KEY_ARG));
747 }
748
749 if (in.hasOption(HIVE_PARTITION_VALUE_ARG)) {
750 out.setHivePartitionValue(in.getOptionValue(HIVE_PARTITION_VALUE_ARG));
751 }
752 }
753
754 protected void applyOutputFormatOptions(CommandLine in, SqoopOptions out)
755 throws InvalidOptionsException {
756 if (in.hasOption(FIELDS_TERMINATED_BY_ARG)) {
757 out.setFieldsTerminatedBy(SqoopOptions.toChar(
758 in.getOptionValue(FIELDS_TERMINATED_BY_ARG)));
759 out.setExplicitDelims(true);
760 }
761
762 if (in.hasOption(LINES_TERMINATED_BY_ARG)) {
763 out.setLinesTerminatedBy(SqoopOptions.toChar(
764 in.getOptionValue(LINES_TERMINATED_BY_ARG)));
765 out.setExplicitDelims(true);
766 }
767
768 if (in.hasOption(OPTIONALLY_ENCLOSED_BY_ARG)) {
769 out.setEnclosedBy(SqoopOptions.toChar(
770 in.getOptionValue(OPTIONALLY_ENCLOSED_BY_ARG)));
771 out.setOutputEncloseRequired(false);
772 out.setExplicitDelims(true);
773 }
774
775 if (in.hasOption(ENCLOSED_BY_ARG)) {
776 out.setEnclosedBy(SqoopOptions.toChar(
777 in.getOptionValue(ENCLOSED_BY_ARG)));
778 out.setOutputEncloseRequired(true);
779 out.setExplicitDelims(true);
780 }
781
782 if (in.hasOption(ESCAPED_BY_ARG)) {
783 out.setEscapedBy(SqoopOptions.toChar(
784 in.getOptionValue(ESCAPED_BY_ARG)));
785 out.setExplicitDelims(true);
786 }
787
788 if (in.hasOption(MYSQL_DELIMITERS_ARG)) {
789 out.setOutputEncloseRequired(false);
790 out.setFieldsTerminatedBy(',');
791 out.setLinesTerminatedBy('\n');
792 out.setEscapedBy('\\');
793 out.setEnclosedBy('\'');
794 out.setExplicitDelims(true);
795 }
796 }
797
798 protected void applyInputFormatOptions(CommandLine in, SqoopOptions out)
799 throws InvalidOptionsException {
800 if (in.hasOption(INPUT_FIELDS_TERMINATED_BY_ARG)) {
801 out.setInputFieldsTerminatedBy(SqoopOptions.toChar(
802 in.getOptionValue(INPUT_FIELDS_TERMINATED_BY_ARG)));
803 }
804
805 if (in.hasOption(INPUT_LINES_TERMINATED_BY_ARG)) {
806 out.setInputLinesTerminatedBy(SqoopOptions.toChar(
807 in.getOptionValue(INPUT_LINES_TERMINATED_BY_ARG)));
808 }
809
810 if (in.hasOption(INPUT_OPTIONALLY_ENCLOSED_BY_ARG)) {
811 out.setInputEnclosedBy(SqoopOptions.toChar(
812 in.getOptionValue(INPUT_OPTIONALLY_ENCLOSED_BY_ARG)));
813 out.setInputEncloseRequired(false);
814 }
815
816 if (in.hasOption(INPUT_ENCLOSED_BY_ARG)) {
817 out.setInputEnclosedBy(SqoopOptions.toChar(
818 in.getOptionValue(INPUT_ENCLOSED_BY_ARG)));
819 out.setInputEncloseRequired(true);
820 }
821
822 if (in.hasOption(INPUT_ESCAPED_BY_ARG)) {
823 out.setInputEscapedBy(SqoopOptions.toChar(
824 in.getOptionValue(INPUT_ESCAPED_BY_ARG)));
825 }
826 }
827
828 protected void applyCodeGenOptions(CommandLine in, SqoopOptions out,
829 boolean multiTable) throws InvalidOptionsException {
830 if (in.hasOption(CODE_OUT_DIR_ARG)) {
831 out.setCodeOutputDir(in.getOptionValue(CODE_OUT_DIR_ARG));
832 }
833
834 if (in.hasOption(BIN_OUT_DIR_ARG)) {
835 out.setJarOutputDir(in.getOptionValue(BIN_OUT_DIR_ARG));
836 }
837
838 if (in.hasOption(PACKAGE_NAME_ARG)) {
839 out.setPackageName(in.getOptionValue(PACKAGE_NAME_ARG));
840 }
841
842 if (!multiTable && in.hasOption(CLASS_NAME_ARG)) {
843 out.setClassName(in.getOptionValue(CLASS_NAME_ARG));
844 }
845 }
846
847 protected void applyHBaseOptions(CommandLine in, SqoopOptions out) {
848 if (in.hasOption(HBASE_TABLE_ARG)) {
849 out.setHBaseTable(in.getOptionValue(HBASE_TABLE_ARG));
850 }
851
852 if (in.hasOption(HBASE_COL_FAM_ARG)) {
853 out.setHBaseColFamily(in.getOptionValue(HBASE_COL_FAM_ARG));
854 }
855
856 if (in.hasOption(HBASE_ROW_KEY_ARG)) {
857 out.setHBaseRowKeyColumn(in.getOptionValue(HBASE_ROW_KEY_ARG));
858 }
859
860 if (in.hasOption(HBASE_CREATE_TABLE_ARG)) {
861 out.setCreateHBaseTable(true);
862 }
863 }
864
865 protected void validateCommonOptions(SqoopOptions options)
866 throws InvalidOptionsException {
867 if (options.getConnectString() == null) {
868 throw new InvalidOptionsException(
869 "Error: Required argument --connect is missing."
870 + HELP_STR);
871 }
872 }
873
874 protected void validateCodeGenOptions(SqoopOptions options)
875 throws InvalidOptionsException {
876 if (options.getClassName() != null && options.getPackageName() != null) {
877 throw new InvalidOptionsException(
878 "--class-name overrides --package-name. You cannot use both."
879 + HELP_STR);
880 }
881 }
882
883 protected void validateOutputFormatOptions(SqoopOptions options)
884 throws InvalidOptionsException {
885 if (options.doHiveImport()) {
886 if (!options.explicitDelims()) {
887 // user hasn't manually specified delimiters, and wants to import
888 // straight to Hive. Use Hive-style delimiters.
889 LOG.info("Using Hive-specific delimiters for output. You can override");
890 LOG.info("delimiters with --fields-terminated-by, etc.");
891 options.setOutputDelimiters(DelimiterSet.HIVE_DELIMITERS);
892 }
893
894 if (options.getOutputEscapedBy() != DelimiterSet.NULL_CHAR) {
895 LOG.warn("Hive does not support escape characters in fields;");
896 LOG.warn("parse errors in Hive may result from using --escaped-by.");
897 }
898
899 if (options.getOutputEnclosedBy() != DelimiterSet.NULL_CHAR) {
900 LOG.warn("Hive does not support quoted strings; parse errors");
901 LOG.warn("in Hive may result from using --enclosed-by.");
902 }
903 }
904 }
905
906 protected void validateHiveOptions(SqoopOptions options)
907 throws InvalidOptionsException {
908 // Empty; this method is present to maintain API consistency, and
909 // is reserved for future constraints on Hive options.
910 if (options.getHiveDelimsReplacement() != null
911 && options.doHiveDropDelims()) {
912 throw new InvalidOptionsException("The " + HIVE_DROP_DELIMS_ARG
913 + " option conflicts with the " + HIVE_DELIMS_REPLACEMENT_ARG
914 + " option." + HELP_STR);
915 }
916 }
917
918 protected void validateHBaseOptions(SqoopOptions options)
919 throws InvalidOptionsException {
920 if ((options.getHBaseColFamily() != null && options.getHBaseTable() == null)
921 || (options.getHBaseColFamily() == null
922 && options.getHBaseTable() != null)) {
923 throw new InvalidOptionsException(
924 "Both --hbase-table and --column-family must be set together."
925 + HELP_STR);
926 }
927 }
928
929 /**
930 * Given an array of extra arguments (usually populated via
931 * this.extraArguments), determine the offset of the first '--'
932 * argument in the list. Return 'extra.length' if there is none.
933 */
934 protected int getDashPosition(String [] extra) {
935 int dashPos = extra.length;
936 for (int i = 0; i < extra.length; i++) {
937 if (extra[i].equals("--")) {
938 dashPos = i;
939 break;
940 }
941 }
942
943 return dashPos;
944 }
945 }
946