c307907f3ddc1753153880cd1783de9be8b1f442
[sqoop.git] / src / java / com / cloudera / sqoop / tool / BaseSqoopTool.java
1 /**
2 * Licensed to Cloudera, Inc. under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. Cloudera, Inc. licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 package com.cloudera.sqoop.tool;
20
21 import java.io.File;
22 import java.io.FileInputStream;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.sql.SQLException;
26 import java.util.Arrays;
27 import java.util.Properties;
28
29 import org.apache.commons.cli.CommandLine;
30 import org.apache.commons.cli.Option;
31 import org.apache.commons.cli.OptionBuilder;
32 import org.apache.commons.cli.OptionGroup;
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.util.StringUtils;
36 import org.apache.log4j.Category;
37 import org.apache.log4j.Level;
38 import org.apache.log4j.Logger;
39
40 import com.cloudera.sqoop.ConnFactory;
41 import com.cloudera.sqoop.Sqoop;
42 import com.cloudera.sqoop.SqoopOptions;
43 import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException;
44 import com.cloudera.sqoop.cli.RelatedOptions;
45 import com.cloudera.sqoop.cli.ToolOptions;
46 import com.cloudera.sqoop.lib.DelimiterSet;
47 import com.cloudera.sqoop.manager.ConnManager;
48 import com.cloudera.sqoop.metastore.JobData;
49
50 /**
51 * Layer on top of SqoopTool that provides some basic common code
52 * that most SqoopTool implementations will use.
53 *
54 * Subclasses should call init() at the top of their run() method,
55 * and call destroy() at the end in a finally block.
56 */
57 public abstract class BaseSqoopTool extends SqoopTool {
58
59 public static final Log LOG = LogFactory.getLog(
60 BaseSqoopTool.class.getName());
61
62 public static final String HELP_STR = "\nTry --help for usage instructions.";
63
64 // Here are all the arguments that are used by the standard sqoop tools.
65 // Their names are recorded here so that tools can share them and their
66 // use consistently. The argument parser applies the leading '--' to each
67 // string.
68 public static final String CONNECT_STRING_ARG = "connect";
69 public static final String CONN_MANAGER_CLASS_NAME =
70 "connection-manager";
71 public static final String CONNECT_PARAM_FILE = "connection-param-file";
72 public static final String DRIVER_ARG = "driver";
73 public static final String USERNAME_ARG = "username";
74 public static final String PASSWORD_ARG = "password";
75 public static final String PASSWORD_PROMPT_ARG = "P";
76 public static final String DIRECT_ARG = "direct";
77 public static final String BATCH_ARG = "batch";
78 public static final String TABLE_ARG = "table";
79 public static final String STAGING_TABLE_ARG = "staging-table";
80 public static final String CLEAR_STAGING_TABLE_ARG = "clear-staging-table";
81 public static final String COLUMNS_ARG = "columns";
82 public static final String SPLIT_BY_ARG = "split-by";
83 public static final String WHERE_ARG = "where";
84 public static final String HADOOP_HOME_ARG = "hadoop-home";
85 public static final String HIVE_HOME_ARG = "hive-home";
86 public static final String WAREHOUSE_DIR_ARG = "warehouse-dir";
87 public static final String TARGET_DIR_ARG = "target-dir";
88 public static final String APPEND_ARG = "append";
89 public static final String NULL_STRING = "null-string";
90 public static final String INPUT_NULL_STRING = "input-null-string";
91 public static final String NULL_NON_STRING = "null-non-string";
92 public static final String INPUT_NULL_NON_STRING = "input-null-non-string";
93
94 public static final String FMT_SEQUENCEFILE_ARG = "as-sequencefile";
95 public static final String FMT_TEXTFILE_ARG = "as-textfile";
96 public static final String FMT_AVRODATAFILE_ARG = "as-avrodatafile";
97 public static final String HIVE_IMPORT_ARG = "hive-import";
98 public static final String HIVE_TABLE_ARG = "hive-table";
99 public static final String HIVE_OVERWRITE_ARG = "hive-overwrite";
100 public static final String HIVE_DROP_DELIMS_ARG = "hive-drop-import-delims";
101 public static final String HIVE_PARTITION_KEY_ARG = "hive-partition-key";
102 public static final String HIVE_PARTITION_VALUE_ARG = "hive-partition-value";
103 public static final String CREATE_HIVE_TABLE_ARG =
104 "create-hive-table";
105 public static final String NUM_MAPPERS_ARG = "num-mappers";
106 public static final String NUM_MAPPERS_SHORT_ARG = "m";
107 public static final String COMPRESS_ARG = "compress";
108 public static final String COMPRESSION_CODEC_ARG = "compression-codec";
109 public static final String COMPRESS_SHORT_ARG = "z";
110 public static final String DIRECT_SPLIT_SIZE_ARG = "direct-split-size";
111 public static final String INLINE_LOB_LIMIT_ARG = "inline-lob-limit";
112 public static final String FETCH_SIZE_ARG = "fetch-size";
113 public static final String EXPORT_PATH_ARG = "export-dir";
114 public static final String FIELDS_TERMINATED_BY_ARG = "fields-terminated-by";
115 public static final String LINES_TERMINATED_BY_ARG = "lines-terminated-by";
116 public static final String OPTIONALLY_ENCLOSED_BY_ARG =
117 "optionally-enclosed-by";
118 public static final String ENCLOSED_BY_ARG = "enclosed-by";
119 public static final String ESCAPED_BY_ARG = "escaped-by";
120 public static final String MYSQL_DELIMITERS_ARG = "mysql-delimiters";
121 public static final String INPUT_FIELDS_TERMINATED_BY_ARG =
122 "input-fields-terminated-by";
123 public static final String INPUT_LINES_TERMINATED_BY_ARG =
124 "input-lines-terminated-by";
125 public static final String INPUT_OPTIONALLY_ENCLOSED_BY_ARG =
126 "input-optionally-enclosed-by";
127 public static final String INPUT_ENCLOSED_BY_ARG = "input-enclosed-by";
128 public static final String INPUT_ESCAPED_BY_ARG = "input-escaped-by";
129 public static final String CODE_OUT_DIR_ARG = "outdir";
130 public static final String BIN_OUT_DIR_ARG = "bindir";
131 public static final String PACKAGE_NAME_ARG = "package-name";
132 public static final String CLASS_NAME_ARG = "class-name";
133 public static final String JAR_FILE_NAME_ARG = "jar-file";
134 public static final String SQL_QUERY_ARG = "query";
135 public static final String SQL_QUERY_SHORT_ARG = "e";
136 public static final String VERBOSE_ARG = "verbose";
137 public static final String HELP_ARG = "help";
138 public static final String UPDATE_KEY_ARG = "update-key";
139
140 // Arguments for incremental imports.
141 public static final String INCREMENT_TYPE_ARG = "incremental";
142 public static final String INCREMENT_COL_ARG = "check-column";
143 public static final String INCREMENT_LAST_VAL_ARG = "last-value";
144
145 // HBase arguments.
146 public static final String HBASE_TABLE_ARG = "hbase-table";
147 public static final String HBASE_COL_FAM_ARG = "column-family";
148 public static final String HBASE_ROW_KEY_ARG = "hbase-row-key";
149 public static final String HBASE_CREATE_TABLE_ARG = "hbase-create-table";
150
151
152 // Arguments for the saved job management system.
153 public static final String STORAGE_METASTORE_ARG = "meta-connect";
154 public static final String JOB_CMD_CREATE_ARG = "create";
155 public static final String JOB_CMD_DELETE_ARG = "delete";
156 public static final String JOB_CMD_EXEC_ARG = "exec";
157 public static final String JOB_CMD_LIST_ARG = "list";
158 public static final String JOB_CMD_SHOW_ARG = "show";
159
160 // Arguments for the metastore.
161 public static final String METASTORE_SHUTDOWN_ARG = "shutdown";
162
163
164 // Arguments for merging datasets.
165 public static final String NEW_DATASET_ARG = "new-data";
166 public static final String OLD_DATASET_ARG = "onto";
167 public static final String MERGE_KEY_ARG = "merge-key";
168
169 public BaseSqoopTool() {
170 }
171
172 public BaseSqoopTool(String toolName) {
173 super(toolName);
174 }
175
176 protected ConnManager manager;
177
178 public ConnManager getManager() {
179 return manager;
180 }
181
182 public void setManager(ConnManager mgr) {
183 this.manager = mgr;
184 }
185
186 /**
187 * Should be called at the beginning of the run() method to initialize
188 * the connection manager, etc. If this succeeds (returns true), it should
189 * be paired with a call to destroy().
190 * @return true on success, false on failure.
191 */
192 protected boolean init(SqoopOptions sqoopOpts) {
193 // Get the connection to the database.
194 try {
195 JobData data = new JobData(sqoopOpts, this);
196 this.manager = new ConnFactory(sqoopOpts.getConf()).getManager(data);
197 return true;
198 } catch (Exception e) {
199 LOG.error("Got error creating database manager: "
200 + StringUtils.stringifyException(e));
201 if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) {
202 throw new RuntimeException(e);
203 }
204 }
205
206 return false;
207 }
208
209 /**
210 * Should be called in a 'finally' block at the end of the run() method.
211 */
212 protected void destroy(SqoopOptions sqoopOpts) {
213 if (null != manager) {
214 try {
215 manager.close();
216 } catch (SQLException sqlE) {
217 LOG.warn("Error while closing connection: " + sqlE);
218 }
219 }
220 }
221
222 /**
223 * Examines a subset of the arrray presented, and determines if it
224 * contains any non-empty arguments. If so, logs the arguments
225 * and returns true.
226 *
227 * @param argv an array of strings to check.
228 * @param offset the first element of the array to check
229 * @param len the number of elements to check
230 * @return true if there are any non-null, non-empty argument strings
231 * present.
232 */
233 protected boolean hasUnrecognizedArgs(String [] argv, int offset, int len) {
234 if (argv == null) {
235 return false;
236 }
237
238 boolean unrecognized = false;
239 boolean printedBanner = false;
240 for (int i = offset; i < Math.min(argv.length, offset + len); i++) {
241 if (argv[i] != null && argv[i].length() > 0) {
242 if (!printedBanner) {
243 LOG.error("Error parsing arguments for " + getToolName() + ":");
244 printedBanner = true;
245 }
246 LOG.error("Unrecognized argument: " + argv[i]);
247 unrecognized = true;
248 }
249 }
250
251 return unrecognized;
252 }
253
254 protected boolean hasUnrecognizedArgs(String [] argv) {
255 if (null == argv) {
256 return false;
257 }
258 return hasUnrecognizedArgs(argv, 0, argv.length);
259 }
260
261
262 /**
263 * If argv contains an entry "--", return an array containing all elements
264 * after the "--" separator. Otherwise, return null.
265 * @param argv a set of arguments to scan for the subcommand arguments.
266 */
267 protected String [] getSubcommandArgs(String [] argv) {
268 if (null == argv) {
269 return null;
270 }
271
272 for (int i = 0; i < argv.length; i++) {
273 if (argv[i].equals("--")) {
274 return Arrays.copyOfRange(argv, i + 1, argv.length);
275 }
276 }
277
278 return null;
279 }
280
281 /**
282 * @return RelatedOptions used by job management tools.
283 */
284 protected RelatedOptions getJobOptions() {
285 RelatedOptions relatedOpts = new RelatedOptions(
286 "Job management arguments");
287 relatedOpts.addOption(OptionBuilder.withArgName("jdbc-uri")
288 .hasArg()
289 .withDescription("Specify JDBC connect string for the metastore")
290 .withLongOpt(STORAGE_METASTORE_ARG)
291 .create());
292
293 // Create an option-group surrounding the operations a user
294 // can perform on jobs.
295 OptionGroup group = new OptionGroup();
296 group.addOption(OptionBuilder.withArgName("job-id")
297 .hasArg()
298 .withDescription("Create a new saved job")
299 .withLongOpt(JOB_CMD_CREATE_ARG)
300 .create());
301 group.addOption(OptionBuilder.withArgName("job-id")
302 .hasArg()
303 .withDescription("Delete a saved job")
304 .withLongOpt(JOB_CMD_DELETE_ARG)
305 .create());
306 group.addOption(OptionBuilder.withArgName("job-id")
307 .hasArg()
308 .withDescription("Show the parameters for a saved job")
309 .withLongOpt(JOB_CMD_SHOW_ARG)
310 .create());
311
312 Option execOption = OptionBuilder.withArgName("job-id")
313 .hasArg()
314 .withDescription("Run a saved job")
315 .withLongOpt(JOB_CMD_EXEC_ARG)
316 .create();
317 group.addOption(execOption);
318
319 group.addOption(OptionBuilder
320 .withDescription("List saved jobs")
321 .withLongOpt(JOB_CMD_LIST_ARG)
322 .create());
323
324 relatedOpts.addOptionGroup(group);
325
326 // Since the "common" options aren't used in the job tool,
327 // add these settings here.
328 relatedOpts.addOption(OptionBuilder
329 .withDescription("Print more information while working")
330 .withLongOpt(VERBOSE_ARG)
331 .create());
332 relatedOpts.addOption(OptionBuilder
333 .withDescription("Print usage instructions")
334 .withLongOpt(HELP_ARG)
335 .create());
336
337 return relatedOpts;
338 }
339
340 /**
341 * @return RelatedOptions used by most/all Sqoop tools.
342 */
343 protected RelatedOptions getCommonOptions() {
344 // Connection args (common)
345 RelatedOptions commonOpts = new RelatedOptions("Common arguments");
346 commonOpts.addOption(OptionBuilder.withArgName("jdbc-uri")
347 .hasArg().withDescription("Specify JDBC connect string")
348 .withLongOpt(CONNECT_STRING_ARG)
349 .create());
350 commonOpts.addOption(OptionBuilder.withArgName("class-name")
351 .hasArg().withDescription("Specify connection manager class name")
352 .withLongOpt(CONN_MANAGER_CLASS_NAME)
353 .create());
354 commonOpts.addOption(OptionBuilder.withArgName("properties-file")
355 .hasArg().withDescription("Specify connection parameters file")
356 .withLongOpt(CONNECT_PARAM_FILE)
357 .create());
358 commonOpts.addOption(OptionBuilder.withArgName("class-name")
359 .hasArg().withDescription("Manually specify JDBC driver class to use")
360 .withLongOpt(DRIVER_ARG)
361 .create());
362 commonOpts.addOption(OptionBuilder.withArgName("username")
363 .hasArg().withDescription("Set authentication username")
364 .withLongOpt(USERNAME_ARG)
365 .create());
366 commonOpts.addOption(OptionBuilder.withArgName("password")
367 .hasArg().withDescription("Set authentication password")
368 .withLongOpt(PASSWORD_ARG)
369 .create());
370 commonOpts.addOption(OptionBuilder
371 .withDescription("Read password from console")
372 .create(PASSWORD_PROMPT_ARG));
373
374 commonOpts.addOption(OptionBuilder.withArgName("dir")
375 .hasArg().withDescription("Override $HADOOP_HOME")
376 .withLongOpt(HADOOP_HOME_ARG)
377 .create());
378
379 // misc (common)
380 commonOpts.addOption(OptionBuilder
381 .withDescription("Print more information while working")
382 .withLongOpt(VERBOSE_ARG)
383 .create());
384 commonOpts.addOption(OptionBuilder
385 .withDescription("Print usage instructions")
386 .withLongOpt(HELP_ARG)
387 .create());
388
389 return commonOpts;
390 }
391
392 /**
393 * @param explicitHiveImport true if the user has an explicit --hive-import
394 * available, or false if this is implied by the tool.
395 * @return options governing interaction with Hive
396 */
397 protected RelatedOptions getHiveOptions(boolean explicitHiveImport) {
398 RelatedOptions hiveOpts = new RelatedOptions("Hive arguments");
399 if (explicitHiveImport) {
400 hiveOpts.addOption(OptionBuilder
401 .withDescription("Import tables into Hive "
402 + "(Uses Hive's default delimiters if none are set.)")
403 .withLongOpt(HIVE_IMPORT_ARG)
404 .create());
405 }
406
407 hiveOpts.addOption(OptionBuilder.withArgName("dir")
408 .hasArg().withDescription("Override $HIVE_HOME")
409 .withLongOpt(HIVE_HOME_ARG)
410 .create());
411 hiveOpts.addOption(OptionBuilder
412 .withDescription("Overwrite existing data in the Hive table")
413 .withLongOpt(HIVE_OVERWRITE_ARG)
414 .create());
415 hiveOpts.addOption(OptionBuilder
416 .withDescription("Fail if the target hive table exists")
417 .withLongOpt(CREATE_HIVE_TABLE_ARG)
418 .create());
419 hiveOpts.addOption(OptionBuilder.withArgName("table-name")
420 .hasArg()
421 .withDescription("Sets the table name to use when importing to hive")
422 .withLongOpt(HIVE_TABLE_ARG)
423 .create());
424 hiveOpts.addOption(OptionBuilder
425 .withDescription("Drop Hive record \\0x01 and row delimiters "
426 + "(\\n\\r) from imported string fields")
427 .withLongOpt(HIVE_DROP_DELIMS_ARG)
428 .create());
429 hiveOpts.addOption(OptionBuilder.withArgName("partition-key")
430 .hasArg()
431 .withDescription("Sets the partition key to use when importing to hive")
432 .withLongOpt(HIVE_PARTITION_KEY_ARG)
433 .create());
434 hiveOpts.addOption(OptionBuilder.withArgName("partition-value")
435 .hasArg()
436 .withDescription("Sets the partition value to use when importing "
437 + "to hive")
438 .withLongOpt(HIVE_PARTITION_VALUE_ARG)
439 .create());
440 return hiveOpts;
441 }
442
443 /**
444 * @return options governing output format delimiters
445 */
446 protected RelatedOptions getOutputFormatOptions() {
447 RelatedOptions formatOpts = new RelatedOptions(
448 "Output line formatting arguments");
449 formatOpts.addOption(OptionBuilder.withArgName("char")
450 .hasArg()
451 .withDescription("Sets the field separator character")
452 .withLongOpt(FIELDS_TERMINATED_BY_ARG)
453 .create());
454 formatOpts.addOption(OptionBuilder.withArgName("char")
455 .hasArg()
456 .withDescription("Sets the end-of-line character")
457 .withLongOpt(LINES_TERMINATED_BY_ARG)
458 .create());
459 formatOpts.addOption(OptionBuilder.withArgName("char")
460 .hasArg()
461 .withDescription("Sets a field enclosing character")
462 .withLongOpt(OPTIONALLY_ENCLOSED_BY_ARG)
463 .create());
464 formatOpts.addOption(OptionBuilder.withArgName("char")
465 .hasArg()
466 .withDescription("Sets a required field enclosing character")
467 .withLongOpt(ENCLOSED_BY_ARG)
468 .create());
469 formatOpts.addOption(OptionBuilder.withArgName("char")
470 .hasArg()
471 .withDescription("Sets the escape character")
472 .withLongOpt(ESCAPED_BY_ARG)
473 .create());
474 formatOpts.addOption(OptionBuilder
475 .withDescription("Uses MySQL's default delimiter set: "
476 + "fields: , lines: \\n escaped-by: \\ optionally-enclosed-by: '")
477 .withLongOpt(MYSQL_DELIMITERS_ARG)
478 .create());
479
480 return formatOpts;
481 }
482
483 /**
484 * @return options governing input format delimiters.
485 */
486 protected RelatedOptions getInputFormatOptions() {
487 RelatedOptions inputFormatOpts =
488 new RelatedOptions("Input parsing arguments");
489 inputFormatOpts.addOption(OptionBuilder.withArgName("char")
490 .hasArg()
491 .withDescription("Sets the input field separator")
492 .withLongOpt(INPUT_FIELDS_TERMINATED_BY_ARG)
493 .create());
494 inputFormatOpts.addOption(OptionBuilder.withArgName("char")
495 .hasArg()
496 .withDescription("Sets the input end-of-line char")
497 .withLongOpt(INPUT_LINES_TERMINATED_BY_ARG)
498 .create());
499 inputFormatOpts.addOption(OptionBuilder.withArgName("char")
500 .hasArg()
501 .withDescription("Sets a field enclosing character")
502 .withLongOpt(INPUT_OPTIONALLY_ENCLOSED_BY_ARG)
503 .create());
504 inputFormatOpts.addOption(OptionBuilder.withArgName("char")
505 .hasArg()
506 .withDescription("Sets a required field encloser")
507 .withLongOpt(INPUT_ENCLOSED_BY_ARG)
508 .create());
509 inputFormatOpts.addOption(OptionBuilder.withArgName("char")
510 .hasArg()
511 .withDescription("Sets the input escape character")
512 .withLongOpt(INPUT_ESCAPED_BY_ARG)
513 .create());
514
515 return inputFormatOpts;
516 }
517
518 /**
519 * @param multiTable true if these options will be used for bulk code-gen.
520 * @return options related to code generation.
521 */
522 protected RelatedOptions getCodeGenOpts(boolean multiTable) {
523 RelatedOptions codeGenOpts =
524 new RelatedOptions("Code generation arguments");
525 codeGenOpts.addOption(OptionBuilder.withArgName("dir")
526 .hasArg()
527 .withDescription("Output directory for generated code")
528 .withLongOpt(CODE_OUT_DIR_ARG)
529 .create());
530 codeGenOpts.addOption(OptionBuilder.withArgName("dir")
531 .hasArg()
532 .withDescription("Output directory for compiled objects")
533 .withLongOpt(BIN_OUT_DIR_ARG)
534 .create());
535 codeGenOpts.addOption(OptionBuilder.withArgName("name")
536 .hasArg()
537 .withDescription("Put auto-generated classes in this package")
538 .withLongOpt(PACKAGE_NAME_ARG)
539 .create());
540 codeGenOpts.addOption(OptionBuilder.withArgName("null-str")
541 .hasArg()
542 .withDescription("Null string representation")
543 .withLongOpt(NULL_STRING)
544 .create());
545 codeGenOpts.addOption(OptionBuilder.withArgName("null-str")
546 .hasArg()
547 .withDescription("Input null string representation")
548 .withLongOpt(INPUT_NULL_STRING)
549 .create());
550 codeGenOpts.addOption(OptionBuilder.withArgName("null-str")
551 .hasArg()
552 .withDescription("Null non-string representation")
553 .withLongOpt(NULL_NON_STRING)
554 .create());
555 codeGenOpts.addOption(OptionBuilder.withArgName("null-str")
556 .hasArg()
557 .withDescription("Input null non-string representation")
558 .withLongOpt(INPUT_NULL_NON_STRING)
559 .create());
560 if (!multiTable) {
561 codeGenOpts.addOption(OptionBuilder.withArgName("name")
562 .hasArg()
563 .withDescription("Sets the generated class name. "
564 + "This overrides --" + PACKAGE_NAME_ARG + ". When combined "
565 + "with --" + JAR_FILE_NAME_ARG + ", sets the input class.")
566 .withLongOpt(CLASS_NAME_ARG)
567 .create());
568 }
569 return codeGenOpts;
570 }
571
572 protected RelatedOptions getHBaseOptions() {
573 RelatedOptions hbaseOpts =
574 new RelatedOptions("HBase arguments");
575 hbaseOpts.addOption(OptionBuilder.withArgName("table")
576 .hasArg()
577 .withDescription("Import to <table> in HBase")
578 .withLongOpt(HBASE_TABLE_ARG)
579 .create());
580 hbaseOpts.addOption(OptionBuilder.withArgName("family")
581 .hasArg()
582 .withDescription("Sets the target column family for the import")
583 .withLongOpt(HBASE_COL_FAM_ARG)
584 .create());
585 hbaseOpts.addOption(OptionBuilder.withArgName("col")
586 .hasArg()
587 .withDescription("Specifies which input column to use as the row key")
588 .withLongOpt(HBASE_ROW_KEY_ARG)
589 .create());
590 hbaseOpts.addOption(OptionBuilder
591 .withDescription("If specified, create missing HBase tables")
592 .withLongOpt(HBASE_CREATE_TABLE_ARG)
593 .create());
594
595 return hbaseOpts;
596 }
597
598
599
600 /**
601 * Apply common command-line to the state.
602 */
603 protected void applyCommonOptions(CommandLine in, SqoopOptions out)
604 throws InvalidOptionsException {
605
606 // common options.
607 if (in.hasOption(VERBOSE_ARG)) {
608 // Immediately switch into DEBUG logging.
609 Category sqoopLogger = Logger.getLogger(
610 Sqoop.class.getName()).getParent();
611 sqoopLogger.setLevel(Level.DEBUG);
612 LOG.debug("Enabled debug logging.");
613 }
614
615 if (in.hasOption(HELP_ARG)) {
616 ToolOptions toolOpts = new ToolOptions();
617 configureOptions(toolOpts);
618 printHelp(toolOpts);
619 throw new InvalidOptionsException("");
620 }
621
622 if (in.hasOption(CONNECT_STRING_ARG)) {
623 out.setConnectString(in.getOptionValue(CONNECT_STRING_ARG));
624 }
625
626 if (in.hasOption(CONN_MANAGER_CLASS_NAME)) {
627 out.setConnManagerClassName(in.getOptionValue(CONN_MANAGER_CLASS_NAME));
628 }
629
630 if (in.hasOption(CONNECT_PARAM_FILE)) {
631 File paramFile = new File(in.getOptionValue(CONNECT_PARAM_FILE));
632 if (!paramFile.exists()) {
633 throw new InvalidOptionsException(
634 "Specified connection parameter file not found: " + paramFile);
635 }
636 InputStream inStream = null;
637 Properties connectionParams = new Properties();
638 try {
639 inStream = new FileInputStream(
640 new File(in.getOptionValue(CONNECT_PARAM_FILE)));
641 connectionParams.load(inStream);
642 } catch (IOException ex) {
643 LOG.warn("Failed to load connection parameter file", ex);
644 throw new InvalidOptionsException(
645 "Error while loading connection parameter file: "
646 + ex.getMessage());
647 } finally {
648 if (inStream != null) {
649 try {
650 inStream.close();
651 } catch (IOException ex) {
652 LOG.warn("Failed to close input stream", ex);
653 }
654 }
655 }
656 LOG.debug("Loaded connection parameters: " + connectionParams);
657 out.setConnectionParams(connectionParams);
658 }
659
660 if (in.hasOption(NULL_STRING)) {
661 out.setNullStringValue(in.getOptionValue(NULL_STRING));
662 }
663
664 if (in.hasOption(INPUT_NULL_STRING)) {
665 out.setInNullStringValue(in.getOptionValue(INPUT_NULL_STRING));
666 }
667
668 if (in.hasOption(NULL_NON_STRING)) {
669 out.setNullNonStringValue(in.getOptionValue(NULL_NON_STRING));
670 }
671
672 if (in.hasOption(INPUT_NULL_NON_STRING)) {
673 out.setInNullNonStringValue(in.getOptionValue(INPUT_NULL_NON_STRING));
674 }
675
676 if (in.hasOption(DRIVER_ARG)) {
677 out.setDriverClassName(in.getOptionValue(DRIVER_ARG));
678 }
679
680 if (in.hasOption(USERNAME_ARG)) {
681 out.setUsername(in.getOptionValue(USERNAME_ARG));
682 if (null == out.getPassword()) {
683 // Set password to empty if the username is set first,
684 // to ensure that they're either both null or neither is.
685 out.setPassword("");
686 }
687 }
688
689 if (in.hasOption(PASSWORD_ARG)) {
690 LOG.warn("Setting your password on the command-line is insecure. "
691 + "Consider using -" + PASSWORD_PROMPT_ARG + " instead.");
692 out.setPassword(in.getOptionValue(PASSWORD_ARG));
693 }
694
695 if (in.hasOption(PASSWORD_PROMPT_ARG)) {
696 out.setPasswordFromConsole();
697 }
698
699 if (in.hasOption(HADOOP_HOME_ARG)) {
700 out.setHadoopHome(in.getOptionValue(HADOOP_HOME_ARG));
701 }
702
703 }
704
705 protected void applyHiveOptions(CommandLine in, SqoopOptions out)
706 throws InvalidOptionsException {
707
708 if (in.hasOption(HIVE_HOME_ARG)) {
709 out.setHiveHome(in.getOptionValue(HIVE_HOME_ARG));
710 }
711
712 if (in.hasOption(HIVE_IMPORT_ARG)) {
713 out.setHiveImport(true);
714 }
715
716 if (in.hasOption(HIVE_OVERWRITE_ARG)) {
717 out.setOverwriteHiveTable(true);
718 }
719
720 if (in.hasOption(CREATE_HIVE_TABLE_ARG)) {
721 out.setFailIfHiveTableExists(true);
722 }
723
724 if (in.hasOption(HIVE_TABLE_ARG)) {
725 out.setHiveTableName(in.getOptionValue(HIVE_TABLE_ARG));
726 }
727
728 if (in.hasOption(HIVE_DROP_DELIMS_ARG)) {
729 out.setHiveDropDelims(true);
730 }
731
732 if (in.hasOption(HIVE_PARTITION_KEY_ARG)) {
733 out.setHivePartitionKey(in.getOptionValue(HIVE_PARTITION_KEY_ARG));
734 }
735
736 if (in.hasOption(HIVE_PARTITION_VALUE_ARG)) {
737 out.setHivePartitionValue(in.getOptionValue(HIVE_PARTITION_VALUE_ARG));
738 }
739 }
740
741 protected void applyOutputFormatOptions(CommandLine in, SqoopOptions out)
742 throws InvalidOptionsException {
743 if (in.hasOption(FIELDS_TERMINATED_BY_ARG)) {
744 out.setFieldsTerminatedBy(SqoopOptions.toChar(
745 in.getOptionValue(FIELDS_TERMINATED_BY_ARG)));
746 out.setExplicitDelims(true);
747 }
748
749 if (in.hasOption(LINES_TERMINATED_BY_ARG)) {
750 out.setLinesTerminatedBy(SqoopOptions.toChar(
751 in.getOptionValue(LINES_TERMINATED_BY_ARG)));
752 out.setExplicitDelims(true);
753 }
754
755 if (in.hasOption(OPTIONALLY_ENCLOSED_BY_ARG)) {
756 out.setEnclosedBy(SqoopOptions.toChar(
757 in.getOptionValue(OPTIONALLY_ENCLOSED_BY_ARG)));
758 out.setOutputEncloseRequired(false);
759 out.setExplicitDelims(true);
760 }
761
762 if (in.hasOption(ENCLOSED_BY_ARG)) {
763 out.setEnclosedBy(SqoopOptions.toChar(
764 in.getOptionValue(ENCLOSED_BY_ARG)));
765 out.setOutputEncloseRequired(true);
766 out.setExplicitDelims(true);
767 }
768
769 if (in.hasOption(ESCAPED_BY_ARG)) {
770 out.setEscapedBy(SqoopOptions.toChar(
771 in.getOptionValue(ESCAPED_BY_ARG)));
772 out.setExplicitDelims(true);
773 }
774
775 if (in.hasOption(MYSQL_DELIMITERS_ARG)) {
776 out.setOutputEncloseRequired(false);
777 out.setFieldsTerminatedBy(',');
778 out.setLinesTerminatedBy('\n');
779 out.setEscapedBy('\\');
780 out.setEnclosedBy('\'');
781 out.setExplicitDelims(true);
782 }
783 }
784
785 protected void applyInputFormatOptions(CommandLine in, SqoopOptions out)
786 throws InvalidOptionsException {
787 if (in.hasOption(INPUT_FIELDS_TERMINATED_BY_ARG)) {
788 out.setInputFieldsTerminatedBy(SqoopOptions.toChar(
789 in.getOptionValue(INPUT_FIELDS_TERMINATED_BY_ARG)));
790 }
791
792 if (in.hasOption(INPUT_LINES_TERMINATED_BY_ARG)) {
793 out.setInputLinesTerminatedBy(SqoopOptions.toChar(
794 in.getOptionValue(INPUT_LINES_TERMINATED_BY_ARG)));
795 }
796
797 if (in.hasOption(INPUT_OPTIONALLY_ENCLOSED_BY_ARG)) {
798 out.setInputEnclosedBy(SqoopOptions.toChar(
799 in.getOptionValue(INPUT_OPTIONALLY_ENCLOSED_BY_ARG)));
800 out.setInputEncloseRequired(false);
801 }
802
803 if (in.hasOption(INPUT_ENCLOSED_BY_ARG)) {
804 out.setInputEnclosedBy(SqoopOptions.toChar(
805 in.getOptionValue(INPUT_ENCLOSED_BY_ARG)));
806 out.setInputEncloseRequired(true);
807 }
808
809 if (in.hasOption(INPUT_ESCAPED_BY_ARG)) {
810 out.setInputEscapedBy(SqoopOptions.toChar(
811 in.getOptionValue(INPUT_ESCAPED_BY_ARG)));
812 }
813 }
814
815 protected void applyCodeGenOptions(CommandLine in, SqoopOptions out,
816 boolean multiTable) throws InvalidOptionsException {
817 if (in.hasOption(CODE_OUT_DIR_ARG)) {
818 out.setCodeOutputDir(in.getOptionValue(CODE_OUT_DIR_ARG));
819 }
820
821 if (in.hasOption(BIN_OUT_DIR_ARG)) {
822 out.setJarOutputDir(in.getOptionValue(BIN_OUT_DIR_ARG));
823 }
824
825 if (in.hasOption(PACKAGE_NAME_ARG)) {
826 out.setPackageName(in.getOptionValue(PACKAGE_NAME_ARG));
827 }
828
829 if (!multiTable && in.hasOption(CLASS_NAME_ARG)) {
830 out.setClassName(in.getOptionValue(CLASS_NAME_ARG));
831 }
832 }
833
834 protected void applyHBaseOptions(CommandLine in, SqoopOptions out) {
835 if (in.hasOption(HBASE_TABLE_ARG)) {
836 out.setHBaseTable(in.getOptionValue(HBASE_TABLE_ARG));
837 }
838
839 if (in.hasOption(HBASE_COL_FAM_ARG)) {
840 out.setHBaseColFamily(in.getOptionValue(HBASE_COL_FAM_ARG));
841 }
842
843 if (in.hasOption(HBASE_ROW_KEY_ARG)) {
844 out.setHBaseRowKeyColumn(in.getOptionValue(HBASE_ROW_KEY_ARG));
845 }
846
847 if (in.hasOption(HBASE_CREATE_TABLE_ARG)) {
848 out.setCreateHBaseTable(true);
849 }
850 }
851
852 protected void validateCommonOptions(SqoopOptions options)
853 throws InvalidOptionsException {
854 if (options.getConnectString() == null) {
855 throw new InvalidOptionsException(
856 "Error: Required argument --connect is missing."
857 + HELP_STR);
858 }
859 }
860
861 protected void validateCodeGenOptions(SqoopOptions options)
862 throws InvalidOptionsException {
863 if (options.getClassName() != null && options.getPackageName() != null) {
864 throw new InvalidOptionsException(
865 "--class-name overrides --package-name. You cannot use both."
866 + HELP_STR);
867 }
868 }
869
870 protected void validateOutputFormatOptions(SqoopOptions options)
871 throws InvalidOptionsException {
872 if (options.doHiveImport()) {
873 if (!options.explicitDelims()) {
874 // user hasn't manually specified delimiters, and wants to import
875 // straight to Hive. Use Hive-style delimiters.
876 LOG.info("Using Hive-specific delimiters for output. You can override");
877 LOG.info("delimiters with --fields-terminated-by, etc.");
878 options.setOutputDelimiters(DelimiterSet.HIVE_DELIMITERS);
879 }
880
881 if (options.getOutputEscapedBy() != DelimiterSet.NULL_CHAR) {
882 LOG.warn("Hive does not support escape characters in fields;");
883 LOG.warn("parse errors in Hive may result from using --escaped-by.");
884 }
885
886 if (options.getOutputEnclosedBy() != DelimiterSet.NULL_CHAR) {
887 LOG.warn("Hive does not support quoted strings; parse errors");
888 LOG.warn("in Hive may result from using --enclosed-by.");
889 }
890 }
891 }
892
893 protected void validateHiveOptions(SqoopOptions options)
894 throws InvalidOptionsException {
895 // Empty; this method is present to maintain API consistency, and
896 // is reserved for future constraints on Hive options.
897 }
898
899 protected void validateHBaseOptions(SqoopOptions options)
900 throws InvalidOptionsException {
901 if ((options.getHBaseColFamily() != null && options.getHBaseTable() == null)
902 || (options.getHBaseColFamily() == null
903 && options.getHBaseTable() != null)) {
904 throw new InvalidOptionsException(
905 "Both --hbase-table and --column-family must be set together."
906 + HELP_STR);
907 }
908 }
909
910 /**
911 * Given an array of extra arguments (usually populated via
912 * this.extraArguments), determine the offset of the first '--'
913 * argument in the list. Return 'extra.length' if there is none.
914 */
915 protected int getDashPosition(String [] extra) {
916 int dashPos = extra.length;
917 for (int i = 0; i < extra.length; i++) {
918 if (extra[i].equals("--")) {
919 dashPos = i;
920 break;
921 }
922 }
923
924 return dashPos;
925 }
926 }
927