b7a25b7809e0d50166966a77161dc8ff603fb2d2
[sqoop.git] / src / java / org / apache / sqoop / hive / TableDefWriter.java
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 package org.apache.sqoop.hive;
20
21 import java.io.File;
22 import java.io.IOException;
23 import java.util.Map;
24 import java.util.Date;
25 import java.text.DateFormat;
26 import java.text.SimpleDateFormat;
27 import java.util.Properties;
28
29 import org.apache.commons.lang.StringUtils;
30 import org.apache.commons.logging.Log;
31 import org.apache.commons.logging.LogFactory;
32 import org.apache.hadoop.conf.Configuration;
33 import org.apache.hadoop.fs.FileSystem;
34 import org.apache.hadoop.fs.Path;
35 import org.apache.sqoop.io.CodecMap;
36
37 import org.apache.sqoop.SqoopOptions;
38 import org.apache.sqoop.manager.ConnManager;
39 import org.apache.sqoop.util.FileSystemUtil;
40
41 /**
42 * Creates (Hive-specific) SQL DDL statements to create tables to hold data
43 * we're importing from another source.
44 *
45 * After we import the database into HDFS, we can inject it into Hive using
46 * the CREATE TABLE and LOAD DATA INPATH statements generated by this object.
47 */
48 public class TableDefWriter {
49
50 public static final Log LOG = LogFactory.getLog(
51 TableDefWriter.class.getName());
52
53 private SqoopOptions options;
54 private ConnManager connManager;
55 private Configuration configuration;
56 private String inputTableName;
57 private String outputTableName;
58 private boolean commentsEnabled;
59
60 /**
61 * Creates a new TableDefWriter to generate a Hive CREATE TABLE statement.
62 * @param opts program-wide options
63 * @param connMgr the connection manager used to describe the table.
64 * @param inputTable the name of the table to load.
65 * @param outputTable the name of the Hive table to create.
66 * @param config the Hadoop configuration to use to connect to the dfs
67 * @param withComments if true, then tables will be created with a
68 * timestamp comment.
69 */
70 public TableDefWriter(final SqoopOptions opts, final ConnManager connMgr,
71 final String inputTable, final String outputTable,
72 final Configuration config, final boolean withComments) {
73 this.options = opts;
74 this.connManager = connMgr;
75 this.inputTableName = inputTable;
76 this.outputTableName = outputTable;
77 this.configuration = config;
78 this.commentsEnabled = withComments;
79 }
80
81 /**
82 * Get the column names to import.
83 */
84 private String [] getColumnNames() {
85 String [] colNames = options.getColumns();
86 if (null != colNames) {
87 return colNames; // user-specified column names.
88 } else if (null != inputTableName) {
89 return connManager.getColumnNames(inputTableName);
90 } else {
91 return connManager.getColumnNamesForQuery(options.getSqlQuery());
92 }
93 }
94
95 /**
96 * @return the CREATE TABLE statement for the table to load into hive.
97 */
98 public String getCreateTableStmt() throws IOException {
99 Map<String, Integer> columnTypes;
100 Properties userMapping = options.getMapColumnHive();
101 Boolean isHiveExternalTableSet = !StringUtils.isBlank(options.getHiveExternalTableDir());
102 // Get these from the database.
103 if (null != inputTableName) {
104 columnTypes = connManager.getColumnTypes(inputTableName);
105 } else {
106 columnTypes = connManager.getColumnTypesForQuery(options.getSqlQuery());
107 }
108
109 String [] colNames = getColumnNames();
110 StringBuilder sb = new StringBuilder();
111 if (options.doFailIfHiveTableExists()) {
112 if (isHiveExternalTableSet) {
113 sb.append("CREATE EXTERNAL TABLE `");
114 } else {
115 sb.append("CREATE TABLE `");
116 }
117 } else {
118 if (isHiveExternalTableSet) {
119 sb.append("CREATE EXTERNAL TABLE IF NOT EXISTS `");
120 } else {
121 sb.append("CREATE TABLE IF NOT EXISTS `");
122 }
123 }
124
125 if(options.getHiveDatabaseName() != null) {
126 sb.append(options.getHiveDatabaseName()).append("`.`");
127 }
128 sb.append(outputTableName).append("` ( ");
129
130 // Check that all explicitly mapped columns are present in result set
131 for(Object column : userMapping.keySet()) {
132 boolean found = false;
133 for(String c : colNames) {
134 if (c.equals(column)) {
135 found = true;
136 break;
137 }
138 }
139
140 if (!found) {
141 throw new IllegalArgumentException("No column by the name " + column
142 + "found while importing data");
143 }
144 }
145
146 boolean first = true;
147 String partitionKey = options.getHivePartitionKey();
148 for (String col : colNames) {
149 if (col.equals(partitionKey)) {
150 throw new IllegalArgumentException("Partition key " + col + " cannot "
151 + "be a column to import.");
152 }
153
154 if (!first) {
155 sb.append(", ");
156 }
157
158 first = false;
159
160 Integer colType = columnTypes.get(col);
161 String hiveColType = userMapping.getProperty(col);
162 if (hiveColType == null) {
163 hiveColType = connManager.toHiveType(inputTableName, col, colType);
164 }
165 if (null == hiveColType) {
166 throw new IOException("Hive does not support the SQL type for column "
167 + col);
168 }
169
170 sb.append('`').append(col).append("` ").append(hiveColType);
171
172 if (HiveTypes.isHiveTypeImprovised(colType)) {
173 LOG.warn(
174 "Column " + col + " had to be cast to a less precise type in Hive");
175 }
176 }
177
178 sb.append(") ");
179
180 if (commentsEnabled) {
181 DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
182 String curDateStr = dateFormat.format(new Date());
183 sb.append("COMMENT 'Imported by sqoop on " + curDateStr + "' ");
184 }
185
186 if (partitionKey != null) {
187 sb.append("PARTITIONED BY (")
188 .append(partitionKey)
189 .append(" STRING) ");
190 }
191
192 sb.append("ROW FORMAT DELIMITED FIELDS TERMINATED BY '");
193 sb.append(getHiveOctalCharCode((int) options.getOutputFieldDelim()));
194 sb.append("' LINES TERMINATED BY '");
195 sb.append(getHiveOctalCharCode((int) options.getOutputRecordDelim()));
196 String codec = options.getCompressionCodec();
197 if (codec != null && (codec.equals(CodecMap.LZOP)
198 || codec.equals(CodecMap.getCodecClassName(CodecMap.LZOP)))) {
199 sb.append("' STORED AS INPUTFORMAT "
200 + "'com.hadoop.mapred.DeprecatedLzoTextInputFormat'");
201 sb.append(" OUTPUTFORMAT "
202 + "'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'");
203 } else {
204 sb.append("' STORED AS TEXTFILE");
205 }
206
207 if (isHiveExternalTableSet) {
208 // add location
209 sb.append(" LOCATION '"+options.getHiveExternalTableDir()+"'");
210 }
211
212 LOG.debug("Create statement: " + sb.toString());
213 return sb.toString();
214 }
215
216 /**
217 * @return the LOAD DATA statement to import the data in HDFS into hive.
218 */
219 public String getLoadDataStmt() throws IOException {
220 Path finalPath = getFinalPath();
221
222 StringBuilder sb = new StringBuilder();
223 sb.append("LOAD DATA INPATH '");
224 sb.append(finalPath.toString() + "'");
225 if (options.doOverwriteHiveTable()) {
226 sb.append(" OVERWRITE");
227 }
228 sb.append(" INTO TABLE `");
229 if(options.getHiveDatabaseName() != null) {
230 sb.append(options.getHiveDatabaseName()).append("`.`");
231 }
232 sb.append(outputTableName);
233 sb.append('`');
234
235 if (options.getHivePartitionKey() != null) {
236 sb.append(" PARTITION (")
237 .append(options.getHivePartitionKey())
238 .append("='").append(options.getHivePartitionValue())
239 .append("')");
240 }
241
242 LOG.debug("Load statement: " + sb.toString());
243 return sb.toString();
244 }
245
246 public Path getFinalPath() throws IOException {
247 String warehouseDir = options.getWarehouseDir();
248 if (null == warehouseDir) {
249 warehouseDir = "";
250 } else if (!warehouseDir.endsWith(File.separator)) {
251 warehouseDir = warehouseDir + File.separator;
252 }
253
254 // Final path is determined in the following order:
255 // 1. Use target dir if the user specified.
256 // 2. Use input table name.
257 String tablePath = null;
258 String targetDir = options.getTargetDir();
259 if (null != targetDir) {
260 tablePath = warehouseDir + targetDir;
261 } else {
262 tablePath = warehouseDir + inputTableName;
263 }
264 return FileSystemUtil.makeQualified(new Path(tablePath), configuration);
265 }
266
267 /**
268 * Return a string identifying the character to use as a delimiter
269 * in Hive, in octal representation.
270 * Hive can specify delimiter characters in the form '\ooo' where
271 * ooo is a three-digit octal number between 000 and 177. Values
272 * may not be truncated ('\12' is wrong; '\012' is ok) nor may they
273 * be zero-prefixed (e.g., '\0177' is wrong).
274 *
275 * @param charNum the character to use as a delimiter
276 * @return a string of the form "\ooo" where ooo is an octal number
277 * in [000, 177].
278 * @throws IllegalArgumentException if charNum &gt; 0177.
279 */
280 public static String getHiveOctalCharCode(int charNum) {
281 if (charNum > 0177) {
282 throw new IllegalArgumentException(
283 "Character " + charNum + " is an out-of-range delimiter");
284 }
285
286 return String.format("\\%03o", charNum);
287 }
288
289 }
290