SQOOP-319. Support for replacing Hive delimiters.
[sqoop.git] / src / java / com / cloudera / sqoop / lib / FieldFormatter.java
1 /**
2 * Licensed to Cloudera, Inc. under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. Cloudera, Inc. licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 package com.cloudera.sqoop.lib;
20
21 /**
22 * Static helper class that will help format data with quotes and escape chars.
23 */
24 public final class FieldFormatter {
25
26 private FieldFormatter() { }
27
28 /**
29 * only pass fields that are strings when --hive-drop-delims option is on.
30 * @param str
31 * @param delimiters
32 * @return
33 */
34 public static String hiveStringDropDelims(String str,
35 DelimiterSet delimiters) {
36 return hiveStringReplaceDelims(str, "", delimiters);
37 }
38
39 /**
40 * replace hive delimiters with a user-defined string passed to the
41 * --hive-delims-replacement option.
42 * @param str
43 * @param delimiters
44 * @return
45 */
46 public static String hiveStringReplaceDelims(String str, String replacement,
47 DelimiterSet delimiters) {
48 String droppedDelims = str.replaceAll("\\n|\\r|\01", replacement);
49 return escapeAndEnclose(droppedDelims, delimiters);
50 }
51
52 /**
53 * Takes an input string representing the value of a field, encloses it in
54 * enclosing chars, and escapes any occurrences of such characters in the
55 * middle. The escape character itself is also escaped if it appears in the
56 * text of the field. If there is no enclosing character, then any
57 * delimiters present in the field body are escaped instead.
58 *
59 * The field is enclosed only if:
60 * enclose != '\000', and:
61 * encloseRequired is true, or
62 * one of the fields-terminated-by or lines-terminated-by characters is
63 * present in the string.
64 *
65 * Escaping is not performed if the escape char is '\000'.
66 *
67 * @param str - The user's string to escape and enclose
68 * @param delimiters - The DelimiterSet to use identifying the escape and
69 * enclose semantics. If the specified escape or enclose characters are
70 * '\000', those operations are not performed.
71 * @return the escaped, enclosed version of 'str'.
72 */
73 public static String escapeAndEnclose(String str, DelimiterSet delimiters) {
74
75 char escape = delimiters.getEscapedBy();
76 char enclose = delimiters.getEnclosedBy();
77 boolean encloseRequired = delimiters.isEncloseRequired();
78
79 // true if we can use an escape character.
80 boolean escapingLegal = DelimiterSet.NULL_CHAR != escape;
81 String withEscapes;
82
83 if (null == str) {
84 return null;
85 }
86
87 if (escapingLegal) {
88 // escaping is legal. Escape any instances of the escape char itself.
89 withEscapes = str.replace("" + escape, "" + escape + escape);
90 } else {
91 // no need to double-escape
92 withEscapes = str;
93 }
94
95 if (DelimiterSet.NULL_CHAR == enclose) {
96 // The enclose-with character was left unset, so we can't enclose items.
97
98 if (escapingLegal) {
99 // If the user has used the fields-terminated-by or
100 // lines-terminated-by characters in the string, escape them if we
101 // have an escape character.
102 String fields = "" + delimiters.getFieldsTerminatedBy();
103 String lines = "" + delimiters.getLinesTerminatedBy();
104 withEscapes = withEscapes.replace(fields, "" + escape + fields);
105 withEscapes = withEscapes.replace(lines, "" + escape + lines);
106 }
107
108 // No enclosing possible, so now return this.
109 return withEscapes;
110 }
111
112 // if we have an enclosing character, and escaping is legal, then the
113 // encloser must always be escaped.
114 if (escapingLegal) {
115 withEscapes = withEscapes.replace("" + enclose, "" + escape + enclose);
116 }
117
118 boolean actuallyDoEnclose = encloseRequired;
119 if (!actuallyDoEnclose) {
120 // check if the string requires enclosing.
121 char [] mustEncloseFor = new char[2];
122 mustEncloseFor[0] = delimiters.getFieldsTerminatedBy();
123 mustEncloseFor[1] = delimiters.getLinesTerminatedBy();
124 for (char reason : mustEncloseFor) {
125 if (str.indexOf(reason) != -1) {
126 actuallyDoEnclose = true;
127 break;
128 }
129 }
130 }
131
132 if (actuallyDoEnclose) {
133 return "" + enclose + withEscapes + enclose;
134 } else {
135 return withEscapes;
136 }
137 }
138 }