Skip to content

Commit 1ef9226

Browse files
committed
Fix dotted column names in Hive
This has the side effect of changing the return value of ResultProxy.keys() and RowProxy.keys(), unless using hive_raw_colnames. After this diff, those methods will return undotted names.
1 parent 75a8337 commit 1ef9226

File tree

5 files changed

+64
-34
lines changed

5 files changed

+64
-34
lines changed

pyhive/sqlalchemy_hive.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,33 @@ def visit_DATETIME(self, type_):
154154
return 'TIMESTAMP'
155155

156156

157+
class HiveExecutionContext(default.DefaultExecutionContext):
158+
"""This is pretty much the same as SQLiteExecutionContext to work around the same issue.
159+
160+
http://docs.sqlalchemy.org/en/latest/dialects/sqlite.html#dotted-column-names
161+
162+
engine = create_engine('hive://...', execution_options={'hive_raw_colnames': True})
163+
"""
164+
165+
@util.memoized_property
166+
def _preserve_raw_colnames(self):
167+
# Ideally, this would also gate on hive.resultset.use.unique.column.names
168+
return self.execution_options.get('hive_raw_colnames', False)
169+
170+
def _translate_colname(self, colname):
171+
# Adjust for dotted column names.
172+
# When hive.resultset.use.unique.column.names is true (the default), Hive returns column
173+
# names as "tablename.colname" in cursor.description.
174+
if not self._preserve_raw_colnames and '.' in colname:
175+
return colname.split('.')[-1], colname
176+
else:
177+
return colname, None
178+
179+
157180
class HiveDialect(default.DefaultDialect):
158181
name = b'hive'
159182
driver = b'thrift'
183+
execution_ctx_cls = HiveExecutionContext
160184
preparer = HiveIdentifierPreparer
161185
statement_compiler = HiveCompiler
162186
supports_views = True

pyhive/tests/test_hive.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -38,28 +38,28 @@ def connect(self):
3838
def test_description(self, cursor):
3939
cursor.execute('SELECT * FROM one_row')
4040

41-
desc = [('number_of_rows', 'INT_TYPE', None, None, None, None, True)]
41+
desc = [('one_row.number_of_rows', 'INT_TYPE', None, None, None, None, True)]
4242
self.assertEqual(cursor.description, desc)
4343

4444
@with_cursor
4545
def test_complex(self, cursor):
4646
cursor.execute('SELECT * FROM one_row_complex')
4747
self.assertEqual(cursor.description, [
48-
('boolean', 'BOOLEAN_TYPE', None, None, None, None, True),
49-
('tinyint', 'TINYINT_TYPE', None, None, None, None, True),
50-
('smallint', 'SMALLINT_TYPE', None, None, None, None, True),
51-
('int', 'INT_TYPE', None, None, None, None, True),
52-
('bigint', 'BIGINT_TYPE', None, None, None, None, True),
53-
('float', 'FLOAT_TYPE', None, None, None, None, True),
54-
('double', 'DOUBLE_TYPE', None, None, None, None, True),
55-
('string', 'STRING_TYPE', None, None, None, None, True),
56-
('timestamp', 'TIMESTAMP_TYPE', None, None, None, None, True),
57-
('binary', 'BINARY_TYPE', None, None, None, None, True),
58-
('array', 'ARRAY_TYPE', None, None, None, None, True),
59-
('map', 'MAP_TYPE', None, None, None, None, True),
60-
('struct', 'STRUCT_TYPE', None, None, None, None, True),
61-
('union', 'UNION_TYPE', None, None, None, None, True),
62-
('decimal', 'DECIMAL_TYPE', None, None, None, None, True),
48+
('one_row_complex.boolean', 'BOOLEAN_TYPE', None, None, None, None, True),
49+
('one_row_complex.tinyint', 'TINYINT_TYPE', None, None, None, None, True),
50+
('one_row_complex.smallint', 'SMALLINT_TYPE', None, None, None, None, True),
51+
('one_row_complex.int', 'INT_TYPE', None, None, None, None, True),
52+
('one_row_complex.bigint', 'BIGINT_TYPE', None, None, None, None, True),
53+
('one_row_complex.float', 'FLOAT_TYPE', None, None, None, None, True),
54+
('one_row_complex.double', 'DOUBLE_TYPE', None, None, None, None, True),
55+
('one_row_complex.string', 'STRING_TYPE', None, None, None, None, True),
56+
('one_row_complex.timestamp', 'TIMESTAMP_TYPE', None, None, None, None, True),
57+
('one_row_complex.binary', 'BINARY_TYPE', None, None, None, None, True),
58+
('one_row_complex.array', 'ARRAY_TYPE', None, None, None, None, True),
59+
('one_row_complex.map', 'MAP_TYPE', None, None, None, None, True),
60+
('one_row_complex.struct', 'STRUCT_TYPE', None, None, None, None, True),
61+
('one_row_complex.union', 'UNION_TYPE', None, None, None, None, True),
62+
('one_row_complex.decimal', 'DECIMAL_TYPE', None, None, None, None, True),
6363
])
6464
rows = cursor.fetchall()
6565
expected = [(

pyhive/tests/test_sqlalchemy_hive.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,30 @@ class TestSqlAlchemyHive(unittest.TestCase, SqlAlchemyTestCase):
4141
def create_engine(self):
4242
return create_engine('hive://localhost:10000/default')
4343

44+
@with_engine_connection
45+
def test_dotted_column_names(self, engine, connection):
46+
"""When Hive returns a dotted column name, both the non-dotted version should be available
47+
as an attribute, and the dotted version should remain available as a key.
48+
"""
49+
row = connection.execute('SELECT * FROM one_row').fetchone()
50+
assert row.keys() == ['number_of_rows']
51+
assert 'number_of_rows' in row
52+
assert row.number_of_rows == 1
53+
assert row['number_of_rows'] == 1
54+
assert getattr(row, 'one_row.number_of_rows') == 1
55+
assert row['one_row.number_of_rows'] == 1
56+
57+
@with_engine_connection
58+
def test_dotted_column_names_raw(self, engine, connection):
59+
"""When Hive returns a dotted column name, and raw mode is on, nothing should be modified.
60+
"""
61+
row = connection.execution_options(hive_raw_colnames=True)\
62+
.execute('SELECT * FROM one_row').fetchone()
63+
assert row.keys() == ['one_row.number_of_rows']
64+
assert 'number_of_rows' not in row
65+
assert getattr(row, 'one_row.number_of_rows') == 1
66+
assert row['one_row.number_of_rows'] == 1
67+
4468
@with_engine_connection
4569
def test_reflect_select(self, engine, connection):
4670
"""reflecttable should be able to fill in a table from the name"""

scripts/travis-conf/hive/hive-site-ldap.xml

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,6 @@
1313
<name>fs.defaultFS</name>
1414
<value>file:///</value>
1515
</property>
16-
<!--
17-
TODO tests rely having result set column names unprefixed
18-
This could be improved by having an option to strip out prefixes when it would not result in
19-
ambiguity.
20-
-->
21-
<property>
22-
<name>hive.resultset.use.unique.column.names</name>
23-
<value>false</value>
24-
</property>
2516
<property>
2617
<name>hive.server2.authentication</name>
2718
<value>LDAP</value>

scripts/travis-conf/hive/hive-site.xml

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,4 @@
1313
<name>fs.defaultFS</name>
1414
<value>file:///</value>
1515
</property>
16-
<!--
17-
TODO tests rely having result set column names unprefixed
18-
This could be improved by having an option to strip out prefixes when it would not result in
19-
ambiguity.
20-
-->
21-
<property>
22-
<name>hive.resultset.use.unique.column.names</name>
23-
<value>false</value>
24-
</property>
2516
</configuration>

0 commit comments

Comments
 (0)