From 8485b9e2630a0619ad59ffb29739477b46d6a527 Mon Sep 17 00:00:00 2001 From: Balaji Varadarajan Date: Thu, 11 Oct 2018 11:35:32 -0700 Subject: [PATCH] Fix regression which broke HudiInputFormat handling of non-hoodie datasets --- .../uber/hoodie/hadoop/HoodieInputFormat.java | 3 +- .../uber/hoodie/integ/ITTestHoodieSanity.java | 30 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieInputFormat.java b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieInputFormat.java index 4c0d548d7d0d..c7931b5ce7e1 100644 --- a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieInputFormat.java +++ b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieInputFormat.java @@ -23,6 +23,7 @@ import com.uber.hoodie.common.table.TableFileSystemView; import com.uber.hoodie.common.table.timeline.HoodieInstant; import com.uber.hoodie.common.table.view.HoodieTableFileSystemView; +import com.uber.hoodie.exception.DatasetNotFoundException; import com.uber.hoodie.exception.HoodieIOException; import com.uber.hoodie.exception.InvalidDatasetException; import java.io.IOException; @@ -161,7 +162,7 @@ private Map> groupFileStatus(FileStatus[ metadata = getTableMetaClient(status.getPath().getFileSystem(conf), status.getPath().getParent()); nonHoodieBasePath = null; - } catch (InvalidDatasetException e) { + } catch (DatasetNotFoundException | InvalidDatasetException e) { LOG.info("Handling a non-hoodie path " + status.getPath()); metadata = null; nonHoodieBasePath = status.getPath().getParent().toString(); diff --git a/hoodie-integ-test/src/test/java/com/uber/hoodie/integ/ITTestHoodieSanity.java b/hoodie-integ-test/src/test/java/com/uber/hoodie/integ/ITTestHoodieSanity.java index 7cc87469669d..9a1b694ff33a 100644 --- a/hoodie-integ-test/src/test/java/com/uber/hoodie/integ/ITTestHoodieSanity.java +++ b/hoodie-integ-test/src/test/java/com/uber/hoodie/integ/ITTestHoodieSanity.java @@ -66,6 +66,9 @@ public void testRunHoodieJavaAppOnMultiPartitionKeysCOWTable() throws Exception */ public void testRunHoodieJavaAppOnCOWTable(String hiveTableName, boolean singlePartitionKey) throws Exception { + String hdfsPath = "/" + hiveTableName; + String hdfsUrl = "hdfs://namenode" + hdfsPath; + // Drop Table if it exists { String[] hiveDropCmd = getHiveConsoleCommand("drop table if exists " + hiveTableName); @@ -91,6 +94,7 @@ public void testRunHoodieJavaAppOnCOWTable(String hiveTableName, boolean singleP cmd = new String[]{ HOODIE_JAVA_APP, "--hive-sync", + "--table-path", hdfsUrl, "--hive-url", HIVE_SERVER_JDBC_URL, "--hive-table", hiveTableName }; @@ -98,6 +102,7 @@ public void testRunHoodieJavaAppOnCOWTable(String hiveTableName, boolean singleP cmd = new String[]{ HOODIE_JAVA_APP, "--hive-sync", + "--table-path", hdfsUrl, "--hive-url", HIVE_SERVER_JDBC_URL, "--use-multi-partition-keys", "--hive-table", hiveTableName @@ -135,5 +140,30 @@ public void testRunHoodieJavaAppOnCOWTable(String hiveTableName, boolean singleP Assert.assertEquals("Expecting 100 rows to be present in the new table", 100, Integer.parseInt(stdout.trim())); } + + // Make the HDFS dataset non-hoodie and run the same query + // Checks for interoperability with non-hoodie tables + { + // Delete Hoodie directory to make it non-hoodie dataset + String[] cmd = new String[]{ + "hadoop", "fs", "-rm", "-r", hdfsPath + "/.hoodie" + }; + TestExecStartResultCallback callback = + executeCommandInDocker(ADHOC_1_CONTAINER, cmd, true); + String stderr = callback.getStderr().toString().trim(); + String stdout = callback.getStdout().toString().trim(); + LOG.info("Got output for (" + Arrays.toString(cmd) + ") : (" + stdout + ")"); + LOG.info("Got error output for (" + Arrays.toString(cmd) + ") : (" + stderr + ")"); + + // Run the count query again. Without Hoodie, all versions are included. So we get a wrong count + String[] hiveTableCheck = getHiveConsoleCommand("select count(1) from " + hiveTableName); + callback = executeCommandInDocker(ADHOC_1_CONTAINER, hiveTableCheck, true); + stderr = callback.getStderr().toString().trim(); + stdout = callback.getStdout().toString().trim(); + LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stdout + ")"); + LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stderr + ")"); + Assert.assertEquals("Expecting 200 rows to be present in the new table", 200, + Integer.parseInt(stdout.trim())); + } } }