Skip to content

Commit

Permalink
Fix regression which broke HudiInputFormat handling of non-hoodie dat…
Browse files Browse the repository at this point in the history
…asets
  • Loading branch information
bvaradar authored and vinothchandar committed Oct 16, 2018
1 parent 1fca9b2 commit 8485b9e
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import com.uber.hoodie.common.table.TableFileSystemView;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import com.uber.hoodie.common.table.view.HoodieTableFileSystemView;
import com.uber.hoodie.exception.DatasetNotFoundException;
import com.uber.hoodie.exception.HoodieIOException;
import com.uber.hoodie.exception.InvalidDatasetException;
import java.io.IOException;
Expand Down Expand Up @@ -161,7 +162,7 @@ private Map<HoodieTableMetaClient, List<FileStatus>> groupFileStatus(FileStatus[
metadata = getTableMetaClient(status.getPath().getFileSystem(conf),
status.getPath().getParent());
nonHoodieBasePath = null;
} catch (InvalidDatasetException e) {
} catch (DatasetNotFoundException | InvalidDatasetException e) {
LOG.info("Handling a non-hoodie path " + status.getPath());
metadata = null;
nonHoodieBasePath = status.getPath().getParent().toString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ public void testRunHoodieJavaAppOnMultiPartitionKeysCOWTable() throws Exception
*/
public void testRunHoodieJavaAppOnCOWTable(String hiveTableName, boolean singlePartitionKey) throws Exception {

String hdfsPath = "/" + hiveTableName;
String hdfsUrl = "hdfs://namenode" + hdfsPath;

// Drop Table if it exists
{
String[] hiveDropCmd = getHiveConsoleCommand("drop table if exists " + hiveTableName);
Expand All @@ -91,13 +94,15 @@ public void testRunHoodieJavaAppOnCOWTable(String hiveTableName, boolean singleP
cmd = new String[]{
HOODIE_JAVA_APP,
"--hive-sync",
"--table-path", hdfsUrl,
"--hive-url", HIVE_SERVER_JDBC_URL,
"--hive-table", hiveTableName
};
} else {
cmd = new String[]{
HOODIE_JAVA_APP,
"--hive-sync",
"--table-path", hdfsUrl,
"--hive-url", HIVE_SERVER_JDBC_URL,
"--use-multi-partition-keys",
"--hive-table", hiveTableName
Expand Down Expand Up @@ -135,5 +140,30 @@ public void testRunHoodieJavaAppOnCOWTable(String hiveTableName, boolean singleP
Assert.assertEquals("Expecting 100 rows to be present in the new table", 100,
Integer.parseInt(stdout.trim()));
}

// Make the HDFS dataset non-hoodie and run the same query
// Checks for interoperability with non-hoodie tables
{
// Delete Hoodie directory to make it non-hoodie dataset
String[] cmd = new String[]{
"hadoop", "fs", "-rm", "-r", hdfsPath + "/.hoodie"
};
TestExecStartResultCallback callback =
executeCommandInDocker(ADHOC_1_CONTAINER, cmd, true);
String stderr = callback.getStderr().toString().trim();
String stdout = callback.getStdout().toString().trim();
LOG.info("Got output for (" + Arrays.toString(cmd) + ") : (" + stdout + ")");
LOG.info("Got error output for (" + Arrays.toString(cmd) + ") : (" + stderr + ")");

// Run the count query again. Without Hoodie, all versions are included. So we get a wrong count
String[] hiveTableCheck = getHiveConsoleCommand("select count(1) from " + hiveTableName);
callback = executeCommandInDocker(ADHOC_1_CONTAINER, hiveTableCheck, true);
stderr = callback.getStderr().toString().trim();
stdout = callback.getStdout().toString().trim();
LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stdout + ")");
LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stderr + ")");
Assert.assertEquals("Expecting 200 rows to be present in the new table", 200,
Integer.parseInt(stdout.trim()));
}
}
}

0 comments on commit 8485b9e

Please # to comment.