Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Fixes #258 #268

Merged
merged 1 commit into from
Sep 28, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions hoodie-client/src/main/java/com/uber/hoodie/WriteStatus.java
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ public long getTotalRecords() {
return totalRecords;
}

public long getTotalErrorRecords() { return totalErrorRecords; }
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fyi @ovj


@Override
public String toString() {
final StringBuilder sb = new StringBuilder("WriteStatus {");
Expand Down
29 changes: 29 additions & 0 deletions hoodie-client/src/main/java/com/uber/hoodie/index/HoodieIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,35 @@ public abstract JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeSt
*/
public abstract boolean rollbackCommit(String commitTime);

/**
* An index is `global` if {@link HoodieKey} to fileID mapping, does not depend on the `partitionPath`.
* Such an implementation is able to obtain the same mapping, for two hoodie keys with same `recordKey`
* but different `partitionPath`
*
* @return whether or not, the index implementation is global in nature
*/
public abstract boolean isGlobal();

/**
* This is used by storage to determine, if its safe to send inserts, straight to the log,
* i.e having a {@link com.uber.hoodie.common.model.FileSlice}, with no data file.
*
* @return Returns true/false depending on whether the impl has this capability
*/
public abstract boolean canIndexLogFiles();


/**
*
* An index is "implicit" with respect to storage, if just writing new data to a file slice,
* updates the index as well. This is used by storage, to save memory footprint in
* certain cases.
*
* @return
*/
public abstract boolean isImplicitWithStorage();


public static <T extends HoodieRecordPayload> HoodieIndex<T> createIndex(
HoodieWriteConfig config, JavaSparkContext jsc) throws HoodieIndexException {
switch (config.getIndexType()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,36 @@ public WriteStatus call(WriteStatus writeStatus) {

@Override
public boolean rollbackCommit(String commitTime) {
// TODO (weiy)
return true;
}

/**
* Only looks up by recordKey
*
* @return
*/
@Override
public boolean isGlobal() {
return true;
}

/**
* Mapping is available in HBase already.
*
* @return
*/
@Override
public boolean canIndexLogFiles() {
return true;
}

/**
* Index needs to be explicitly updated after storage write.
*
* @return
*/
@Override
public boolean isImplicitWithStorage() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,36 @@ public boolean rollbackCommit(String commitTime) {
return true;
}

/**
* This is not global, since we depend on the partitionPath to do the lookup
*
* @return
*/
@Override
public boolean isGlobal() {
return false;
}

/**
* No indexes into log files yet.
*
* @return
*/
@Override
public boolean canIndexLogFiles() {
return false;
}

/**
* Bloom filters are stored, into the same data files.
*
* @return
*/
@Override
public boolean isImplicitWithStorage() {
return true;
}

/**
* if we dont have key ranges, then also we need to compare against the file. no other choice
* if we do, then only compare the file if the record key falls in range.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
* - Could increase write amplification on copy-on-write storage since inserts always rewrite files
* - Not global.
*
*
*
*/
public class BucketedIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {

Expand Down Expand Up @@ -88,4 +90,35 @@ public boolean rollbackCommit(String commitTime) {
// nothing to rollback in the index.
return true;
}

/**
* Bucketing is still done within each partition.
*
* @return
*/
@Override
public boolean isGlobal() {
return false;
}

/**
* Since indexing is just a deterministic hash, we can identify file group correctly even without an index
* on the actual log file.
*
* @return
*/
@Override
public boolean canIndexLogFiles() {
return true;
}

/**
* Indexing is just a hash function.
*
* @return
*/
@Override
public boolean isImplicitWithStorage() {
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -241,4 +241,34 @@ public boolean rollbackCommit(String commitTime) {
// not the other way around
return true;
}

/**
* Only looks up by recordKey
*
* @return
*/
@Override
public boolean isGlobal() {
return true;
}

/**
* Mapping is available in HBase already.
*
* @return
*/
@Override
public boolean canIndexLogFiles() {
return true;
}

/**
* Index needs to be explicitly updated after storage write.
*
* @return
*/
@Override
public boolean isImplicitWithStorage() {
return false;
}
}