forked from twitter/elephant-bird
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathGenericWritableConverter.java
46 lines (39 loc) · 1.59 KB
/
GenericWritableConverter.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
package com.twitter.elephantbird.pig.util;
import java.io.IOException;
import com.google.common.base.Preconditions;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.Writable;
import org.apache.pig.ResourceSchema.ResourceFieldSchema;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DataType;
/**
* Supports conversion between Pig bytearray ({@link DataByteArray}) and an arbitrary
* {@link Writable} implementation type. Useful for loading data from a SequenceFile when the key or
* value must be passed through to output, but otherwise goes untouched by Pig.
*
* @author Andy Schlaikjer
*/
public class GenericWritableConverter extends AbstractWritableConverter<Writable> {
private final DataInputBuffer ibuf = new DataInputBuffer();
@Override
public ResourceFieldSchema getLoadSchema() throws IOException {
ResourceFieldSchema schema = new ResourceFieldSchema();
schema.setType(DataType.BYTEARRAY);
return schema;
}
@Override
public void checkStoreSchema(ResourceFieldSchema schema) throws IOException {
Preconditions.checkNotNull(schema);
if (schema.getType() != DataType.BYTEARRAY)
throw new IOException("Expected Pig type '" + DataType.findTypeName(DataType.BYTEARRAY)
+ "' but found '" + DataType.findTypeName(schema.getType()) + "'");
}
@Override
protected Writable toWritable(DataByteArray value) throws IOException {
Preconditions.checkNotNull(writable, "Writable is null");
byte[] bytes = value.get();
ibuf.reset(bytes, bytes.length);
writable.readFields(ibuf);
return writable;
}
}