package mobvista.dmp.format; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import java.io.IOException; import java.util.ArrayList; /** * RCFileInputFormat. * * @param <K> * @param <V> */ public class RCFileInputFormat<K extends LongWritable, V extends BytesRefArrayWritable> extends FileInputFormat<K, V> { protected long getFormatMinSplitSize() { return SequenceFile.SYNC_INTERVAL; } public static void setColumnID(Configuration conf, ArrayList<Integer> list) { ColumnProjectionUtils.appendReadColumnIDs(conf, list); } public static ArrayList<Integer> getReadColumnIDs(Configuration conf) { return (ArrayList<Integer>) ColumnProjectionUtils.getReadColumnIDs(conf); } @SuppressWarnings("unchecked") public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context ) throws IOException { return new RCFileRecordReader(context.getConfiguration(), (FileSplit) split); } }