Commit 5506e778 by arpitgupta

Merge branch 'master' into dal

parents cd0e3950 74c54aa9
......@@ -26,6 +26,7 @@ import org.apache.hadoop.hive.metastore.api.Index;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
......@@ -263,17 +264,22 @@ public class HiveMetaStoreBridge {
tableRef = new Referenceable(HiveDataTypes.HIVE_TABLE.getName());
tableRef.set("name", hiveTable.getTableName());
tableRef.set("owner", hiveTable.getOwner());
//todo fix
tableRef.set("createTime", hiveTable.getLastAccessTime());
tableRef.set("createTime", hiveTable.getMetadata().getProperty(hive_metastoreConstants.DDL_TIME));
tableRef.set("lastAccessTime", hiveTable.getLastAccessTime());
tableRef.set("retention", hiveTable.getRetention());
tableRef.set(HiveDataModelGenerator.COMMENT, hiveTable.getParameters().get(HiveDataModelGenerator.COMMENT));
// add reference to the database
tableRef.set("dbName", dbReference);
List<Referenceable> colList = getColumns(hiveTable.getCols());
tableRef.set("columns", colList);
// add reference to the StorageDescriptor
StorageDescriptor storageDesc = hiveTable.getSd();
Referenceable sdReferenceable = fillStorageDescStruct(storageDesc);
Referenceable sdReferenceable = fillStorageDescStruct(storageDesc, colList);
tableRef.set("sd", sdReferenceable);
// add reference to the Partition Keys
......@@ -293,8 +299,6 @@ public class HiveMetaStoreBridge {
tableRef.set("tableType", hiveTable.getTableType().name());
tableRef.set("temporary", hiveTable.isTemporary());
List<Referenceable> colList = getColumns(hiveTable.getAllCols());
tableRef.set("columns", colList);
tableRef = createInstance(tableRef);
} else {
......@@ -388,7 +392,7 @@ public class HiveMetaStoreBridge {
indexRef.set("origTableName", index.getOrigTableName());
indexRef.set("indexTableName", index.getIndexTableName());
Referenceable sdReferenceable = fillStorageDescStruct(index.getSd());
Referenceable sdReferenceable = fillStorageDescStruct(index.getSd(), null);
indexRef.set("sd", sdReferenceable);
indexRef.set("parameters", index.getParameters());
......@@ -398,7 +402,7 @@ public class HiveMetaStoreBridge {
createInstance(indexRef);
}
private Referenceable fillStorageDescStruct(StorageDescriptor storageDesc) throws Exception {
private Referenceable fillStorageDescStruct(StorageDescriptor storageDesc, List<Referenceable> colList) throws Exception {
LOG.debug("Filling storage descriptor information for " + storageDesc);
Referenceable sdReferenceable = new Referenceable(HiveDataTypes.HIVE_STORAGEDESC.getName());
......@@ -415,6 +419,8 @@ public class HiveMetaStoreBridge {
serdeInfoStruct.set("parameters", serdeInfo.getParameters());
sdReferenceable.set("serdeInfo", serdeInfoStruct);
sdReferenceable.set(HiveDataModelGenerator.STORAGE_NUM_BUCKETS, storageDesc.getNumBuckets());
sdReferenceable.set(HiveDataModelGenerator.STORAGE_IS_STORED_AS_SUB_DIRS, storageDesc.isStoredAsSubDirectories());
// Will need to revisit this after we fix typesystem.
/*
......@@ -433,8 +439,15 @@ public class HiveMetaStoreBridge {
}
*/
List<Referenceable> fieldsList = getColumns(storageDesc.getCols());
sdReferenceable.set("cols", fieldsList);
//Use the passed column list if not null, ex: use same references for table and SD
List<FieldSchema> columns = storageDesc.getCols();
if (columns != null && !columns.isEmpty()) {
if (colList != null) {
sdReferenceable.set("cols", colList);
} else {
sdReferenceable.set("cols", getColumns(columns));
}
}
List<Struct> sortColsStruct = new ArrayList<>();
for (Order sortcol : storageDesc.getSortCols()) {
......@@ -472,7 +485,7 @@ public class HiveMetaStoreBridge {
Referenceable colReferenceable = new Referenceable(HiveDataTypes.HIVE_COLUMN.getName());
colReferenceable.set("name", fs.getName());
colReferenceable.set("type", fs.getType());
colReferenceable.set("comment", fs.getComment());
colReferenceable.set(HiveDataModelGenerator.COMMENT, fs.getComment());
colList.add(createInstance(colReferenceable));
}
......
......@@ -55,6 +55,10 @@ public class HiveDataModelGenerator {
private final Map<String, EnumTypeDefinition> enumTypeDefinitionMap;
private final Map<String, StructTypeDefinition> structTypeDefinitionMap;
public static final String COMMENT = "comment";
public static final String STORAGE_NUM_BUCKETS = "numBuckets";
public static final String STORAGE_IS_STORED_AS_SUB_DIRS = "storedAsSubDirectories";
public HiveDataModelGenerator() {
classTypeDefinitions = new HashMap<>();
enumTypeDefinitionMap = new HashMap<>();
......@@ -237,7 +241,7 @@ public class HiveDataModelGenerator {
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("compressed", DataTypes.BOOLEAN_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("numBuckets", DataTypes.INT_TYPE.getName(),
new AttributeDefinition(STORAGE_NUM_BUCKETS, DataTypes.INT_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("serdeInfo", HiveDataTypes.HIVE_SERDE.getName(),
Multiplicity.OPTIONAL, false, null),
......@@ -251,7 +255,7 @@ public class HiveDataModelGenerator {
Multiplicity.OPTIONAL, false, null),
//new AttributeDefinition("skewedInfo", DefinedTypes.HIVE_SKEWEDINFO.getName(),
// Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("storedAsSubDirectories", DataTypes.BOOLEAN_TYPE.getName(),
new AttributeDefinition(STORAGE_IS_STORED_AS_SUB_DIRS, DataTypes.BOOLEAN_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
};
......@@ -326,7 +330,7 @@ public class HiveDataModelGenerator {
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("type", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("comment", DataTypes.STRING_TYPE.getName(),
new AttributeDefinition(COMMENT, DataTypes.STRING_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
......@@ -377,6 +381,8 @@ public class HiveDataModelGenerator {
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("lastAccessTime", DataTypes.INT_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition(COMMENT, DataTypes.STRING_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("retention", DataTypes.INT_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("sd", HiveDataTypes.HIVE_STORAGEDESC.getName(),
......
......@@ -21,12 +21,15 @@ package org.apache.hadoop.metadata.hive.hook;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.metadata.MetadataServiceClient;
import org.apache.hadoop.metadata.hive.bridge.HiveMetaStoreBridge;
import org.apache.hadoop.metadata.hive.model.HiveDataModelGenerator;
import org.apache.hadoop.metadata.hive.model.HiveDataTypes;
import org.apache.hadoop.metadata.typesystem.Referenceable;
import org.apache.hadoop.metadata.typesystem.persistence.Id;
import org.apache.log4j.spi.LoggerFactory;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONObject;
......@@ -118,7 +121,7 @@ public class HiveHookIT {
private String createTable(boolean partition) throws Exception {
String tableName = tableName();
runCommand("create table " + tableName + "(id int, name string)" + (partition ? " partitioned by(dt string)"
runCommand("create table " + tableName + "(id int, name string) comment 'table comment' " + (partition ? " partitioned by(dt string)"
: ""));
return tableName;
}
......@@ -127,18 +130,32 @@ public class HiveHookIT {
public void testCreateTable() throws Exception {
String tableName = tableName();
String dbName = createDatabase();
runCommand("create table " + dbName + "." + tableName + "(id int, name string)");
String colName = "col" + random();
runCommand("create table " + dbName + "." + tableName + "(" + colName + " int, name string)");
assertTableIsRegistered(dbName, tableName);
//there is only one instance of column registered
assertColumnIsRegistered(colName);
tableName = createTable();
String tableId = assertTableIsRegistered(DEFAULT_DB, tableName);
Referenceable tableRef = dgiCLient.getEntity(tableId);
Assert.assertEquals(tableRef.get("tableType"), TableType.MANAGED_TABLE.name());
Assert.assertEquals(tableRef.get(HiveDataModelGenerator.COMMENT), "table comment");
final Id sdId = (Id) tableRef.get("sd");
Referenceable sdRef = dgiCLient.getEntity(sdId.id);
Assert.assertEquals(sdRef.get(HiveDataModelGenerator.STORAGE_IS_STORED_AS_SUB_DIRS),false);
//Create table where database doesn't exist, will create database instance as well
assertDatabaseIsRegistered(DEFAULT_DB);
}
private String assertColumnIsRegistered(String colName) throws Exception {
LOG.debug("Searching for column {}", colName);
String query = String.format("%s where name = '%s'", HiveDataTypes.HIVE_COLUMN.getName(), colName.toLowerCase());
return assertEntityIsRegistered(query, true);
}
@Test
public void testCTAS() throws Exception {
String tableName = createTable();
......
......@@ -48,28 +48,28 @@ public class RepositoryMetadataModule extends com.google.inject.AbstractModule {
ThrowingProviderBinder.create(binder())
.bind(GraphProvider.class, TitanGraph.class)
.to(TitanGraphProvider.class)
.in(Scopes.SINGLETON);
.asEagerSingleton();
// allow for dynamic binding of the metadata repo & graph service
// bind the MetadataRepositoryService interface to an implementation
bind(MetadataRepository.class).to(GraphBackedMetadataRepository.class);
bind(MetadataRepository.class).to(GraphBackedMetadataRepository.class).asEagerSingleton();
// bind the ITypeStore interface to an implementation
bind(ITypeStore.class).to(GraphBackedTypeStore.class);
bind(ITypeStore.class).to(GraphBackedTypeStore.class).asEagerSingleton();
// bind the GraphService interface to an implementation
// bind(GraphService.class).to(graphServiceClass);
// bind the MetadataService interface to an implementation
bind(MetadataService.class).to(DefaultMetadataService.class);
bind(MetadataService.class).to(DefaultMetadataService.class).asEagerSingleton();
// bind the DiscoveryService interface to an implementation
bind(DiscoveryService.class).to(GraphBackedDiscoveryService.class);
bind(DiscoveryService.class).to(GraphBackedDiscoveryService.class).asEagerSingleton();
bind(SearchIndexer.class).to(GraphBackedSearchIndexer.class);
bind(SearchIndexer.class).to(GraphBackedSearchIndexer.class).asEagerSingleton();
bind(LineageService.class).to(HiveLineageService.class);
bind(LineageService.class).to(HiveLineageService.class).asEagerSingleton();
MethodInterceptor interceptor = new GraphTransactionInterceptor();
requestInjection(interceptor);
......
......@@ -89,10 +89,11 @@ public class DefaultMetadataService implements MetadataService {
private void restoreTypeSystem() {
LOG.info("Restoring type system from the store");
try {
createSuperTypes();
TypesDef typesDef = typeStore.restore();
typeSystem.defineTypes(typesDef);
createSuperTypes();
} catch (MetadataException e) {
throw new RuntimeException(e);
}
......
......@@ -57,11 +57,6 @@ import java.util.List;
@Guice(modules = RepositoryMetadataModule.class)
public class HiveLineageServiceTest {
static {
// this would override super types creation if not first thing
TypeSystem.getInstance().reset();
}
@Inject
private DefaultMetadataService metadataService;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment