Commit 5506e778 by arpitgupta

Merge branch 'master' into dal

parents cd0e3950 74c54aa9
...@@ -26,6 +26,7 @@ import org.apache.hadoop.hive.metastore.api.Index; ...@@ -26,6 +26,7 @@ import org.apache.hadoop.hive.metastore.api.Index;
import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.Table;
...@@ -228,8 +229,8 @@ public class HiveMetaStoreBridge { ...@@ -228,8 +229,8 @@ public class HiveMetaStoreBridge {
String dbType = HiveDataTypes.HIVE_DB.getName(); String dbType = HiveDataTypes.HIVE_DB.getName();
String tableType = HiveDataTypes.HIVE_TABLE.getName(); String tableType = HiveDataTypes.HIVE_TABLE.getName();
String gremlinQuery = String.format("g.V.has('__typeName', '%s').has('%s.values', %s).as('p')." String gremlinQuery = String.format("g.V.has('__typeName', '%s').has('%s.values', %s).as('p')."
+ "out('__%s.tableName').has('%s.name', '%s').out('__%s.dbName').has('%s.name', '%s')" + "out('__%s.tableName').has('%s.name', '%s').out('__%s.dbName').has('%s.name', '%s')"
+ ".has('%s.clusterName', '%s').back('p').toList()", typeName, typeName, valuesStr, typeName, + ".has('%s.clusterName', '%s').back('p').toList()", typeName, typeName, valuesStr, typeName,
tableType, tableName.toLowerCase(), tableType, dbType, dbName.toLowerCase(), dbType, clusterName); tableType, tableName.toLowerCase(), tableType, dbType, dbName.toLowerCase(), dbType, clusterName);
return getEntityReferenceFromGremlin(typeName, gremlinQuery); return getEntityReferenceFromGremlin(typeName, gremlinQuery);
...@@ -263,17 +264,22 @@ public class HiveMetaStoreBridge { ...@@ -263,17 +264,22 @@ public class HiveMetaStoreBridge {
tableRef = new Referenceable(HiveDataTypes.HIVE_TABLE.getName()); tableRef = new Referenceable(HiveDataTypes.HIVE_TABLE.getName());
tableRef.set("name", hiveTable.getTableName()); tableRef.set("name", hiveTable.getTableName());
tableRef.set("owner", hiveTable.getOwner()); tableRef.set("owner", hiveTable.getOwner());
//todo fix
tableRef.set("createTime", hiveTable.getLastAccessTime()); tableRef.set("createTime", hiveTable.getMetadata().getProperty(hive_metastoreConstants.DDL_TIME));
tableRef.set("lastAccessTime", hiveTable.getLastAccessTime()); tableRef.set("lastAccessTime", hiveTable.getLastAccessTime());
tableRef.set("retention", hiveTable.getRetention()); tableRef.set("retention", hiveTable.getRetention());
tableRef.set(HiveDataModelGenerator.COMMENT, hiveTable.getParameters().get(HiveDataModelGenerator.COMMENT));
// add reference to the database // add reference to the database
tableRef.set("dbName", dbReference); tableRef.set("dbName", dbReference);
List<Referenceable> colList = getColumns(hiveTable.getCols());
tableRef.set("columns", colList);
// add reference to the StorageDescriptor // add reference to the StorageDescriptor
StorageDescriptor storageDesc = hiveTable.getSd(); StorageDescriptor storageDesc = hiveTable.getSd();
Referenceable sdReferenceable = fillStorageDescStruct(storageDesc); Referenceable sdReferenceable = fillStorageDescStruct(storageDesc, colList);
tableRef.set("sd", sdReferenceable); tableRef.set("sd", sdReferenceable);
// add reference to the Partition Keys // add reference to the Partition Keys
...@@ -293,8 +299,6 @@ public class HiveMetaStoreBridge { ...@@ -293,8 +299,6 @@ public class HiveMetaStoreBridge {
tableRef.set("tableType", hiveTable.getTableType().name()); tableRef.set("tableType", hiveTable.getTableType().name());
tableRef.set("temporary", hiveTable.isTemporary()); tableRef.set("temporary", hiveTable.isTemporary());
List<Referenceable> colList = getColumns(hiveTable.getAllCols());
tableRef.set("columns", colList);
tableRef = createInstance(tableRef); tableRef = createInstance(tableRef);
} else { } else {
...@@ -388,7 +392,7 @@ public class HiveMetaStoreBridge { ...@@ -388,7 +392,7 @@ public class HiveMetaStoreBridge {
indexRef.set("origTableName", index.getOrigTableName()); indexRef.set("origTableName", index.getOrigTableName());
indexRef.set("indexTableName", index.getIndexTableName()); indexRef.set("indexTableName", index.getIndexTableName());
Referenceable sdReferenceable = fillStorageDescStruct(index.getSd()); Referenceable sdReferenceable = fillStorageDescStruct(index.getSd(), null);
indexRef.set("sd", sdReferenceable); indexRef.set("sd", sdReferenceable);
indexRef.set("parameters", index.getParameters()); indexRef.set("parameters", index.getParameters());
...@@ -398,7 +402,7 @@ public class HiveMetaStoreBridge { ...@@ -398,7 +402,7 @@ public class HiveMetaStoreBridge {
createInstance(indexRef); createInstance(indexRef);
} }
private Referenceable fillStorageDescStruct(StorageDescriptor storageDesc) throws Exception { private Referenceable fillStorageDescStruct(StorageDescriptor storageDesc, List<Referenceable> colList) throws Exception {
LOG.debug("Filling storage descriptor information for " + storageDesc); LOG.debug("Filling storage descriptor information for " + storageDesc);
Referenceable sdReferenceable = new Referenceable(HiveDataTypes.HIVE_STORAGEDESC.getName()); Referenceable sdReferenceable = new Referenceable(HiveDataTypes.HIVE_STORAGEDESC.getName());
...@@ -415,6 +419,8 @@ public class HiveMetaStoreBridge { ...@@ -415,6 +419,8 @@ public class HiveMetaStoreBridge {
serdeInfoStruct.set("parameters", serdeInfo.getParameters()); serdeInfoStruct.set("parameters", serdeInfo.getParameters());
sdReferenceable.set("serdeInfo", serdeInfoStruct); sdReferenceable.set("serdeInfo", serdeInfoStruct);
sdReferenceable.set(HiveDataModelGenerator.STORAGE_NUM_BUCKETS, storageDesc.getNumBuckets());
sdReferenceable.set(HiveDataModelGenerator.STORAGE_IS_STORED_AS_SUB_DIRS, storageDesc.isStoredAsSubDirectories());
// Will need to revisit this after we fix typesystem. // Will need to revisit this after we fix typesystem.
/* /*
...@@ -433,8 +439,15 @@ public class HiveMetaStoreBridge { ...@@ -433,8 +439,15 @@ public class HiveMetaStoreBridge {
} }
*/ */
List<Referenceable> fieldsList = getColumns(storageDesc.getCols()); //Use the passed column list if not null, ex: use same references for table and SD
sdReferenceable.set("cols", fieldsList); List<FieldSchema> columns = storageDesc.getCols();
if (columns != null && !columns.isEmpty()) {
if (colList != null) {
sdReferenceable.set("cols", colList);
} else {
sdReferenceable.set("cols", getColumns(columns));
}
}
List<Struct> sortColsStruct = new ArrayList<>(); List<Struct> sortColsStruct = new ArrayList<>();
for (Order sortcol : storageDesc.getSortCols()) { for (Order sortcol : storageDesc.getSortCols()) {
...@@ -472,7 +485,7 @@ public class HiveMetaStoreBridge { ...@@ -472,7 +485,7 @@ public class HiveMetaStoreBridge {
Referenceable colReferenceable = new Referenceable(HiveDataTypes.HIVE_COLUMN.getName()); Referenceable colReferenceable = new Referenceable(HiveDataTypes.HIVE_COLUMN.getName());
colReferenceable.set("name", fs.getName()); colReferenceable.set("name", fs.getName());
colReferenceable.set("type", fs.getType()); colReferenceable.set("type", fs.getType());
colReferenceable.set("comment", fs.getComment()); colReferenceable.set(HiveDataModelGenerator.COMMENT, fs.getComment());
colList.add(createInstance(colReferenceable)); colList.add(createInstance(colReferenceable));
} }
......
...@@ -55,6 +55,10 @@ public class HiveDataModelGenerator { ...@@ -55,6 +55,10 @@ public class HiveDataModelGenerator {
private final Map<String, EnumTypeDefinition> enumTypeDefinitionMap; private final Map<String, EnumTypeDefinition> enumTypeDefinitionMap;
private final Map<String, StructTypeDefinition> structTypeDefinitionMap; private final Map<String, StructTypeDefinition> structTypeDefinitionMap;
public static final String COMMENT = "comment";
public static final String STORAGE_NUM_BUCKETS = "numBuckets";
public static final String STORAGE_IS_STORED_AS_SUB_DIRS = "storedAsSubDirectories";
public HiveDataModelGenerator() { public HiveDataModelGenerator() {
classTypeDefinitions = new HashMap<>(); classTypeDefinitions = new HashMap<>();
enumTypeDefinitionMap = new HashMap<>(); enumTypeDefinitionMap = new HashMap<>();
...@@ -237,7 +241,7 @@ public class HiveDataModelGenerator { ...@@ -237,7 +241,7 @@ public class HiveDataModelGenerator {
Multiplicity.OPTIONAL, false, null), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("compressed", DataTypes.BOOLEAN_TYPE.getName(), new AttributeDefinition("compressed", DataTypes.BOOLEAN_TYPE.getName(),
Multiplicity.REQUIRED, false, null), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("numBuckets", DataTypes.INT_TYPE.getName(), new AttributeDefinition(STORAGE_NUM_BUCKETS, DataTypes.INT_TYPE.getName(),
Multiplicity.OPTIONAL, false, null), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("serdeInfo", HiveDataTypes.HIVE_SERDE.getName(), new AttributeDefinition("serdeInfo", HiveDataTypes.HIVE_SERDE.getName(),
Multiplicity.OPTIONAL, false, null), Multiplicity.OPTIONAL, false, null),
...@@ -251,7 +255,7 @@ public class HiveDataModelGenerator { ...@@ -251,7 +255,7 @@ public class HiveDataModelGenerator {
Multiplicity.OPTIONAL, false, null), Multiplicity.OPTIONAL, false, null),
//new AttributeDefinition("skewedInfo", DefinedTypes.HIVE_SKEWEDINFO.getName(), //new AttributeDefinition("skewedInfo", DefinedTypes.HIVE_SKEWEDINFO.getName(),
// Multiplicity.OPTIONAL, false, null), // Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("storedAsSubDirectories", DataTypes.BOOLEAN_TYPE.getName(), new AttributeDefinition(STORAGE_IS_STORED_AS_SUB_DIRS, DataTypes.BOOLEAN_TYPE.getName(),
Multiplicity.OPTIONAL, false, null), Multiplicity.OPTIONAL, false, null),
}; };
...@@ -326,7 +330,7 @@ public class HiveDataModelGenerator { ...@@ -326,7 +330,7 @@ public class HiveDataModelGenerator {
Multiplicity.REQUIRED, false, null), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("type", DataTypes.STRING_TYPE.getName(), new AttributeDefinition("type", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("comment", DataTypes.STRING_TYPE.getName(), new AttributeDefinition(COMMENT, DataTypes.STRING_TYPE.getName(),
Multiplicity.OPTIONAL, false, null), Multiplicity.OPTIONAL, false, null),
}; };
HierarchicalTypeDefinition<ClassType> definition = HierarchicalTypeDefinition<ClassType> definition =
...@@ -377,6 +381,8 @@ public class HiveDataModelGenerator { ...@@ -377,6 +381,8 @@ public class HiveDataModelGenerator {
Multiplicity.OPTIONAL, false, null), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("lastAccessTime", DataTypes.INT_TYPE.getName(), new AttributeDefinition("lastAccessTime", DataTypes.INT_TYPE.getName(),
Multiplicity.OPTIONAL, false, null), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition(COMMENT, DataTypes.STRING_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("retention", DataTypes.INT_TYPE.getName(), new AttributeDefinition("retention", DataTypes.INT_TYPE.getName(),
Multiplicity.OPTIONAL, false, null), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("sd", HiveDataTypes.HIVE_STORAGEDESC.getName(), new AttributeDefinition("sd", HiveDataTypes.HIVE_STORAGEDESC.getName(),
......
...@@ -21,12 +21,15 @@ package org.apache.hadoop.metadata.hive.hook; ...@@ -21,12 +21,15 @@ package org.apache.hadoop.metadata.hive.hook;
import org.apache.commons.lang.RandomStringUtils; import org.apache.commons.lang.RandomStringUtils;
import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.metadata.MetadataServiceClient; import org.apache.hadoop.metadata.MetadataServiceClient;
import org.apache.hadoop.metadata.hive.bridge.HiveMetaStoreBridge; import org.apache.hadoop.metadata.hive.bridge.HiveMetaStoreBridge;
import org.apache.hadoop.metadata.hive.model.HiveDataModelGenerator;
import org.apache.hadoop.metadata.hive.model.HiveDataTypes; import org.apache.hadoop.metadata.hive.model.HiveDataTypes;
import org.apache.hadoop.metadata.typesystem.Referenceable; import org.apache.hadoop.metadata.typesystem.Referenceable;
import org.apache.hadoop.metadata.typesystem.persistence.Id;
import org.apache.log4j.spi.LoggerFactory; import org.apache.log4j.spi.LoggerFactory;
import org.codehaus.jettison.json.JSONArray; import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONObject; import org.codehaus.jettison.json.JSONObject;
...@@ -118,7 +121,7 @@ public class HiveHookIT { ...@@ -118,7 +121,7 @@ public class HiveHookIT {
private String createTable(boolean partition) throws Exception { private String createTable(boolean partition) throws Exception {
String tableName = tableName(); String tableName = tableName();
runCommand("create table " + tableName + "(id int, name string)" + (partition ? " partitioned by(dt string)" runCommand("create table " + tableName + "(id int, name string) comment 'table comment' " + (partition ? " partitioned by(dt string)"
: "")); : ""));
return tableName; return tableName;
} }
...@@ -127,18 +130,32 @@ public class HiveHookIT { ...@@ -127,18 +130,32 @@ public class HiveHookIT {
public void testCreateTable() throws Exception { public void testCreateTable() throws Exception {
String tableName = tableName(); String tableName = tableName();
String dbName = createDatabase(); String dbName = createDatabase();
runCommand("create table " + dbName + "." + tableName + "(id int, name string)"); String colName = "col" + random();
runCommand("create table " + dbName + "." + tableName + "(" + colName + " int, name string)");
assertTableIsRegistered(dbName, tableName); assertTableIsRegistered(dbName, tableName);
//there is only one instance of column registered
assertColumnIsRegistered(colName);
tableName = createTable(); tableName = createTable();
String tableId = assertTableIsRegistered(DEFAULT_DB, tableName); String tableId = assertTableIsRegistered(DEFAULT_DB, tableName);
Referenceable tableRef = dgiCLient.getEntity(tableId); Referenceable tableRef = dgiCLient.getEntity(tableId);
Assert.assertEquals(tableRef.get("tableType"), TableType.MANAGED_TABLE.name()); Assert.assertEquals(tableRef.get("tableType"), TableType.MANAGED_TABLE.name());
Assert.assertEquals(tableRef.get(HiveDataModelGenerator.COMMENT), "table comment");
final Id sdId = (Id) tableRef.get("sd");
Referenceable sdRef = dgiCLient.getEntity(sdId.id);
Assert.assertEquals(sdRef.get(HiveDataModelGenerator.STORAGE_IS_STORED_AS_SUB_DIRS),false);
//Create table where database doesn't exist, will create database instance as well //Create table where database doesn't exist, will create database instance as well
assertDatabaseIsRegistered(DEFAULT_DB); assertDatabaseIsRegistered(DEFAULT_DB);
} }
private String assertColumnIsRegistered(String colName) throws Exception {
LOG.debug("Searching for column {}", colName);
String query = String.format("%s where name = '%s'", HiveDataTypes.HIVE_COLUMN.getName(), colName.toLowerCase());
return assertEntityIsRegistered(query, true);
}
@Test @Test
public void testCTAS() throws Exception { public void testCTAS() throws Exception {
String tableName = createTable(); String tableName = createTable();
......
...@@ -48,28 +48,28 @@ public class RepositoryMetadataModule extends com.google.inject.AbstractModule { ...@@ -48,28 +48,28 @@ public class RepositoryMetadataModule extends com.google.inject.AbstractModule {
ThrowingProviderBinder.create(binder()) ThrowingProviderBinder.create(binder())
.bind(GraphProvider.class, TitanGraph.class) .bind(GraphProvider.class, TitanGraph.class)
.to(TitanGraphProvider.class) .to(TitanGraphProvider.class)
.in(Scopes.SINGLETON); .asEagerSingleton();
// allow for dynamic binding of the metadata repo & graph service // allow for dynamic binding of the metadata repo & graph service
// bind the MetadataRepositoryService interface to an implementation // bind the MetadataRepositoryService interface to an implementation
bind(MetadataRepository.class).to(GraphBackedMetadataRepository.class); bind(MetadataRepository.class).to(GraphBackedMetadataRepository.class).asEagerSingleton();
// bind the ITypeStore interface to an implementation // bind the ITypeStore interface to an implementation
bind(ITypeStore.class).to(GraphBackedTypeStore.class); bind(ITypeStore.class).to(GraphBackedTypeStore.class).asEagerSingleton();
// bind the GraphService interface to an implementation // bind the GraphService interface to an implementation
// bind(GraphService.class).to(graphServiceClass); // bind(GraphService.class).to(graphServiceClass);
// bind the MetadataService interface to an implementation // bind the MetadataService interface to an implementation
bind(MetadataService.class).to(DefaultMetadataService.class); bind(MetadataService.class).to(DefaultMetadataService.class).asEagerSingleton();
// bind the DiscoveryService interface to an implementation // bind the DiscoveryService interface to an implementation
bind(DiscoveryService.class).to(GraphBackedDiscoveryService.class); bind(DiscoveryService.class).to(GraphBackedDiscoveryService.class).asEagerSingleton();
bind(SearchIndexer.class).to(GraphBackedSearchIndexer.class); bind(SearchIndexer.class).to(GraphBackedSearchIndexer.class).asEagerSingleton();
bind(LineageService.class).to(HiveLineageService.class); bind(LineageService.class).to(HiveLineageService.class).asEagerSingleton();
MethodInterceptor interceptor = new GraphTransactionInterceptor(); MethodInterceptor interceptor = new GraphTransactionInterceptor();
requestInjection(interceptor); requestInjection(interceptor);
......
...@@ -89,10 +89,11 @@ public class DefaultMetadataService implements MetadataService { ...@@ -89,10 +89,11 @@ public class DefaultMetadataService implements MetadataService {
private void restoreTypeSystem() { private void restoreTypeSystem() {
LOG.info("Restoring type system from the store"); LOG.info("Restoring type system from the store");
try { try {
createSuperTypes();
TypesDef typesDef = typeStore.restore(); TypesDef typesDef = typeStore.restore();
typeSystem.defineTypes(typesDef); typeSystem.defineTypes(typesDef);
createSuperTypes();
} catch (MetadataException e) { } catch (MetadataException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
......
...@@ -57,11 +57,6 @@ import java.util.List; ...@@ -57,11 +57,6 @@ import java.util.List;
@Guice(modules = RepositoryMetadataModule.class) @Guice(modules = RepositoryMetadataModule.class)
public class HiveLineageServiceTest { public class HiveLineageServiceTest {
static {
// this would override super types creation if not first thing
TypeSystem.getInstance().reset();
}
@Inject @Inject
private DefaultMetadataService metadataService; private DefaultMetadataService metadataService;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment