Commit 4aef164c by Shwetha GS

ATLAS-242 The qualified name for hive entities should be backward compatible (shwethags)

parent fd468f45
......@@ -194,7 +194,7 @@ public class HiveMetaStoreBridge {
}
public static String getDBQualifiedName(String clusterName, String dbName) {
return String.format("%s.%s", clusterName, dbName.toLowerCase());
return String.format("%s@%s", dbName.toLowerCase(), clusterName);
}
/**
......@@ -234,7 +234,7 @@ public class HiveMetaStoreBridge {
}
public static String getTableQualifiedName(String clusterName, String dbName, String tableName) {
return String.format("%s.%s.%s", clusterName, dbName.toLowerCase(), tableName.toLowerCase());
return String.format("%s.%s@%s", dbName.toLowerCase(), tableName.toLowerCase(), clusterName);
}
public Referenceable createTableInstance(Referenceable dbReference, Table hiveTable)
......@@ -392,8 +392,8 @@ public class HiveMetaStoreBridge {
}
private String getPartitionQualifiedName(Partition partition) {
return String.format("%s.%s.%s.%s", clusterName, partition.getTable().getDbName(),
partition.getTable().getTableName(), StringUtils.join(partition.getValues(), "/"));
return String.format("%s.%s.%s@%s", partition.getTable().getDbName(),
partition.getTable().getTableName(), StringUtils.join(partition.getValues(), "-"), clusterName);
}
private Referenceable fillStorageDescStruct(StorageDescriptor storageDesc, String tableQualifiedName,
......@@ -454,7 +454,9 @@ public class HiveMetaStoreBridge {
}
private String getColumnQualifiedName(String tableQualifiedName, String colName) {
return String.format("%s.%s", tableQualifiedName, colName);
String[] parts = tableQualifiedName.split("@");
String tableName = parts[0];
return String.format("%s.%s@%s", tableName, colName, clusterName);
}
private List<Referenceable> getColumns(List<FieldSchema> schemaList, String tableQualifiedName) throws Exception {
......
......@@ -86,7 +86,12 @@ public class HiveHookIT {
//There should be just one entity per dbname
runCommand("drop database " + dbName);
runCommand("create database " + dbName);
assertDatabaseIsRegistered(dbName);
String dbid = assertDatabaseIsRegistered(dbName);
//assert on qualified name
Referenceable dbEntity = dgiCLient.getEntity(dbid);
Assert.assertEquals(dbEntity.get("qualifiedName"), dbName.toLowerCase() + "@" + CLUSTER_NAME);
}
private String dbName() {
......@@ -121,8 +126,12 @@ public class HiveHookIT {
String colName = "col" + random();
runCommand("create table " + dbName + "." + tableName + "(" + colName + " int, name string)");
assertTableIsRegistered(dbName, tableName);
//there is only one instance of column registered
assertColumnIsRegistered(colName);
String colId = assertColumnIsRegistered(colName);
Referenceable colEntity = dgiCLient.getEntity(colId);
Assert.assertEquals(colEntity.get("qualifiedName"), String.format("%s.%s.%s@%s", dbName.toLowerCase(),
tableName.toLowerCase(), colName.toLowerCase(), CLUSTER_NAME));
tableName = createTable();
String tableId = assertTableIsRegistered(DEFAULT_DB, tableName);
......@@ -131,6 +140,7 @@ public class HiveHookIT {
Assert.assertEquals(tableRef.get(HiveDataModelGenerator.COMMENT), "table comment");
String entityName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName);
Assert.assertEquals(tableRef.get(HiveDataModelGenerator.NAME), entityName);
Assert.assertEquals(tableRef.get("name"), "default." + tableName.toLowerCase() + "@" + CLUSTER_NAME);
final Referenceable sdRef = (Referenceable) tableRef.get("sd");
Assert.assertEquals(sdRef.get(HiveDataModelGenerator.STORAGE_IS_STORED_AS_SUB_DIRS), false);
......@@ -189,7 +199,10 @@ public class HiveHookIT {
runCommand(query);
assertProcessIsRegistered(query);
assertPartitionIsRegistered(DEFAULT_DB, insertTableName, "2015-01-01");
String partId = assertPartitionIsRegistered(DEFAULT_DB, insertTableName, "2015-01-01");
Referenceable partitionEntity = dgiCLient.getEntity(partId);
Assert.assertEquals(partitionEntity.get("qualifiedName"),
String.format("%s.%s.%s@%s", "default", insertTableName.toLowerCase(), "2015-01-01", CLUSTER_NAME));
}
private String random() {
......@@ -231,7 +244,9 @@ public class HiveHookIT {
String tableName = createTable();
String query = "select * from " + tableName;
runCommand(query);
assertProcessIsRegistered(query);
String pid = assertProcessIsRegistered(query);
Referenceable processEntity = dgiCLient.getEntity(pid);
Assert.assertEquals(processEntity.get("name"), query.toLowerCase());
//single entity per query
query = "SELECT * from " + tableName.toUpperCase();
......@@ -265,7 +280,7 @@ public class HiveHookIT {
assertTableIsNotRegistered(DEFAULT_DB, viewName);
}
private void assertProcessIsRegistered(String queryStr) throws Exception {
private String assertProcessIsRegistered(String queryStr) throws Exception {
// String dslQuery = String.format("%s where queryText = \"%s\"", HiveDataTypes.HIVE_PROCESS.getName(),
// normalize(queryStr));
// assertEntityIsRegistered(dslQuery, true);
......@@ -274,7 +289,7 @@ public class HiveHookIT {
String gremlinQuery =
String.format("g.V.has('__typeName', '%s').has('%s.queryText', \"%s\").toList()", typeName, typeName,
normalize(queryStr));
assertEntityIsRegistered(gremlinQuery);
return assertEntityIsRegistered(gremlinQuery);
}
private String normalize(String str) {
......@@ -307,22 +322,15 @@ public class HiveHookIT {
return assertEntityIsRegistered(query);
}
private void assertPartitionIsRegistered(String dbName, String tableName, String value) throws Exception {
private String assertPartitionIsRegistered(String dbName, String tableName, String value) throws Exception {
String typeName = HiveDataTypes.HIVE_PARTITION.getName();
String dbType = HiveDataTypes.HIVE_DB.getName();
String tableType = HiveDataTypes.HIVE_TABLE.getName();
LOG.debug("Searching for partition of {}.{} with values {}", dbName, tableName, value);
/* gremlinQuery = String.format("g.V.has('__typeName', '%s').has('%s.values', ['%s']).as('p')."
+ "out('__%s.table').has('%s.tableName', '%s').out('__%s.db').has('%s.name', '%s')"
+ ".has('%s.clusterName', '%s').back('p').toList()", typeName, typeName, value, typeName,
tableType, tableName.toLowerCase(), tableType, dbType, dbName.toLowerCase(), dbType, CLUSTER_NAME);
*/
String dslQuery = String.format("%s as p where values = ['%s'], table where tableName = '%s', "
+ "db where name = '%s' and clusterName = '%s' select p", typeName, value,
tableName.toLowerCase(), dbName.toLowerCase(), CLUSTER_NAME);
assertEntityIsRegistered(dslQuery, "p");
return assertEntityIsRegistered(dslQuery, "p");
}
private String assertEntityIsRegistered(final String query, String... arg) throws Exception {
......
......@@ -19,12 +19,12 @@ hive_partition(ClassType) - super types [Referenceable] - attributes [values, ta
hive_process(ClassType) - super types [Process] - attributes [startTime, endTime, userName, operationType, queryText, queryPlan, queryId, queryGraph]
</verbatim>
The entities are created and de-duped using unique qualified name. They provide namespace and can be used for querying as well:
hive_db - attribute qualifiedName - clustername.dbname
hive_table - attribute name - clustername.dbname.tablename
hive_column - attribute qualifiedName - clustername.dbname.tablename.columnname
hive_partition - attribute qualifiedName - clustername.dbname.tablename.partitionvalues
hive_process - attribute qualifiedName - queryText
The entities are created and de-duped using unique qualified name. They provide namespace and can be used for querying/lineage as well. Note that dbName and tableName should be in lower case. clusterName is explained below:
hive_db - attribute qualifiedName - <dbName>@<clusterName>
hive_table - attribute name - <dbName>.<tableName>@<clusterName>
hive_column - attribute qualifiedName - <dbName>.<tableName>.<columnName>@<clusterName>
hive_partition - attribute qualifiedName - <dbName>.<tableName>.<partitionValues('-' separated)>@<clusterName>
hive_process - attribute name - <queryString> - trimmed query string in lower case
---++ Importing Hive Metadata
......
......@@ -9,6 +9,7 @@ ATLAS-54 Rename configs in hive hook (shwethags)
ATLAS-3 Mixed Index creation fails with Date types (sumasai via shwethags)
ALL CHANGES:
ATLAS-242 The qualified name for hive entities should be backward compatible (shwethags)
ATLAS-361 Add validation when index backends are switched in ATLAS configuration (sumasai via shwethags)
ATLAS-171 Ability to update type definition(shwethags via sumasai)
ATLAS-352 Improve write performance on type and entity creation with Hbase (sumasai)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment