Commit 4aef164c by Shwetha GS

ATLAS-242 The qualified name for hive entities should be backward compatible (shwethags)

parent fd468f45
...@@ -194,7 +194,7 @@ public class HiveMetaStoreBridge { ...@@ -194,7 +194,7 @@ public class HiveMetaStoreBridge {
} }
public static String getDBQualifiedName(String clusterName, String dbName) { public static String getDBQualifiedName(String clusterName, String dbName) {
return String.format("%s.%s", clusterName, dbName.toLowerCase()); return String.format("%s@%s", dbName.toLowerCase(), clusterName);
} }
/** /**
...@@ -234,7 +234,7 @@ public class HiveMetaStoreBridge { ...@@ -234,7 +234,7 @@ public class HiveMetaStoreBridge {
} }
public static String getTableQualifiedName(String clusterName, String dbName, String tableName) { public static String getTableQualifiedName(String clusterName, String dbName, String tableName) {
return String.format("%s.%s.%s", clusterName, dbName.toLowerCase(), tableName.toLowerCase()); return String.format("%s.%s@%s", dbName.toLowerCase(), tableName.toLowerCase(), clusterName);
} }
public Referenceable createTableInstance(Referenceable dbReference, Table hiveTable) public Referenceable createTableInstance(Referenceable dbReference, Table hiveTable)
...@@ -392,8 +392,8 @@ public class HiveMetaStoreBridge { ...@@ -392,8 +392,8 @@ public class HiveMetaStoreBridge {
} }
private String getPartitionQualifiedName(Partition partition) { private String getPartitionQualifiedName(Partition partition) {
return String.format("%s.%s.%s.%s", clusterName, partition.getTable().getDbName(), return String.format("%s.%s.%s@%s", partition.getTable().getDbName(),
partition.getTable().getTableName(), StringUtils.join(partition.getValues(), "/")); partition.getTable().getTableName(), StringUtils.join(partition.getValues(), "-"), clusterName);
} }
private Referenceable fillStorageDescStruct(StorageDescriptor storageDesc, String tableQualifiedName, private Referenceable fillStorageDescStruct(StorageDescriptor storageDesc, String tableQualifiedName,
...@@ -454,7 +454,9 @@ public class HiveMetaStoreBridge { ...@@ -454,7 +454,9 @@ public class HiveMetaStoreBridge {
} }
private String getColumnQualifiedName(String tableQualifiedName, String colName) { private String getColumnQualifiedName(String tableQualifiedName, String colName) {
return String.format("%s.%s", tableQualifiedName, colName); String[] parts = tableQualifiedName.split("@");
String tableName = parts[0];
return String.format("%s.%s@%s", tableName, colName, clusterName);
} }
private List<Referenceable> getColumns(List<FieldSchema> schemaList, String tableQualifiedName) throws Exception { private List<Referenceable> getColumns(List<FieldSchema> schemaList, String tableQualifiedName) throws Exception {
......
...@@ -86,7 +86,12 @@ public class HiveHookIT { ...@@ -86,7 +86,12 @@ public class HiveHookIT {
//There should be just one entity per dbname //There should be just one entity per dbname
runCommand("drop database " + dbName); runCommand("drop database " + dbName);
runCommand("create database " + dbName); runCommand("create database " + dbName);
assertDatabaseIsRegistered(dbName); String dbid = assertDatabaseIsRegistered(dbName);
//assert on qualified name
Referenceable dbEntity = dgiCLient.getEntity(dbid);
Assert.assertEquals(dbEntity.get("qualifiedName"), dbName.toLowerCase() + "@" + CLUSTER_NAME);
} }
private String dbName() { private String dbName() {
...@@ -121,8 +126,12 @@ public class HiveHookIT { ...@@ -121,8 +126,12 @@ public class HiveHookIT {
String colName = "col" + random(); String colName = "col" + random();
runCommand("create table " + dbName + "." + tableName + "(" + colName + " int, name string)"); runCommand("create table " + dbName + "." + tableName + "(" + colName + " int, name string)");
assertTableIsRegistered(dbName, tableName); assertTableIsRegistered(dbName, tableName);
//there is only one instance of column registered //there is only one instance of column registered
assertColumnIsRegistered(colName); String colId = assertColumnIsRegistered(colName);
Referenceable colEntity = dgiCLient.getEntity(colId);
Assert.assertEquals(colEntity.get("qualifiedName"), String.format("%s.%s.%s@%s", dbName.toLowerCase(),
tableName.toLowerCase(), colName.toLowerCase(), CLUSTER_NAME));
tableName = createTable(); tableName = createTable();
String tableId = assertTableIsRegistered(DEFAULT_DB, tableName); String tableId = assertTableIsRegistered(DEFAULT_DB, tableName);
...@@ -131,6 +140,7 @@ public class HiveHookIT { ...@@ -131,6 +140,7 @@ public class HiveHookIT {
Assert.assertEquals(tableRef.get(HiveDataModelGenerator.COMMENT), "table comment"); Assert.assertEquals(tableRef.get(HiveDataModelGenerator.COMMENT), "table comment");
String entityName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName); String entityName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName);
Assert.assertEquals(tableRef.get(HiveDataModelGenerator.NAME), entityName); Assert.assertEquals(tableRef.get(HiveDataModelGenerator.NAME), entityName);
Assert.assertEquals(tableRef.get("name"), "default." + tableName.toLowerCase() + "@" + CLUSTER_NAME);
final Referenceable sdRef = (Referenceable) tableRef.get("sd"); final Referenceable sdRef = (Referenceable) tableRef.get("sd");
Assert.assertEquals(sdRef.get(HiveDataModelGenerator.STORAGE_IS_STORED_AS_SUB_DIRS), false); Assert.assertEquals(sdRef.get(HiveDataModelGenerator.STORAGE_IS_STORED_AS_SUB_DIRS), false);
...@@ -189,7 +199,10 @@ public class HiveHookIT { ...@@ -189,7 +199,10 @@ public class HiveHookIT {
runCommand(query); runCommand(query);
assertProcessIsRegistered(query); assertProcessIsRegistered(query);
assertPartitionIsRegistered(DEFAULT_DB, insertTableName, "2015-01-01"); String partId = assertPartitionIsRegistered(DEFAULT_DB, insertTableName, "2015-01-01");
Referenceable partitionEntity = dgiCLient.getEntity(partId);
Assert.assertEquals(partitionEntity.get("qualifiedName"),
String.format("%s.%s.%s@%s", "default", insertTableName.toLowerCase(), "2015-01-01", CLUSTER_NAME));
} }
private String random() { private String random() {
...@@ -231,7 +244,9 @@ public class HiveHookIT { ...@@ -231,7 +244,9 @@ public class HiveHookIT {
String tableName = createTable(); String tableName = createTable();
String query = "select * from " + tableName; String query = "select * from " + tableName;
runCommand(query); runCommand(query);
assertProcessIsRegistered(query); String pid = assertProcessIsRegistered(query);
Referenceable processEntity = dgiCLient.getEntity(pid);
Assert.assertEquals(processEntity.get("name"), query.toLowerCase());
//single entity per query //single entity per query
query = "SELECT * from " + tableName.toUpperCase(); query = "SELECT * from " + tableName.toUpperCase();
...@@ -265,7 +280,7 @@ public class HiveHookIT { ...@@ -265,7 +280,7 @@ public class HiveHookIT {
assertTableIsNotRegistered(DEFAULT_DB, viewName); assertTableIsNotRegistered(DEFAULT_DB, viewName);
} }
private void assertProcessIsRegistered(String queryStr) throws Exception { private String assertProcessIsRegistered(String queryStr) throws Exception {
// String dslQuery = String.format("%s where queryText = \"%s\"", HiveDataTypes.HIVE_PROCESS.getName(), // String dslQuery = String.format("%s where queryText = \"%s\"", HiveDataTypes.HIVE_PROCESS.getName(),
// normalize(queryStr)); // normalize(queryStr));
// assertEntityIsRegistered(dslQuery, true); // assertEntityIsRegistered(dslQuery, true);
...@@ -274,7 +289,7 @@ public class HiveHookIT { ...@@ -274,7 +289,7 @@ public class HiveHookIT {
String gremlinQuery = String gremlinQuery =
String.format("g.V.has('__typeName', '%s').has('%s.queryText', \"%s\").toList()", typeName, typeName, String.format("g.V.has('__typeName', '%s').has('%s.queryText', \"%s\").toList()", typeName, typeName,
normalize(queryStr)); normalize(queryStr));
assertEntityIsRegistered(gremlinQuery); return assertEntityIsRegistered(gremlinQuery);
} }
private String normalize(String str) { private String normalize(String str) {
...@@ -307,22 +322,15 @@ public class HiveHookIT { ...@@ -307,22 +322,15 @@ public class HiveHookIT {
return assertEntityIsRegistered(query); return assertEntityIsRegistered(query);
} }
private void assertPartitionIsRegistered(String dbName, String tableName, String value) throws Exception { private String assertPartitionIsRegistered(String dbName, String tableName, String value) throws Exception {
String typeName = HiveDataTypes.HIVE_PARTITION.getName(); String typeName = HiveDataTypes.HIVE_PARTITION.getName();
String dbType = HiveDataTypes.HIVE_DB.getName();
String tableType = HiveDataTypes.HIVE_TABLE.getName();
LOG.debug("Searching for partition of {}.{} with values {}", dbName, tableName, value); LOG.debug("Searching for partition of {}.{} with values {}", dbName, tableName, value);
/* gremlinQuery = String.format("g.V.has('__typeName', '%s').has('%s.values', ['%s']).as('p')."
+ "out('__%s.table').has('%s.tableName', '%s').out('__%s.db').has('%s.name', '%s')"
+ ".has('%s.clusterName', '%s').back('p').toList()", typeName, typeName, value, typeName,
tableType, tableName.toLowerCase(), tableType, dbType, dbName.toLowerCase(), dbType, CLUSTER_NAME);
*/
String dslQuery = String.format("%s as p where values = ['%s'], table where tableName = '%s', " String dslQuery = String.format("%s as p where values = ['%s'], table where tableName = '%s', "
+ "db where name = '%s' and clusterName = '%s' select p", typeName, value, + "db where name = '%s' and clusterName = '%s' select p", typeName, value,
tableName.toLowerCase(), dbName.toLowerCase(), CLUSTER_NAME); tableName.toLowerCase(), dbName.toLowerCase(), CLUSTER_NAME);
assertEntityIsRegistered(dslQuery, "p"); return assertEntityIsRegistered(dslQuery, "p");
} }
private String assertEntityIsRegistered(final String query, String... arg) throws Exception { private String assertEntityIsRegistered(final String query, String... arg) throws Exception {
......
...@@ -19,12 +19,12 @@ hive_partition(ClassType) - super types [Referenceable] - attributes [values, ta ...@@ -19,12 +19,12 @@ hive_partition(ClassType) - super types [Referenceable] - attributes [values, ta
hive_process(ClassType) - super types [Process] - attributes [startTime, endTime, userName, operationType, queryText, queryPlan, queryId, queryGraph] hive_process(ClassType) - super types [Process] - attributes [startTime, endTime, userName, operationType, queryText, queryPlan, queryId, queryGraph]
</verbatim> </verbatim>
The entities are created and de-duped using unique qualified name. They provide namespace and can be used for querying as well: The entities are created and de-duped using unique qualified name. They provide namespace and can be used for querying/lineage as well. Note that dbName and tableName should be in lower case. clusterName is explained below:
hive_db - attribute qualifiedName - clustername.dbname hive_db - attribute qualifiedName - <dbName>@<clusterName>
hive_table - attribute name - clustername.dbname.tablename hive_table - attribute name - <dbName>.<tableName>@<clusterName>
hive_column - attribute qualifiedName - clustername.dbname.tablename.columnname hive_column - attribute qualifiedName - <dbName>.<tableName>.<columnName>@<clusterName>
hive_partition - attribute qualifiedName - clustername.dbname.tablename.partitionvalues hive_partition - attribute qualifiedName - <dbName>.<tableName>.<partitionValues('-' separated)>@<clusterName>
hive_process - attribute qualifiedName - queryText hive_process - attribute name - <queryString> - trimmed query string in lower case
---++ Importing Hive Metadata ---++ Importing Hive Metadata
......
...@@ -9,6 +9,7 @@ ATLAS-54 Rename configs in hive hook (shwethags) ...@@ -9,6 +9,7 @@ ATLAS-54 Rename configs in hive hook (shwethags)
ATLAS-3 Mixed Index creation fails with Date types (sumasai via shwethags) ATLAS-3 Mixed Index creation fails with Date types (sumasai via shwethags)
ALL CHANGES: ALL CHANGES:
ATLAS-242 The qualified name for hive entities should be backward compatible (shwethags)
ATLAS-361 Add validation when index backends are switched in ATLAS configuration (sumasai via shwethags) ATLAS-361 Add validation when index backends are switched in ATLAS configuration (sumasai via shwethags)
ATLAS-171 Ability to update type definition(shwethags via sumasai) ATLAS-171 Ability to update type definition(shwethags via sumasai)
ATLAS-352 Improve write performance on type and entity creation with Hbase (sumasai) ATLAS-352 Improve write performance on type and entity creation with Hbase (sumasai)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment