Commit 25c2adf3 by Suma Shivaprasad

ATLAS-530 Add table information to column class (sumasai)

parent f147d3ff
......@@ -31,6 +31,7 @@ import org.apache.atlas.typesystem.Referenceable;
import org.apache.atlas.typesystem.Struct;
import org.apache.atlas.typesystem.json.InstanceSerialization;
import org.apache.atlas.typesystem.json.TypesSerialization;
import org.apache.atlas.typesystem.persistence.Id;
import org.apache.commons.configuration.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.Database;
......@@ -62,7 +63,6 @@ public class HiveMetaStoreBridge {
public static final String HIVE_CLUSTER_NAME = "atlas.cluster.name";
public static final String DEFAULT_CLUSTER_NAME = "primary";
public static final String DESCRIPTION_ATTR = "description";
public static final String TABLE_TYPE_ATTR = "tableType";
public static final String SEARCH_ENTRY_GUID_ATTR = "__guid";
private final String clusterName;
......@@ -303,7 +303,7 @@ public class HiveMetaStoreBridge {
String tableQualifiedName = getTableQualifiedName(clusterName, hiveTable.getDbName(), hiveTable.getTableName());
tableReference.set(HiveDataModelGenerator.NAME, tableQualifiedName);
tableReference.set(HiveDataModelGenerator.TABLE_NAME, hiveTable.getTableName().toLowerCase());
tableReference.set("owner", hiveTable.getOwner());
tableReference.set(HiveDataModelGenerator.OWNER, hiveTable.getOwner());
Date createDate = new Date();
if (hiveTable.getMetadata().getProperty(hive_metastoreConstants.DDL_TIME) != null){
......@@ -327,15 +327,9 @@ public class HiveMetaStoreBridge {
// add reference to the database
tableReference.set(HiveDataModelGenerator.DB, dbReference);
tableReference.set(HiveDataModelGenerator.COLUMNS, getColumns(hiveTable.getCols(), tableQualifiedName));
// add reference to the StorageDescriptor
Referenceable sdReferenceable = fillStorageDesc(hiveTable.getSd(), tableQualifiedName, getStorageDescQFName(tableQualifiedName));
tableReference.set("sd", sdReferenceable);
// add reference to the Partition Keys
List<Referenceable> partKeys = getColumns(hiveTable.getPartitionKeys(), tableQualifiedName);
tableReference.set("partitionKeys", partKeys);
Referenceable sdReferenceable = fillStorageDesc(hiveTable.getSd(), tableQualifiedName, getStorageDescQFName(tableQualifiedName), tableReference.getId());
tableReference.set(HiveDataModelGenerator.STORAGE_DESC, sdReferenceable);
tableReference.set(HiveDataModelGenerator.PARAMETERS, hiveTable.getParameters());
......@@ -347,9 +341,15 @@ public class HiveMetaStoreBridge {
tableReference.set("viewExpandedText", hiveTable.getViewExpandedText());
}
tableReference.set(TABLE_TYPE_ATTR, hiveTable.getTableType().name());
tableReference.set(HiveDataModelGenerator.TABLE_TYPE_ATTR, hiveTable.getTableType().name());
tableReference.set("temporary", hiveTable.isTemporary());
// add reference to the Partition Keys
List<Referenceable> partKeys = getColumns(hiveTable.getPartitionKeys(), tableQualifiedName, tableReference.getId());
tableReference.set("partitionKeys", partKeys);
tableReference.set(HiveDataModelGenerator.COLUMNS, getColumns(hiveTable.getCols(), tableQualifiedName, tableReference.getId()));
return tableReference;
}
......@@ -384,7 +384,6 @@ public class HiveMetaStoreBridge {
atlasClient.updateEntity(referenceable.getId().id, referenceable);
}
private Referenceable getEntityReferenceFromGremlin(String typeName, String gremlinQuery)
throws AtlasServiceException, JSONException {
AtlasClient client = getAtlasClient();
......@@ -404,12 +403,12 @@ public class HiveMetaStoreBridge {
AtlasClient dgiClient = getAtlasClient();
Referenceable tableInstance = dgiClient.getEntity(tableRef.getId().id);
Referenceable sd = (Referenceable) tableInstance.get("sd");
Referenceable sd = (Referenceable) tableInstance.get(HiveDataModelGenerator.STORAGE_DESC);
return new Referenceable(sd.getId().id, sd.getTypeName(), null);
}
public Referenceable fillStorageDesc(StorageDescriptor storageDesc, String tableQualifiedName,
String sdQualifiedName) throws Exception {
String sdQualifiedName, Id tableId) throws Exception {
LOG.debug("Filling storage descriptor information for " + storageDesc);
Referenceable sdReferenceable = new Referenceable(HiveDataTypes.HIVE_STORAGEDESC.getName());
......@@ -455,6 +454,7 @@ public class HiveMetaStoreBridge {
sdReferenceable.set(HiveDataModelGenerator.PARAMETERS, storageDesc.getParameters());
sdReferenceable.set("storedAsSubDirectories", storageDesc.isStoredAsSubDirectories());
sdReferenceable.set(HiveDataModelGenerator.TABLE, tableId);
return sdReferenceable;
}
......@@ -465,7 +465,7 @@ public class HiveMetaStoreBridge {
// Path path = new Path(pathUri);
// ref.set("name", path.getName());
//TODO - Fix after ATLAS-542 to shorter Name
ref.set("name", pathUri);
ref.set(HiveDataModelGenerator.NAME, pathUri);
ref.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, pathUri);
return ref;
}
......@@ -477,8 +477,9 @@ public class HiveMetaStoreBridge {
return String.format("%s.%s@%s", tableName, colName.toLowerCase(), clusterName);
}
public List<Referenceable> getColumns(List<FieldSchema> schemaList, String tableQualifiedName) throws Exception {
public List<Referenceable> getColumns(List<FieldSchema> schemaList, String tableQualifiedName, Id tableReference) throws Exception {
List<Referenceable> colList = new ArrayList<>();
for (FieldSchema fs : schemaList) {
LOG.debug("Processing field " + fs);
Referenceable colReferenceable = new Referenceable(HiveDataTypes.HIVE_COLUMN.getName());
......@@ -487,6 +488,7 @@ public class HiveMetaStoreBridge {
colReferenceable.set(HiveDataModelGenerator.NAME, fs.getName());
colReferenceable.set("type", fs.getType());
colReferenceable.set(HiveDataModelGenerator.COMMENT, fs.getComment());
colReferenceable.set(HiveDataModelGenerator.TABLE, tableReference);
colList.add(colReferenceable);
}
......
......@@ -79,6 +79,8 @@ public class HiveDataModelGenerator {
public static final String STORAGE_DESC_OUTPUT_FMT = "outputFormat";
public static final String OWNER = "owner";
public static final String TABLE_TYPE_ATTR = "tableType";
public static final String CREATE_TIME = "createTime";
public static final String LAST_ACCESS_TIME = "lastAccessTime";
......@@ -166,6 +168,9 @@ public class HiveDataModelGenerator {
private void createStorageDescClass() throws AtlasException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
//Optional to keep it backward-compatible
new AttributeDefinition(TABLE, HiveDataTypes.HIVE_TABLE.getName(), Multiplicity.OPTIONAL, false,
STORAGE_DESC),
new AttributeDefinition("location", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false,
null),
new AttributeDefinition("inputFormat", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false,
......@@ -223,7 +228,11 @@ public class HiveDataModelGenerator {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition(NAME, DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("type", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition(COMMENT, DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false, null),};
new AttributeDefinition(COMMENT, DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
//Making this optional since this is an incompatible change
//Reverse attribute to 'columns' in Table
new AttributeDefinition(TABLE, HiveDataTypes.HIVE_TABLE.getName(), Multiplicity.OPTIONAL, false, COLUMNS),};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class, HiveDataTypes.HIVE_COLUMN.getName(), null,
ImmutableSet.of(AtlasClient.REFERENCEABLE_SUPER_TYPE), attributeDefinitions);
......@@ -254,7 +263,7 @@ public class HiveDataModelGenerator {
false, null),
new AttributeDefinition("viewExpandedText", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL,
false, null),
new AttributeDefinition("tableType", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false,
new AttributeDefinition(HiveDataModelGenerator.TABLE_TYPE_ATTR, DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false,
null),
new AttributeDefinition("temporary", DataTypes.BOOLEAN_TYPE.getName(), Multiplicity.OPTIONAL, false,
null),};
......
......@@ -20,6 +20,7 @@ package org.apache.atlas.hive.bridge;
import org.apache.atlas.AtlasClient;
import org.apache.atlas.AtlasServiceException;
import org.apache.atlas.hive.model.HiveDataModelGenerator;
import org.apache.atlas.hive.model.HiveDataTypes;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.hadoop.hive.metastore.TableType;
......@@ -104,7 +105,7 @@ public class HiveMetaStoreBridgeTest {
// verify update is called on table
verify(atlasClient).updateEntity(eq("82e06b34-9151-4023-aa9d-b82103a50e77"),
(Referenceable) argThat(new MatchesReferenceableProperty(HiveMetaStoreBridge.TABLE_TYPE_ATTR,
(Referenceable) argThat(new MatchesReferenceableProperty(HiveDataModelGenerator.TABLE_TYPE_ATTR,
TableType.EXTERNAL_TABLE.name())));
}
......@@ -163,7 +164,7 @@ public class HiveMetaStoreBridgeTest {
private Referenceable createTableReference() {
Referenceable tableReference = new Referenceable(HiveDataTypes.HIVE_TABLE.getName());
Referenceable sdReference = new Referenceable(HiveDataTypes.HIVE_STORAGEDESC.getName());
tableReference.set("sd", sdReference);
tableReference.set(HiveDataModelGenerator.STORAGE_DESC, sdReference);
return tableReference;
}
......
......@@ -137,7 +137,7 @@ public class HiveHookIT {
//assert on qualified name
Referenceable dbEntity = atlasClient.getEntity(dbid);
Assert.assertEquals(dbEntity.get("qualifiedName"), dbName.toLowerCase() + "@" + CLUSTER_NAME);
Assert.assertEquals(dbEntity.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), dbName.toLowerCase() + "@" + CLUSTER_NAME);
}
......@@ -188,19 +188,21 @@ public class HiveHookIT {
String dbName = createDatabase();
String colName = columnName();
runCommand("create table " + dbName + "." + tableName + "(" + colName + " int, name string)");
assertTableIsRegistered(dbName, tableName);
String tableId = assertTableIsRegistered(dbName, tableName);
//there is only one instance of column registered
String colId = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(
HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, dbName, tableName), colName));
Referenceable colEntity = atlasClient.getEntity(colId);
Assert.assertEquals(colEntity.get("qualifiedName"), String.format("%s.%s.%s@%s", dbName.toLowerCase(),
Assert.assertEquals(colEntity.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), String.format("%s.%s.%s@%s", dbName.toLowerCase(),
tableName.toLowerCase(), colName.toLowerCase(), CLUSTER_NAME));
Assert.assertNotNull(colEntity.get(HiveDataModelGenerator.TABLE));
Assert.assertEquals(((Id) colEntity.get(HiveDataModelGenerator.TABLE))._getId(), tableId);
tableName = createTable();
String tableId = assertTableIsRegistered(DEFAULT_DB, tableName);
tableId = assertTableIsRegistered(DEFAULT_DB, tableName);
Referenceable tableRef = atlasClient.getEntity(tableId);
Assert.assertEquals(tableRef.get("tableType"), TableType.MANAGED_TABLE.name());
Assert.assertEquals(tableRef.get(HiveDataModelGenerator.TABLE_TYPE_ATTR), TableType.MANAGED_TABLE.name());
Assert.assertEquals(tableRef.get(HiveDataModelGenerator.COMMENT), "table comment");
String entityName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName);
Assert.assertEquals(tableRef.get(HiveDataModelGenerator.NAME), entityName);
......@@ -212,8 +214,10 @@ public class HiveHookIT {
verifyTimestamps(tableRef, HiveDataModelGenerator.CREATE_TIME, createTime);
verifyTimestamps(tableRef, HiveDataModelGenerator.LAST_ACCESS_TIME, createTime);
final Referenceable sdRef = (Referenceable) tableRef.get("sd");
final Referenceable sdRef = (Referenceable) tableRef.get(HiveDataModelGenerator.STORAGE_DESC);
Assert.assertEquals(sdRef.get(HiveDataModelGenerator.STORAGE_IS_STORED_AS_SUB_DIRS), false);
Assert.assertNotNull(sdRef.get(HiveDataModelGenerator.TABLE));
Assert.assertEquals(((Id) sdRef.get(HiveDataModelGenerator.TABLE))._getId(), tableId);
//Create table where database doesn't exist, will create database instance as well
assertDatabaseIsRegistered(DEFAULT_DB);
......
......@@ -17,6 +17,7 @@ ATLAS-409 Atlas will not import avro tables with schema read from a file (dosset
ATLAS-379 Create sqoop and falcon metadata addons (venkatnrangan,bvellanki,sowmyaramesh via shwethags)
ALL CHANGES:
ATLAS-530 Add table information to column class (sumasai)
ATLAS-538 Rename table should retain traits/tags assigned to columns/storage descriptors (sumasai)
ATLAS-628 Starting two Atlas instances at the same time causes exceptions in HA mode (yhemanth via sumasai)
ATLAS-594 alter table rename doesnt work across databases (sumasai via shwethags)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment