Commit 6e02ec5b by rmani Committed by Madhan Neethiraj

ATLAS-2491: Hive hook should use v2 notifications

parent dee8a2da
......@@ -68,6 +68,7 @@ public class FalconBridge {
public static final String RUNSON = "runs-on";
public static final String STOREDIN = "stored-in";
public static final String FREQUENCY = "frequency";
public static final String ATTRIBUTE_DB = "db";
/**
* Creates cluster entity
......@@ -357,7 +358,7 @@ public class FalconBridge {
tableRef.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
HiveMetaStoreBridge.getTableQualifiedName(clusterName, dbName, tableName));
tableRef.set(AtlasClient.NAME, tableName.toLowerCase());
tableRef.set(HiveMetaStoreBridge.DB, dbRef);
tableRef.set(ATTRIBUTE_DB, dbRef);
entities.add(tableRef);
return entities;
......
......@@ -106,6 +106,12 @@
<dependency>
<groupId>org.apache.atlas</groupId>
<artifactId>atlas-client-v2</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.atlas</groupId>
<artifactId>atlas-notification</artifactId>
</dependency>
......
......@@ -21,22 +21,28 @@ package org.apache.atlas.hive.bridge;
import com.google.common.annotations.VisibleForTesting;
import com.sun.jersey.api.client.ClientResponse;
import org.apache.atlas.ApplicationProperties;
import org.apache.atlas.AtlasClient;
import org.apache.atlas.AtlasConstants;
import org.apache.atlas.AtlasClientV2;
import org.apache.atlas.AtlasServiceException;
import org.apache.atlas.hive.hook.HiveHook;
import org.apache.atlas.hive.hook.events.BaseHiveEvent;
import org.apache.atlas.hive.model.HiveDataTypes;
import org.apache.atlas.hook.AtlasHookException;
import org.apache.atlas.type.AtlasType;
import org.apache.atlas.model.instance.AtlasEntityHeader;
import org.apache.atlas.model.instance.EntityMutationResponse;
import org.apache.atlas.model.instance.EntityMutations;
import org.apache.atlas.utils.AuthenticationUtil;
import org.apache.atlas.utils.HdfsNameServiceResolver;
import org.apache.atlas.v1.model.instance.Id;
import org.apache.atlas.v1.model.instance.Referenceable;
import org.apache.atlas.v1.model.instance.Struct;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.instance.AtlasStruct;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.Options;
import org.apache.commons.collections.MapUtils;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.commons.lang.StringUtils;
......@@ -57,639 +63,729 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Date;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import static org.apache.atlas.hive.hook.HiveHook.CONF_PREFIX;
import static org.apache.atlas.hive.hook.events.BaseHiveEvent.*;
/**
* A Bridge Utility that imports metadata from the Hive Meta Store
* and registers them in Atlas.
*/
public class HiveMetaStoreBridge {
private static final String DEFAULT_DGI_URL = "http://localhost:21000/";
private static final Logger LOG = LoggerFactory.getLogger(HiveMetaStoreBridge.class);
public static final String CONF_PREFIX = "atlas.hook.hive.";
public static final String HIVE_CLUSTER_NAME = "atlas.cluster.name";
public static final String HDFS_PATH_CONVERT_TO_LOWER_CASE = CONF_PREFIX + "hdfs_path.convert_to_lowercase";
public static final String DEFAULT_CLUSTER_NAME = "primary";
public static final String DESCRIPTION_ATTR = "description";
public static final String TEMP_TABLE_PREFIX = "_temp-";
private final String clusterName;
public static final long MILLIS_CONVERT_FACTOR = 1000;
public static final String ATLAS_ENDPOINT = "atlas.rest.address";
public static final String COMMENT = "comment";
public static final String PARAMETERS = "parameters";
public static final String COLUMNS = "columns";
public static final String POSITION = "position";
public static final String PART_COLS = "partitionKeys";
public static final String TABLE_ALIAS_LIST = "aliases";
public static final String STORAGE_NUM_BUCKETS = "numBuckets";
public static final String STORAGE_IS_STORED_AS_SUB_DIRS = "storedAsSubDirectories";
public static final String TABLE = "table";
public static final String DB = "db";
public static final String STORAGE_DESC = "sd";
public static final String STORAGE_DESC_INPUT_FMT = "inputFormat";
public static final String STORAGE_DESC_OUTPUT_FMT = "outputFormat";
public static final String LOCATION = "location";
public static final String TABLE_TYPE_ATTR = "tableType";
public static final String CREATE_TIME = "createTime";
public static final String LAST_ACCESS_TIME = "lastAccessTime";
public static final String HDFS_PATH = "hdfs_path";
public static final String SEP = ":".intern();
public static final String HDFS_PATH = "hdfs_path";
private static final Logger LOG = LoggerFactory.getLogger(HiveMetaStoreBridge.class);
private static final String DEFAULT_ATLAS_URL = "http://localhost:21000/";
public final Hive hiveClient;
private final AtlasClient atlasClient;
private final HdfsNameServiceResolver hdfsNameServiceResolver = HdfsNameServiceResolver.getInstance();
private final String clusterName;
private final Hive hiveClient;
private final AtlasClientV2 atlasClientV2;
private final boolean convertHdfsPathToLowerCase;
private final HdfsNameServiceResolver hdfsNameServiceResolver = HdfsNameServiceResolver.getInstance();
HiveMetaStoreBridge(String clusterName, Hive hiveClient, AtlasClient atlasClient) {
this(clusterName, hiveClient, atlasClient, true);
public static void main(String[] args) throws AtlasHookException {
try {
Configuration atlasConf = ApplicationProperties.get();
String[] atlasEndpoint = atlasConf.getStringArray(ATLAS_ENDPOINT);
if (atlasEndpoint == null || atlasEndpoint.length == 0){
atlasEndpoint = new String[] { DEFAULT_ATLAS_URL };
}
HiveMetaStoreBridge(String clusterName, Hive hiveClient, AtlasClient atlasClient, boolean convertHdfsPathToLowerCase) {
this.clusterName = clusterName;
this.hiveClient = hiveClient;
this.atlasClient = atlasClient;
this.convertHdfsPathToLowerCase = convertHdfsPathToLowerCase;
AtlasClientV2 atlasClientV2;
if (!AuthenticationUtil.isKerberosAuthenticationEnabled()) {
String[] basicAuthUsernamePassword = AuthenticationUtil.getBasicAuthenticationInput();
atlasClientV2 = new AtlasClientV2(atlasEndpoint, basicAuthUsernamePassword);
} else {
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
atlasClientV2 = new AtlasClientV2(ugi, ugi.getShortUserName(), atlasEndpoint);
}
public String getClusterName() {
return clusterName;
Options options = new Options();
CommandLineParser parser = new BasicParser();
CommandLine cmd = parser.parse(options, args);
boolean failOnError = cmd.hasOption("failOnError");
HiveMetaStoreBridge hiveMetaStoreBridge = new HiveMetaStoreBridge(atlasConf, new HiveConf(), atlasClientV2);
hiveMetaStoreBridge.importHiveMetadata(failOnError);
} catch(Exception e) {
throw new AtlasHookException("HiveMetaStoreBridge.main() failed.", e);
}
}
/**
* Construct a HiveMetaStoreBridge.
* @param hiveConf {@link HiveConf} for Hive component in the cluster
*/
public HiveMetaStoreBridge(Configuration atlasProperties, HiveConf hiveConf) throws Exception {
this(atlasProperties, hiveConf, null);
public HiveMetaStoreBridge(Configuration atlasProperties, HiveConf hiveConf, AtlasClientV2 atlasClientV2) throws Exception {
this(atlasProperties.getString(HIVE_CLUSTER_NAME, DEFAULT_CLUSTER_NAME), Hive.get(hiveConf), atlasClientV2, atlasProperties.getBoolean(HDFS_PATH_CONVERT_TO_LOWER_CASE, true));
}
/**
* Construct a HiveMetaStoreBridge.
* @param hiveConf {@link HiveConf} for Hive component in the cluster
*/
public HiveMetaStoreBridge(Configuration atlasProperties, HiveConf hiveConf, AtlasClient atlasClient) throws Exception {
this(atlasProperties.getString(HIVE_CLUSTER_NAME, DEFAULT_CLUSTER_NAME), Hive.get(hiveConf), atlasClient, atlasProperties.getBoolean(CONF_PREFIX + "hdfs_path.convert_to_lowercase", true));
}
AtlasClient getAtlasClient() {
return atlasClient;
public HiveMetaStoreBridge(Configuration atlasProperties, HiveConf hiveConf) throws Exception {
this(atlasProperties, hiveConf, null);
}
public boolean isConvertHdfsPathToLowerCase() {
return convertHdfsPathToLowerCase;
HiveMetaStoreBridge(String clusterName, Hive hiveClient, AtlasClientV2 atlasClientV2) {
this(clusterName, hiveClient, atlasClientV2, true);
}
void importHiveMetadata(boolean failOnError) throws Exception {
LOG.info("Importing hive metadata");
importDatabases(failOnError);
HiveMetaStoreBridge(String clusterName, Hive hiveClient, AtlasClientV2 atlasClientV2, boolean convertHdfsPathToLowerCase) {
this.clusterName = clusterName;
this.hiveClient = hiveClient;
this.atlasClientV2 = atlasClientV2;
this.convertHdfsPathToLowerCase = convertHdfsPathToLowerCase;
}
private void importDatabases(boolean failOnError) throws Exception {
List<String> databases = hiveClient.getAllDatabases();
for (String databaseName : databases) {
Referenceable dbReference = registerDatabase(databaseName);
if (dbReference != null) {
importTables(dbReference, databaseName, failOnError);
}
}
public String getClusterName() {
return clusterName;
}
/**
* Create a Hive Database entity
* @param hiveDB The Hive {@link Database} object from which to map properties
* @return new Hive Database entity
* @throws HiveException
*/
public Referenceable createDBInstance(Database hiveDB) throws HiveException {
return createOrUpdateDBInstance(hiveDB, null);
public Hive getHiveClient() {
return hiveClient;
}
/**
* Checks if db is already registered, else creates and registers db entity
* @param databaseName
* @return
* @throws Exception
*/
private Referenceable registerDatabase(String databaseName) throws Exception {
Referenceable dbRef = getDatabaseReference(clusterName, databaseName);
Database db = hiveClient.getDatabase(databaseName);
if (db != null) {
if (dbRef == null) {
dbRef = createDBInstance(db);
dbRef = registerInstance(dbRef);
} else {
LOG.info("Database {} is already registered with id {}. Updating it.", databaseName, dbRef.getId().getId());
dbRef = createOrUpdateDBInstance(db, dbRef);
updateInstance(dbRef);
}
public AtlasClientV2 getAtlasClient() {
return atlasClientV2;
}
return dbRef;
}
private Referenceable createOrUpdateDBInstance(Database hiveDB, Referenceable dbRef) {
LOG.info("Importing objects from databaseName : {}", hiveDB.getName());
if (dbRef == null) {
dbRef = new Referenceable(HiveDataTypes.HIVE_DB.getName());
}
String dbName = hiveDB.getName().toLowerCase();
dbRef.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getDBQualifiedName(clusterName, dbName));
dbRef.set(AtlasClient.NAME, dbName);
dbRef.set(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, clusterName);
dbRef.set(DESCRIPTION_ATTR, hiveDB.getDescription());
dbRef.set(LOCATION, hdfsNameServiceResolver.getPathWithNameServiceID(hiveDB.getLocationUri()));
dbRef.set(PARAMETERS, hiveDB.getParameters());
dbRef.set(AtlasClient.OWNER, hiveDB.getOwnerName());
if (hiveDB.getOwnerType() != null) {
dbRef.set("ownerType", hiveDB.getOwnerType().getValue());
}
return dbRef;
public boolean isConvertHdfsPathToLowerCase() {
return convertHdfsPathToLowerCase;
}
/**
* Registers an entity in atlas
* @param referenceable
* @return
* @throws Exception
*/
private Referenceable registerInstance(Referenceable referenceable) throws Exception {
String typeName = referenceable.getTypeName();
LOG.debug("creating instance of type {}", typeName);
String entityJSON = AtlasType.toV1Json(referenceable);
LOG.debug("Submitting new entity {} = {}", referenceable.getTypeName(), entityJSON);
List<String> guids = getAtlasClient().createEntity(entityJSON);
LOG.debug("created instance for type {}, guid: {}", typeName, guids);
@VisibleForTesting
public void importHiveMetadata(boolean failOnError) throws Exception {
LOG.info("Importing Hive metadata");
return new Referenceable(guids.get(guids.size() - 1), referenceable.getTypeName(), null);
importDatabases(failOnError);
}
/**
* Gets reference to the atlas entity for the database
* @param databaseName database Name
* @param clusterName cluster name
* @return Reference for database if exists, else null
* @throws Exception
*/
private Referenceable getDatabaseReference(String clusterName, String databaseName) throws Exception {
LOG.debug("Getting reference for database {}", databaseName);
String typeName = HiveDataTypes.HIVE_DB.getName();
private void importDatabases(boolean failOnError) throws Exception {
List<String> databases = hiveClient.getAllDatabases();
return getEntityReference(typeName, getDBQualifiedName(clusterName, databaseName));
}
LOG.info("Found {} databases", databases.size());
/**
* Construct the qualified name used to uniquely identify a Database instance in Atlas.
* @param clusterName Name of the cluster to which the Hive component belongs
* @param dbName Name of the Hive database
* @return Unique qualified name to identify the Database instance in Atlas.
*/
public static String getDBQualifiedName(String clusterName, String dbName) {
return String.format("%s@%s", dbName.toLowerCase(), clusterName);
}
for (String databaseName : databases) {
AtlasEntityWithExtInfo dbEntity = registerDatabase(databaseName);
private String getCreateTableString(Table table, String location){
String colString = "";
List<FieldSchema> colList = table.getAllCols();
if ( colList != null) {
for (FieldSchema col : colList) {
colString += col.getName() + " " + col.getType() + ",";
if (dbEntity != null) {
importTables(dbEntity.getEntity(), databaseName, failOnError);
}
if (colList.size() > 0) {
colString = colString.substring(0, colString.length() - 1);
colString = "(" + colString + ")";
}
}
String query = "create external table " + table.getTableName() + colString +
" location '" + location + "'";
return query;
}
/**
* Imports all tables for the given db
* @param databaseReferenceable
* @param dbEntity
* @param databaseName
* @param failOnError
* @throws Exception
*/
private int importTables(Referenceable databaseReferenceable, String databaseName, final boolean failOnError) throws Exception {
int tablesImported = 0;
private int importTables(AtlasEntity dbEntity, String databaseName, final boolean failOnError) throws Exception {
List<String> hiveTables = hiveClient.getAllTables(databaseName);
LOG.info("Importing tables {} for db {}", hiveTables.toString(), databaseName);
LOG.info("Found {} tables in database {}", hiveTables.size(), databaseName);
int tablesImported = 0;
try {
for (String tableName : hiveTables) {
int imported = importTable(databaseReferenceable, databaseName, tableName, failOnError);
int imported = importTable(dbEntity, databaseName, tableName, failOnError);
tablesImported += imported;
}
} finally {
if (tablesImported == hiveTables.size()) {
LOG.info("Successfully imported all {} tables from {} ", tablesImported, databaseName);
LOG.info("Successfully imported all {} tables from database {}", tablesImported, databaseName);
} else {
LOG.error("Able to import {} tables out of {} tables from {}. Please check logs for import errors", tablesImported, hiveTables.size(), databaseName);
LOG.error("Imported {} of {} tables from database {}. Please check logs for errors during import", tablesImported, hiveTables.size(), databaseName);
}
}
return tablesImported;
}
@VisibleForTesting
public int importTable(Referenceable databaseReferenceable, String databaseName, String tableName, final boolean failOnError) throws Exception {
public int importTable(AtlasEntity dbEntity, String databaseName, String tableName, final boolean failOnError) throws Exception {
try {
Table table = hiveClient.getTable(databaseName, tableName);
Referenceable tableReferenceable = registerTable(databaseReferenceable, table);
AtlasEntityWithExtInfo tableEntity = registerTable(dbEntity, table);
if (table.getTableType() == TableType.EXTERNAL_TABLE) {
String tableQualifiedName = getTableProcessQualifiedName(clusterName, table);
Referenceable process = getProcessReference(tableQualifiedName);
if (process == null) {
LOG.info("Attempting to register create table process for {}", tableQualifiedName);
Referenceable lineageProcess = new Referenceable(HiveDataTypes.HIVE_PROCESS.getName());
ArrayList<Referenceable> sourceList = new ArrayList<>();
ArrayList<Referenceable> targetList = new ArrayList<>();
String tableLocation = isConvertHdfsPathToLowerCase() ? HiveHook.lower(table.getDataLocation().toString()) : table.getDataLocation().toString();
Referenceable path = fillHDFSDataSet(tableLocation);
String query = getCreateTableString(table, tableLocation);
sourceList.add(path);
targetList.add(tableReferenceable);
lineageProcess.set("inputs", sourceList);
lineageProcess.set("outputs", targetList);
lineageProcess.set("userName", table.getOwner());
lineageProcess.set("startTime", new Date(System.currentTimeMillis()));
lineageProcess.set("endTime", new Date(System.currentTimeMillis()));
lineageProcess.set("operationType", "CREATETABLE");
lineageProcess.set("queryText", query);
lineageProcess.set("queryId", query);
lineageProcess.set("queryPlan", "{}");
lineageProcess.set("clusterName", clusterName);
List<String> recentQueries = new ArrayList<>(1);
recentQueries.add(query);
lineageProcess.set("recentQueries", recentQueries);
String processQualifiedName = getTableProcessQualifiedName(clusterName, table);
lineageProcess.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, processQualifiedName);
lineageProcess.set(AtlasClient.NAME, query);
registerInstance(lineageProcess);
AtlasEntityWithExtInfo processEntity = findProcessEntity(processQualifiedName);
if (processEntity == null) {
String tableLocation = isConvertHdfsPathToLowerCase() ? lower(table.getDataLocation().toString()) : table.getDataLocation().toString();
String query = getCreateTableString(table, tableLocation);
AtlasEntity pathInst = toHdfsPathEntity(tableLocation);
AtlasEntity tableInst = tableEntity.getEntity();
AtlasEntity processInst = new AtlasEntity(HiveDataTypes.HIVE_PROCESS.getName());
long now = System.currentTimeMillis();
processInst.setAttribute(ATTRIBUTE_QUALIFIED_NAME, processQualifiedName);
processInst.setAttribute(ATTRIBUTE_NAME, query);
processInst.setAttribute(ATTRIBUTE_CLUSTER_NAME, clusterName);
processInst.setAttribute(ATTRIBUTE_INPUTS, Collections.singletonList(BaseHiveEvent.getObjectId(pathInst)));
processInst.setAttribute(ATTRIBUTE_OUTPUTS, Collections.singletonList(BaseHiveEvent.getObjectId(tableInst)));
processInst.setAttribute(ATTRIBUTE_USER_NAME, table.getOwner());
processInst.setAttribute(ATTRIBUTE_START_TIME, now);
processInst.setAttribute(ATTRIBUTE_END_TIME, now);
processInst.setAttribute(ATTRIBUTE_OPERATION_TYPE, "CREATETABLE");
processInst.setAttribute(ATTRIBUTE_QUERY_TEXT, query);
processInst.setAttribute(ATTRIBUTE_QUERY_ID, query);
processInst.setAttribute(ATTRIBUTE_QUERY_PLAN, "{}");
processInst.setAttribute(ATTRIBUTE_RECENT_QUERIES, Collections.singletonList(query));
AtlasEntitiesWithExtInfo createTableProcess = new AtlasEntitiesWithExtInfo();
createTableProcess.addEntity(processInst);
createTableProcess.addEntity(pathInst);
registerInstances(createTableProcess);
} else {
LOG.info("Process {} is already registered", process.toString());
LOG.info("Process {} is already registered", processQualifiedName);
}
}
return 1;
} catch (Exception e) {
LOG.error("Import failed for hive_table {} ", tableName, e);
LOG.error("Import failed for hive_table {}", tableName, e);
if (failOnError) {
throw e;
}
return 0;
}
}
/**
* Gets reference for the table
*
* @param hiveTable
* @return table reference if exists, else null
* Checks if db is already registered, else creates and registers db entity
* @param databaseName
* @return
* @throws Exception
*/
private Referenceable getTableReference(Table hiveTable) throws Exception {
LOG.debug("Getting reference for table {}.{}", hiveTable.getDbName(), hiveTable.getTableName());
private AtlasEntityWithExtInfo registerDatabase(String databaseName) throws Exception {
AtlasEntityWithExtInfo ret = null;
Database db = hiveClient.getDatabase(databaseName);
String typeName = HiveDataTypes.HIVE_TABLE.getName();
String tblQualifiedName = getTableQualifiedName(getClusterName(), hiveTable.getDbName(), hiveTable.getTableName());
return getEntityReference(typeName, tblQualifiedName);
}
if (db != null) {
ret = findDatabase(clusterName, databaseName);
private Referenceable getEntityReference(final String typeName, final String tblQualifiedName) throws AtlasServiceException {
AtlasClient dgiClient = getAtlasClient();
try {
return dgiClient.getEntity(typeName, AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tblQualifiedName);
} catch (AtlasServiceException e) {
if(e.getStatus() == ClientResponse.Status.NOT_FOUND) {
return null;
}
throw e;
if (ret == null) {
ret = registerInstance(new AtlasEntityWithExtInfo(toDbEntity(db)));
} else {
LOG.info("Database {} is already registered - id={}. Updating it.", databaseName, ret.getEntity().getGuid());
ret.setEntity(toDbEntity(db, ret.getEntity()));
updateInstance(ret);
}
}
private Referenceable getProcessReference(String qualifiedName) throws Exception{
LOG.debug("Getting reference for process {}", qualifiedName);
String typeName = HiveDataTypes.HIVE_PROCESS.getName();
return getEntityReference(typeName, qualifiedName);
return ret;
}
/**
* Construct the qualified name used to uniquely identify a Table instance in Atlas.
* @param clusterName Name of the cluster to which the Hive component belongs
* @param dbName Name of the Hive database to which the Table belongs
* @param tableName Name of the Hive table
* @return Unique qualified name to identify the Table instance in Atlas.
*/
public static String getTableQualifiedName(String clusterName, String dbName, String tableName, boolean isTemporaryTable) {
String tableTempName = tableName;
if (isTemporaryTable) {
if (SessionState.get() != null && SessionState.get().getSessionId() != null) {
tableTempName = tableName + TEMP_TABLE_PREFIX + SessionState.get().getSessionId();
private AtlasEntityWithExtInfo registerTable(AtlasEntity dbEntity, Table table) throws AtlasHookException {
try {
AtlasEntityWithExtInfo ret;
AtlasEntityWithExtInfo tableEntity = findTableEntity(table);
if (tableEntity == null) {
tableEntity = toTableEntity(dbEntity, table);
ret = registerInstance(tableEntity);
} else {
tableTempName = tableName + TEMP_TABLE_PREFIX + RandomStringUtils.random(10);
LOG.info("Table {}.{} is already registered with id {}. Updating entity.", table.getDbName(), table.getTableName(), tableEntity.getEntity().getGuid());
ret = toTableEntity(dbEntity, table, tableEntity);
updateInstance(ret);
}
return ret;
} catch (Exception e) {
throw new AtlasHookException("HiveMetaStoreBridge.registerTable() failed.", e);
}
return String.format("%s.%s@%s", dbName.toLowerCase(), tableTempName.toLowerCase(), clusterName);
}
/**
* Construct the qualified name used to uniquely identify a Table instance in Atlas.
* @param clusterName Name of the cluster to which the Hive component belongs
* @param table hive table for which the qualified name is needed
* @return Unique qualified name to identify the Table instance in Atlas.
* Registers an entity in atlas
* @param entity
* @return
* @throws Exception
*/
public static String getTableQualifiedName(String clusterName, Table table) {
return getTableQualifiedName(clusterName, table.getDbName(), table.getTableName(), table.isTemporary());
private AtlasEntityWithExtInfo registerInstance(AtlasEntityWithExtInfo entity) throws Exception {
if (LOG.isDebugEnabled()) {
LOG.debug("creating {} entity: {}", entity.getEntity().getTypeName(), entity);
}
public static String getTableProcessQualifiedName(String clusterName, Table table) {
String tableQualifiedName = getTableQualifiedName(clusterName, table);
Date createdTime = getTableCreatedTime(table);
return tableQualifiedName + SEP + createdTime.getTime();
AtlasEntityWithExtInfo ret = null;
EntityMutationResponse response = atlasClientV2.createEntity(entity);
List<AtlasEntityHeader> createdEntities = response.getEntitiesByOperation(EntityMutations.EntityOperation.CREATE);
if (CollectionUtils.isNotEmpty(createdEntities)) {
for (AtlasEntityHeader createdEntity : createdEntities) {
if (ret == null) {
ret = atlasClientV2.getEntityByGuid(createdEntity.getGuid());
LOG.info("Created {} entity: name={}, guid={}", ret.getEntity().getTypeName(), ret.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), ret.getEntity().getGuid());
} else if (ret.getEntity(createdEntity.getGuid()) == null) {
AtlasEntityWithExtInfo newEntity = atlasClientV2.getEntityByGuid(createdEntity.getGuid());
ret.addReferredEntity(newEntity.getEntity());
if (MapUtils.isNotEmpty(newEntity.getReferredEntities())) {
for (Map.Entry<String, AtlasEntity> entry : newEntity.getReferredEntities().entrySet()) {
ret.addReferredEntity(entry.getKey(), entry.getValue());
}
}
/**
* Construct the qualified name used to uniquely identify a Table instance in Atlas.
* @param clusterName Name of the cluster to which the Hive component belongs
* @param dbName Name of the Hive database to which the Table belongs
* @param tableName Name of the Hive table
* @return Unique qualified name to identify the Table instance in Atlas.
*/
public static String getTableQualifiedName(String clusterName, String dbName, String tableName) {
return getTableQualifiedName(clusterName, dbName, tableName, false);
LOG.info("Created {} entity: name={}, guid={}", newEntity.getEntity().getTypeName(), newEntity.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), newEntity.getEntity().getGuid());
}
}
}
return ret;
}
/**
* Create a new table instance in Atlas
* @param dbReference reference to a created Hive database {@link Referenceable} to which this table belongs
* @param hiveTable reference to the Hive {@link Table} from which to map properties
* @return Newly created Hive reference
* Registers an entity in atlas
* @param entities
* @return
* @throws Exception
*/
public Referenceable createTableInstance(Referenceable dbReference, Table hiveTable)
throws AtlasHookException {
return createOrUpdateTableInstance(dbReference, null, hiveTable);
private AtlasEntitiesWithExtInfo registerInstances(AtlasEntitiesWithExtInfo entities) throws Exception {
if (LOG.isDebugEnabled()) {
LOG.debug("creating {} entities: {}", entities.getEntities().size(), entities);
}
AtlasEntitiesWithExtInfo ret = null;
EntityMutationResponse response = atlasClientV2.createEntities(entities);
List<AtlasEntityHeader> createdEntities = response.getEntitiesByOperation(EntityMutations.EntityOperation.CREATE);
if (CollectionUtils.isNotEmpty(createdEntities)) {
ret = new AtlasEntitiesWithExtInfo();
for (AtlasEntityHeader createdEntity : createdEntities) {
AtlasEntityWithExtInfo entity = atlasClientV2.getEntityByGuid(createdEntity.getGuid());
ret.addEntity(entity.getEntity());
if (MapUtils.isNotEmpty(entity.getReferredEntities())) {
for (Map.Entry<String, AtlasEntity> entry : entity.getReferredEntities().entrySet()) {
ret.addReferredEntity(entry.getKey(), entry.getValue());
}
}
public static Date getTableCreatedTime(Table table) {
return new Date(table.getTTable().getCreateTime() * MILLIS_CONVERT_FACTOR);
LOG.info("Created {} entity: name={}, guid={}", entity.getEntity().getTypeName(), entity.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), entity.getEntity().getGuid());
}
}
private Referenceable createOrUpdateTableInstance(Referenceable dbReference, Referenceable tableReference,
final Table hiveTable) throws AtlasHookException {
LOG.info("Importing objects from {}.{}", hiveTable.getDbName(), hiveTable.getTableName());
return ret;
}
if (tableReference == null) {
tableReference = new Referenceable(HiveDataTypes.HIVE_TABLE.getName());
private void updateInstance(AtlasEntityWithExtInfo entity) throws AtlasServiceException {
if (LOG.isDebugEnabled()) {
LOG.debug("updating {} entity: {}", entity.getEntity().getTypeName(), entity);
}
String tableQualifiedName = getTableQualifiedName(clusterName, hiveTable);
tableReference.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName);
tableReference.set(AtlasClient.NAME, hiveTable.getTableName().toLowerCase());
tableReference.set(AtlasClient.OWNER, hiveTable.getOwner());
atlasClientV2.updateEntity(entity);
Date createDate = new Date();
if (hiveTable.getTTable() != null){
try {
createDate = getTableCreatedTime(hiveTable);
LOG.debug("Setting create time to {} ", createDate);
tableReference.set(CREATE_TIME, createDate);
} catch(Exception ne) {
LOG.error("Error while setting createTime for the table {} ", hiveTable.getCompleteName(), ne);
LOG.info("Updated {} entity: name={}, guid={}", entity.getEntity().getTypeName(), entity.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), entity.getEntity().getGuid());
}
/**
* Create a Hive Database entity
* @param hiveDB The Hive {@link Database} object from which to map properties
* @return new Hive Database AtlasEntity
* @throws HiveException
*/
private AtlasEntity toDbEntity(Database hiveDB) throws HiveException {
return toDbEntity(hiveDB, null);
}
Date lastAccessTime = createDate;
if ( hiveTable.getLastAccessTime() > 0) {
lastAccessTime = new Date(hiveTable.getLastAccessTime() * MILLIS_CONVERT_FACTOR);
private AtlasEntity toDbEntity(Database hiveDB, AtlasEntity dbEntity) {
if (dbEntity == null) {
dbEntity = new AtlasEntity(HiveDataTypes.HIVE_DB.getName());
}
tableReference.set(LAST_ACCESS_TIME, lastAccessTime);
tableReference.set("retention", hiveTable.getRetention());
tableReference.set(COMMENT, hiveTable.getParameters().get(COMMENT));
String dbName = hiveDB.getName().toLowerCase();
dbEntity.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getDBQualifiedName(clusterName, dbName));
dbEntity.setAttribute(ATTRIBUTE_NAME, dbName);
dbEntity.setAttribute(ATTRIBUTE_DESCRIPTION, hiveDB.getDescription());
dbEntity.setAttribute(ATTRIBUTE_OWNER, hiveDB.getOwnerName());
// add reference to the database
tableReference.set(DB, dbReference);
dbEntity.setAttribute(ATTRIBUTE_CLUSTER_NAME, clusterName);
dbEntity.setAttribute(ATTRIBUTE_LOCATION, hdfsNameServiceResolver.getPathWithNameServiceID(hiveDB.getLocationUri()));
dbEntity.setAttribute(ATTRIBUTE_PARAMETERS, hiveDB.getParameters());
// add reference to the StorageDescriptor
Referenceable sdReferenceable = fillStorageDesc(hiveTable.getSd(), tableQualifiedName, getStorageDescQFName(tableQualifiedName), tableReference.getId());
tableReference.set(STORAGE_DESC, sdReferenceable);
if (hiveDB.getOwnerType() != null) {
dbEntity.setAttribute(ATTRIBUTE_OWNER_TYPE, OWNER_TYPE_TO_ENUM_VALUE.get(hiveDB.getOwnerType().getValue()));
}
tableReference.set(PARAMETERS, hiveTable.getParameters());
return dbEntity;
}
/**
* Create a new table instance in Atlas
* @param database AtlasEntity for Hive {@link AtlasEntity} to which this table belongs
* @param hiveTable reference to the Hive {@link Table} from which to map properties
* @return Newly created Hive AtlasEntity
* @throws Exception
*/
private AtlasEntityWithExtInfo toTableEntity(AtlasEntity database, Table hiveTable) throws AtlasHookException {
return toTableEntity(database, hiveTable, null);
}
private AtlasEntityWithExtInfo toTableEntity(AtlasEntity database, final Table hiveTable, AtlasEntityWithExtInfo table) throws AtlasHookException {
if (table == null) {
table = new AtlasEntityWithExtInfo(new AtlasEntity(HiveDataTypes.HIVE_TABLE.getName()));
}
AtlasEntity tableEntity = table.getEntity();
String tableQualifiedName = getTableQualifiedName(clusterName, hiveTable);
long createTime = BaseHiveEvent.getTableCreateTime(hiveTable);
long lastAccessTime = hiveTable.getLastAccessTime() > 0 ? hiveTable.getLastAccessTime() : createTime;
tableEntity.setAttribute(ATTRIBUTE_DB, BaseHiveEvent.getObjectId(database));
tableEntity.setAttribute(ATTRIBUTE_QUALIFIED_NAME, tableQualifiedName);
tableEntity.setAttribute(ATTRIBUTE_NAME, hiveTable.getTableName().toLowerCase());
tableEntity.setAttribute(ATTRIBUTE_OWNER, hiveTable.getOwner());
tableEntity.setAttribute(ATTRIBUTE_CREATE_TIME, createTime);
tableEntity.setAttribute(ATTRIBUTE_LAST_ACCESS_TIME, lastAccessTime);
tableEntity.setAttribute(ATTRIBUTE_RETENTION, hiveTable.getRetention());
tableEntity.setAttribute(ATTRIBUTE_PARAMETERS, hiveTable.getParameters());
tableEntity.setAttribute(ATTRIBUTE_COMMENT, hiveTable.getParameters().get(ATTRIBUTE_COMMENT));
tableEntity.setAttribute(ATTRIBUTE_TABLE_TYPE, hiveTable.getTableType().name());
tableEntity.setAttribute(ATTRIBUTE_TEMPORARY, hiveTable.isTemporary());
if (hiveTable.getViewOriginalText() != null) {
tableReference.set("viewOriginalText", hiveTable.getViewOriginalText());
tableEntity.setAttribute(ATTRIBUTE_VIEW_ORIGINAL_TEXT, hiveTable.getViewOriginalText());
}
if (hiveTable.getViewExpandedText() != null) {
tableReference.set("viewExpandedText", hiveTable.getViewExpandedText());
tableEntity.setAttribute(ATTRIBUTE_VIEW_EXPANDED_TEXT, hiveTable.getViewExpandedText());
}
tableReference.set(TABLE_TYPE_ATTR, hiveTable.getTableType().name());
tableReference.set("temporary", hiveTable.isTemporary());
// add reference to the Partition Keys
List<Referenceable> partKeys = getColumns(hiveTable.getPartitionKeys(), tableReference);
tableReference.set("partitionKeys", partKeys);
AtlasEntity sdEntity = toStroageDescEntity(hiveTable.getSd(), tableQualifiedName, getStorageDescQFName(tableQualifiedName), BaseHiveEvent.getObjectId(tableEntity));
List<AtlasEntity> partKeys = toColumns(hiveTable.getPartitionKeys(), tableEntity);
List<AtlasEntity> columns = toColumns(hiveTable.getCols(), tableEntity);
tableReference.set(COLUMNS, getColumns(hiveTable.getCols(), tableReference));
tableEntity.setAttribute(ATTRIBUTE_STORAGEDESC, BaseHiveEvent.getObjectId(sdEntity));
tableEntity.setAttribute(ATTRIBUTE_PARTITION_KEYS, BaseHiveEvent.getObjectIds(partKeys));
tableEntity.setAttribute(ATTRIBUTE_COLUMNS, BaseHiveEvent.getObjectIds(columns));
return tableReference;
if (MapUtils.isNotEmpty(table.getReferredEntities())) {
table.getReferredEntities().clear();
}
public static String getStorageDescQFName(String entityQualifiedName) {
return entityQualifiedName + "_storage";
table.addReferredEntity(database);
table.addReferredEntity(sdEntity);
if (partKeys != null) {
for (AtlasEntity partKey : partKeys) {
table.addReferredEntity(partKey);
}
}
private Referenceable registerTable(Referenceable dbReference, Table table) throws AtlasHookException {
try {
String dbName = table.getDbName();
String tableName = table.getTableName();
LOG.info("Attempting to register table [{}]", tableName);
Referenceable tableReference = getTableReference(table);
LOG.info("Found result {}", tableReference);
if (tableReference == null) {
tableReference = createTableInstance(dbReference, table);
tableReference = registerInstance(tableReference);
} else {
LOG.info("Table {}.{} is already registered with id {}. Updating entity.", dbName, tableName,
tableReference.getId().getId());
tableReference = createOrUpdateTableInstance(dbReference, tableReference, table);
updateInstance(tableReference);
if (columns != null) {
for (AtlasEntity column : columns) {
table.addReferredEntity(column);
}
return tableReference;
} catch (Exception e) {
throw new AtlasHookException("HiveMetaStoreBridge.getStorageDescQFName() failed.", e);
}
return table;
}
private void updateInstance(Referenceable referenceable) throws AtlasServiceException {
String typeName = referenceable.getTypeName();
LOG.debug("updating instance of type {}", typeName);
private AtlasEntity toStroageDescEntity(StorageDescriptor storageDesc, String tableQualifiedName, String sdQualifiedName, AtlasObjectId tableId ) throws AtlasHookException {
AtlasEntity ret = new AtlasEntity(HiveDataTypes.HIVE_STORAGEDESC.getName());
String entityJSON = AtlasType.toV1Json(referenceable);
LOG.debug("Updating entity {} = {}", referenceable.getTypeName(), entityJSON);
ret.setAttribute(ATTRIBUTE_TABLE, tableId);
ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, sdQualifiedName);
ret.setAttribute(ATTRIBUTE_PARAMETERS, storageDesc.getParameters());
ret.setAttribute(ATTRIBUTE_LOCATION, hdfsNameServiceResolver.getPathWithNameServiceID(storageDesc.getLocation()));
ret.setAttribute(ATTRIBUTE_INPUT_FORMAT, storageDesc.getInputFormat());
ret.setAttribute(ATTRIBUTE_OUTPUT_FORMAT, storageDesc.getOutputFormat());
ret.setAttribute(ATTRIBUTE_COMPRESSED, storageDesc.isCompressed());
ret.setAttribute(ATTRIBUTE_NUM_BUCKETS, storageDesc.getNumBuckets());
ret.setAttribute(ATTRIBUTE_STORED_AS_SUB_DIRECTORIES, storageDesc.isStoredAsSubDirectories());
atlasClient.updateEntity(referenceable.getId().getId(), referenceable);
if (storageDesc.getBucketCols().size() > 0) {
ret.setAttribute(ATTRIBUTE_BUCKET_COLS, storageDesc.getBucketCols());
}
public Referenceable fillStorageDesc(StorageDescriptor storageDesc, String tableQualifiedName,
String sdQualifiedName, Id tableId) throws AtlasHookException {
LOG.debug("Filling storage descriptor information for {}", storageDesc);
Referenceable sdReferenceable = new Referenceable(HiveDataTypes.HIVE_STORAGEDESC.getName());
sdReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, sdQualifiedName);
if (storageDesc.getSerdeInfo() != null) {
SerDeInfo serdeInfo = storageDesc.getSerdeInfo();
LOG.debug("serdeInfo = {}", serdeInfo);
// SkewedInfo skewedInfo = storageDesc.getSkewedInfo();
String serdeInfoName = HiveDataTypes.HIVE_SERDE.getName();
Struct serdeInfoStruct = new Struct(serdeInfoName);
AtlasStruct serdeInfoStruct = new AtlasStruct(HiveDataTypes.HIVE_SERDE.getName());
serdeInfoStruct.setAttribute(ATTRIBUTE_NAME, serdeInfo.getName());
serdeInfoStruct.setAttribute(ATTRIBUTE_SERIALIZATION_LIB, serdeInfo.getSerializationLib());
serdeInfoStruct.setAttribute(ATTRIBUTE_PARAMETERS, serdeInfo.getParameters());
serdeInfoStruct.set(AtlasClient.NAME, serdeInfo.getName());
serdeInfoStruct.set("serializationLib", serdeInfo.getSerializationLib());
serdeInfoStruct.set(PARAMETERS, serdeInfo.getParameters());
ret.setAttribute(ATTRIBUTE_SERDE_INFO, serdeInfoStruct);
}
sdReferenceable.set("serdeInfo", serdeInfoStruct);
sdReferenceable.set(STORAGE_NUM_BUCKETS, storageDesc.getNumBuckets());
sdReferenceable
.set(STORAGE_IS_STORED_AS_SUB_DIRS, storageDesc.isStoredAsSubDirectories());
if (CollectionUtils.isNotEmpty(storageDesc.getSortCols())) {
List<AtlasStruct> sortColsStruct = new ArrayList<>();
List<Struct> sortColsStruct = new ArrayList<>();
for (Order sortcol : storageDesc.getSortCols()) {
String hiveOrderName = HiveDataTypes.HIVE_ORDER.getName();
Struct colStruct = new Struct(hiveOrderName);
colStruct.set("col", sortcol.getCol());
colStruct.set("order", sortcol.getOrder());
AtlasStruct colStruct = new AtlasStruct(hiveOrderName);
colStruct.setAttribute("col", sortcol.getCol());
colStruct.setAttribute("order", sortcol.getOrder());
sortColsStruct.add(colStruct);
}
if (sortColsStruct.size() > 0) {
sdReferenceable.set("sortCols", sortColsStruct);
}
sdReferenceable.set(LOCATION, hdfsNameServiceResolver.getPathWithNameServiceID(storageDesc.getLocation()));
sdReferenceable.set("inputFormat", storageDesc.getInputFormat());
sdReferenceable.set("outputFormat", storageDesc.getOutputFormat());
sdReferenceable.set("compressed", storageDesc.isCompressed());
ret.setAttribute(ATTRIBUTE_SORT_COLS, sortColsStruct);
}
if (storageDesc.getBucketCols().size() > 0) {
sdReferenceable.set("bucketCols", storageDesc.getBucketCols());
return ret;
}
sdReferenceable.set(PARAMETERS, storageDesc.getParameters());
sdReferenceable.set("storedAsSubDirectories", storageDesc.isStoredAsSubDirectories());
sdReferenceable.set(TABLE, tableId);
private List<AtlasEntity> toColumns(List<FieldSchema> schemaList, AtlasEntity table) throws AtlasHookException {
List<AtlasEntity> ret = new ArrayList<>();
return sdReferenceable;
}
int columnPosition = 0;
for (FieldSchema fs : schemaList) {
LOG.debug("Processing field {}", fs);
public Referenceable fillHDFSDataSet(String pathUri) {
Referenceable ref = new Referenceable(HDFS_PATH);
AtlasEntity column = new AtlasEntity(HiveDataTypes.HIVE_COLUMN.getName());
// Get the name service ID for the given HDFS path
String nameServiceID = hdfsNameServiceResolver.getNameServiceIDForPath(pathUri);
column.setAttribute(ATTRIBUTE_TABLE, BaseHiveEvent.getObjectId(table));
column.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getColumnQualifiedName((String) table.getAttribute(ATTRIBUTE_QUALIFIED_NAME), fs.getName()));
column.setAttribute(ATTRIBUTE_NAME, fs.getName());
column.setAttribute(ATTRIBUTE_OWNER, table.getAttribute(ATTRIBUTE_OWNER));
column.setAttribute(ATTRIBUTE_COL_TYPE, fs.getType());
column.setAttribute(ATTRIBUTE_COL_POSITION, columnPosition++);
column.setAttribute(ATTRIBUTE_COMMENT, fs.getComment());
ret.add(column);
}
return ret;
}
private AtlasEntity toHdfsPathEntity(String pathUri) {
AtlasEntity ret = new AtlasEntity(HDFS_PATH);
String nameServiceID = hdfsNameServiceResolver.getNameServiceIDForPath(pathUri);
Path path = new Path(pathUri);
ref.set(AtlasClient.NAME, Path.getPathWithoutSchemeAndAuthority(path).toString().toLowerCase());
ret.setAttribute(ATTRIBUTE_NAME, Path.getPathWithoutSchemeAndAuthority(path).toString().toLowerCase());
ret.setAttribute(ATTRIBUTE_CLUSTER_NAME, clusterName);
if (StringUtils.isNotEmpty(nameServiceID)) {
// Name service resolution is successful, now get updated HDFS path where the host port info is replaced by
// resolved name service
// Name service resolution is successful, now get updated HDFS path where the host port info is replaced by resolved name service
String updatedHdfsPath = hdfsNameServiceResolver.getPathWithNameServiceID(pathUri);
ref.set("path", updatedHdfsPath);
ref.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getHdfsPathQualifiedName(clusterName, updatedHdfsPath));
// Only set name service if it was resolved
ref.set("nameServiceId", nameServiceID);
ret.setAttribute(ATTRIBUTE_PATH, updatedHdfsPath);
ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getHdfsPathQualifiedName(updatedHdfsPath));
ret.setAttribute(ATTRIBUTE_NAMESERVICE_ID, nameServiceID);
} else {
ref.set("path", pathUri);
ret.setAttribute(ATTRIBUTE_PATH, pathUri);
// Only append clusterName for the HDFS path
if (pathUri.startsWith(HdfsNameServiceResolver.HDFS_SCHEME)) {
ref.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getHdfsPathQualifiedName(clusterName, pathUri));
ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getHdfsPathQualifiedName(pathUri));
} else {
ref.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, pathUri);
ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, pathUri);
}
}
ref.set(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, clusterName);
return ref;
return ret;
}
public static String getColumnQualifiedName(final String tableQualifiedName, final String colName) {
final String[] parts = tableQualifiedName.split("@");
final String tableName = parts[0];
final String clusterName = parts[1];
return String.format("%s.%s@%s", tableName, colName.toLowerCase(), clusterName);
/**
* Gets the atlas entity for the database
* @param databaseName database Name
* @param clusterName cluster name
* @return AtlasEntity for database if exists, else null
* @throws Exception
*/
private AtlasEntityWithExtInfo findDatabase(String clusterName, String databaseName) throws Exception {
if (LOG.isDebugEnabled()) {
LOG.debug("Searching Atlas for database {}", databaseName);
}
public List<Referenceable> getColumns(List<FieldSchema> schemaList, Referenceable tableReference) throws AtlasHookException {
List<Referenceable> colList = new ArrayList<>();
int columnPosition = 0;
for (FieldSchema fs : schemaList) {
LOG.debug("Processing field {}", fs);
Referenceable colReferenceable = new Referenceable(HiveDataTypes.HIVE_COLUMN.getName());
colReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
getColumnQualifiedName((String) tableReference.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), fs.getName()));
colReferenceable.set(AtlasClient.NAME, fs.getName());
colReferenceable.set(AtlasClient.OWNER, tableReference.get(AtlasClient.OWNER));
colReferenceable.set("type", fs.getType());
colReferenceable.set(POSITION, columnPosition++);
colReferenceable.set(COMMENT, fs.getComment());
colReferenceable.set(TABLE, tableReference.getId());
String typeName = HiveDataTypes.HIVE_DB.getName();
return findEntity(typeName, getDBQualifiedName(clusterName, databaseName));
}
colList.add(colReferenceable);
/**
* Gets Atlas Entity for the table
*
* @param hiveTable
* @return table entity from Atlas if exists, else null
* @throws Exception
*/
private AtlasEntityWithExtInfo findTableEntity(Table hiveTable) throws Exception {
if (LOG.isDebugEnabled()) {
LOG.debug("Searching Atlas for table {}.{}", hiveTable.getDbName(), hiveTable.getTableName());
}
return colList;
String typeName = HiveDataTypes.HIVE_TABLE.getName();
String tblQualifiedName = getTableQualifiedName(getClusterName(), hiveTable.getDbName(), hiveTable.getTableName());
return findEntity(typeName, tblQualifiedName);
}
public static String getHdfsPathQualifiedName(String clusterName, String hdfsPath) {
return String.format("%s@%s", hdfsPath, clusterName);
private AtlasEntityWithExtInfo findProcessEntity(String qualifiedName) throws Exception{
if (LOG.isDebugEnabled()) {
LOG.debug("Searching Atlas for process {}", qualifiedName);
}
String typeName = HiveDataTypes.HIVE_PROCESS.getName();
return findEntity(typeName, qualifiedName);
}
private AtlasEntityWithExtInfo findEntity(final String typeName, final String qualifiedName) throws AtlasServiceException {
AtlasClientV2 atlasClientV2 = getAtlasClient();
public static void main(String[] args) throws AtlasHookException {
try {
Configuration atlasConf = ApplicationProperties.get();
String[] atlasEndpoint = atlasConf.getStringArray(ATLAS_ENDPOINT);
if (atlasEndpoint == null || atlasEndpoint.length == 0){
atlasEndpoint = new String[] { DEFAULT_DGI_URL };
return atlasClientV2.getEntityByAttribute(typeName, Collections.singletonMap(ATTRIBUTE_QUALIFIED_NAME, qualifiedName));
} catch (AtlasServiceException e) {
if(e.getStatus() == ClientResponse.Status.NOT_FOUND) {
return null;
}
AtlasClient atlasClient;
if (!AuthenticationUtil.isKerberosAuthenticationEnabled()) {
String[] basicAuthUsernamePassword = AuthenticationUtil.getBasicAuthenticationInput();
atlasClient = new AtlasClient(atlasEndpoint, basicAuthUsernamePassword);
throw e;
}
}
private String getCreateTableString(Table table, String location){
String colString = "";
List<FieldSchema> colList = table.getAllCols();
if (colList != null) {
for (FieldSchema col : colList) {
colString += col.getName() + " " + col.getType() + ",";
}
if (colList.size() > 0) {
colString = colString.substring(0, colString.length() - 1);
colString = "(" + colString + ")";
}
}
String query = "create external table " + table.getTableName() + colString + " location '" + location + "'";
return query;
}
private String lower(String str) {
if (StringUtils.isEmpty(str)) {
return "";
}
return str.toLowerCase().trim();
}
/**
* Construct the qualified name used to uniquely identify a Table instance in Atlas.
* @param clusterName Name of the cluster to which the Hive component belongs
* @param table hive table for which the qualified name is needed
* @return Unique qualified name to identify the Table instance in Atlas.
*/
private static String getTableQualifiedName(String clusterName, Table table) {
return getTableQualifiedName(clusterName, table.getDbName(), table.getTableName(), table.isTemporary());
}
private String getHdfsPathQualifiedName(String hdfsPath) {
return String.format("%s@%s", hdfsPath, clusterName);
}
/**
* Construct the qualified name used to uniquely identify a Database instance in Atlas.
* @param clusterName Name of the cluster to which the Hive component belongs
* @param dbName Name of the Hive database
* @return Unique qualified name to identify the Database instance in Atlas.
*/
public static String getDBQualifiedName(String clusterName, String dbName) {
return String.format("%s@%s", dbName.toLowerCase(), clusterName);
}
/**
* Construct the qualified name used to uniquely identify a Table instance in Atlas.
* @param clusterName Name of the cluster to which the Hive component belongs
* @param dbName Name of the Hive database to which the Table belongs
* @param tableName Name of the Hive table
* @param isTemporaryTable is this a temporary table
* @return Unique qualified name to identify the Table instance in Atlas.
*/
public static String getTableQualifiedName(String clusterName, String dbName, String tableName, boolean isTemporaryTable) {
String tableTempName = tableName;
if (isTemporaryTable) {
if (SessionState.get() != null && SessionState.get().getSessionId() != null) {
tableTempName = tableName + TEMP_TABLE_PREFIX + SessionState.get().getSessionId();
} else {
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
atlasClient = new AtlasClient(ugi, ugi.getShortUserName(), atlasEndpoint);
tableTempName = tableName + TEMP_TABLE_PREFIX + RandomStringUtils.random(10);
}
}
Options options = new Options();
CommandLineParser parser = new BasicParser();
CommandLine cmd = parser.parse( options, args);
return String.format("%s.%s@%s", dbName.toLowerCase(), tableTempName.toLowerCase(), clusterName);
}
public static String getTableProcessQualifiedName(String clusterName, Table table) {
String tableQualifiedName = getTableQualifiedName(clusterName, table);
long createdTime = getTableCreatedTime(table);
boolean failOnError = false;
if (cmd.hasOption("failOnError")) {
failOnError = true;
return tableQualifiedName + SEP + createdTime;
}
HiveMetaStoreBridge hiveMetaStoreBridge = new HiveMetaStoreBridge(atlasConf, new HiveConf(), atlasClient);
hiveMetaStoreBridge.importHiveMetadata(failOnError);
/**
* Construct the qualified name used to uniquely identify a Table instance in Atlas.
* @param clusterName Name of the cluster to which the Hive component belongs
* @param dbName Name of the Hive database to which the Table belongs
* @param tableName Name of the Hive table
* @return Unique qualified name to identify the Table instance in Atlas.
*/
public static String getTableQualifiedName(String clusterName, String dbName, String tableName) {
return getTableQualifiedName(clusterName, dbName, tableName, false);
}
catch(Exception e) {
throw new AtlasHookException("HiveMetaStoreBridge.main() failed.", e);
public static String getStorageDescQFName(String tableQualifiedName) {
return tableQualifiedName + "_storage";
}
public static String getColumnQualifiedName(final String tableQualifiedName, final String colName) {
final String[] parts = tableQualifiedName.split("@");
final String tableName = parts[0];
final String clusterName = parts[1];
return String.format("%s.%s@%s", tableName, colName.toLowerCase(), clusterName);
}
public static long getTableCreatedTime(Table table) {
return table.getTTable().getCreateTime() * MILLIS_CONVERT_FACTOR;
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.hook;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.hadoop.hive.ql.hooks.HookContext;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
public class AtlasHiveHookContext {
private final HiveHook hook;
private final HiveOperation hiveOperation;
private final HookContext hiveContext;
private final Hive hive;
private final Map<String, AtlasEntity> qNameEntityMap = new HashMap<>();
public AtlasHiveHookContext(HiveHook hook, HiveOperation hiveOperation, HookContext hiveContext) throws Exception {
this.hook = hook;
this.hiveOperation = hiveOperation;
this.hiveContext = hiveContext;
this.hive = Hive.get(hiveContext.getConf());
}
public HookContext getHiveContext() {
return hiveContext;
}
public Hive getHive() {
return hive;
}
public HiveOperation getHiveOperation() {
return hiveOperation;
}
public void putEntity(String qualifiedName, AtlasEntity entity) {
qNameEntityMap.put(qualifiedName, entity);
}
public AtlasEntity getEntity(String qualifiedName) {
return qNameEntityMap.get(qualifiedName);
}
public Collection<AtlasEntity> getEntities() { return qNameEntityMap.values(); }
public String getClusterName() {
return hook.getClusterName();
}
public boolean isKnownDatabase(String dbQualifiedName) {
return hook.isKnownDatabase(dbQualifiedName);
}
public boolean isKnownTable(String tblQualifiedName) {
return hook.isKnownTable(tblQualifiedName);
}
public void addToKnownEntities(Collection<AtlasEntity> entities) {
hook.addToKnownEntities(entities);
}
public void removeFromKnownDatabase(String dbQualifiedName) {
hook.removeFromKnownDatabase(dbQualifiedName);
}
public void removeFromKnownTable(String tblQualifiedName) {
hook.removeFromKnownTable(tblQualifiedName);
}
}
......@@ -18,151 +18,57 @@
package org.apache.atlas.hive.hook;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.atlas.AtlasClient;
import org.apache.atlas.AtlasConstants;
import org.apache.atlas.hive.bridge.ColumnLineageUtils;
import org.apache.atlas.hive.bridge.HiveMetaStoreBridge;
import org.apache.atlas.hive.model.HiveDataTypes;
import org.apache.atlas.hive.hook.events.*;
import org.apache.atlas.hook.AtlasHook;
import org.apache.atlas.hook.AtlasHookException;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.v1.model.instance.Referenceable;
import org.apache.atlas.v1.model.notification.HookNotificationV1.EntityDeleteRequest;
import org.apache.atlas.v1.model.notification.HookNotificationV1.EntityPartialUpdateRequest;
import org.apache.atlas.v1.model.notification.HookNotificationV1.EntityUpdateRequest;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.utils.LruCache;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.hooks.Entity;
import org.apache.hadoop.hive.ql.hooks.Entity.Type;
import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext;
import org.apache.hadoop.hive.ql.hooks.HookContext;
import org.apache.hadoop.hive.ql.hooks.LineageInfo;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.shims.Utils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ShutdownHookManager;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.MalformedURLException;
import java.net.URI;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.Date;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
/**
* AtlasHook sends lineage information to the AtlasSever.
*/
import static org.apache.atlas.hive.hook.events.BaseHiveEvent.ATTRIBUTE_QUALIFIED_NAME;
import static org.apache.atlas.hive.hook.events.BaseHiveEvent.HIVE_TYPE_DB;
import static org.apache.atlas.hive.hook.events.BaseHiveEvent.HIVE_TYPE_TABLE;
public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
private static final Logger LOG = LoggerFactory.getLogger(HiveHook.class);
public static final String CONF_PREFIX = "atlas.hook.hive.";
private static final String MIN_THREADS = CONF_PREFIX + "minThreads";
private static final String MAX_THREADS = CONF_PREFIX + "maxThreads";
private static final String KEEP_ALIVE_TIME = CONF_PREFIX + "keepAliveTime";
public static final String CONF_SYNC = CONF_PREFIX + "synchronous";
public static final String QUEUE_SIZE = CONF_PREFIX + "queueSize";
public static final String HOOK_NUM_RETRIES = CONF_PREFIX + "numRetries";
public static final String SEP = ":".intern();
static final String IO_SEP = "->".intern();
public static final String HOOK_DATABASE_NAME_CACHE_COUNT = CONF_PREFIX + "database.name.cache.count";
public static final String HOOK_TABLE_NAME_CACHE_COUNT = CONF_PREFIX + "table.name.cache.count";
public static final String CONF_CLUSTER_NAME = "atlas.cluster.name";
private static final Map<String, HiveOperation> OPERATION_MAP = new HashMap<>();
// wait time determines how long we wait before we exit the jvm on
// shutdown. Pending requests after that will not be sent.
private static final int WAIT_TIME = 3;
private static ExecutorService executor = null;
private static final int minThreadsDefault = 1;
private static final int maxThreadsDefault = 5;
private static final long keepAliveTimeDefault = 10;
private static final int queueSizeDefault = 10000;
public static final String DEFAULT_CLUSTER_NAME = "primary";
private static final HiveConf hiveConf;
private static final Map<String, HiveOperation> OPERATION_MAP = new HashMap<>();
private static final String clusterName;
private static final Map<String, Long> knownDatabases;
private static final Map<String, Long> knownTables;
static {
try {
// initialize the async facility to process hook calls. We don't
// want to do this inline since it adds plenty of overhead for the query.
boolean isSync = atlasProperties.getBoolean(CONF_SYNC, Boolean.FALSE);
if(!isSync) {
int minThreads = atlasProperties.getInt(MIN_THREADS, minThreadsDefault);
int maxThreads = atlasProperties.getInt(MAX_THREADS, maxThreadsDefault);
long keepAliveTime = atlasProperties.getLong(KEEP_ALIVE_TIME, keepAliveTimeDefault);
int queueSize = atlasProperties.getInt(QUEUE_SIZE, queueSizeDefault);
executor = new ThreadPoolExecutor(minThreads, maxThreads, keepAliveTime, TimeUnit.MILLISECONDS,
new LinkedBlockingQueue<Runnable>(queueSize),
new ThreadFactoryBuilder().setNameFormat("Atlas Logger %d").build());
ShutdownHookManager.get().addShutdownHook(new Thread() {
@Override
public void run() {
try {
LOG.info("==> Shutdown of Atlas Hive Hook");
executor.shutdown();
executor.awaitTermination(WAIT_TIME, TimeUnit.SECONDS);
executor = null;
} catch (InterruptedException ie) {
LOG.info("Interrupt received in shutdown.");
} finally {
LOG.info("<== Shutdown of Atlas Hive Hook");
}
// shutdown client
}
}, AtlasConstants.ATLAS_SHUTDOWN_HOOK_PRIORITY);
}
setupOperationMap();
} catch (Exception e) {
LOG.info("Attempting to send msg while shutdown in progress.", e);
for (HiveOperation hiveOperation : HiveOperation.values()) {
OPERATION_MAP.put(hiveOperation.getOperationName(), hiveOperation);
}
hiveConf = new HiveConf();
int dbNameCacheCount = atlasProperties.getInt(HOOK_DATABASE_NAME_CACHE_COUNT, 10000);
int tblNameCacheCount = atlasProperties.getInt(HOOK_TABLE_NAME_CACHE_COUNT, 10000);
LOG.info("Created Atlas Hook");
clusterName = atlasProperties.getString(CONF_CLUSTER_NAME, DEFAULT_CLUSTER_NAME);
knownDatabases = dbNameCacheCount > 0 ? Collections.synchronizedMap(new LruCache<String, Long>(dbNameCacheCount, 0)) : null;
knownTables = tblNameCacheCount > 0 ? Collections.synchronizedMap(new LruCache<String, Long>(tblNameCacheCount, 0)) : null;
}
private static void setupOperationMap() {
//Populate OPERATION_MAP - string to HiveOperation mapping
for (HiveOperation hiveOperation : HiveOperation.values()) {
OPERATION_MAP.put(hiveOperation.getOperationName(), hiveOperation);
}
public HiveHook() {
}
@Override
......@@ -171,97 +77,37 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
}
@Override
public void run(final HookContext hookContext) throws Exception {
// clone to avoid concurrent access
try {
final HiveEventContext event = new HiveEventContext();
event.setInputs(hookContext.getInputs());
event.setOutputs(hookContext.getOutputs());
event.setHookType(hookContext.getHookType());
final UserGroupInformation ugi = hookContext.getUgi() == null ? Utils.getUGI() : hookContext.getUgi();
final QueryPlan queryPlan = hookContext.getQueryPlan();
event.setUgi(ugi);
event.setUser(getUser(hookContext.getUserName(), hookContext.getUgi()));
event.setOperation(OPERATION_MAP.get(hookContext.getOperationName()));
event.setQueryId(queryPlan.getQueryId());
event.setQueryStr(queryPlan.getQueryStr());
event.setQueryStartTime(queryPlan.getQueryStartTime());
event.setLineageInfo(hookContext.getLinfo());
if (executor == null) {
collect(event);
notifyAsPrivilegedAction(event);
} else {
executor.submit(new Runnable() {
@Override
public void run() {
try {
ugi.doAs(new PrivilegedExceptionAction<Object>() {
@Override
public Object run() throws Exception {
collect(event);
return event;
}
});
notifyAsPrivilegedAction(event);
} catch (Throwable e) {
LOG.error("Atlas hook failed due to error ", e);
}
}
});
}
} catch (Throwable t) {
LOG.error("Submitting to thread pool failed due to error ", t);
}
}
void notifyAsPrivilegedAction(final HiveEventContext event) {
try {
PrivilegedExceptionAction<Object> privilegedNotify = new PrivilegedExceptionAction<Object>() {
@Override
public Object run() throws Exception {
notifyEntities(event.getMessages());
return event;
}
};
//Notify as 'hive' service user in doAs mode
UserGroupInformation realUser = event.getUgi().getRealUser();
if (realUser != null) {
LOG.info("Sending notification for event {} as service user {} #messages {} ", event.getOperation(), realUser.getShortUserName(), event.getMessages().size());
realUser.doAs(privilegedNotify);
} else {
LOG.info("Sending notification for event {} as current user {} #messages {} ", event.getOperation(), event.getUgi().getShortUserName(), event.getMessages().size());
event.getUgi().doAs(privilegedNotify);
}
} catch(Throwable e) {
LOG.error("Error during notify {} ", event.getOperation(), e);
}
public void run(HookContext hookContext) throws Exception {
if (LOG.isDebugEnabled()) {
LOG.debug("==> HiveHook.run({})", hookContext.getOperationName());
}
private void collect(HiveEventContext event) throws Exception {
HiveOperation oper = OPERATION_MAP.get(hookContext.getOperationName());
AtlasHiveHookContext context = new AtlasHiveHookContext(this, oper, hookContext);
assert event.getHookType() == HookContext.HookType.POST_EXEC_HOOK : "Non-POST_EXEC_HOOK not supported!";
BaseHiveEvent event = null;
LOG.info("Entered Atlas hook for hook type {}, operation {} , user {} as {}", event.getHookType(), event.getOperation(), event.getUgi().getRealUser(), event.getUgi().getShortUserName());
switch (oper) {
case CREATEDATABASE:
event = new CreateDatabase(context);
break;
HiveMetaStoreBridge dgiBridge = new HiveMetaStoreBridge(atlasProperties, hiveConf);
case DROPDATABASE:
event = new DropDatabase(context);
break;
switch (event.getOperation()) {
case CREATEDATABASE:
handleEventOutputs(dgiBridge, event, Type.DATABASE);
case ALTERDATABASE:
case ALTERDATABASE_OWNER:
event = new AlterDatabase(context);
break;
case CREATETABLE:
LinkedHashMap<Type, Referenceable> tablesCreated = handleEventOutputs(dgiBridge, event, Type.TABLE);
if (tablesCreated != null && tablesCreated.size() > 0) {
handleExternalTables(dgiBridge, event, tablesCreated);
}
event = new CreateTable(context, true);
break;
case DROPTABLE:
case DROPVIEW:
event = new DropTable(context);
break;
case CREATETABLE_AS_SELECT:
......@@ -272,12 +118,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
case IMPORT:
case QUERY:
case TRUNCATETABLE:
registerProcess(dgiBridge, event);
break;
case ALTERTABLE_RENAME:
case ALTERVIEW_RENAME:
renameTable(dgiBridge, event);
event = new CreateHiveProcess(context);
break;
case ALTERTABLE_FILEFORMAT:
......@@ -290,914 +131,82 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
case ALTERTABLE_ADDCOLS:
case ALTERTABLE_REPLACECOLS:
case ALTERTABLE_PARTCOLTYPE:
handleEventOutputs(dgiBridge, event, Type.TABLE);
break;
case ALTERTABLE_RENAMECOL:
renameColumn(dgiBridge, event);
break;
case ALTERTABLE_LOCATION:
LinkedHashMap<Type, Referenceable> tablesUpdated = handleEventOutputs(dgiBridge, event, Type.TABLE);
if (tablesUpdated != null && tablesUpdated.size() > 0) {
//Track altered lineage in case of external tables
handleExternalTables(dgiBridge, event, tablesUpdated);
}
break;
case ALTERDATABASE:
case ALTERDATABASE_OWNER:
handleEventOutputs(dgiBridge, event, Type.DATABASE);
event = new AlterTable(context);
break;
case DROPTABLE:
case DROPVIEW:
deleteTable(dgiBridge, event);
case ALTERTABLE_RENAME:
case ALTERVIEW_RENAME:
event = new AlterTableRename(context);
break;
case DROPDATABASE:
deleteDatabase(dgiBridge, event);
case ALTERTABLE_RENAMECOL:
event = new AlterTableRenameCol(context);
break;
default:
if (LOG.isDebugEnabled()) {
LOG.debug("HiveHook.run({}): operation ignored", hookContext.getOperationName());
}
}
private void deleteTable(HiveMetaStoreBridge dgiBridge, HiveEventContext event) {
for (WriteEntity output : event.getOutputs()) {
if (Type.TABLE.equals(output.getType())) {
deleteTable(dgiBridge, event, output);
}
}
}
private void deleteTable(HiveMetaStoreBridge dgiBridge, HiveEventContext event, WriteEntity output) {
final String tblQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(dgiBridge.getClusterName(), output.getTable());
LOG.info("Deleting table {} ", tblQualifiedName);
event.addMessage(
new EntityDeleteRequest(event.getUser(),
HiveDataTypes.HIVE_TABLE.getName(),
AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
tblQualifiedName));
}
private void deleteDatabase(HiveMetaStoreBridge dgiBridge, HiveEventContext event) {
if (event.getOutputs().size() > 1) {
LOG.info("Starting deletion of tables and databases with cascade {} ", event.getQueryStr());
} else {
LOG.info("Starting deletion of database {} ", event.getQueryStr());
}
for (WriteEntity output : event.getOutputs()) {
if (Type.TABLE.equals(output.getType())) {
deleteTable(dgiBridge, event, output);
} else if (Type.DATABASE.equals(output.getType())) {
final String dbQualifiedName = HiveMetaStoreBridge.getDBQualifiedName(dgiBridge.getClusterName(), output.getDatabase().getName());
event.addMessage(
new EntityDeleteRequest(event.getUser(),
HiveDataTypes.HIVE_DB.getName(),
AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
dbQualifiedName));
}
}
}
private Pair<String, String> findChangedColNames(List<FieldSchema> oldColList, List<FieldSchema> newColList) {
HashMap<FieldSchema, Integer> oldColHashMap = new HashMap<>();
HashMap<FieldSchema, Integer> newColHashMap = new HashMap<>();
for (int i = 0; i < oldColList.size(); i++) {
oldColHashMap.put(oldColList.get(i), i);
newColHashMap.put(newColList.get(i), i);
}
String changedColStringOldName = oldColList.get(0).getName();
String changedColStringNewName = changedColStringOldName;
for (FieldSchema oldCol : oldColList) {
if (!newColHashMap.containsKey(oldCol)) {
changedColStringOldName = oldCol.getName();
break;
}
}
for (FieldSchema newCol : newColList) {
if (!oldColHashMap.containsKey(newCol)) {
changedColStringNewName = newCol.getName();
break;
}
}
return Pair.of(changedColStringOldName, changedColStringNewName);
}
private void renameColumn(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws AtlasHookException {
try {
assert event.getInputs() != null && event.getInputs().size() == 1;
assert event.getOutputs() != null && event.getOutputs().size() > 0;
Table oldTable = event.getInputs().iterator().next().getTable();
List<FieldSchema> oldColList = oldTable.getAllCols();
Table outputTbl = event.getOutputs().iterator().next().getTable();
outputTbl = dgiBridge.hiveClient.getTable(outputTbl.getDbName(), outputTbl.getTableName());
List<FieldSchema> newColList = outputTbl.getAllCols();
assert oldColList.size() == newColList.size();
Pair<String, String> changedColNamePair = findChangedColNames(oldColList, newColList);
String oldColName = changedColNamePair.getLeft();
String newColName = changedColNamePair.getRight();
for (WriteEntity writeEntity : event.getOutputs()) {
if (writeEntity.getType() == Type.TABLE) {
Table newTable = writeEntity.getTable();
createOrUpdateEntities(dgiBridge, event, writeEntity, true, oldTable);
final String newQualifiedTableName = HiveMetaStoreBridge.getTableQualifiedName(dgiBridge.getClusterName(),
newTable);
String oldColumnQFName = HiveMetaStoreBridge.getColumnQualifiedName(newQualifiedTableName, oldColName);
String newColumnQFName = HiveMetaStoreBridge.getColumnQualifiedName(newQualifiedTableName, newColName);
Referenceable newColEntity = new Referenceable(HiveDataTypes.HIVE_COLUMN.getName());
newColEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, newColumnQFName);
event.addMessage(new EntityPartialUpdateRequest(event.getUser(),
HiveDataTypes.HIVE_COLUMN.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
oldColumnQFName, newColEntity));
}
}
handleEventOutputs(dgiBridge, event, Type.TABLE);
}
catch(Exception e) {
throw new AtlasHookException("HiveHook.renameColumn() failed.", e);
}
}
private void renameTable(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws AtlasHookException {
try {
//crappy, no easy of getting new name
assert event.getInputs() != null && event.getInputs().size() == 1;
assert event.getOutputs() != null && event.getOutputs().size() > 0;
//Update entity if not exists
ReadEntity oldEntity = event.getInputs().iterator().next();
Table oldTable = oldEntity.getTable();
for (WriteEntity writeEntity : event.getOutputs()) {
if (writeEntity.getType() == Entity.Type.TABLE) {
Table newTable = writeEntity.getTable();
//Hive sends with both old and new table names in the outputs which is weird. So skipping that with the below check
if (!newTable.getDbName().equals(oldTable.getDbName()) || !newTable.getTableName().equals(oldTable.getTableName())) {
final String oldQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(dgiBridge.getClusterName(),
oldTable);
final String newQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(dgiBridge.getClusterName(),
newTable);
//Create/update old table entity - create entity with oldQFNme and old tableName if it doesnt exist. If exists, will update
//We always use the new entity while creating the table since some flags, attributes of the table are not set in inputEntity and Hive.getTable(oldTableName) also fails since the table doesnt exist in hive anymore
final LinkedHashMap<Type, Referenceable> tables = createOrUpdateEntities(dgiBridge, event, writeEntity, true);
Referenceable tableEntity = tables.get(Type.TABLE);
//Reset regular column QF Name to old Name and create a new partial notification request to replace old column QFName to newName to retain any existing traits
replaceColumnQFName(event, (List<Referenceable>) tableEntity.get(HiveMetaStoreBridge.COLUMNS), oldQualifiedName, newQualifiedName);
//Reset partition key column QF Name to old Name and create a new partial notification request to replace old column QFName to newName to retain any existing traits
replaceColumnQFName(event, (List<Referenceable>) tableEntity.get(HiveMetaStoreBridge.PART_COLS), oldQualifiedName, newQualifiedName);
//Reset SD QF Name to old Name and create a new partial notification request to replace old SD QFName to newName to retain any existing traits
replaceSDQFName(event, tableEntity, oldQualifiedName, newQualifiedName);
//Reset Table QF Name to old Name and create a new partial notification request to replace old Table QFName to newName
replaceTableQFName(event, oldTable, newTable, tableEntity, oldQualifiedName, newQualifiedName);
}
}
}
}
catch(Exception e) {
throw new AtlasHookException("HiveHook.renameTable() failed.", e);
}
}
private Referenceable replaceTableQFName(HiveEventContext event, Table oldTable, Table newTable, final Referenceable tableEntity, final String oldTableQFName, final String newTableQFName) throws HiveException {
tableEntity.set(AtlasClient.NAME, oldTable.getTableName().toLowerCase());
tableEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, oldTableQFName);
//Replace table entity with new name
final Referenceable newEntity = new Referenceable(HiveDataTypes.HIVE_TABLE.getName());
newEntity.set(AtlasClient.NAME, newTable.getTableName().toLowerCase());
newEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, newTableQFName);
ArrayList<String> alias_list = new ArrayList<>();
alias_list.add(oldTable.getTableName().toLowerCase());
newEntity.set(HiveMetaStoreBridge.TABLE_ALIAS_LIST, alias_list);
event.addMessage(new EntityPartialUpdateRequest(event.getUser(),
HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
oldTableQFName, newEntity));
return newEntity;
}
private List<Referenceable> replaceColumnQFName(final HiveEventContext event, final List<Referenceable> cols, final String oldTableQFName, final String newTableQFName) {
List<Referenceable> newColEntities = new ArrayList<>();
for (Referenceable col : cols) {
final String colName = (String) col.get(AtlasClient.NAME);
String oldColumnQFName = HiveMetaStoreBridge.getColumnQualifiedName(oldTableQFName, colName);
String newColumnQFName = HiveMetaStoreBridge.getColumnQualifiedName(newTableQFName, colName);
col.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, oldColumnQFName);
Referenceable newColEntity = new Referenceable(HiveDataTypes.HIVE_COLUMN.getName());
///Only QF Name changes
newColEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, newColumnQFName);
event.addMessage(new EntityPartialUpdateRequest(event.getUser(),
HiveDataTypes.HIVE_COLUMN.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
oldColumnQFName, newColEntity));
newColEntities.add(newColEntity);
}
return newColEntities;
}
private Referenceable replaceSDQFName(final HiveEventContext event, Referenceable tableEntity, final String oldTblQFName, final String newTblQFName) {
//Reset storage desc QF Name to old Name
final Referenceable sdRef = ((Referenceable) tableEntity.get(HiveMetaStoreBridge.STORAGE_DESC));
sdRef.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, HiveMetaStoreBridge.getStorageDescQFName(oldTblQFName));
//Replace SD QF name fir st to retain tags
final String oldSDQFName = HiveMetaStoreBridge.getStorageDescQFName(oldTblQFName);
final String newSDQFName = HiveMetaStoreBridge.getStorageDescQFName(newTblQFName);
final Referenceable newSDEntity = new Referenceable(HiveDataTypes.HIVE_STORAGEDESC.getName());
newSDEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, newSDQFName);
event.addMessage(new EntityPartialUpdateRequest(event.getUser(),
HiveDataTypes.HIVE_STORAGEDESC.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
oldSDQFName, newSDEntity));
return newSDEntity;
}
private LinkedHashMap<Type, Referenceable> createOrUpdateEntities(HiveMetaStoreBridge dgiBridge, HiveEventContext event, Entity entity, boolean skipTempTables, Table existTable) throws AtlasHookException {
try {
Database db = null;
Table table = null;
Partition partition = null;
switch (entity.getType()) {
case DATABASE:
db = entity.getDatabase();
if (db != null) {
db = dgiBridge.hiveClient.getDatabase(db.getName());
}
break;
case TABLE:
table = entity.getTable();
db = dgiBridge.hiveClient.getDatabase(table.getDbName());
break;
case PARTITION:
partition = entity.getPartition();
table = partition.getTable();
db = dgiBridge.hiveClient.getDatabase(table.getDbName());
break;
default:
LOG.info("{}: entity-type not handled by Atlas hook. Ignored", entity.getType());
}
Referenceable dbEntity = null;
Referenceable tableEntity = null;
if (db != null) {
dbEntity = dgiBridge.createDBInstance(db);
}
if (db != null && table != null) {
if (existTable != null) {
table = existTable;
} else {
table = refreshTable(dgiBridge, table.getDbName(), table.getTableName());
}
if (table != null) {
// If its an external table, even though the temp table skip flag is on, we create the table since we need the HDFS path to temp table lineage.
if (skipTempTables && table.isTemporary() && !TableType.EXTERNAL_TABLE.equals(table.getTableType())) {
LOG.warn("Skipping temporary table registration {} since it is not an external table {} ", table.getTableName(), table.getTableType().name());
} else {
tableEntity = dgiBridge.createTableInstance(dbEntity, table);
}
}
}
LinkedHashMap<Type, Referenceable> result = new LinkedHashMap<>();
List<Referenceable> entities = new ArrayList<>();
if (dbEntity != null) {
result.put(Type.DATABASE, dbEntity);
entities.add(dbEntity);
if (event != null) {
super.notifyEntities(event.getNotificationMessages());
}
if (tableEntity != null) {
result.put(Type.TABLE, tableEntity);
entities.add(tableEntity);
if (LOG.isDebugEnabled()) {
LOG.debug("<== HiveHook.run({})", hookContext.getOperationName());
}
if (!entities.isEmpty()) {
event.addMessage(new EntityUpdateRequest(event.getUser(), entities));
}
return result;
}
catch(Exception e) {
throw new AtlasHookException("HiveHook.createOrUpdateEntities() failed.", e);
}
public String getClusterName() {
return clusterName;
}
private LinkedHashMap<Type, Referenceable> createOrUpdateEntities(HiveMetaStoreBridge dgiBridge, HiveEventContext event, Entity entity, boolean skipTempTables) throws AtlasHookException {
try {
return createOrUpdateEntities(dgiBridge, event, entity, skipTempTables, null);
} catch (Exception e) {
throw new AtlasHookException("HiveHook.createOrUpdateEntities() failed.", e);
}
public boolean isKnownDatabase(String dbQualifiedName) {
return knownDatabases != null && dbQualifiedName != null ? knownDatabases.containsKey(dbQualifiedName) : false;
}
private LinkedHashMap<Type, Referenceable> handleEventOutputs(HiveMetaStoreBridge dgiBridge, HiveEventContext event, Type entityType) throws AtlasHookException {
try {
for (Entity entity : event.getOutputs()) {
if (entity.getType() == entityType) {
return createOrUpdateEntities(dgiBridge, event, entity, true);
}
}
return null;
}
catch(Exception e) {
throw new AtlasHookException("HiveHook.handleEventOutputs() failed.", e);
}
public boolean isKnownTable(String tblQualifiedName) {
return knownTables != null && tblQualifiedName != null ? knownTables.containsKey(tblQualifiedName) : false;
}
private static Entity getEntityByType(Set<? extends Entity> entities, Type entityType) {
for (Entity entity : entities) {
if (entity.getType() == entityType) {
return entity;
public void addToKnownEntities(Collection<AtlasEntity> entities) {
if (knownDatabases != null || knownTables != null) { // caching should be enabled at least for one
if (entities != null) {
for (AtlasEntity entity : entities) {
if (StringUtils.equalsIgnoreCase(entity.getTypeName(), HIVE_TYPE_DB)) {
addToKnownDatabase((String) entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME));
} else if (StringUtils.equalsIgnoreCase(entity.getTypeName(), HIVE_TYPE_TABLE)) {
addToKnwnTable((String) entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME));
}
}
return null;
}
public static String lower(String str) {
if (StringUtils.isEmpty(str)) {
return null;
}
return str.toLowerCase().trim();
}
private void registerProcess(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws AtlasHookException {
try {
Set<ReadEntity> inputs = event.getInputs();
Set<WriteEntity> outputs = event.getOutputs();
//Even explain CTAS has operation name as CREATETABLE_AS_SELECT
if (inputs.isEmpty() && outputs.isEmpty()) {
LOG.info("Explain statement. Skipping...");
return;
public void addToKnownDatabase(String dbQualifiedName) {
if (knownDatabases != null && dbQualifiedName != null) {
knownDatabases.put(dbQualifiedName, System.currentTimeMillis());
}
if (event.getQueryId() == null) {
LOG.info("Query id/plan is missing for {}", event.getQueryStr());
}
final SortedMap<ReadEntity, Referenceable> source = new TreeMap<>(entityComparator);
final SortedMap<WriteEntity, Referenceable> target = new TreeMap<>(entityComparator);
final Set<String> dataSets = new HashSet<>();
final Set<Referenceable> entities = new LinkedHashSet<>();
boolean isSelectQuery = isSelectQuery(event);
// filter out select queries which do not modify data
if (!isSelectQuery) {
SortedSet<ReadEntity> sortedHiveInputs = new TreeSet<>(entityComparator);
if (event.getInputs() != null) {
sortedHiveInputs.addAll(event.getInputs());
public void addToKnwnTable(String tblQualifiedName) {
if (knownTables != null && tblQualifiedName != null) {
knownTables.put(tblQualifiedName, System.currentTimeMillis());
}
SortedSet<WriteEntity> sortedHiveOutputs = new TreeSet<>(entityComparator);
if (event.getOutputs() != null) {
sortedHiveOutputs.addAll(event.getOutputs());
}
for (ReadEntity readEntity : sortedHiveInputs) {
processHiveEntity(dgiBridge, event, readEntity, dataSets, source, entities);
public void removeFromKnownDatabase(String dbQualifiedName) {
if (knownDatabases != null && dbQualifiedName != null) {
knownDatabases.remove(dbQualifiedName);
}
for (WriteEntity writeEntity : sortedHiveOutputs) {
processHiveEntity(dgiBridge, event, writeEntity, dataSets, target, entities);
}
if (source.size() > 0 || target.size() > 0) {
Referenceable processReferenceable = getProcessReferenceable(dgiBridge, event, sortedHiveInputs, sortedHiveOutputs, source, target);
// setup Column Lineage
List<Referenceable> sourceList = new ArrayList<>(source.values());
List<Referenceable> targetList = new ArrayList<>(target.values());
List<Referenceable> colLineageProcessInstances = new ArrayList<>();
try {
Map<String, Referenceable> columnQNameToRef =
ColumnLineageUtils.buildColumnReferenceableMap(sourceList, targetList);
colLineageProcessInstances = createColumnLineageProcessInstances(processReferenceable,
event.lineageInfo,
columnQNameToRef);
} catch (Exception e) {
LOG.warn("Column lineage process setup failed with exception {}", e);
public void removeFromKnownTable(String tblQualifiedName) {
if (knownTables != null && tblQualifiedName != null) {
knownTables.remove(tblQualifiedName);
}
colLineageProcessInstances.add(0, processReferenceable);
entities.addAll(colLineageProcessInstances);
addEntityUpdateNotificationMessagess(event, entities);
} else {
LOG.info("Skipped query {} since it has no getInputs() or resulting getOutputs()", event.getQueryStr());
}
} else {
LOG.info("Skipped query {} for processing since it is a select query ", event.getQueryStr());
}
}
catch(Exception e) {
throw new AtlasHookException("HiveHook.registerProcess() failed.", e);
}
}
private void addEntityUpdateNotificationMessagess(final HiveEventContext event, final Collection<Referenceable> entities) {
// process each entity as separate message to avoid running into OOM errors
for (Referenceable entity : entities) {
event.addMessage(new EntityUpdateRequest(event.getUser(), entity));
}
}
private <T extends Entity> void processHiveEntity(HiveMetaStoreBridge dgiBridge, HiveEventContext event, T entity, Set<String> dataSetsProcessed,
SortedMap<T, Referenceable> dataSets, Set<Referenceable> entities) throws AtlasHookException {
try {
if (entity.getType() == Type.TABLE || entity.getType() == Type.PARTITION) {
final String tblQFName = HiveMetaStoreBridge.getTableQualifiedName(dgiBridge.getClusterName(), entity.getTable());
if (!dataSetsProcessed.contains(tblQFName)) {
LinkedHashMap<Type, Referenceable> result = createOrUpdateEntities(dgiBridge, event, entity, false);
if (result.get(Type.TABLE) != null) {
dataSets.put(entity, result.get(Type.TABLE));
}
dataSetsProcessed.add(tblQFName);
entities.addAll(result.values());
}
} else if (entity.getType() == Type.DFS_DIR) {
URI location = entity.getLocation();
if (location != null) {
final String pathUri = dgiBridge.isConvertHdfsPathToLowerCase() ? lower(new Path(location).toString()) : new Path(location).toString();
LOG.debug("Registering DFS Path {} ", pathUri);
if (!dataSetsProcessed.contains(pathUri)) {
Referenceable hdfsPath = dgiBridge.fillHDFSDataSet(pathUri);
dataSets.put(entity, hdfsPath);
dataSetsProcessed.add(pathUri);
entities.add(hdfsPath);
}
}
}
}
catch(Exception e) {
throw new AtlasHookException("HiveHook.processHiveEntity() failed.", e);
}
}
private boolean isSelectQuery(HiveEventContext event) {
if (event.getOperation() == HiveOperation.QUERY) {
//Select query has only one output
if (event.getOutputs().size() == 1) {
WriteEntity output = event.getOutputs().iterator().next();
/* Strangely select queries have DFS_DIR as the type which seems like a bug in hive. Filter out by checking if the path is a temporary URI
* Insert into/overwrite queries onto local or dfs paths have DFS_DIR or LOCAL_DIR as the type and WriteType.PATH_WRITE and tempUri = false
* Insert into a temporary table has isTempURI = false. So will not skip as expected
*/
if (output.getType() == Type.DFS_DIR || output.getType() == Type.LOCAL_DIR) {
if (output.getWriteType() == WriteEntity.WriteType.PATH_WRITE &&
output.isTempURI()) {
return true;
}
}
}
}
return false;
}
private void handleExternalTables(final HiveMetaStoreBridge dgiBridge, final HiveEventContext event, final LinkedHashMap<Type, Referenceable> tables) throws HiveException, MalformedURLException {
List<Referenceable> entities = new ArrayList<>();
final WriteEntity hiveEntity = (WriteEntity) getEntityByType(event.getOutputs(), Type.TABLE);
Table hiveTable = hiveEntity == null ? null : hiveEntity.getTable();
//Refresh to get the correct location
if(hiveTable != null) {
hiveTable = refreshTable(dgiBridge, hiveTable.getDbName(), hiveTable.getTableName());
}
if (hiveTable != null && TableType.EXTERNAL_TABLE.equals(hiveTable.getTableType())) {
LOG.info("Registering external table process {} ", event.getQueryStr());
final String location = dgiBridge.isConvertHdfsPathToLowerCase() ? lower(hiveTable.getDataLocation().toString()) : hiveTable.getDataLocation().toString();
final ReadEntity dfsEntity = new ReadEntity();
dfsEntity.setTyp(Type.DFS_DIR);
dfsEntity.setD(new Path(location));
SortedMap<ReadEntity, Referenceable> hiveInputsMap = new TreeMap<ReadEntity, Referenceable>(entityComparator) {{
put(dfsEntity, dgiBridge.fillHDFSDataSet(location));
}};
SortedMap<WriteEntity, Referenceable> hiveOutputsMap = new TreeMap<WriteEntity, Referenceable>(entityComparator) {{
put(hiveEntity, tables.get(Type.TABLE));
}};
SortedSet<ReadEntity> sortedIps = new TreeSet<>(entityComparator);
sortedIps.addAll(hiveInputsMap.keySet());
SortedSet<WriteEntity> sortedOps = new TreeSet<>(entityComparator);
sortedOps.addAll(hiveOutputsMap.keySet());
Referenceable processReferenceable = getProcessReferenceable(dgiBridge, event,
sortedIps, sortedOps, hiveInputsMap, hiveOutputsMap);
entities.addAll(tables.values());
entities.add(processReferenceable);
addEntityUpdateNotificationMessagess(event, entities);
}
}
private static boolean isCreateOp(HiveEventContext hiveEvent) {
return HiveOperation.CREATETABLE.equals(hiveEvent.getOperation())
|| HiveOperation.CREATEVIEW.equals(hiveEvent.getOperation())
|| HiveOperation.ALTERVIEW_AS.equals(hiveEvent.getOperation())
|| HiveOperation.ALTERTABLE_LOCATION.equals(hiveEvent.getOperation())
|| HiveOperation.CREATETABLE_AS_SELECT.equals(hiveEvent.getOperation());
}
private Referenceable getProcessReferenceable(HiveMetaStoreBridge dgiBridge, HiveEventContext hiveEvent,
final SortedSet<ReadEntity> sortedHiveInputs, final SortedSet<WriteEntity> sortedHiveOutputs, SortedMap<ReadEntity, Referenceable> source, SortedMap<WriteEntity, Referenceable> target)
throws HiveException {
Referenceable processReferenceable = new Referenceable(HiveDataTypes.HIVE_PROCESS.getName());
String queryStr = lower(hiveEvent.getQueryStr());
processReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
getProcessQualifiedName(dgiBridge, hiveEvent, sortedHiveInputs, sortedHiveOutputs, source, target));
LOG.debug("Registering query: {}", queryStr);
List<Referenceable> sourceList = new ArrayList<>(source.values());
List<Referenceable> targetList = new ArrayList<>(target.values());
//The serialization code expected a list
if (sourceList != null && !sourceList.isEmpty()) {
processReferenceable.set("inputs", sourceList);
}
if (targetList != null && !targetList.isEmpty()) {
processReferenceable.set("outputs", targetList);
}
processReferenceable.set(AtlasClient.NAME, queryStr);
processReferenceable.set("operationType", hiveEvent.getOperation().getOperationName());
processReferenceable.set("startTime", new Date(hiveEvent.getQueryStartTime()));
processReferenceable.set("userName", hiveEvent.getUser());
processReferenceable.set("queryText", queryStr);
processReferenceable.set("queryId", hiveEvent.getQueryId());
processReferenceable.set("queryPlan", "Not Supported");
processReferenceable.set(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, dgiBridge.getClusterName());
List<String> recentQueries = new ArrayList<>(1);
recentQueries.add(queryStr);
processReferenceable.set("recentQueries", recentQueries);
processReferenceable.set("endTime", new Date(System.currentTimeMillis()));
//TODO set queryGraph
return processReferenceable;
}
private List<Referenceable> createColumnLineageProcessInstances(
Referenceable processRefObj,
Map<String, List<ColumnLineageUtils.HiveColumnLineageInfo>> lineageInfo,
Map<String, Referenceable> columnQNameToRef
) {
List<Referenceable> l = new ArrayList<>();
for(Map.Entry<String, List<ColumnLineageUtils.HiveColumnLineageInfo>> e :
lineageInfo.entrySet()) {
Referenceable destCol = columnQNameToRef.get(e.getKey());
if (destCol == null ) {
LOG.debug("Couldn't find output Column {}", e.getKey());
continue;
}
List<Referenceable> outRef = new ArrayList<>();
outRef.add(destCol);
List<Referenceable> inputRefs = new ArrayList<>();
for(ColumnLineageUtils.HiveColumnLineageInfo cLI : e.getValue()) {
Referenceable srcCol = columnQNameToRef.get(cLI.inputColumn);
if (srcCol == null ) {
LOG.debug("Couldn't find input Column {}", cLI.inputColumn);
continue;
}
inputRefs.add(srcCol);
}
if (inputRefs.size() > 0 ) {
Referenceable r = new Referenceable(HiveDataTypes.HIVE_COLUMN_LINEAGE.getName());
r.set("name", processRefObj.get(AtlasClient.NAME) + ":" + outRef.get(0).get(AtlasClient.NAME));
r.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, processRefObj.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME) + ":" + outRef.get(0).get(AtlasClient.NAME));
r.set("inputs", inputRefs);
r.set("outputs", outRef);
r.set("query", processRefObj);
r.set("depenendencyType", e.getValue().get(0).depenendencyType);
r.set("expression", e.getValue().get(0).expr);
l.add(r);
}
else{
LOG.debug("No input references found for lineage of column {}", destCol.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME));
}
}
return l;
}
@VisibleForTesting
static String getProcessQualifiedName(HiveMetaStoreBridge dgiBridge, HiveEventContext eventContext,
final SortedSet<ReadEntity> sortedHiveInputs,
final SortedSet<WriteEntity> sortedHiveOutputs,
SortedMap<ReadEntity, Referenceable> hiveInputsMap,
SortedMap<WriteEntity, Referenceable> hiveOutputsMap) throws HiveException {
HiveOperation op = eventContext.getOperation();
if (isCreateOp(eventContext)) {
Entity entity = getEntityByType(sortedHiveOutputs, Type.TABLE);
if (entity != null) {
Table outTable = entity.getTable();
//refresh table
outTable = dgiBridge.hiveClient.getTable(outTable.getDbName(), outTable.getTableName());
return HiveMetaStoreBridge.getTableProcessQualifiedName(dgiBridge.getClusterName(), outTable);
}
}
StringBuilder buffer = new StringBuilder(op.getOperationName());
boolean ignoreHDFSPathsinQFName = ignoreHDFSPathsinQFName(op, sortedHiveInputs, sortedHiveOutputs);
if ( ignoreHDFSPathsinQFName && LOG.isDebugEnabled()) {
LOG.debug("Ignoring HDFS paths in qualifiedName for {} {} ", op, eventContext.getQueryStr());
}
addInputs(dgiBridge, op, sortedHiveInputs, buffer, hiveInputsMap, ignoreHDFSPathsinQFName);
buffer.append(IO_SEP);
addOutputs(dgiBridge, op, sortedHiveOutputs, buffer, hiveOutputsMap, ignoreHDFSPathsinQFName);
LOG.info("Setting process qualified name to {}", buffer);
return buffer.toString();
}
private static boolean ignoreHDFSPathsinQFName(final HiveOperation op, final Set<ReadEntity> inputs, final Set<WriteEntity> outputs) {
switch (op) {
case LOAD:
case IMPORT:
return isPartitionBasedQuery(outputs);
case EXPORT:
return isPartitionBasedQuery(inputs);
case QUERY:
return true;
}
return false;
}
private static boolean isPartitionBasedQuery(Set<? extends Entity> entities) {
for (Entity entity : entities) {
if (Type.PARTITION.equals(entity.getType())) {
return true;
}
}
return false;
}
private static void addInputs(HiveMetaStoreBridge hiveBridge, HiveOperation op, SortedSet<ReadEntity> sortedInputs, StringBuilder buffer, final Map<ReadEntity, Referenceable> refs, final boolean ignoreHDFSPathsInQFName) throws HiveException {
if (refs != null) {
if (sortedInputs != null) {
Set<String> dataSetsProcessed = new LinkedHashSet<>();
for (Entity input : sortedInputs) {
if (!dataSetsProcessed.contains(input.getName().toLowerCase())) {
//HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations
if (ignoreHDFSPathsInQFName &&
(Type.DFS_DIR.equals(input.getType()) || Type.LOCAL_DIR.equals(input.getType()))) {
LOG.debug("Skipping dfs dir input addition to process qualified name {} ", input.getName());
} else if (refs.containsKey(input)) {
if ( input.getType() == Type.PARTITION || input.getType() == Type.TABLE) {
Table inputTable = refreshTable(hiveBridge, input.getTable().getDbName(), input.getTable().getTableName());
if (inputTable != null) {
final Date createTime = HiveMetaStoreBridge.getTableCreatedTime(inputTable);
addDataset(buffer, refs.get(input), createTime.getTime());
}
} else {
addDataset(buffer, refs.get(input));
}
}
dataSetsProcessed.add(input.getName().toLowerCase());
}
}
}
}
}
private static void addDataset(StringBuilder buffer, Referenceable ref, final long createTime) {
addDataset(buffer, ref);
buffer.append(SEP);
buffer.append(createTime);
}
private static void addDataset(StringBuilder buffer, Referenceable ref) {
buffer.append(SEP);
String dataSetQlfdName = (String) ref.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME);
// '/' breaks query parsing on ATLAS
buffer.append(dataSetQlfdName.toLowerCase().replaceAll("/", ""));
}
private static void addOutputs(HiveMetaStoreBridge hiveBridge, HiveOperation op, SortedSet<WriteEntity> sortedOutputs, StringBuilder buffer, final Map<WriteEntity, Referenceable> refs, final boolean ignoreHDFSPathsInQFName) throws HiveException {
if (refs != null) {
Set<String> dataSetsProcessed = new LinkedHashSet<>();
if (sortedOutputs != null) {
for (WriteEntity output : sortedOutputs) {
final Entity entity = output;
if (!dataSetsProcessed.contains(output.getName().toLowerCase())) {
//HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations
if (addQueryType(op, (WriteEntity) entity)) {
buffer.append(SEP);
buffer.append(((WriteEntity) entity).getWriteType().name());
}
if (ignoreHDFSPathsInQFName &&
(Type.DFS_DIR.equals(output.getType()) || Type.LOCAL_DIR.equals(output.getType()))) {
LOG.debug("Skipping dfs dir output addition to process qualified name {} ", output.getName());
} else if (refs.containsKey(output)) {
if ( output.getType() == Type.PARTITION || output.getType() == Type.TABLE) {
Table outputTable = refreshTable(hiveBridge, output.getTable().getDbName(), output.getTable().getTableName());
if (outputTable != null) {
final Date createTime = HiveMetaStoreBridge.getTableCreatedTime(outputTable);
addDataset(buffer, refs.get(output), createTime.getTime());
}
} else {
addDataset(buffer, refs.get(output));
}
}
dataSetsProcessed.add(output.getName().toLowerCase());
}
}
}
}
}
private static Table refreshTable(HiveMetaStoreBridge dgiBridge, String dbName, String tableName) {
try {
return dgiBridge.hiveClient.getTable(dbName, tableName);
} catch (HiveException excp) { // this might be the case for temp tables
LOG.warn("failed to get details for table {}.{}. Ignoring. {}: {}", dbName, tableName, excp.getClass().getCanonicalName(), excp.getMessage());
}
return null;
}
private static boolean addQueryType(HiveOperation op, WriteEntity entity) {
if (entity.getWriteType() != null && HiveOperation.QUERY.equals(op)) {
switch (entity.getWriteType()) {
case INSERT:
case INSERT_OVERWRITE:
case UPDATE:
case DELETE:
return true;
case PATH_WRITE:
//Add query type only for DFS paths and ignore local paths since they are not added as outputs
if ( !Type.LOCAL_DIR.equals(entity.getType())) {
return true;
}
break;
default:
}
}
return false;
}
public static class HiveEventContext {
private Set<ReadEntity> inputs;
private Set<WriteEntity> outputs;
private String user;
private UserGroupInformation ugi;
private HiveOperation operation;
private HookContext.HookType hookType;
private JSONObject jsonPlan;
private String queryId;
private String queryStr;
private Long queryStartTime;
public Map<String, List<ColumnLineageUtils.HiveColumnLineageInfo>> lineageInfo;
private List<HookNotification> messages = new ArrayList<>();
public void setInputs(Set<ReadEntity> inputs) {
this.inputs = inputs;
}
public void setOutputs(Set<WriteEntity> outputs) {
this.outputs = outputs;
}
public void setUser(String user) {
this.user = user;
}
public void setUgi(UserGroupInformation ugi) {
this.ugi = ugi;
}
public void setOperation(HiveOperation operation) {
this.operation = operation;
}
public void setHookType(HookContext.HookType hookType) {
this.hookType = hookType;
}
public void setQueryId(String queryId) {
this.queryId = queryId;
}
public void setQueryStr(String queryStr) {
this.queryStr = queryStr;
}
public void setQueryStartTime(Long queryStartTime) {
this.queryStartTime = queryStartTime;
}
public void setLineageInfo(LineageInfo lineageInfo){
try {
this.lineageInfo = ColumnLineageUtils.buildLineageMap(lineageInfo);
LOG.debug("Column Lineage Map => {} ", this.lineageInfo.entrySet());
}catch (Throwable e){
LOG.warn("Column Lineage Map build failed with exception {}", e);
}
}
public Set<ReadEntity> getInputs() {
return inputs;
}
public Set<WriteEntity> getOutputs() {
return outputs;
}
public String getUser() {
return user;
}
public UserGroupInformation getUgi() {
return ugi;
}
public HiveOperation getOperation() {
return operation;
}
public HookContext.HookType getHookType() {
return hookType;
}
public String getQueryId() {
return queryId;
}
public String getQueryStr() {
return queryStr;
}
public Long getQueryStartTime() {
return queryStartTime;
}
public void addMessage(HookNotification message) {
messages.add(message);
}
public List<HookNotification> getMessages() {
return messages;
}
}
@VisibleForTesting
static final class EntityComparator implements Comparator<Entity> {
@Override
public int compare(Entity o1, Entity o2) {
String s1 = o1.getName();
String s2 = o2.getName();
if (s1 == null || s2 == null){
s1 = o1.getD().toString();
s2 = o2.getD().toString();
}
return s1.toLowerCase().compareTo(s2.toLowerCase());
}
}
@VisibleForTesting
static final Comparator<Entity> entityComparator = new EntityComparator();
}
......@@ -6,19 +6,36 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
*
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.rewrite;
public class RewriteException extends Exception {
public RewriteException(final String message, final Exception exception) {
super(message, exception);
package org.apache.atlas.hive.hook.events;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
import org.apache.atlas.model.notification.HookNotification;
import java.util.Collections;
import java.util.List;
public class AlterDatabase extends CreateDatabase {
public AlterDatabase(AtlasHiveHookContext context) {
super(context);
}
@Override
public List<HookNotification> getNotificationMessages() throws Exception {
AtlasEntitiesWithExtInfo entities = getEntities();
HookNotification notification = new HookNotification.EntityUpdateRequestV2(getUserName(), entities);
List<HookNotification> ret = Collections.singletonList(notification);
return ret;
}
}
......@@ -6,21 +6,37 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
*
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.rewrite;
package org.apache.atlas.hive.hook.events;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.model.notification.HookNotification.EntityUpdateRequestV2;
public interface ASTRewriter {
import java.util.Collections;
import java.util.List;
void rewrite(RewriteContext ctx, ASTNode node) throws RewriteException;
public class AlterTable extends CreateTable {
public AlterTable(AtlasHiveHookContext context) {
super(context, true);
}
@Override
public List<HookNotification> getNotificationMessages() throws Exception {
AtlasEntitiesWithExtInfo entities = getEntities();
HookNotification notification = new EntityUpdateRequestV2(getUserName(), entities);
List<HookNotification> ret = Collections.singletonList(notification);
return ret;
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.hook.events;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityExtInfo;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.model.notification.HookNotification.EntityPartialUpdateRequestV2;
import org.apache.atlas.model.notification.HookNotification.EntityUpdateRequestV2;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.ql.hooks.Entity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
public class AlterTableRename extends BaseHiveEvent {
private static final Logger LOG = LoggerFactory.getLogger(AlterTableRename.class);
public AlterTableRename(AtlasHiveHookContext context) {
super(context);
}
@Override
public List<HookNotification> getNotificationMessages() throws Exception {
List<HookNotification> ret = new ArrayList<>();
if (CollectionUtils.isEmpty(getHiveContext().getInputs())) {
LOG.error("AlterTableRename: old-table not found in inputs list");
return ret;
}
Table oldTable = getHiveContext().getInputs().iterator().next().getTable();
Table newTable = null;
if (CollectionUtils.isNotEmpty(getHiveContext().getOutputs())) {
for (WriteEntity entity : getHiveContext().getOutputs()) {
if (entity.getType() == Entity.Type.TABLE) {
newTable = entity.getTable();
//Hive sends with both old and new table names in the outputs which is weird. So skipping that with the below check
if (StringUtils.equalsIgnoreCase(newTable.getDbName(), oldTable.getDbName()) && StringUtils.equalsIgnoreCase(newTable.getTableName(), oldTable.getTableName())) {
newTable = null;
continue;
}
break;
}
}
}
if (newTable == null) {
LOG.error("AlterTableRename: renamed table not found in outputs list");
return ret;
}
AtlasEntityWithExtInfo oldTableEntity = toTableEntity(oldTable);
// first update with oldTable info, so that the table will be created if it is not present in Atlas
ret.add(new EntityUpdateRequestV2(getUserName(), new AtlasEntitiesWithExtInfo(oldTableEntity)));
// update qualifiedName for all columns, partitionKeys, storageDesc
String newTableQualifiedName = getQualifiedName(newTable);
renameColumns((List<AtlasObjectId>) oldTableEntity.getEntity().getAttribute(ATTRIBUTE_COLUMNS), oldTableEntity, newTableQualifiedName, ret);
renameColumns((List<AtlasObjectId>) oldTableEntity.getEntity().getAttribute(ATTRIBUTE_PARTITION_KEYS), oldTableEntity, newTableQualifiedName, ret);
renameStorageDesc((AtlasObjectId) oldTableEntity.getEntity().getAttribute(ATTRIBUTE_STORAGEDESC), oldTableEntity, newTableQualifiedName, ret);
// update qualifiedName and other attributes (like params - which include lastModifiedTime, lastModifiedBy) of the table
AtlasEntityWithExtInfo newTableEntity = toTableEntity(newTable);
// set previous name as the alias
newTableEntity.getEntity().setAttribute(ATTRIBUTE_ALIASES, Collections.singletonList(oldTable.getTableName()));
// remove columns, partitionKeys and storageDesc - as they have already been updated above
removeAttribute(newTableEntity, ATTRIBUTE_COLUMNS);
removeAttribute(newTableEntity, ATTRIBUTE_PARTITION_KEYS);
removeAttribute(newTableEntity, ATTRIBUTE_STORAGEDESC);
AtlasObjectId oldTableId = new AtlasObjectId(oldTableEntity.getEntity().getTypeName(), ATTRIBUTE_QUALIFIED_NAME, oldTableEntity.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME));
ret.add(new EntityPartialUpdateRequestV2(getUserName(), oldTableId, newTableEntity));
context.removeFromKnownTable((String) oldTableEntity.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME));
return ret;
}
private void renameColumns(List<AtlasObjectId> columns, AtlasEntityExtInfo oldEntityExtInfo, String newTableQualifiedName, List<HookNotification> notifications) {
if (CollectionUtils.isNotEmpty(columns)) {
for (AtlasObjectId columnId : columns) {
AtlasEntity oldColumn = oldEntityExtInfo.getEntity(columnId.getGuid());
AtlasObjectId oldColumnId = new AtlasObjectId(oldColumn.getTypeName(), ATTRIBUTE_QUALIFIED_NAME, oldColumn.getAttribute(ATTRIBUTE_QUALIFIED_NAME));
AtlasEntity newColumn = new AtlasEntity(oldColumn.getTypeName(), ATTRIBUTE_QUALIFIED_NAME, getColumnQualifiedName(newTableQualifiedName, (String) oldColumn.getAttribute(ATTRIBUTE_NAME)));
notifications.add(new EntityPartialUpdateRequestV2(getUserName(), oldColumnId, new AtlasEntityWithExtInfo(newColumn)));
}
}
}
private void renameStorageDesc(AtlasObjectId sdId, AtlasEntityExtInfo oldEntityExtInfo, String newTableQualifiedName, List<HookNotification> notifications) {
if (sdId != null) {
AtlasEntity oldSd = oldEntityExtInfo.getEntity(sdId.getGuid());
AtlasObjectId oldSdId = new AtlasObjectId(oldSd.getTypeName(), ATTRIBUTE_QUALIFIED_NAME, oldSd.getAttribute(ATTRIBUTE_QUALIFIED_NAME));
AtlasEntity newSd = new AtlasEntity(oldSd.getTypeName(), ATTRIBUTE_QUALIFIED_NAME, getStorageDescQualifiedName(newTableQualifiedName));
notifications.add(new EntityPartialUpdateRequestV2(getUserName(), oldSdId, new AtlasEntityWithExtInfo(newSd)));
}
}
private String getColumnQualifiedName(String tblQualifiedName, String columnName) {
int sepPos = tblQualifiedName.lastIndexOf(QNAME_SEP_CLUSTER_NAME);
if (sepPos == -1) {
return tblQualifiedName + QNAME_SEP_ENTITY_NAME + columnName.toLowerCase();
} else {
return tblQualifiedName.substring(0, sepPos) + QNAME_SEP_ENTITY_NAME + columnName.toLowerCase() + tblQualifiedName.substring(sepPos);
}
}
private void removeAttribute(AtlasEntityWithExtInfo entity, String attributeName) {
Object attributeValue = entity.getEntity().getAttribute(attributeName);
entity.getEntity().getAttributes().remove(attributeName);
if (attributeValue instanceof AtlasObjectId) {
AtlasObjectId objectId = (AtlasObjectId) attributeValue;
entity.removeReferredEntity(objectId.getGuid());
} else if (attributeValue instanceof Collection) {
for (Object item : (Collection) attributeValue)
if (item instanceof AtlasObjectId) {
AtlasObjectId objectId = (AtlasObjectId) item;
entity.removeReferredEntity(objectId.getGuid());
}
}
}
private String getStorageDescQualifiedName(String tblQualifiedName) {
return tblQualifiedName + "_storage";
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.hook.events;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.model.notification.HookNotification.EntityPartialUpdateRequestV2;
import org.apache.commons.collections.CollectionUtils;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
public class AlterTableRenameCol extends AlterTable {
private static final Logger LOG = LoggerFactory.getLogger(AlterTableRenameCol.class);
public AlterTableRenameCol(AtlasHiveHookContext context) {
super(context);
}
@Override
public List<HookNotification> getNotificationMessages() throws Exception {
if (CollectionUtils.isEmpty(getHiveContext().getInputs())) {
LOG.error("AlterTableRenameCol: old-table not found in inputs list");
return null;
}
if (CollectionUtils.isEmpty(getHiveContext().getOutputs())) {
LOG.error("AlterTableRenameCol: new-table not found in outputs list");
return null;
}
List<HookNotification> ret = new ArrayList<>(super.getNotificationMessages());
Table oldTable = getHiveContext().getInputs().iterator().next().getTable();
Table newTable = getHiveContext().getOutputs().iterator().next().getTable();
newTable = getHive().getTable(newTable.getDbName(), newTable.getTableName());
List<FieldSchema> oldColumns = oldTable.getCols();
List<FieldSchema> newColumns = newTable.getCols();
FieldSchema changedColumnOld = null;
FieldSchema changedColumnNew = null;
for (FieldSchema oldColumn : oldColumns) {
if (!newColumns.contains(oldColumn)) {
changedColumnOld = oldColumn;
break;
}
}
for (FieldSchema newColumn : newColumns) {
if (!oldColumns.contains(newColumn)) {
changedColumnNew = newColumn;
break;
}
}
if (changedColumnOld != null && changedColumnNew != null) {
AtlasObjectId oldColumnId = new AtlasObjectId(HIVE_TYPE_COLUMN, ATTRIBUTE_QUALIFIED_NAME, getQualifiedName(oldTable, changedColumnOld));
AtlasEntity newColumn = new AtlasEntity(HIVE_TYPE_COLUMN);
newColumn.setAttribute(ATTRIBUTE_NAME, changedColumnNew.getName());
newColumn.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getQualifiedName(newTable, changedColumnNew));
ret.add(0, new EntityPartialUpdateRequestV2(getUserName(), oldColumnId, new AtlasEntityWithExtInfo(newColumn)));
} else {
LOG.error("AlterTableRenameCol: no renamed column detected");
}
return ret;
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.hook.events;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityExtInfo;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.instance.AtlasStruct;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.utils.HdfsNameServiceResolver;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.ql.hooks.Entity;
import org.apache.hadoop.hive.ql.hooks.HookContext;
import org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo;
import org.apache.hadoop.hive.ql.hooks.LineageInfo.DependencyKey;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
public abstract class BaseHiveEvent {
private static final Logger LOG = LoggerFactory.getLogger(BaseHiveEvent.class);
public static final String HIVE_TYPE_DB = "hive_db";
public static final String HIVE_TYPE_TABLE = "hive_table";
public static final String HIVE_TYPE_STORAGEDESC = "hive_storagedesc";
public static final String HIVE_TYPE_COLUMN = "hive_column";
public static final String HIVE_TYPE_PROCESS = "hive_process";
public static final String HIVE_TYPE_COLUMN_LINEAGE = "hive_column_lineage";
public static final String HIVE_TYPE_SERDE = "hive_serde";
public static final String HIVE_TYPE_ORDER = "hive_order";
public static final String HDFS_TYPE_PATH = "hdfs_path";
public static final String ATTRIBUTE_QUALIFIED_NAME = "qualifiedName";
public static final String ATTRIBUTE_NAME = "name";
public static final String ATTRIBUTE_DESCRIPTION = "description";
public static final String ATTRIBUTE_OWNER = "owner";
public static final String ATTRIBUTE_CLUSTER_NAME = "clusterName";
public static final String ATTRIBUTE_LOCATION = "location";
public static final String ATTRIBUTE_PARAMETERS = "parameters";
public static final String ATTRIBUTE_OWNER_TYPE = "ownerType";
public static final String ATTRIBUTE_COMMENT = "comment";
public static final String ATTRIBUTE_CREATE_TIME = "createTime";
public static final String ATTRIBUTE_LAST_ACCESS_TIME = "lastAccessTime";
public static final String ATTRIBUTE_VIEW_ORIGINAL_TEXT = "viewOriginalText";
public static final String ATTRIBUTE_VIEW_EXPANDED_TEXT = "viewExpandedText";
public static final String ATTRIBUTE_TABLE_TYPE = "tableType";
public static final String ATTRIBUTE_TEMPORARY = "temporary";
public static final String ATTRIBUTE_RETENTION = "retention";
public static final String ATTRIBUTE_DB = "db";
public static final String ATTRIBUTE_STORAGEDESC = "sd";
public static final String ATTRIBUTE_PARTITION_KEYS = "partitionKeys";
public static final String ATTRIBUTE_COLUMNS = "columns";
public static final String ATTRIBUTE_INPUT_FORMAT = "inputFormat";
public static final String ATTRIBUTE_OUTPUT_FORMAT = "outputFormat";
public static final String ATTRIBUTE_COMPRESSED = "compressed";
public static final String ATTRIBUTE_BUCKET_COLS = "bucketCols";
public static final String ATTRIBUTE_NUM_BUCKETS = "numBuckets";
public static final String ATTRIBUTE_STORED_AS_SUB_DIRECTORIES = "storedAsSubDirectories";
public static final String ATTRIBUTE_TABLE = "table";
public static final String ATTRIBUTE_SERDE_INFO = "serdeInfo";
public static final String ATTRIBUTE_SERIALIZATION_LIB = "serializationLib";
public static final String ATTRIBUTE_SORT_COLS = "sortCols";
public static final String ATTRIBUTE_COL_TYPE = "type";
public static final String ATTRIBUTE_COL_POSITION = "position";
public static final String ATTRIBUTE_PATH = "path";
public static final String ATTRIBUTE_NAMESERVICE_ID = "nameServiceId";
public static final String ATTRIBUTE_INPUTS = "inputs";
public static final String ATTRIBUTE_OUTPUTS = "outputs";
public static final String ATTRIBUTE_OPERATION_TYPE = "operationType";
public static final String ATTRIBUTE_START_TIME = "startTime";
public static final String ATTRIBUTE_USER_NAME = "userName";
public static final String ATTRIBUTE_QUERY_TEXT = "queryText";
public static final String ATTRIBUTE_QUERY_ID = "queryId";
public static final String ATTRIBUTE_QUERY_PLAN = "queryPlan";
public static final String ATTRIBUTE_END_TIME = "endTime";
public static final String ATTRIBUTE_RECENT_QUERIES = "recentQueries";
public static final String ATTRIBUTE_QUERY = "query";
public static final String ATTRIBUTE_DEPENDENCY_TYPE = "depenendencyType";
public static final String ATTRIBUTE_EXPRESSION = "expression";
public static final String ATTRIBUTE_ALIASES = "aliases";
public static final char QNAME_SEP_CLUSTER_NAME = '@';
public static final char QNAME_SEP_ENTITY_NAME = '.';
public static final char QNAME_SEP_PROCESS = ':';
public static final String TEMP_TABLE_PREFIX = "_temp-";
public static final long MILLIS_CONVERT_FACTOR = 1000;
public static final Map<Integer, String> OWNER_TYPE_TO_ENUM_VALUE = new HashMap<>();
static {
OWNER_TYPE_TO_ENUM_VALUE.put(1, "USER");
OWNER_TYPE_TO_ENUM_VALUE.put(2, "ROLE");
OWNER_TYPE_TO_ENUM_VALUE.put(3, "GROUP");
}
protected final AtlasHiveHookContext context;
protected BaseHiveEvent(AtlasHiveHookContext context) {
this.context = context;
}
public AtlasHiveHookContext getContext() {
return context;
}
public List<HookNotification> getNotificationMessages() throws Exception {
return null;
}
public static long getTableCreateTime(Table table) {
return table.getTTable() != null ? (table.getTTable().getCreateTime() * MILLIS_CONVERT_FACTOR) : System.currentTimeMillis();
}
public static AtlasObjectId getObjectId(AtlasEntity entity) {
String qualifiedName = (String) entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME);
AtlasObjectId ret = new AtlasObjectId(entity.getGuid(), entity.getTypeName(), Collections.singletonMap(ATTRIBUTE_QUALIFIED_NAME, qualifiedName));
return ret;
}
public static List<AtlasObjectId> getObjectIds(List<AtlasEntity> entities) {
final List<AtlasObjectId> ret;
if (CollectionUtils.isNotEmpty(entities)) {
ret = new ArrayList<>(entities.size());
for (AtlasEntity entity : entities) {
ret.add(getObjectId(entity));
}
} else {
ret = Collections.emptyList();
}
return ret;
}
protected void addProcessedEntities(AtlasEntitiesWithExtInfo entitiesWithExtInfo) {
for (AtlasEntity entity : context.getEntities()) {
entitiesWithExtInfo.addReferredEntity(entity);
}
entitiesWithExtInfo.compact();
context.addToKnownEntities(entitiesWithExtInfo.getEntities());
if (entitiesWithExtInfo.getReferredEntities() != null) {
context.addToKnownEntities(entitiesWithExtInfo.getReferredEntities().values());
}
}
protected AtlasEntity getInputOutputEntity(Entity entity, AtlasEntityExtInfo entityExtInfo) throws Exception {
AtlasEntity ret = null;
switch(entity.getType()) {
case TABLE:
case PARTITION:
case DFS_DIR: {
ret = toAtlasEntity(entity, entityExtInfo);
}
break;
}
return ret;
}
protected AtlasEntity toAtlasEntity(Entity entity, AtlasEntityExtInfo entityExtInfo) throws Exception {
AtlasEntity ret = null;
switch (entity.getType()) {
case DATABASE: {
Database db = getHive().getDatabase(entity.getDatabase().getName());
ret = toDbEntity(db);
}
break;
case TABLE:
case PARTITION: {
Table table = getHive().getTable(entity.getTable().getDbName(), entity.getTable().getTableName());
ret = toTableEntity(table, entityExtInfo);
}
break;
case DFS_DIR: {
URI location = entity.getLocation();
if (location != null) {
ret = getHDFSPathEntity(new Path(entity.getLocation()));
}
}
break;
default:
break;
}
return ret;
}
protected AtlasEntity toDbEntity(Database db) throws Exception {
String dbQualifiedName = getQualifiedName(db);
boolean isKnownDatabase = context.isKnownDatabase(dbQualifiedName);
AtlasEntity ret = context.getEntity(dbQualifiedName);
if (ret == null) {
ret = new AtlasEntity(HIVE_TYPE_DB);
// if this DB was sent in an earlier notification, set 'guid' to null - which will:
// - result in this entity to be not included in 'referredEntities'
// - cause Atlas server to resolve the entity by its qualifiedName
if (isKnownDatabase) {
ret.setGuid(null);
}
ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, dbQualifiedName);
ret.setAttribute(ATTRIBUTE_NAME, db.getName().toLowerCase());
ret.setAttribute(ATTRIBUTE_DESCRIPTION, db.getDescription());
ret.setAttribute(ATTRIBUTE_OWNER, db.getOwnerName());
ret.setAttribute(ATTRIBUTE_CLUSTER_NAME, getClusterName());
ret.setAttribute(ATTRIBUTE_LOCATION, HdfsNameServiceResolver.getInstance().getPathWithNameServiceID(db.getLocationUri()));
ret.setAttribute(ATTRIBUTE_PARAMETERS, db.getParameters());
if (db.getOwnerType() != null) {
ret.setAttribute(ATTRIBUTE_OWNER_TYPE, OWNER_TYPE_TO_ENUM_VALUE.get(db.getOwnerType().getValue()));
}
context.putEntity(dbQualifiedName, ret);
}
return ret;
}
protected AtlasEntityWithExtInfo toTableEntity(Table table) throws Exception {
AtlasEntityWithExtInfo ret = new AtlasEntityWithExtInfo();
AtlasEntity entity = toTableEntity(table, ret);
ret.setEntity(entity);
return ret;
}
protected AtlasEntity toTableEntity(Table table, AtlasEntitiesWithExtInfo entities) throws Exception {
AtlasEntity ret = toTableEntity(table, (AtlasEntityExtInfo) entities);
entities.addEntity(ret);
return ret;
}
protected AtlasEntity toTableEntity(Table table, AtlasEntityExtInfo entityExtInfo) throws Exception {
AtlasEntity dbEntity = toDbEntity(getHive().getDatabase(table.getDbName()));
if (entityExtInfo != null) {
if (dbEntity != null) {
entityExtInfo.addReferredEntity(dbEntity);
}
}
AtlasEntity ret = toTableEntity(getObjectId(dbEntity), table, entityExtInfo);
return ret;
}
protected AtlasEntity toTableEntity(AtlasObjectId dbId, Table table, AtlasEntityExtInfo entityExtInfo) throws Exception {
String tblQualifiedName = getQualifiedName(table);
boolean isKnownTable = context.isKnownTable(tblQualifiedName);
AtlasEntity ret = context.getEntity(tblQualifiedName);
if (ret == null) {
ret = new AtlasEntity(HIVE_TYPE_TABLE);
// if this table was sent in an earlier notification, set 'guid' to null - which will:
// - result in this entity to be not included in 'referredEntities'
// - cause Atlas server to resolve the entity by its qualifiedName
if (isKnownTable && !isAlterTableOperation()) {
ret.setGuid(null);
}
long createTime = getTableCreateTime(table);
long lastAccessTime = table.getLastAccessTime() > 0 ? (table.getLastAccessTime() * MILLIS_CONVERT_FACTOR) : createTime;
ret.setAttribute(ATTRIBUTE_DB, dbId);
ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, tblQualifiedName);
ret.setAttribute(ATTRIBUTE_NAME, table.getTableName().toLowerCase());
ret.setAttribute(ATTRIBUTE_OWNER, table.getOwner());
ret.setAttribute(ATTRIBUTE_CREATE_TIME, createTime);
ret.setAttribute(ATTRIBUTE_LAST_ACCESS_TIME, lastAccessTime);
ret.setAttribute(ATTRIBUTE_RETENTION, table.getRetention());
ret.setAttribute(ATTRIBUTE_PARAMETERS, table.getParameters());
ret.setAttribute(ATTRIBUTE_COMMENT, table.getParameters().get(ATTRIBUTE_COMMENT));
ret.setAttribute(ATTRIBUTE_TABLE_TYPE, table.getTableType().name());
ret.setAttribute(ATTRIBUTE_TEMPORARY, table.isTemporary());
if (table.getViewOriginalText() != null) {
ret.setAttribute(ATTRIBUTE_VIEW_ORIGINAL_TEXT, table.getViewOriginalText());
}
if (table.getViewExpandedText() != null) {
ret.setAttribute(ATTRIBUTE_VIEW_EXPANDED_TEXT, table.getViewExpandedText());
}
AtlasObjectId tableId = getObjectId(ret);
AtlasEntity sd = getStorageDescEntity(tableId, table);
List<AtlasEntity> partitionKeys = getColumnEntities(tableId, table, table.getPartitionKeys());
List<AtlasEntity> columns = getColumnEntities(tableId, table, table.getCols());
if (entityExtInfo != null) {
entityExtInfo.addReferredEntity(sd);
if (partitionKeys != null) {
for (AtlasEntity partitionKey : partitionKeys) {
entityExtInfo.addReferredEntity(partitionKey);
}
}
if (columns != null) {
for (AtlasEntity column : columns) {
entityExtInfo.addReferredEntity(column);
}
}
}
ret.setAttribute(ATTRIBUTE_STORAGEDESC, getObjectId(sd));
ret.setAttribute(ATTRIBUTE_PARTITION_KEYS, getObjectIds(partitionKeys));
ret.setAttribute(ATTRIBUTE_COLUMNS, getObjectIds(columns));
context.putEntity(tblQualifiedName, ret);
}
return ret;
}
protected AtlasEntity getStorageDescEntity(AtlasObjectId tableId, Table table) {
String sdQualifiedName = getQualifiedName(table, table.getSd());
boolean isKnownTable = tableId.getGuid() == null;
AtlasEntity ret = context.getEntity(sdQualifiedName);
if (ret == null) {
ret = new AtlasEntity(HIVE_TYPE_STORAGEDESC);
// if sd's table was sent in an earlier notification, set 'guid' to null - which will:
// - result in this entity to be not included in 'referredEntities'
// - cause Atlas server to resolve the entity by its qualifiedName
if (isKnownTable) {
ret.setGuid(null);
}
StorageDescriptor sd = table.getSd();
ret.setAttribute(ATTRIBUTE_TABLE, tableId);
ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, sdQualifiedName);
ret.setAttribute(ATTRIBUTE_PARAMETERS, sd.getParameters());
ret.setAttribute(ATTRIBUTE_LOCATION, HdfsNameServiceResolver.getInstance().getPathWithNameServiceID(sd.getLocation()));
ret.setAttribute(ATTRIBUTE_INPUT_FORMAT, sd.getInputFormat());
ret.setAttribute(ATTRIBUTE_OUTPUT_FORMAT, sd.getOutputFormat());
ret.setAttribute(ATTRIBUTE_COMPRESSED, sd.isCompressed());
ret.setAttribute(ATTRIBUTE_NUM_BUCKETS, sd.getNumBuckets());
ret.setAttribute(ATTRIBUTE_STORED_AS_SUB_DIRECTORIES, sd.isStoredAsSubDirectories());
if (sd.getBucketCols().size() > 0) {
ret.setAttribute(ATTRIBUTE_BUCKET_COLS, sd.getBucketCols());
}
if (sd.getSerdeInfo() != null) {
AtlasStruct serdeInfo = new AtlasStruct(HIVE_TYPE_SERDE);
SerDeInfo sdSerDeInfo = sd.getSerdeInfo();
serdeInfo.setAttribute(ATTRIBUTE_NAME, sdSerDeInfo.getName());
serdeInfo.setAttribute(ATTRIBUTE_SERIALIZATION_LIB, sdSerDeInfo.getSerializationLib());
serdeInfo.setAttribute(ATTRIBUTE_PARAMETERS, sdSerDeInfo.getParameters());
ret.setAttribute(ATTRIBUTE_SERDE_INFO, serdeInfo);
}
if (CollectionUtils.isNotEmpty(sd.getSortCols())) {
List<AtlasStruct> sortCols = new ArrayList<>(sd.getSortCols().size());
for (Order sdSortCol : sd.getSortCols()) {
AtlasStruct sortcol = new AtlasStruct(HIVE_TYPE_ORDER);
sortcol.setAttribute("col", sdSortCol.getCol());
sortcol.setAttribute("order", sdSortCol.getOrder());
sortCols.add(sortcol);
}
ret.setAttribute(ATTRIBUTE_SORT_COLS, sortCols);
}
context.putEntity(sdQualifiedName, ret);
}
return ret;
}
protected List<AtlasEntity> getColumnEntities(AtlasObjectId tableId, Table table, List<FieldSchema> fieldSchemas) {
List<AtlasEntity> ret = new ArrayList<>();
boolean isKnownTable = tableId.getGuid() == null;
int columnPosition = 0;
for (FieldSchema fieldSchema : fieldSchemas) {
String colQualifiedName = getQualifiedName(table, fieldSchema);
AtlasEntity column = context.getEntity(colQualifiedName);
if (column == null) {
column = new AtlasEntity(HIVE_TYPE_COLUMN);
// if column's table was sent in an earlier notification, set 'guid' to null - which will:
// - result in this entity to be not included in 'referredEntities'
// - cause Atlas server to resolve the entity by its qualifiedName
if (isKnownTable) {
column.setGuid(null);
}
column.setAttribute(ATTRIBUTE_TABLE, tableId);
column.setAttribute(ATTRIBUTE_QUALIFIED_NAME, colQualifiedName);
column.setAttribute(ATTRIBUTE_NAME, fieldSchema.getName());
column.setAttribute(ATTRIBUTE_OWNER, table.getOwner());
column.setAttribute(ATTRIBUTE_COL_TYPE, fieldSchema.getType());
column.setAttribute(ATTRIBUTE_COL_POSITION, columnPosition++);
column.setAttribute(ATTRIBUTE_COMMENT, fieldSchema.getComment());
context.putEntity(colQualifiedName, column);
}
ret.add(column);
}
return ret;
}
protected AtlasEntity getHDFSPathEntity(Path path) {
String strPath = path.toString().toLowerCase();
String nameServiceID = HdfsNameServiceResolver.getInstance().getNameServiceIDForPath(strPath);
String attrPath = StringUtils.isEmpty(nameServiceID) ? strPath : HdfsNameServiceResolver.getInstance().getPathWithNameServiceID(strPath);
String pathQualifiedName = getQualifiedName(attrPath);
AtlasEntity ret = context.getEntity(pathQualifiedName);
if (ret == null) {
ret = new AtlasEntity(HDFS_TYPE_PATH);
if (StringUtils.isNotEmpty(nameServiceID)) {
ret.setAttribute(ATTRIBUTE_NAMESERVICE_ID, nameServiceID);
}
ret.setAttribute(ATTRIBUTE_PATH, attrPath);
ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, pathQualifiedName);
ret.setAttribute(ATTRIBUTE_NAME, Path.getPathWithoutSchemeAndAuthority(path).toString().toLowerCase());
ret.setAttribute(ATTRIBUTE_CLUSTER_NAME, getClusterName());
context.putEntity(pathQualifiedName, ret);
}
return ret;
}
protected AtlasEntity getHiveProcessEntity(List<AtlasEntity> inputs, List<AtlasEntity> outputs) throws Exception {
AtlasEntity ret = new AtlasEntity(HIVE_TYPE_PROCESS);
HookContext hookContext = getHiveContext();
String queryStr = hookContext.getQueryPlan().getQueryStr();
if (queryStr != null) {
queryStr = queryStr.toLowerCase();
}
ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getQualifiedName(inputs, outputs));
ret.setAttribute(ATTRIBUTE_INPUTS, getObjectIds(inputs));
ret.setAttribute(ATTRIBUTE_OUTPUTS, getObjectIds(outputs));
ret.setAttribute(ATTRIBUTE_NAME, queryStr);
ret.setAttribute(ATTRIBUTE_OPERATION_TYPE, hookContext.getOperationName());
ret.setAttribute(ATTRIBUTE_START_TIME, hookContext.getQueryPlan().getQueryStartTime());
ret.setAttribute(ATTRIBUTE_END_TIME, System.currentTimeMillis());
ret.setAttribute(ATTRIBUTE_USER_NAME, getUserName());
ret.setAttribute(ATTRIBUTE_QUERY_TEXT, queryStr);
ret.setAttribute(ATTRIBUTE_QUERY_ID, hookContext.getQueryPlan().getQuery().getQueryId());
ret.setAttribute(ATTRIBUTE_QUERY_PLAN, "Not Supported");
ret.setAttribute(ATTRIBUTE_RECENT_QUERIES, Collections.singletonList(queryStr));
return ret;
}
protected String getClusterName() {
return context.getClusterName();
}
protected Hive getHive() {
return context.getHive();
}
protected HookContext getHiveContext() {
return context.getHiveContext();
}
protected String getUserName() {
String ret = getHiveContext().getUserName();
if (StringUtils.isEmpty(ret)) {
UserGroupInformation ugi = getHiveContext().getUgi();
if (ugi != null) {
ret = ugi.getShortUserName();
}
if (StringUtils.isEmpty(ret)) {
try {
ret = UserGroupInformation.getCurrentUser().getShortUserName();
} catch (IOException e) {
LOG.warn("Failed for UserGroupInformation.getCurrentUser() ", e);
ret = System.getProperty("user.name");
}
}
}
return ret;
}
protected String getQualifiedName(Entity entity) throws Exception {
switch (entity.getType()) {
case DATABASE:
return getQualifiedName(entity.getDatabase());
case TABLE:
case PARTITION:
return getQualifiedName(entity.getTable());
case DFS_DIR:
return getQualifiedName(entity.getLocation());
}
return null;
}
protected String getQualifiedName(Database db) {
return (db.getName() + QNAME_SEP_CLUSTER_NAME).toLowerCase() + getClusterName();
}
protected String getQualifiedName(Table table) {
String tableName = table.getTableName();
if (table.isTemporary()) {
if (SessionState.get() != null && SessionState.get().getSessionId() != null) {
tableName = tableName + TEMP_TABLE_PREFIX + SessionState.get().getSessionId();
} else {
tableName = tableName + TEMP_TABLE_PREFIX + RandomStringUtils.random(10);
}
}
return (table.getDbName() + QNAME_SEP_ENTITY_NAME + tableName + QNAME_SEP_CLUSTER_NAME).toLowerCase() + getClusterName();
}
protected String getQualifiedName(Table table, StorageDescriptor sd) {
return getQualifiedName(table) + "_storage";
}
protected String getQualifiedName(Table table, FieldSchema column) {
String tblQualifiedName = getQualifiedName(table);
int sepPos = tblQualifiedName.lastIndexOf(QNAME_SEP_CLUSTER_NAME);
if (sepPos == -1) {
return tblQualifiedName + QNAME_SEP_ENTITY_NAME + column.getName().toLowerCase();
} else {
return tblQualifiedName.substring(0, sepPos) + QNAME_SEP_ENTITY_NAME + column.getName().toLowerCase() + tblQualifiedName.substring(sepPos);
}
}
protected String getQualifiedName(DependencyKey column) {
String dbName = column.getDataContainer().getTable().getDbName();
String tableName = column.getDataContainer().getTable().getTableName();
String colName = column.getFieldSchema().getName();
return getQualifiedName(dbName, tableName, colName);
}
protected String getQualifiedName(BaseColumnInfo column) {
String dbName = column.getTabAlias().getTable().getDbName();
String tableName = column.getTabAlias().getTable().getTableName();
String colName = column.getColumn().getName();
return getQualifiedName(dbName, tableName, colName);
}
protected String getQualifiedName(String dbName, String tableName, String colName) {
return (dbName + QNAME_SEP_ENTITY_NAME + tableName + QNAME_SEP_ENTITY_NAME + colName + QNAME_SEP_CLUSTER_NAME).toLowerCase() + getClusterName();
}
protected String getQualifiedName(URI location) {
String strPath = new Path(location).toString().toLowerCase();
String nameServiceID = HdfsNameServiceResolver.getInstance().getNameServiceIDForPath(strPath);
String attrPath = StringUtils.isEmpty(nameServiceID) ? strPath : HdfsNameServiceResolver.getInstance().getPathWithNameServiceID(strPath);
return getQualifiedName(attrPath);
}
protected String getQualifiedName(String path) {
if (path.startsWith(HdfsNameServiceResolver.HDFS_SCHEME)) {
return (path + QNAME_SEP_CLUSTER_NAME).toLowerCase() + getClusterName();
}
return path.toLowerCase();
}
protected String getQualifiedName(List<AtlasEntity> inputs, List<AtlasEntity> outputs) throws Exception {
HiveOperation operation = context.getHiveOperation();
if (operation == HiveOperation.CREATETABLE ||
operation == HiveOperation.CREATETABLE_AS_SELECT ||
operation == HiveOperation.CREATEVIEW ||
operation == HiveOperation.ALTERVIEW_AS ||
operation == HiveOperation.ALTERTABLE_LOCATION) {
List<? extends Entity> sortedEntities = new ArrayList<>(getHiveContext().getOutputs());
Collections.sort(sortedEntities, entityComparator);
for (Entity entity : sortedEntities) {
if (entity.getType() == Entity.Type.TABLE) {
Table table = entity.getTable();
table = getHive().getTable(table.getDbName(), table.getTableName());
long createTime = getTableCreateTime(table);
return getQualifiedName(table) + QNAME_SEP_PROCESS + createTime;
}
}
}
StringBuilder sb = new StringBuilder(getHiveContext().getOperationName());
boolean ignoreHDFSPaths = ignoreHDFSPathsinProcessQualifiedName();
addToProcessQualifiedName(sb, getHiveContext().getInputs(), ignoreHDFSPaths);
sb.append("->");
addToProcessQualifiedName(sb, getHiveContext().getOutputs(), ignoreHDFSPaths);
return sb.toString();
}
private boolean ignoreHDFSPathsinProcessQualifiedName() {
switch (context.getHiveOperation()) {
case LOAD:
case IMPORT:
return hasPartitionEntity(getHiveContext().getOutputs());
case EXPORT:
return hasPartitionEntity(getHiveContext().getInputs());
case QUERY:
return true;
}
return false;
}
private boolean hasPartitionEntity(Collection<? extends Entity> entities) {
if (entities != null) {
for (Entity entity : entities) {
if (entity.getType() == Entity.Type.PARTITION) {
return true;
}
}
}
return false;
}
private void addToProcessQualifiedName(StringBuilder processQualifiedName, Collection<? extends Entity> entities, boolean ignoreHDFSPaths) {
if (entities == null) {
return;
}
List<? extends Entity> sortedEntities = new ArrayList<>(entities);
Collections.sort(sortedEntities, entityComparator);
Set<String> dataSetsProcessed = new HashSet<>();
for (Entity entity : sortedEntities) {
if (ignoreHDFSPaths && (Entity.Type.DFS_DIR.equals(entity.getType()) || Entity.Type.LOCAL_DIR.equals(entity.getType()))) {
continue;
}
String qualifiedName = null;
long createTime = 0;
try {
if (entity.getType() == Entity.Type.PARTITION || entity.getType() == Entity.Type.TABLE) {
Table table = getHive().getTable(entity.getTable().getDbName(), entity.getTable().getTableName());
if (table != null) {
createTime = getTableCreateTime(table);
qualifiedName = getQualifiedName(table);
}
} else {
qualifiedName = getQualifiedName(entity);
}
} catch (Exception excp) {
LOG.error("error while computing qualifiedName for process", excp);
}
if (qualifiedName == null || !dataSetsProcessed.add(qualifiedName)) {
continue;
}
if (entity instanceof WriteEntity) { // output entity
WriteEntity writeEntity = (WriteEntity) entity;
if (writeEntity.getWriteType() != null && HiveOperation.QUERY.equals(context.getHiveOperation())) {
boolean addWriteType = false;
switch (((WriteEntity) entity).getWriteType()) {
case INSERT:
case INSERT_OVERWRITE:
case UPDATE:
case DELETE:
addWriteType = true;
break;
case PATH_WRITE:
addWriteType = !Entity.Type.LOCAL_DIR.equals(entity.getType());
break;
}
if (addWriteType) {
processQualifiedName.append(QNAME_SEP_PROCESS).append(writeEntity.getWriteType().name());
}
}
}
processQualifiedName.append(QNAME_SEP_PROCESS).append(qualifiedName.toLowerCase().replaceAll("/", ""));
if (createTime != 0) {
processQualifiedName.append(QNAME_SEP_PROCESS).append(createTime);
}
}
}
private boolean isAlterTableOperation() {
switch (context.getHiveOperation()) {
case ALTERTABLE_FILEFORMAT:
case ALTERTABLE_CLUSTER_SORT:
case ALTERTABLE_BUCKETNUM:
case ALTERTABLE_PROPERTIES:
case ALTERTABLE_SERDEPROPERTIES:
case ALTERTABLE_SERIALIZER:
case ALTERTABLE_ADDCOLS:
case ALTERTABLE_REPLACECOLS:
case ALTERTABLE_PARTCOLTYPE:
case ALTERTABLE_LOCATION:
case ALTERTABLE_RENAME:
case ALTERTABLE_RENAMECOL:
case ALTERVIEW_PROPERTIES:
case ALTERVIEW_RENAME:
return true;
}
return false;
}
static final class EntityComparator implements Comparator<Entity> {
@Override
public int compare(Entity entity1, Entity entity2) {
String name1 = entity1.getName();
String name2 = entity2.getName();
if (name1 == null || name2 == null) {
name1 = entity1.getD().toString();
name2 = entity2.getD().toString();
}
return name1.toLowerCase().compareTo(name2.toLowerCase());
}
}
static final Comparator<Entity> entityComparator = new EntityComparator();
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.hook.events;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.model.notification.HookNotification.EntityCreateRequestV2;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.ql.hooks.Entity;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Collections;
import java.util.List;
public class CreateDatabase extends BaseHiveEvent {
private static final Logger LOG = LoggerFactory.getLogger(CreateDatabase.class);
public CreateDatabase(AtlasHiveHookContext context) {
super(context);
}
@Override
public List<HookNotification> getNotificationMessages() throws Exception {
AtlasEntitiesWithExtInfo entities = getEntities();
HookNotification notification = new EntityCreateRequestV2(getUserName(), entities);
List<HookNotification> ret = Collections.singletonList(notification);
return ret;
}
public AtlasEntitiesWithExtInfo getEntities() throws Exception {
AtlasEntitiesWithExtInfo ret = new AtlasEntitiesWithExtInfo();
for (Entity entity : getHiveContext().getOutputs()) {
if (entity.getType() == Entity.Type.DATABASE) {
Database db = entity.getDatabase();
if (db != null) {
db = getHive().getDatabase(db.getName());
}
if (db != null) {
AtlasEntity dbEntity = toDbEntity(db);
ret.addEntity(dbEntity);
} else {
LOG.error("CreateDatabase.getEntities(): failed to retrieve db");
}
}
}
addProcessedEntities(ret);
return ret;
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.hook.events;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.model.notification.HookNotification.EntityCreateRequestV2;
import org.apache.commons.collections.CollectionUtils;
import org.apache.hadoop.hive.ql.hooks.Entity;
import org.apache.hadoop.hive.ql.hooks.HookContext;
import org.apache.hadoop.hive.ql.hooks.LineageInfo;
import org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo;
import org.apache.hadoop.hive.ql.hooks.LineageInfo.Dependency;
import org.apache.hadoop.hive.ql.hooks.LineageInfo.DependencyKey;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class CreateHiveProcess extends BaseHiveEvent {
private static final Logger LOG = LoggerFactory.getLogger(CreateHiveProcess.class);
public CreateHiveProcess(AtlasHiveHookContext context) {
super(context);
}
@Override
public List<HookNotification> getNotificationMessages() throws Exception {
AtlasEntitiesWithExtInfo entities = getEntities();
List<HookNotification> ret = entities != null ? Collections.singletonList(new EntityCreateRequestV2(getUserName(), entities)) : null;
return ret;
}
public AtlasEntitiesWithExtInfo getEntities() throws Exception {
AtlasEntitiesWithExtInfo ret = null;
if (!skipProcess()) {
List<AtlasEntity> inputs = new ArrayList<>();
List<AtlasEntity> outputs = new ArrayList<>();
HookContext hiveContext = getHiveContext();
Set<String> processedNames = new HashSet<>();
ret = new AtlasEntitiesWithExtInfo();
if (hiveContext.getInputs() != null) {
for (ReadEntity input : hiveContext.getInputs()) {
String qualifiedName = getQualifiedName(input);
if (qualifiedName == null || !processedNames.add(qualifiedName)) {
continue;
}
AtlasEntity entity = getInputOutputEntity(input, ret);
if (entity != null) {
inputs.add(entity);
}
}
}
if (hiveContext.getOutputs() != null) {
for (WriteEntity output : hiveContext.getOutputs()) {
String qualifiedName = getQualifiedName(output);
if (qualifiedName == null || !processedNames.add(qualifiedName)) {
continue;
}
AtlasEntity entity = getInputOutputEntity(output, ret);
if (entity != null) {
outputs.add(entity);
}
}
}
if (!inputs.isEmpty() || !outputs.isEmpty()) {
AtlasEntity process = getHiveProcessEntity(inputs, outputs);
ret.addEntity(process);
processColumnLineage(process, ret);
addProcessedEntities(ret);
} else {
ret = null;
}
}
return ret;
}
private void processColumnLineage(AtlasEntity hiveProcess, AtlasEntitiesWithExtInfo entities) {
LineageInfo lineageInfo = getHiveContext().getLinfo();
if (lineageInfo == null || CollectionUtils.isEmpty(lineageInfo.entrySet())) {
return;
}
for (Map.Entry<DependencyKey, Dependency> entry : lineageInfo.entrySet()) {
String outputColName = getQualifiedName(entry.getKey());
AtlasEntity outputColumn = context.getEntity(outputColName);
if (outputColumn == null) {
LOG.warn("column-lineage: non-existing output-column {}", outputColName);
continue;
}
List<AtlasEntity> inputColumns = new ArrayList<>();
for (BaseColumnInfo baseColumn : entry.getValue().getBaseCols()) {
String inputColName = getQualifiedName(baseColumn);
AtlasEntity inputColumn = context.getEntity(inputColName);
if (inputColumn == null) {
LOG.warn("column-lineage: non-existing input-column {} for output-column={}", inputColName, outputColName);
continue;
}
inputColumns.add(inputColumn);
}
if (inputColumns.isEmpty()) {
continue;
}
AtlasEntity columnLineageProcess = new AtlasEntity(HIVE_TYPE_COLUMN_LINEAGE);
columnLineageProcess.setAttribute(ATTRIBUTE_NAME, hiveProcess.getAttribute(ATTRIBUTE_NAME) + ":" + outputColumn.getAttribute(ATTRIBUTE_NAME));
columnLineageProcess.setAttribute(ATTRIBUTE_QUALIFIED_NAME, hiveProcess.getAttribute(ATTRIBUTE_QUALIFIED_NAME) + ":" + outputColumn.getAttribute(ATTRIBUTE_NAME));
columnLineageProcess.setAttribute(ATTRIBUTE_INPUTS, getObjectIds(inputColumns));
columnLineageProcess.setAttribute(ATTRIBUTE_OUTPUTS, Collections.singletonList(getObjectId(outputColumn)));
columnLineageProcess.setAttribute(ATTRIBUTE_QUERY, getObjectId(hiveProcess));
columnLineageProcess.setAttribute(ATTRIBUTE_DEPENDENCY_TYPE, entry.getValue().getType());
columnLineageProcess.setAttribute(ATTRIBUTE_EXPRESSION, entry.getValue().getExpr());
entities.addEntity(columnLineageProcess);
}
}
private boolean skipProcess() {
Set<ReadEntity> inputs = getHiveContext().getInputs();
Set<WriteEntity> outputs = getHiveContext().getOutputs();
boolean ret = CollectionUtils.isEmpty(inputs) && CollectionUtils.isEmpty(outputs);
if (!ret) {
if (getContext().getHiveOperation() == HiveOperation.QUERY) {
// Select query has only one output
if (outputs.size() == 1) {
WriteEntity output = outputs.iterator().next();
if (output.getType() == Entity.Type.DFS_DIR || output.getType() == Entity.Type.LOCAL_DIR) {
if (output.getWriteType() == WriteEntity.WriteType.PATH_WRITE && output.isTempURI()) {
ret = true;
}
}
}
}
}
return ret;
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.hook.events;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.model.notification.HookNotification.EntityCreateRequestV2;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.ql.hooks.Entity;
import org.apache.hadoop.hive.ql.metadata.Table;
import java.util.Collections;
import java.util.List;
public class CreateTable extends BaseHiveEvent {
private final boolean skipTempTables;
public CreateTable(AtlasHiveHookContext context, boolean skipTempTables) {
super(context);
this.skipTempTables = skipTempTables;
}
@Override
public List<HookNotification> getNotificationMessages() throws Exception {
AtlasEntitiesWithExtInfo entities = getEntities();
HookNotification notification = new EntityCreateRequestV2(getUserName(), entities);
List<HookNotification> ret = Collections.singletonList(notification);
return ret;
}
public AtlasEntitiesWithExtInfo getEntities() throws Exception {
AtlasEntitiesWithExtInfo ret = new AtlasEntitiesWithExtInfo();
Database db = null;
Table table = null;
for (Entity entity : getHiveContext().getOutputs()) {
if (entity.getType() == Entity.Type.TABLE) {
table = entity.getTable();
if (table != null) {
db = getHive().getDatabase(table.getDbName());
table = getHive().getTable(table.getDbName(), table.getTableName());
if (table != null) {
// If its an external table, even though the temp table skip flag is on, we create the table since we need the HDFS path to temp table lineage.
if (skipTempTables && table.isTemporary() && !TableType.EXTERNAL_TABLE.equals(table.getTableType())) {
table = null;
} else {
break;
}
}
}
}
}
if (table != null) {
AtlasEntity tblEntity = toTableEntity(table, ret);
if (TableType.EXTERNAL_TABLE.equals(table.getTableType())) {
AtlasEntity hdfsPathEntity = getHDFSPathEntity(table.getDataLocation());
AtlasEntity processEntity = getHiveProcessEntity(Collections.singletonList(hdfsPathEntity), Collections.singletonList(tblEntity));
ret.addEntity(processEntity);
ret.addReferredEntity(hdfsPathEntity);
}
}
addProcessedEntities(ret);
return ret;
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.hook.events;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.model.notification.HookNotification.EntityDeleteRequestV2;
import org.apache.hadoop.hive.ql.hooks.Entity;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class DropDatabase extends BaseHiveEvent {
public DropDatabase(AtlasHiveHookContext context) {
super(context);
}
@Override
public List<HookNotification> getNotificationMessages() throws Exception {
List<AtlasObjectId> entities = getEntities();
HookNotification notification = new EntityDeleteRequestV2(getUserName(), entities);
List<HookNotification> ret = Collections.singletonList(notification);
return ret;
}
public List<AtlasObjectId> getEntities() throws Exception {
List<AtlasObjectId> ret = new ArrayList<>();
for (Entity entity : getHiveContext().getOutputs()) {
if (entity.getType() == Entity.Type.DATABASE) {
String dbQName = getQualifiedName(entity.getDatabase());
AtlasObjectId dbId = new AtlasObjectId(HIVE_TYPE_DB, ATTRIBUTE_QUALIFIED_NAME, dbQName);
context.removeFromKnownDatabase(dbQName);
ret.add(dbId);
} else if (entity.getType() == Entity.Type.TABLE) {
String tblQName = getQualifiedName(entity.getTable());
AtlasObjectId dbId = new AtlasObjectId(HIVE_TYPE_TABLE, ATTRIBUTE_QUALIFIED_NAME, tblQName);
context.removeFromKnownTable(tblQName);
ret.add(dbId);
}
}
return ret;
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.hook.events;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.model.notification.HookNotification.EntityDeleteRequestV2;
import org.apache.hadoop.hive.ql.hooks.Entity;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class DropTable extends BaseHiveEvent {
public DropTable(AtlasHiveHookContext context) {
super(context);
}
@Override
public List<HookNotification> getNotificationMessages() throws Exception {
List<AtlasObjectId> entities = getEntities();
HookNotification notification = new EntityDeleteRequestV2(getUserName(), entities);
List<HookNotification> ret = Collections.singletonList(notification);
return ret;
}
public List<AtlasObjectId> getEntities() throws Exception {
List<AtlasObjectId> ret = new ArrayList<>();
for (Entity entity : getHiveContext().getOutputs()) {
if (entity.getType() == Entity.Type.TABLE) {
String tblQName = getQualifiedName(entity.getTable());
AtlasObjectId dbId = new AtlasObjectId(HIVE_TYPE_TABLE, ATTRIBUTE_QUALIFIED_NAME, tblQName);
context.removeFromKnownTable(tblQName);
ret.add(dbId);
}
}
return ret;
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.rewrite;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.ParseDriver;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.parse.ParseException;
import org.apache.hadoop.hive.ql.parse.ParseUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class HiveASTRewriter {
private Context queryContext;
private RewriteContext rwCtx;
private List<ASTRewriter> rewriters = new ArrayList<>();
private static final Logger LOG = LoggerFactory.getLogger(HiveASTRewriter.class);
public HiveASTRewriter(HiveConf conf) throws RewriteException {
try {
queryContext = new Context(conf);
setUpRewriters();
} catch (IOException e) {
throw new RewriteException("Exception while rewriting query : " , e);
}
}
private void setUpRewriters() throws RewriteException {
ASTRewriter rewriter = new LiteralRewriter();
rewriters.add(rewriter);
}
public String rewrite(String sourceQry) throws RewriteException {
String result = sourceQry;
ASTNode tree = null;
try {
ParseDriver pd = new ParseDriver();
tree = pd.parse(sourceQry, queryContext, true);
tree = ParseUtils.findRootNonNullToken(tree);
this.rwCtx = new RewriteContext(sourceQry, tree, queryContext.getTokenRewriteStream());
rewrite(tree);
result = toSQL();
} catch (ParseException e) {
LOG.error("Could not parse the query {} ", sourceQry, e);
throw new RewriteException("Could not parse query : " , e);
}
return result;
}
private void rewrite(ASTNode origin) throws RewriteException {
ASTNode node = origin;
if (node != null) {
for(ASTRewriter rewriter : rewriters) {
rewriter.rewrite(rwCtx, node);
}
if (node.getChildren() != null) {
for (int i = 0; i < node.getChildren().size(); i++) {
rewrite((ASTNode) node.getChild(i));
}
}
}
}
public String toSQL() {
return rwCtx.getTokenRewriteStream().toString();
}
public String printAST() {
return rwCtx.getOriginNode().dump();
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.rewrite;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.HiveParser;
import java.util.HashMap;
import java.util.Map;
public class LiteralRewriter implements ASTRewriter {
public static Map<Integer, String> LITERAL_TOKENS = new HashMap<Integer, String>() {{
put(HiveParser.Number, "NUMBER_LITERAL");
put(HiveParser.Digit, "DIGIT_LITERAL");
put(HiveParser.HexDigit, "HEX_LITERAL");
put(HiveParser.Exponent, "EXPONENT_LITERAL");
put(HiveParser.StringLiteral, "'STRING_LITERAL'");
put(HiveParser.BigintLiteral, "BIGINT_LITERAL");
put(HiveParser.SmallintLiteral, "SMALLINT_LITERAL");
put(HiveParser.TinyintLiteral, "TINYINT_LITERAL");
put(HiveParser.DecimalLiteral, "DECIMAL_LITERAL");
put(HiveParser.ByteLengthLiteral, "BYTE_LENGTH_LITERAL");
put(HiveParser.TOK_STRINGLITERALSEQUENCE, "'STRING_LITERAL_SEQ'");
put(HiveParser.TOK_CHARSETLITERAL, "'CHARSET_LITERAL'");
put(HiveParser.KW_TRUE, "BOOLEAN_LITERAL");
put(HiveParser.KW_FALSE, "BOOLEAN_LITERAL");
}};
@Override
public void rewrite(RewriteContext ctx, final ASTNode node) throws RewriteException {
try {
processLiterals(ctx, node);
} catch(Exception e) {
throw new RewriteException("Could not normalize query", e);
}
}
private void processLiterals(final RewriteContext ctx, final ASTNode node) {
// Take child ident.totext
if (isLiteral(node)) {
replaceLiteral(ctx, node);
}
}
private boolean isLiteral(ASTNode node) {
if (LITERAL_TOKENS.containsKey(node.getType())) {
return true;
}
return false;
}
void replaceLiteral(RewriteContext ctx, ASTNode valueNode) {
//Reset the token stream
String literalVal = LITERAL_TOKENS.get(valueNode.getType());
ctx.getTokenRewriteStream().replace(valueNode.getTokenStartIndex(),
valueNode.getTokenStopIndex(), literalVal);
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.rewrite;
import org.antlr.runtime.TokenRewriteStream;
import org.apache.hadoop.hive.ql.parse.ASTNode;
public class RewriteContext {
private String origQuery;
private TokenRewriteStream rewriteStream;
private ASTNode origin;
RewriteContext(String origQuery, ASTNode origin, TokenRewriteStream rewriteStream) {
this.origin = origin;
this.rewriteStream = rewriteStream;
}
public TokenRewriteStream getTokenRewriteStream() {
return rewriteStream;
}
public ASTNode getOriginNode() {
return origin;
}
public String getOriginalQuery() {
return origQuery;
}
}
......@@ -18,32 +18,57 @@
package org.apache.atlas.hive;
import com.google.common.annotations.VisibleForTesting;
import org.apache.atlas.ApplicationProperties;
import org.apache.atlas.AtlasClient;
import org.apache.atlas.AtlasClientV2;
import org.apache.atlas.hive.bridge.ColumnLineageUtils;
import org.apache.atlas.hive.bridge.HiveMetaStoreBridge;
import org.apache.atlas.hive.hook.HiveHookIT;
import org.apache.atlas.hive.model.HiveDataTypes;
import org.apache.atlas.v1.model.instance.Referenceable;
import org.apache.atlas.v1.model.instance.Id;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.instance.AtlasStruct;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.utils.AuthenticationUtil;
import org.apache.atlas.utils.ParamChecker;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.hooks.Entity;
import org.apache.hadoop.hive.ql.hooks.HookContext;
import org.apache.hadoop.hive.ql.hooks.LineageInfo;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.security.UserGroupInformation;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.SortedSet;
import static org.apache.atlas.AtlasClient.NAME;
import static org.apache.atlas.hive.hook.HiveHook.lower;
import static org.apache.atlas.hive.bridge.HiveMetaStoreBridge.HDFS_PATH;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.fail;
......@@ -51,22 +76,28 @@ import static org.testng.Assert.fail;
public class HiveITBase {
private static final Logger LOG = LoggerFactory.getLogger(HiveITBase.class);
public static final String DEFAULT_DB = "default";
public static final String SEP = ":".intern();
public static final String IO_SEP = "->".intern();
protected static final String DGI_URL = "http://localhost:21000/";
protected static final String CLUSTER_NAME = "primary";
public static final String DEFAULT_DB = "default";
protected static final String PART_FILE = "2015-01-01";
protected static final String INPUTS = "inputs";;
protected static final String OUTPUTS = "outputs";
protected Driver driver;
protected AtlasClient atlasClient;
protected AtlasClientV2 atlasClientV2;
protected HiveMetaStoreBridge hiveMetaStoreBridge;
protected SessionState ss;
protected HiveConf conf;
protected static final String INPUTS = AtlasClient.PROCESS_ATTRIBUTE_INPUTS;
protected static final String OUTPUTS = AtlasClient.PROCESS_ATTRIBUTE_OUTPUTS;
protected Driver driverWithoutContext;
private static final String REFERENCEABLE_ATTRIBUTE_NAME = "qualifiedName";
private static final String ATTR_NAME = "name";
@BeforeClass
public void setUp() throws Exception {
//Set-up hive session
......@@ -86,12 +117,15 @@ public class HiveITBase {
}
if (!AuthenticationUtil.isKerberosAuthenticationEnabled()) {
atlasClientV2 = new AtlasClientV2(atlasEndPoint, new String[]{"admin", "admin"});
atlasClient = new AtlasClient(atlasEndPoint, new String[]{"admin", "admin"});
} else {
atlasClientV2 = new AtlasClientV2(atlasEndPoint);
atlasClient = new AtlasClient(atlasEndPoint);
}
hiveMetaStoreBridge = new HiveMetaStoreBridge(configuration, conf, atlasClient);
hiveMetaStoreBridge = new HiveMetaStoreBridge(configuration, conf, atlasClientV2);
HiveConf conf = new HiveConf();
conf.set("hive.exec.post.hooks", "");
......@@ -141,6 +175,13 @@ public class HiveITBase {
return file.getAbsolutePath();
}
public static String lower(String str) {
if (StringUtils.isEmpty(str)) {
return null;
}
return str.toLowerCase().trim();
}
protected String random() {
return RandomStringUtils.randomAlphanumeric(10);
}
......@@ -156,7 +197,7 @@ public class HiveITBase {
protected String assertTableIsRegistered(String dbName, String tableName, HiveHookIT.AssertPredicate assertPredicate, boolean isTemporary) throws Exception {
LOG.debug("Searching for table {}.{}", dbName, tableName);
String tableQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, dbName, tableName, isTemporary);
return assertEntityIsRegistered(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName,
return assertEntityIsRegistered(HiveDataTypes.HIVE_TABLE.getName(), REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName,
assertPredicate);
}
......@@ -165,19 +206,39 @@ public class HiveITBase {
waitFor(80000, new HiveHookIT.Predicate() {
@Override
public void evaluate() throws Exception {
Referenceable entity = atlasClient.getEntity(typeName, property, value);
AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.getEntityByAttribute(typeName, Collections.singletonMap(property,value));
AtlasEntity entity = atlasEntityWithExtInfo.getEntity();
assertNotNull(entity);
if (assertPredicate != null) {
assertPredicate.assertOnEntity(entity);
}
}
});
AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.getEntityByAttribute(typeName, Collections.singletonMap(property,value));
AtlasEntity entity = atlasEntityWithExtInfo.getEntity();
return (String) entity.getGuid();
}
protected AtlasEntity assertEntityIsRegistedViaEntity(final String typeName, final String property, final String value,
final HiveHookIT.AssertPredicate assertPredicate) throws Exception {
waitFor(80000, new HiveHookIT.Predicate() {
@Override
public void evaluate() throws Exception {
AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.getEntityByAttribute(typeName, Collections.singletonMap(property,value));
AtlasEntity entity = atlasEntityWithExtInfo.getEntity();
assertNotNull(entity);
if (assertPredicate != null) {
assertPredicate.assertOnEntity(entity);
}
}
});
Referenceable entity = atlasClient.getEntity(typeName, property, value);
return entity.getId()._getId();
AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.getEntityByAttribute(typeName, Collections.singletonMap(property,value));
AtlasEntity entity = atlasEntityWithExtInfo.getEntity();
return entity;
}
public interface AssertPredicate {
void assertOnEntity(Referenceable entity) throws Exception;
void assertOnEntity(AtlasEntity entity) throws Exception;
}
public interface Predicate {
......@@ -216,35 +277,29 @@ public class HiveITBase {
protected String getTableProcessQualifiedName(String dbName, String tableName) throws Exception {
return HiveMetaStoreBridge.getTableProcessQualifiedName(CLUSTER_NAME,
hiveMetaStoreBridge.hiveClient.getTable(dbName, tableName));
hiveMetaStoreBridge.getHiveClient().getTable(dbName, tableName));
}
protected void validateHDFSPaths(Referenceable processReference, String attributeName, String... testPaths) throws Exception {
List<Id> hdfsPathRefs = (List<Id>) processReference.get(attributeName);
protected void validateHDFSPaths(AtlasEntity processEntity, String attributeName, String... testPaths) throws Exception {
List<AtlasObjectId> hdfsPathIds = toAtlasObjectIdList(processEntity.getAttribute(attributeName));
for (String testPath : testPaths) {
final Path path = new Path(testPath);
final String testPathNormed = lower(path.toString());
Path path = new Path(testPath);
String testPathNormed = lower(path.toString());
String hdfsPathId = assertHDFSPathIsRegistered(testPathNormed);
Assert.assertEquals(hdfsPathRefs.get(0)._getId(), hdfsPathId);
Referenceable hdfsPathRef = atlasClient.getEntity(hdfsPathId);
Assert.assertEquals(hdfsPathRef.get("path"), testPathNormed);
Assert.assertEquals(hdfsPathRef.get(NAME), Path.getPathWithoutSchemeAndAuthority(path).toString().toLowerCase());
if (testPathNormed != null) {
Assert.assertTrue(((String)hdfsPathRef.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME)).startsWith(testPathNormed));
}
Assert.assertEquals(hdfsPathIds.get(0).getGuid(), hdfsPathId);
}
}
private String assertHDFSPathIsRegistered(String path) throws Exception {
protected String assertHDFSPathIsRegistered(String path) throws Exception {
LOG.debug("Searching for hdfs path {}", path);
// ATLAS-2444 HDFS name node federation adds the cluster name to the qualifiedName
if (path.startsWith("hdfs://")) {
String pathWithCluster = path + "@" + CLUSTER_NAME;
return assertEntityIsRegistered(HiveMetaStoreBridge.HDFS_PATH, AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, pathWithCluster, null);
return assertEntityIsRegistered(HDFS_PATH, REFERENCEABLE_ATTRIBUTE_NAME, pathWithCluster, null);
} else {
return assertEntityIsRegistered(HiveMetaStoreBridge.HDFS_PATH, AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, path, null);
return assertEntityIsRegistered(HDFS_PATH, REFERENCEABLE_ATTRIBUTE_NAME, path, null);
}
}
......@@ -255,7 +310,415 @@ public class HiveITBase {
protected String assertDatabaseIsRegistered(String dbName, AssertPredicate assertPredicate) throws Exception {
LOG.debug("Searching for database {}", dbName);
String dbQualifiedName = HiveMetaStoreBridge.getDBQualifiedName(CLUSTER_NAME, dbName);
return assertEntityIsRegistered(HiveDataTypes.HIVE_DB.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
return assertEntityIsRegistered(HiveDataTypes.HIVE_DB.getName(), REFERENCEABLE_ATTRIBUTE_NAME,
dbQualifiedName, assertPredicate);
}
protected AtlasEntity getAtlasEntityByType(String type, String id) throws Exception {
AtlasEntity atlasEntity = null;
AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfoForProcess = atlasClientV2.getEntityByAttribute(type,
Collections.singletonMap(AtlasClient.GUID, id));
atlasEntity = atlasEntityWithExtInfoForProcess.getEntity();
return atlasEntity;
}
public static class HiveEventContext {
private Set<ReadEntity> inputs;
private Set<WriteEntity> outputs;
private String user;
private UserGroupInformation ugi;
private HiveOperation operation;
private HookContext.HookType hookType;
private JSONObject jsonPlan;
private String queryId;
private String queryStr;
private Long queryStartTime;
public Map<String, List<ColumnLineageUtils.HiveColumnLineageInfo>> lineageInfo;
private List<HookNotification> messages = new ArrayList<>();
public void setInputs(Set<ReadEntity> inputs) {
this.inputs = inputs;
}
public void setOutputs(Set<WriteEntity> outputs) {
this.outputs = outputs;
}
public void setUser(String user) {
this.user = user;
}
public void setUgi(UserGroupInformation ugi) {
this.ugi = ugi;
}
public void setOperation(HiveOperation operation) {
this.operation = operation;
}
public void setHookType(HookContext.HookType hookType) {
this.hookType = hookType;
}
public void setQueryId(String queryId) {
this.queryId = queryId;
}
public void setQueryStr(String queryStr) {
this.queryStr = queryStr;
}
public void setQueryStartTime(Long queryStartTime) {
this.queryStartTime = queryStartTime;
}
public void setLineageInfo(LineageInfo lineageInfo){
try {
this.lineageInfo = ColumnLineageUtils.buildLineageMap(lineageInfo);
LOG.debug("Column Lineage Map => {} ", this.lineageInfo.entrySet());
}catch (Throwable e){
LOG.warn("Column Lineage Map build failed with exception {}", e);
}
}
public Set<ReadEntity> getInputs() {
return inputs;
}
public Set<WriteEntity> getOutputs() {
return outputs;
}
public String getUser() {
return user;
}
public UserGroupInformation getUgi() {
return ugi;
}
public HiveOperation getOperation() {
return operation;
}
public HookContext.HookType getHookType() {
return hookType;
}
public String getQueryId() {
return queryId;
}
public String getQueryStr() {
return queryStr;
}
public Long getQueryStartTime() {
return queryStartTime;
}
public void addMessage(HookNotification message) {
messages.add(message);
}
public List<HookNotification> getMessages() {
return messages;
}
}
@VisibleForTesting
protected static String getProcessQualifiedName(HiveMetaStoreBridge dgiBridge, HiveEventContext eventContext,
final SortedSet<ReadEntity> sortedHiveInputs,
final SortedSet<WriteEntity> sortedHiveOutputs,
SortedMap<ReadEntity, AtlasEntity> hiveInputsMap,
SortedMap<WriteEntity, AtlasEntity> hiveOutputsMap) throws HiveException {
HiveOperation op = eventContext.getOperation();
if (isCreateOp(eventContext)) {
Entity entity = getEntityByType(sortedHiveOutputs, Entity.Type.TABLE);
if (entity != null) {
Table outTable = entity.getTable();
//refresh table
outTable = dgiBridge.getHiveClient().getTable(outTable.getDbName(), outTable.getTableName());
return HiveMetaStoreBridge.getTableProcessQualifiedName(dgiBridge.getClusterName(), outTable);
}
}
StringBuilder buffer = new StringBuilder(op.getOperationName());
boolean ignoreHDFSPathsinQFName = ignoreHDFSPathsinQFName(op, sortedHiveInputs, sortedHiveOutputs);
if ( ignoreHDFSPathsinQFName && LOG.isDebugEnabled()) {
LOG.debug("Ignoring HDFS paths in qualifiedName for {} {} ", op, eventContext.getQueryStr());
}
addInputs(dgiBridge, op, sortedHiveInputs, buffer, hiveInputsMap, ignoreHDFSPathsinQFName);
buffer.append(IO_SEP);
addOutputs(dgiBridge, op, sortedHiveOutputs, buffer, hiveOutputsMap, ignoreHDFSPathsinQFName);
LOG.info("Setting process qualified name to {}", buffer);
return buffer.toString();
}
protected static Entity getEntityByType(Set<? extends Entity> entities, Entity.Type entityType) {
for (Entity entity : entities) {
if (entity.getType() == entityType) {
return entity;
}
}
return null;
}
protected static boolean ignoreHDFSPathsinQFName(final HiveOperation op, final Set<ReadEntity> inputs, final Set<WriteEntity> outputs) {
switch (op) {
case LOAD:
case IMPORT:
return isPartitionBasedQuery(outputs);
case EXPORT:
return isPartitionBasedQuery(inputs);
case QUERY:
return true;
}
return false;
}
protected static boolean isPartitionBasedQuery(Set<? extends Entity> entities) {
for (Entity entity : entities) {
if (Entity.Type.PARTITION.equals(entity.getType())) {
return true;
}
}
return false;
}
protected static boolean isCreateOp(HiveEventContext hiveEvent) {
return HiveOperation.CREATETABLE.equals(hiveEvent.getOperation())
|| HiveOperation.CREATEVIEW.equals(hiveEvent.getOperation())
|| HiveOperation.ALTERVIEW_AS.equals(hiveEvent.getOperation())
|| HiveOperation.ALTERTABLE_LOCATION.equals(hiveEvent.getOperation())
|| HiveOperation.CREATETABLE_AS_SELECT.equals(hiveEvent.getOperation());
}
protected static void addInputs(HiveMetaStoreBridge hiveBridge, HiveOperation op, SortedSet<ReadEntity> sortedInputs, StringBuilder buffer, final Map<ReadEntity, AtlasEntity> refs, final boolean ignoreHDFSPathsInQFName) throws HiveException {
if (refs != null) {
if (sortedInputs != null) {
Set<String> dataSetsProcessed = new LinkedHashSet<>();
for (Entity input : sortedInputs) {
if (!dataSetsProcessed.contains(input.getName().toLowerCase())) {
//HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations
if (ignoreHDFSPathsInQFName &&
(Entity.Type.DFS_DIR.equals(input.getType()) || Entity.Type.LOCAL_DIR.equals(input.getType()))) {
LOG.debug("Skipping dfs dir input addition to process qualified name {} ", input.getName());
} else if (refs.containsKey(input)) {
if ( input.getType() == Entity.Type.PARTITION || input.getType() == Entity.Type.TABLE) {
Table inputTable = refreshTable(hiveBridge, input.getTable().getDbName(), input.getTable().getTableName());
if (inputTable != null) {
addDataset(buffer, refs.get(input), HiveMetaStoreBridge.getTableCreatedTime(inputTable));
}
} else {
addDataset(buffer, refs.get(input));
}
}
dataSetsProcessed.add(input.getName().toLowerCase());
}
}
}
}
}
protected static void addDataset(StringBuilder buffer, AtlasEntity ref, final long createTime) {
addDataset(buffer, ref);
buffer.append(SEP);
buffer.append(createTime);
}
protected static void addDataset(StringBuilder buffer, AtlasEntity ref) {
buffer.append(SEP);
String dataSetQlfdName = (String) ref.getAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME);
// '/' breaks query parsing on ATLAS
buffer.append(dataSetQlfdName.toLowerCase().replaceAll("/", ""));
}
protected static void addOutputs(HiveMetaStoreBridge hiveBridge, HiveOperation op, SortedSet<WriteEntity> sortedOutputs, StringBuilder buffer, final Map<WriteEntity, AtlasEntity> refs, final boolean ignoreHDFSPathsInQFName) throws HiveException {
if (refs != null) {
Set<String> dataSetsProcessed = new LinkedHashSet<>();
if (sortedOutputs != null) {
for (WriteEntity output : sortedOutputs) {
final Entity entity = output;
if (!dataSetsProcessed.contains(output.getName().toLowerCase())) {
if (ignoreHDFSPathsInQFName &&
(Entity.Type.DFS_DIR.equals(output.getType()) || Entity.Type.LOCAL_DIR.equals(output.getType()))) {
LOG.debug("Skipping dfs dir output addition to process qualified name {} ", output.getName());
} else if (refs.containsKey(output)) {
//HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations
if (addQueryType(op, (WriteEntity) entity)) {
buffer.append(SEP);
buffer.append(((WriteEntity) entity).getWriteType().name());
}
if ( output.getType() == Entity.Type.PARTITION || output.getType() == Entity.Type.TABLE) {
Table outputTable = refreshTable(hiveBridge, output.getTable().getDbName(), output.getTable().getTableName());
if (outputTable != null) {
addDataset(buffer, refs.get(output), HiveMetaStoreBridge.getTableCreatedTime(outputTable));
}
} else {
addDataset(buffer, refs.get(output));
}
}
dataSetsProcessed.add(output.getName().toLowerCase());
}
}
}
}
}
protected static Table refreshTable(HiveMetaStoreBridge dgiBridge, String dbName, String tableName) {
try {
return dgiBridge.getHiveClient().getTable(dbName, tableName);
} catch (HiveException excp) { // this might be the case for temp tables
LOG.warn("failed to get details for table {}.{}. Ignoring. {}: {}", dbName, tableName, excp.getClass().getCanonicalName(), excp.getMessage());
}
return null;
}
protected static boolean addQueryType(HiveOperation op, WriteEntity entity) {
if (entity.getWriteType() != null && HiveOperation.QUERY.equals(op)) {
switch (entity.getWriteType()) {
case INSERT:
case INSERT_OVERWRITE:
case UPDATE:
case DELETE:
return true;
case PATH_WRITE:
//Add query type only for DFS paths and ignore local paths since they are not added as outputs
if ( !Entity.Type.LOCAL_DIR.equals(entity.getType())) {
return true;
}
break;
default:
}
}
return false;
}
@VisibleForTesting
protected static final class EntityComparator implements Comparator<Entity> {
@Override
public int compare(Entity o1, Entity o2) {
String s1 = o1.getName();
String s2 = o2.getName();
if (s1 == null || s2 == null){
s1 = o1.getD().toString();
s2 = o2.getD().toString();
}
return s1.toLowerCase().compareTo(s2.toLowerCase());
}
}
@VisibleForTesting
protected static final Comparator<Entity> entityComparator = new EntityComparator();
protected AtlasObjectId toAtlasObjectId(Object obj) {
final AtlasObjectId ret;
if (obj instanceof AtlasObjectId) {
ret = (AtlasObjectId) obj;
} else if (obj instanceof Map) {
ret = new AtlasObjectId((Map) obj);
} else if (obj != null) {
ret = new AtlasObjectId(obj.toString()); // guid
} else {
ret = null;
}
return ret;
}
protected List<AtlasObjectId> toAtlasObjectIdList(Object obj) {
final List<AtlasObjectId> ret;
if (obj instanceof Collection) {
Collection coll = (Collection) obj;
ret = new ArrayList<>(coll.size());
for (Object item : coll) {
AtlasObjectId objId = toAtlasObjectId(item);
if (objId != null) {
ret.add(objId);
}
}
} else {
AtlasObjectId objId = toAtlasObjectId(obj);
if (objId != null) {
ret = new ArrayList<>(1);
ret.add(objId);
} else {
ret = null;
}
}
return ret;
}
protected AtlasStruct toAtlasStruct(Object obj) {
final AtlasStruct ret;
if (obj instanceof AtlasStruct) {
ret = (AtlasStruct) obj;
} else if (obj instanceof Map) {
ret = new AtlasStruct((Map) obj);
} else {
ret = null;
}
return ret;
}
protected List<AtlasStruct> toAtlasStructList(Object obj) {
final List<AtlasStruct> ret;
if (obj instanceof Collection) {
Collection coll = (Collection) obj;
ret = new ArrayList<>(coll.size());
for (Object item : coll) {
AtlasStruct struct = toAtlasStruct(item);
if (struct != null) {
ret.add(struct);
}
}
} else {
AtlasStruct struct = toAtlasStruct(obj);
if (struct != null) {
ret = new ArrayList<>(1);
ret.add(struct);
} else {
ret = null;
}
}
return ret;
}
}
......@@ -18,7 +18,6 @@
package org.apache.atlas.hive.bridge;
import org.apache.atlas.AtlasClient;
import org.apache.atlas.hive.model.HiveDataTypes;
import org.apache.atlas.v1.model.instance.Referenceable;
import org.apache.hadoop.hive.ql.hooks.LineageInfo;
......@@ -33,6 +32,10 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.apache.atlas.hive.hook.events.BaseHiveEvent.ATTRIBUTE_COLUMNS;
import static org.apache.atlas.hive.hook.events.BaseHiveEvent.ATTRIBUTE_QUALIFIED_NAME;
public class ColumnLineageUtils {
public static final Logger LOG = LoggerFactory.getLogger(ColumnLineageUtils.class);
public static class HiveColumnLineageInfo {
......@@ -127,10 +130,10 @@ public class ColumnLineageUtils {
static void populateColumnReferenceableMap(Map<String, Referenceable> m,
Referenceable r) {
if (r.getTypeName().equals(HiveDataTypes.HIVE_TABLE.getName())) {
String qName = (String) r.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME);
String qName = (String) r.get(ATTRIBUTE_QUALIFIED_NAME);
String[] qNameComps = extractComponents(qName);
for (Referenceable col : (List<Referenceable>) r.get(HiveMetaStoreBridge.COLUMNS)) {
String cName = (String) col.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME);
for (Referenceable col : (List<Referenceable>) r.get(ATTRIBUTE_COLUMNS)) {
String cName = (String) col.get(ATTRIBUTE_QUALIFIED_NAME);
String[] colQNameComps = extractComponents(cName);
String colQName = colQNameComps[0] + "." + colQNameComps[1] + "." + colQNameComps[2];
m.put(colQName, col);
......
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.bridge;
import org.apache.atlas.hive.hook.HiveHook;
import org.apache.atlas.hive.rewrite.HiveASTRewriter;
import org.apache.atlas.hive.rewrite.RewriteException;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@Test(enabled = false)
public class HiveLiteralRewriterTest {
private HiveConf conf;
@BeforeClass(enabled = false)
public void setup() {
conf = new HiveConf();
conf.addResource("/hive-site.xml");
SessionState ss = new SessionState(conf, "testuser");
SessionState.start(ss);
conf.set("hive.lock.manager", "org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager");
}
@Test(enabled=false)
public void testLiteralRewrite() throws RewriteException {
HiveHook.HiveEventContext ctx = new HiveHook.HiveEventContext();
ctx.setQueryStr("insert into table testTable partition(dt='2014-01-01') select * from test1 where dt = '2014-01-01'" +
" and intColumn = 10" +
" and decimalColumn = 1.10" +
" and charColumn = 'a'" +
" and hexColumn = unhex('\\0xFF')" +
" and expColumn = cast('-1.5e2' as int)" +
" and boolCol = true");
HiveASTRewriter queryRewriter = new HiveASTRewriter(conf);
String result = queryRewriter.rewrite(ctx.getQueryStr());
System.out.println("normlized sql : " + result);
final String normalizedSQL = "insert into table testTable partition(dt='STRING_LITERAL') " +
"select * from test1 where dt = 'STRING_LITERAL' " +
"and intColumn = NUMBER_LITERAL " +
"and decimalColumn = NUMBER_LITERAL and " +
"charColumn = 'STRING_LITERAL' and " +
"hexColumn = unhex('STRING_LITERAL') and " +
"expColumn = cast('STRING_LITERAL' as int) and " +
"boolCol = BOOLEAN_LITERAL";
Assert.assertEquals(result, normalizedSQL);
}
}
......@@ -19,9 +19,11 @@
package org.apache.atlas.hive.bridge;
import org.apache.atlas.AtlasClient;
import org.apache.atlas.AtlasClientV2;
import org.apache.atlas.AtlasServiceException;
import org.apache.atlas.hive.model.HiveDataTypes;
import org.apache.atlas.v1.model.instance.Referenceable;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.EntityMutationResponse;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.Database;
......@@ -40,10 +42,12 @@ import org.testng.annotations.Test;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import static org.mockito.Mockito.argThat;
import static org.mockito.Mockito.eq;
import static org.apache.atlas.hive.hook.events.BaseHiveEvent.*;
import static org.mockito.Matchers.anyObject;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
......@@ -60,6 +64,18 @@ public class HiveMetaStoreBridgeTest {
@Mock
private AtlasClient atlasClient;
@Mock
private AtlasClientV2 atlasClientV2;
@Mock
private AtlasEntity atlasEntity;
@Mock
private AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo;
@Mock
EntityMutationResponse entityMutationResponse;
@BeforeMethod
public void initializeMocks() {
MockitoAnnotations.initMocks(this);
......@@ -70,19 +86,21 @@ public class HiveMetaStoreBridgeTest {
// setup database
when(hiveClient.getAllDatabases()).thenReturn(Arrays.asList(new String[]{TEST_DB_NAME}));
String description = "This is a default database";
when(hiveClient.getDatabase(TEST_DB_NAME)).thenReturn(
new Database(TEST_DB_NAME, description, "/user/hive/default", null));
Database db = new Database(TEST_DB_NAME, description, "/user/hive/default", null);
when(hiveClient.getDatabase(TEST_DB_NAME)).thenReturn(db);
when(hiveClient.getAllTables(TEST_DB_NAME)).thenReturn(Arrays.asList(new String[]{}));
returnExistingDatabase(TEST_DB_NAME, atlasClient, CLUSTER_NAME);
returnExistingDatabase(TEST_DB_NAME, atlasClientV2, CLUSTER_NAME);
HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClient);
when(atlasEntityWithExtInfo.getEntity("72e06b34-9151-4023-aa9d-b82103a50e76"))
.thenReturn((new AtlasEntity.AtlasEntityWithExtInfo(
getEntity(HiveDataTypes.HIVE_DB.getName(), AtlasClient.GUID, "72e06b34-9151-4023-aa9d-b82103a50e76"))).getEntity());
HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClientV2);
bridge.importHiveMetadata(true);
// verify update is called
verify(atlasClient).updateEntity(eq("72e06b34-9151-4023-aa9d-b82103a50e76"),
(Referenceable) argThat(
new MatchesReferenceableProperty(HiveMetaStoreBridge.DESCRIPTION_ATTR, description)));
verify(atlasClientV2).updateEntity(anyObject());
}
@Test
......@@ -91,32 +109,50 @@ public class HiveMetaStoreBridgeTest {
List<Table> hiveTables = setupTables(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME);
returnExistingDatabase(TEST_DB_NAME, atlasClient, CLUSTER_NAME);
returnExistingDatabase(TEST_DB_NAME, atlasClientV2, CLUSTER_NAME);
// return existing table
when(atlasClient.getEntity(HiveDataTypes.HIVE_TABLE.getName(),
AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, TEST_DB_NAME, TEST_TABLE_NAME)))
.thenReturn(getEntityReference(HiveDataTypes.HIVE_TABLE.getName(), "82e06b34-9151-4023-aa9d-b82103a50e77"));
when(atlasClient.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77")).thenReturn(createTableReference());
when(atlasEntityWithExtInfo.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77"))
.thenReturn((new AtlasEntity.AtlasEntityWithExtInfo(
getEntity(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77"))).getEntity());
when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_TABLE.getName(),
Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, TEST_DB_NAME, TEST_TABLE_NAME))))
.thenReturn(new AtlasEntity.AtlasEntityWithExtInfo(
getEntity(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77")));
when(atlasEntityWithExtInfo.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77"))
.thenReturn(createTableReference());
String processQualifiedName = HiveMetaStoreBridge.getTableProcessQualifiedName(CLUSTER_NAME, hiveTables.get(0));
when(atlasClient.getEntity(HiveDataTypes.HIVE_PROCESS.getName(),
AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, processQualifiedName)).thenReturn(getEntityReference(HiveDataTypes.HIVE_PROCESS.getName(), "82e06b34-9151-4023-aa9d-b82103a50e77"));
HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClient);
when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_PROCESS.getName(),
Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
processQualifiedName)))
.thenReturn(new AtlasEntity.AtlasEntityWithExtInfo(
getEntity(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77")));
HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClientV2);
bridge.importHiveMetadata(true);
// verify update is called on table
verify(atlasClient).updateEntity(eq("82e06b34-9151-4023-aa9d-b82103a50e77"),
(Referenceable) argThat(new MatchesReferenceableProperty(HiveMetaStoreBridge.TABLE_TYPE_ATTR,
TableType.EXTERNAL_TABLE.name())));
verify(atlasClientV2, times(2)).updateEntity(anyObject());
}
private void returnExistingDatabase(String databaseName, AtlasClient atlasClient, String clusterName)
private void returnExistingDatabase(String databaseName, AtlasClientV2 atlasClientV2, String clusterName)
throws AtlasServiceException {
when(atlasClient.getEntity(
HiveDataTypes.HIVE_DB.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
HiveMetaStoreBridge.getDBQualifiedName(clusterName, databaseName))).thenReturn(
getEntityReference(HiveDataTypes.HIVE_DB.getName(), "72e06b34-9151-4023-aa9d-b82103a50e76"));
//getEntity(HiveDataTypes.HIVE_DB.getName(), AtlasClient.GUID, "72e06b34-9151-4023-aa9d-b82103a50e76");
when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_DB.getName(),
Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
HiveMetaStoreBridge.getDBQualifiedName(CLUSTER_NAME, TEST_DB_NAME))))
.thenReturn((new AtlasEntity.AtlasEntityWithExtInfo(
getEntity(HiveDataTypes.HIVE_DB.getName(), AtlasClient.GUID, "72e06b34-9151-4023-aa9d-b82103a50e76"))));
}
private List<Table> setupTables(Hive hiveClient, String databaseName, String... tableNames) throws HiveException {
......@@ -142,15 +178,25 @@ public class HiveMetaStoreBridgeTest {
List<Table> hiveTables = setupTables(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME);
Table hiveTable = hiveTables.get(0);
returnExistingDatabase(TEST_DB_NAME, atlasClient, CLUSTER_NAME);
returnExistingDatabase(TEST_DB_NAME, atlasClientV2, CLUSTER_NAME);
when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_TABLE.getName(),
Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, TEST_DB_NAME, TEST_TABLE_NAME))))
.thenReturn(new AtlasEntity.AtlasEntityWithExtInfo(
getEntity(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77")));
when(atlasClient.getEntity(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, TEST_DB_NAME, TEST_TABLE_NAME))).thenReturn(
getEntityReference(HiveDataTypes.HIVE_TABLE.getName(), "82e06b34-9151-4023-aa9d-b82103a50e77"));
String processQualifiedName = HiveMetaStoreBridge.getTableProcessQualifiedName(CLUSTER_NAME, hiveTable);
when(atlasClient.getEntity(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
processQualifiedName)).thenReturn(getEntityReference(HiveDataTypes.HIVE_PROCESS.getName(), "82e06b34-9151-4023-aa9d-b82103a50e77"));
when(atlasClient.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77")).thenReturn(createTableReference());
when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_PROCESS.getName(),
Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
processQualifiedName)))
.thenReturn(new AtlasEntity.AtlasEntityWithExtInfo(
getEntity(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77")));
when(atlasEntityWithExtInfo.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77"))
.thenReturn(createTableReference());
Partition partition = mock(Partition.class);
when(partition.getTable()).thenReturn(hiveTable);
......@@ -159,7 +205,7 @@ public class HiveMetaStoreBridgeTest {
when(hiveClient.getPartitions(hiveTable)).thenReturn(Arrays.asList(new Partition[]{partition}));
HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClient);
HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClientV2);
try {
bridge.importHiveMetadata(true);
} catch (Exception e) {
......@@ -173,18 +219,27 @@ public class HiveMetaStoreBridgeTest {
final String table2Name = TEST_TABLE_NAME + "_1";
List<Table> hiveTables = setupTables(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME, table2Name);
returnExistingDatabase(TEST_DB_NAME, atlasClient, CLUSTER_NAME);
returnExistingDatabase(TEST_DB_NAME, atlasClientV2, CLUSTER_NAME);
when(hiveClient.getTable(TEST_DB_NAME, TEST_TABLE_NAME)).thenThrow(new RuntimeException("Timeout while reading data from hive metastore"));
when(atlasClient.getEntity(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, TEST_DB_NAME,
table2Name))).thenReturn(
getEntityReference(HiveDataTypes.HIVE_TABLE.getName(), "82e06b34-9151-4023-aa9d-b82103a50e77"));
when(atlasClient.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77")).thenReturn(createTableReference());
when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_TABLE.getName(),
Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, TEST_DB_NAME, TEST_TABLE_NAME))))
.thenReturn(new AtlasEntity.AtlasEntityWithExtInfo(
getEntity(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77")));
when(atlasEntityWithExtInfo.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77"))
.thenReturn(createTableReference());
String processQualifiedName = HiveMetaStoreBridge.getTableProcessQualifiedName(CLUSTER_NAME, hiveTables.get(1));
when(atlasClient.getEntity(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
processQualifiedName)).thenReturn(getEntityReference(HiveDataTypes.HIVE_PROCESS.getName(), "82e06b34-9151-4023-aa9d-b82103a50e77"));
HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClient);
when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_PROCESS.getName(),
Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
processQualifiedName)))
.thenReturn(new AtlasEntity.AtlasEntityWithExtInfo(
getEntity(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77")));
HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClientV2);
try {
bridge.importHiveMetadata(false);
} catch (Exception e) {
......@@ -198,18 +253,29 @@ public class HiveMetaStoreBridgeTest {
final String table2Name = TEST_TABLE_NAME + "_1";
List<Table> hiveTables = setupTables(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME, table2Name);
returnExistingDatabase(TEST_DB_NAME, atlasClient, CLUSTER_NAME);
returnExistingDatabase(TEST_DB_NAME, atlasClientV2, CLUSTER_NAME);
when(hiveClient.getTable(TEST_DB_NAME, TEST_TABLE_NAME)).thenThrow(new RuntimeException("Timeout while reading data from hive metastore"));
when(atlasClient.getEntity(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, TEST_DB_NAME,
table2Name))).thenReturn(
getEntityReference(HiveDataTypes.HIVE_TABLE.getName(), "82e06b34-9151-4023-aa9d-b82103a50e77"));
when(atlasClient.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77")).thenReturn(createTableReference());
String processQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, hiveTables.get(1));
when(atlasClient.getEntity(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
processQualifiedName)).thenReturn(getEntityReference(HiveDataTypes.HIVE_PROCESS.getName(), "82e06b34-9151-4023-aa9d-b82103a50e77"));
HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClient);
when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_TABLE.getName(),
Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, TEST_DB_NAME, TEST_TABLE_NAME))))
.thenReturn(new AtlasEntity.AtlasEntityWithExtInfo(
getEntity(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77")));
when(atlasEntityWithExtInfo.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77"))
.thenReturn(createTableReference());
String processQualifiedName = HiveMetaStoreBridge.getTableProcessQualifiedName(CLUSTER_NAME, hiveTables.get(1));
when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_PROCESS.getName(),
Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
processQualifiedName)))
.thenReturn(new AtlasEntity.AtlasEntityWithExtInfo(
getEntity(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77")));
HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClientV2);
try {
bridge.importHiveMetadata(true);
Assert.fail("Table registration is supposed to fail");
......@@ -218,15 +284,15 @@ public class HiveMetaStoreBridgeTest {
}
}
private Referenceable getEntityReference(String typeName, String id) {
return new Referenceable(id, typeName, null);
private AtlasEntity getEntity(String typeName, String attr, String value) {
return new AtlasEntity(typeName, attr, value);
}
private Referenceable createTableReference() {
Referenceable tableReference = new Referenceable(HiveDataTypes.HIVE_TABLE.getName());
Referenceable sdReference = new Referenceable(HiveDataTypes.HIVE_STORAGEDESC.getName());
tableReference.set(HiveMetaStoreBridge.STORAGE_DESC, sdReference);
return tableReference;
private AtlasEntity createTableReference() {
AtlasEntity tableEntity = new AtlasEntity(HiveDataTypes.HIVE_TABLE.getName());
AtlasEntity sdEntity = new AtlasEntity(HiveDataTypes.HIVE_STORAGEDESC.getName());
tableEntity.setAttribute(ATTRIBUTE_STORAGEDESC, getObjectId(sdEntity));
return tableEntity;
}
private Table createTestTable(String databaseName, String tableName) throws HiveException {
......@@ -252,7 +318,7 @@ public class HiveMetaStoreBridgeTest {
@Override
public boolean matches(Object o) {
return attrValue.equals(((Referenceable) o).get(attrName));
return attrValue.equals(((AtlasEntity) o).getAttribute(attrName));
}
}
}
......@@ -21,8 +21,8 @@ package org.apache.atlas.hive.bridge;
import org.apache.atlas.AtlasClient;
import org.apache.atlas.hive.HiveITBase;
import org.apache.atlas.hive.model.HiveDataTypes;
import org.apache.atlas.v1.model.instance.Referenceable;
import org.apache.atlas.v1.model.instance.Id;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.testng.annotations.Test;
import java.util.List;
......@@ -34,34 +34,36 @@ public class HiveMetastoreBridgeIT extends HiveITBase {
@Test
public void testCreateTableAndImport() throws Exception {
String tableName = tableName();
String pFile = createTestDFSPath("parentPath");
final String query = String.format("create EXTERNAL table %s(id string, cnt int) location '%s'", tableName, pFile);
String query = String.format("create EXTERNAL table %s(id string, cnt int) location '%s'", tableName, pFile);
runCommand(query);
String dbId = assertDatabaseIsRegistered(DEFAULT_DB);
String tableId = assertTableIsRegistered(DEFAULT_DB, tableName);
//verify lineage is created
String processId = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(),
AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
getTableProcessQualifiedName(DEFAULT_DB, tableName), null);
Referenceable processReference = atlasClient.getEntity(processId);
validateHDFSPaths(processReference, INPUTS, pFile);
String processId = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getTableProcessQualifiedName(DEFAULT_DB, tableName), null);
AtlasEntity processsEntity = atlasClientV2.getEntityByGuid(processId).getEntity();
validateHDFSPaths(processsEntity, INPUTS, pFile);
List<AtlasObjectId> outputs = toAtlasObjectIdList(processsEntity.getAttribute(OUTPUTS));
List<Id> outputs = (List<Id>) processReference.get(OUTPUTS);
assertEquals(outputs.size(), 1);
assertEquals(outputs.get(0).getId(), tableId);
assertEquals(outputs.get(0).getGuid(), tableId);
int tableCount = atlasClient.listEntities(HiveDataTypes.HIVE_TABLE.getName()).size();
//Now import using import tool - should be no-op. This also tests update since table exists
hiveMetaStoreBridge.importTable(atlasClient.getEntity(dbId), DEFAULT_DB, tableName, true);
AtlasEntity dbEntity = atlasClientV2.getEntityByGuid(dbId).getEntity();
hiveMetaStoreBridge.importTable(dbEntity, DEFAULT_DB, tableName, true);
String tableId2 = assertTableIsRegistered(DEFAULT_DB, tableName);
assertEquals(tableId2, tableId);
String processId2 = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(),
AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
getTableProcessQualifiedName(DEFAULT_DB, tableName), null);
String processId2 = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getTableProcessQualifiedName(DEFAULT_DB, tableName), null);
assertEquals(processId2, processId);
//assert that table is de-duped and no new entity is created
......@@ -73,17 +75,22 @@ public class HiveMetastoreBridgeIT extends HiveITBase {
public void testImportCreatedTable() throws Exception {
String tableName = tableName();
String pFile = createTestDFSPath("parentPath");
runCommand(driverWithoutContext, String.format("create EXTERNAL table %s(id string) location '%s'", tableName, pFile));
String dbId = assertDatabaseIsRegistered(DEFAULT_DB);
hiveMetaStoreBridge.importTable(atlasClient.getEntity(dbId), DEFAULT_DB, tableName, true);
AtlasEntity dbEntity = atlasClientV2.getEntityByGuid(dbId).getEntity();
hiveMetaStoreBridge.importTable(dbEntity, DEFAULT_DB, tableName, true);
String tableId = assertTableIsRegistered(DEFAULT_DB, tableName);
String processId = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(),
AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
getTableProcessQualifiedName(DEFAULT_DB, tableName), null);
List<Id> outputs = (List<Id>) atlasClient.getEntity(processId).get(OUTPUTS);
String processId = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getTableProcessQualifiedName(DEFAULT_DB, tableName), null);
AtlasEntity processEntity = atlasClientV2.getEntityByGuid(processId).getEntity();
List<AtlasObjectId> outputs = toAtlasObjectIdList(processEntity.getAttribute(OUTPUTS));
assertEquals(outputs.size(), 1);
assertEquals(outputs.get(0).getId(), tableId);
assertEquals(outputs.get(0).getGuid(), tableId);
}
}
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -68,6 +68,7 @@ public class SqoopHook extends SqoopJobDataPublisher {
public static final String CMD_LINE_OPTS = "commandlineOpts";
public static final String INPUTS = "inputs";
public static final String OUTPUTS = "outputs";
public static final String ATTRIBUTE_DB = "db";
static {
org.apache.hadoop.conf.Configuration.addDefaultResource("sqoop-site.xml");
......@@ -114,7 +115,7 @@ public class SqoopHook extends SqoopJobDataPublisher {
entHiveTable.setAttribute(AtlasClient.NAME, tableName.toLowerCase());
entHiveTable.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, qualifiedName);
entHiveTable.setAttribute(HiveMetaStoreBridge.DB, AtlasTypeUtil.getAtlasObjectId(entHiveDb));
entHiveTable.setAttribute(ATTRIBUTE_DB, AtlasTypeUtil.getAtlasObjectId(entHiveDb));
return entHiveTable;
}
......
......@@ -64,6 +64,7 @@ public class StormAtlasHook extends AtlasHook implements ISubmitterHook {
public static final String ANONYMOUS_OWNER = "anonymous";
public static final String HBASE_NAMESPACE_DEFAULT = "default";
public static final String ATTRIBUTE_DB = "db";
private final HdfsNameServiceResolver hdfsNameServiceResolver = HdfsNameServiceResolver.getInstance();
......@@ -271,7 +272,7 @@ public class StormAtlasHook extends AtlasHook implements ISubmitterHook {
final String tableQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(clusterName,
databaseName, hiveTableName);
dataSetReferenceable.set(AtlasClient.NAME, hiveTableName);
dataSetReferenceable.set(HiveMetaStoreBridge.DB, dbReferenceable);
dataSetReferenceable.set(ATTRIBUTE_DB, dbReferenceable);
dataSetReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName);
break;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment