Commit 5c9a699a by Aadarsh Jajodia Committed by Sarath Subramanian

ATLAS-3133: Add support for Process Executions in Atlas

parent 0a81c250
...@@ -133,6 +133,8 @@ public class AtlasHiveHookContext { ...@@ -133,6 +133,8 @@ public class AtlasHiveHookContext {
return hook.getClusterName(); return hook.getClusterName();
} }
public String getHostName() { return hook.getHostName(); }
public boolean isConvertHdfsPathToLowerCase() { public boolean isConvertHdfsPathToLowerCase() {
return hook.isConvertHdfsPathToLowerCase(); return hook.isConvertHdfsPathToLowerCase();
} }
......
...@@ -32,6 +32,8 @@ import org.apache.hadoop.security.UserGroupInformation; ...@@ -32,6 +32,8 @@ import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
...@@ -46,7 +48,6 @@ import static org.apache.atlas.hive.hook.events.BaseHiveEvent.ATTRIBUTE_QUALIFIE ...@@ -46,7 +48,6 @@ import static org.apache.atlas.hive.hook.events.BaseHiveEvent.ATTRIBUTE_QUALIFIE
import static org.apache.atlas.hive.hook.events.BaseHiveEvent.HIVE_TYPE_DB; import static org.apache.atlas.hive.hook.events.BaseHiveEvent.HIVE_TYPE_DB;
import static org.apache.atlas.hive.hook.events.BaseHiveEvent.HIVE_TYPE_TABLE; import static org.apache.atlas.hive.hook.events.BaseHiveEvent.HIVE_TYPE_TABLE;
public class HiveHook extends AtlasHook implements ExecuteWithHookContext { public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
private static final Logger LOG = LoggerFactory.getLogger(HiveHook.class); private static final Logger LOG = LoggerFactory.getLogger(HiveHook.class);
...@@ -66,6 +67,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -66,6 +67,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
public static final String HOOK_HIVE_TABLE_CACHE_SIZE = CONF_PREFIX + "hive_table.cache.size"; public static final String HOOK_HIVE_TABLE_CACHE_SIZE = CONF_PREFIX + "hive_table.cache.size";
public static final String DEFAULT_CLUSTER_NAME = "primary"; public static final String DEFAULT_CLUSTER_NAME = "primary";
public static final String DEFAULT_HOST_NAME = "localhost";
private static final Map<String, HiveOperation> OPERATION_MAP = new HashMap<>(); private static final Map<String, HiveOperation> OPERATION_MAP = new HashMap<>();
...@@ -83,6 +85,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -83,6 +85,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
private static final Map<String, PreprocessAction> hiveTablesCache; private static final Map<String, PreprocessAction> hiveTablesCache;
private static HiveHookObjectNamesCache knownObjects = null; private static HiveHookObjectNamesCache knownObjects = null;
private static String hostName;
static { static {
for (HiveOperation hiveOperation : HiveOperation.values()) { for (HiveOperation hiveOperation : HiveOperation.values()) {
...@@ -134,6 +137,13 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -134,6 +137,13 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
} }
knownObjects = nameCacheEnabled ? new HiveHookObjectNamesCache(nameCacheDatabaseMaxCount, nameCacheTableMaxCount, nameCacheRebuildIntervalSeconds) : null; knownObjects = nameCacheEnabled ? new HiveHookObjectNamesCache(nameCacheDatabaseMaxCount, nameCacheTableMaxCount, nameCacheRebuildIntervalSeconds) : null;
try {
hostName = InetAddress.getLocalHost().getHostName();
} catch (UnknownHostException e) {
LOG.warn("No hostname found. Setting the hostname to default value {}", DEFAULT_HOST_NAME, e);
hostName = DEFAULT_HOST_NAME;
}
} }
...@@ -292,6 +302,10 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -292,6 +302,10 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
return knownObjects; return knownObjects;
} }
public String getHostName() {
return hostName;
}
public static class HiveHookObjectNamesCache { public static class HiveHookObjectNamesCache {
private final int dbMaxCacheCount; private final int dbMaxCacheCount;
private final int tblMaxCacheCount; private final int tblMaxCacheCount;
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
package org.apache.atlas.hive.hook.events; package org.apache.atlas.hive.hook.events;
import com.google.common.collect.ImmutableMap;
import org.apache.atlas.hive.hook.AtlasHiveHookContext; import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.hive.hook.HiveHook.PreprocessAction; import org.apache.atlas.hive.hook.HiveHook.PreprocessAction;
import org.apache.atlas.model.instance.AtlasEntity; import org.apache.atlas.model.instance.AtlasEntity;
...@@ -27,6 +28,8 @@ import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityExtInfo; ...@@ -27,6 +28,8 @@ import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityExtInfo;
import org.apache.atlas.model.instance.AtlasObjectId; import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.instance.AtlasStruct; import org.apache.atlas.model.instance.AtlasStruct;
import org.apache.atlas.model.notification.HookNotification; import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.repository.Constants;
import org.apache.atlas.type.AtlasTypeUtil;
import org.apache.atlas.utils.HdfsNameServiceResolver; import org.apache.atlas.utils.HdfsNameServiceResolver;
import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections.MapUtils; import org.apache.commons.collections.MapUtils;
...@@ -49,6 +52,7 @@ import org.slf4j.Logger; ...@@ -49,6 +52,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
import java.net.InetAddress;
import java.net.URI; import java.net.URI;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
...@@ -67,24 +71,25 @@ import static org.apache.atlas.hive.hook.AtlasHiveHookContext.QNAME_SEP_PROCESS; ...@@ -67,24 +71,25 @@ import static org.apache.atlas.hive.hook.AtlasHiveHookContext.QNAME_SEP_PROCESS;
public abstract class BaseHiveEvent { public abstract class BaseHiveEvent {
private static final Logger LOG = LoggerFactory.getLogger(BaseHiveEvent.class); private static final Logger LOG = LoggerFactory.getLogger(BaseHiveEvent.class);
public static final String HIVE_TYPE_DB = "hive_db"; public static final String HIVE_TYPE_DB = "hive_db";
public static final String HIVE_TYPE_TABLE = "hive_table"; public static final String HIVE_TYPE_TABLE = "hive_table";
public static final String HIVE_TYPE_STORAGEDESC = "hive_storagedesc"; public static final String HIVE_TYPE_STORAGEDESC = "hive_storagedesc";
public static final String HIVE_TYPE_COLUMN = "hive_column"; public static final String HIVE_TYPE_COLUMN = "hive_column";
public static final String HIVE_TYPE_PROCESS = "hive_process"; public static final String HIVE_TYPE_PROCESS = "hive_process";
public static final String HIVE_TYPE_COLUMN_LINEAGE = "hive_column_lineage"; public static final String HIVE_TYPE_COLUMN_LINEAGE = "hive_column_lineage";
public static final String HIVE_TYPE_SERDE = "hive_serde"; public static final String HIVE_TYPE_SERDE = "hive_serde";
public static final String HIVE_TYPE_ORDER = "hive_order"; public static final String HIVE_TYPE_ORDER = "hive_order";
public static final String HDFS_TYPE_PATH = "hdfs_path"; public static final String HIVE_TYPE_PROCESS_EXECUTION = "hive_process_execution";
public static final String HBASE_TYPE_TABLE = "hbase_table"; public static final String HDFS_TYPE_PATH = "hdfs_path";
public static final String HBASE_TYPE_NAMESPACE = "hbase_namespace"; public static final String HBASE_TYPE_TABLE = "hbase_table";
public static final String AWS_S3_BUCKET = "aws_s3_bucket"; public static final String HBASE_TYPE_NAMESPACE = "hbase_namespace";
public static final String AWS_S3_PSEUDO_DIR = "aws_s3_pseudo_dir"; public static final String AWS_S3_BUCKET = "aws_s3_bucket";
public static final String AWS_S3_OBJECT = "aws_s3_object"; public static final String AWS_S3_PSEUDO_DIR = "aws_s3_pseudo_dir";
public static final String AWS_S3_OBJECT = "aws_s3_object";
public static final String SCHEME_SEPARATOR = "://";
public static final String S3_SCHEME = "s3" + SCHEME_SEPARATOR; public static final String SCHEME_SEPARATOR = "://";
public static final String S3A_SCHEME = "s3a" + SCHEME_SEPARATOR; public static final String S3_SCHEME = "s3" + SCHEME_SEPARATOR;
public static final String S3A_SCHEME = "s3a" + SCHEME_SEPARATOR;
public static final String ATTRIBUTE_QUALIFIED_NAME = "qualifiedName"; public static final String ATTRIBUTE_QUALIFIED_NAME = "qualifiedName";
public static final String ATTRIBUTE_NAME = "name"; public static final String ATTRIBUTE_NAME = "name";
...@@ -126,6 +131,8 @@ public abstract class BaseHiveEvent { ...@@ -126,6 +131,8 @@ public abstract class BaseHiveEvent {
public static final String ATTRIBUTE_START_TIME = "startTime"; public static final String ATTRIBUTE_START_TIME = "startTime";
public static final String ATTRIBUTE_USER_NAME = "userName"; public static final String ATTRIBUTE_USER_NAME = "userName";
public static final String ATTRIBUTE_QUERY_TEXT = "queryText"; public static final String ATTRIBUTE_QUERY_TEXT = "queryText";
public static final String ATTRIBUTE_PROCESS = "process";
public static final String ATTRIBUTE_PROCESS_EXECUTIONS = "processExecutions";
public static final String ATTRIBUTE_QUERY_ID = "queryId"; public static final String ATTRIBUTE_QUERY_ID = "queryId";
public static final String ATTRIBUTE_QUERY_PLAN = "queryPlan"; public static final String ATTRIBUTE_QUERY_PLAN = "queryPlan";
public static final String ATTRIBUTE_END_TIME = "endTime"; public static final String ATTRIBUTE_END_TIME = "endTime";
...@@ -139,6 +146,7 @@ public abstract class BaseHiveEvent { ...@@ -139,6 +146,7 @@ public abstract class BaseHiveEvent {
public static final String ATTRIBUTE_NAMESPACE = "namespace"; public static final String ATTRIBUTE_NAMESPACE = "namespace";
public static final String ATTRIBUTE_OBJECT_PREFIX = "objectPrefix"; public static final String ATTRIBUTE_OBJECT_PREFIX = "objectPrefix";
public static final String ATTRIBUTE_BUCKET = "bucket"; public static final String ATTRIBUTE_BUCKET = "bucket";
public static final String ATTRIBUTE_HOSTNAME = "hostName";
public static final String HBASE_STORAGE_HANDLER_CLASS = "org.apache.hadoop.hive.hbase.HBaseStorageHandler"; public static final String HBASE_STORAGE_HANDLER_CLASS = "org.apache.hadoop.hive.hbase.HBaseStorageHandler";
public static final String HBASE_DEFAULT_NAMESPACE = "default"; public static final String HBASE_DEFAULT_NAMESPACE = "default";
...@@ -146,6 +154,7 @@ public abstract class BaseHiveEvent { ...@@ -146,6 +154,7 @@ public abstract class BaseHiveEvent {
public static final String HBASE_PARAM_TABLE_NAME = "hbase.table.name"; public static final String HBASE_PARAM_TABLE_NAME = "hbase.table.name";
public static final long MILLIS_CONVERT_FACTOR = 1000; public static final long MILLIS_CONVERT_FACTOR = 1000;
public static final String HDFS_PATH_PREFIX = "hdfs://"; public static final String HDFS_PATH_PREFIX = "hdfs://";
public static final String EMPTY_ATTRIBUTE_VALUE = "";
public static final Map<Integer, String> OWNER_TYPE_TO_ENUM_VALUE = new HashMap<>(); public static final Map<Integer, String> OWNER_TYPE_TO_ENUM_VALUE = new HashMap<>();
...@@ -605,14 +614,41 @@ public abstract class BaseHiveEvent { ...@@ -605,14 +614,41 @@ public abstract class BaseHiveEvent {
ret.setAttribute(ATTRIBUTE_OUTPUTS, getObjectIds(outputs)); ret.setAttribute(ATTRIBUTE_OUTPUTS, getObjectIds(outputs));
ret.setAttribute(ATTRIBUTE_NAME, queryStr); ret.setAttribute(ATTRIBUTE_NAME, queryStr);
ret.setAttribute(ATTRIBUTE_OPERATION_TYPE, getOperationName()); ret.setAttribute(ATTRIBUTE_OPERATION_TYPE, getOperationName());
// We are setting an empty value to these attributes, since now we have a new entity type called hive process
// execution which captures these values. We have to set empty values here because these attributes are
// mandatory attributes for hive process entity type.
ret.setAttribute(ATTRIBUTE_START_TIME, EMPTY_ATTRIBUTE_VALUE);
ret.setAttribute(ATTRIBUTE_END_TIME, EMPTY_ATTRIBUTE_VALUE);
ret.setAttribute(ATTRIBUTE_USER_NAME, EMPTY_ATTRIBUTE_VALUE);
ret.setAttribute(ATTRIBUTE_QUERY_TEXT, EMPTY_ATTRIBUTE_VALUE);
ret.setAttribute(ATTRIBUTE_QUERY_ID, EMPTY_ATTRIBUTE_VALUE);
ret.setAttribute(ATTRIBUTE_QUERY_PLAN, "Not Supported");
ret.setAttribute(ATTRIBUTE_RECENT_QUERIES, Collections.singletonList(queryStr));
return ret;
}
protected AtlasEntity getHiveProcessExecutionEntity(AtlasEntity hiveProcess) throws Exception {
AtlasEntity ret = new AtlasEntity(HIVE_TYPE_PROCESS_EXECUTION);
String queryStr = getQueryString();
if (queryStr != null) {
queryStr = queryStr.toLowerCase().trim();
}
Long endTime = System.currentTimeMillis();
ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, hiveProcess.getAttribute(ATTRIBUTE_QUALIFIED_NAME).toString() +
QNAME_SEP_PROCESS + getQueryStartTime().toString() +
QNAME_SEP_PROCESS + endTime.toString());
ret.setAttribute(ATTRIBUTE_NAME, queryStr + QNAME_SEP_PROCESS + getQueryStartTime().toString());
ret.setAttribute(ATTRIBUTE_START_TIME, getQueryStartTime()); ret.setAttribute(ATTRIBUTE_START_TIME, getQueryStartTime());
ret.setAttribute(ATTRIBUTE_END_TIME, System.currentTimeMillis()); ret.setAttribute(ATTRIBUTE_END_TIME, endTime);
ret.setAttribute(ATTRIBUTE_USER_NAME, getUserName()); ret.setAttribute(ATTRIBUTE_USER_NAME, getUserName());
ret.setAttribute(ATTRIBUTE_QUERY_TEXT, queryStr); ret.setAttribute(ATTRIBUTE_QUERY_TEXT, queryStr);
ret.setAttribute(ATTRIBUTE_QUERY_ID, getQueryId()); ret.setAttribute(ATTRIBUTE_QUERY_ID, getQueryId());
ret.setAttribute(ATTRIBUTE_QUERY_PLAN, "Not Supported"); ret.setAttribute(ATTRIBUTE_QUERY_PLAN, "Not Supported");
ret.setAttribute(ATTRIBUTE_RECENT_QUERIES, Collections.singletonList(queryStr)); ret.setAttribute(ATTRIBUTE_HOSTNAME, getContext().getHostName());
ret.setRelationshipAttribute(ATTRIBUTE_PROCESS, AtlasTypeUtil.toAtlasRelatedObjectId(hiveProcess));
return ret; return ret;
} }
......
...@@ -117,6 +117,9 @@ public class CreateHiveProcess extends BaseHiveEvent { ...@@ -117,6 +117,9 @@ public class CreateHiveProcess extends BaseHiveEvent {
ret.addEntity(process); ret.addEntity(process);
AtlasEntity processExecution = getHiveProcessExecutionEntity(process);
ret.addEntity(processExecution);
processColumnLineage(process, ret); processColumnLineage(process, ret);
addProcessedEntities(ret); addProcessedEntities(ret);
......
...@@ -130,8 +130,11 @@ public class CreateTable extends BaseHiveEvent { ...@@ -130,8 +130,11 @@ public class CreateTable extends BaseHiveEvent {
} else { } else {
processEntity = getHiveProcessEntity(Collections.singletonList(tblEntity), Collections.singletonList(hbaseTableEntity)); processEntity = getHiveProcessEntity(Collections.singletonList(tblEntity), Collections.singletonList(hbaseTableEntity));
} }
ret.addEntity(processEntity); ret.addEntity(processEntity);
AtlasEntity processExecution = getHiveProcessExecutionEntity(processEntity);
ret.addEntity(processExecution);
} }
} else { } else {
if (EXTERNAL_TABLE.equals(table.getTableType())) { if (EXTERNAL_TABLE.equals(table.getTableType())) {
...@@ -140,6 +143,9 @@ public class CreateTable extends BaseHiveEvent { ...@@ -140,6 +143,9 @@ public class CreateTable extends BaseHiveEvent {
ret.addEntity(processEntity); ret.addEntity(processEntity);
ret.addReferredEntity(hdfsPathEntity); ret.addReferredEntity(hdfsPathEntity);
AtlasEntity processExecution = getHiveProcessExecutionEntity(processEntity);
ret.addEntity(processExecution);
} }
} }
} }
......
...@@ -43,7 +43,8 @@ public enum HiveDataTypes { ...@@ -43,7 +43,8 @@ public enum HiveDataTypes {
HIVE_ROLE, HIVE_ROLE,
HIVE_TYPE, HIVE_TYPE,
HIVE_PROCESS, HIVE_PROCESS,
HIVE_COLUMN_LINEAGE HIVE_COLUMN_LINEAGE,
HIVE_PROCESS_EXECUTION,
// HIVE_VIEW, // HIVE_VIEW,
; ;
......
...@@ -218,6 +218,25 @@ public class HiveITBase { ...@@ -218,6 +218,25 @@ public class HiveITBase {
return (String) entity.getGuid(); return (String) entity.getGuid();
} }
protected String assertEntityIsRegisteredViaGuid(String guid,
final HiveHookIT.AssertPredicate assertPredicate) throws Exception {
waitFor(80000, new HiveHookIT.Predicate() {
@Override
public void evaluate() throws Exception {
AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.getEntityByGuid(guid);
AtlasEntity entity = atlasEntityWithExtInfo.getEntity();
assertNotNull(entity);
if (assertPredicate != null) {
assertPredicate.assertOnEntity(entity);
}
}
});
AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.getEntityByGuid(guid);
AtlasEntity entity = atlasEntityWithExtInfo.getEntity();
return (String) entity.getGuid();
}
protected AtlasEntity assertEntityIsRegistedViaEntity(final String typeName, final String property, final String value, protected AtlasEntity assertEntityIsRegistedViaEntity(final String typeName, final String property, final String value,
final HiveHookIT.AssertPredicate assertPredicate) throws Exception { final HiveHookIT.AssertPredicate assertPredicate) throws Exception {
waitFor(80000, new HiveHookIT.Predicate() { waitFor(80000, new HiveHookIT.Predicate() {
...@@ -473,7 +492,6 @@ public class HiveITBase { ...@@ -473,7 +492,6 @@ public class HiveITBase {
return buffer.toString(); return buffer.toString();
} }
protected static Entity getEntityByType(Set<? extends Entity> entities, Entity.Type entityType) { protected static Entity getEntityByType(Set<? extends Entity> entities, Entity.Type entityType) {
for (Entity entity : entities) { for (Entity entity : entities) {
if (entity.getType() == entityType) { if (entity.getType() == entityType) {
......
...@@ -29,13 +29,13 @@ import org.apache.atlas.hive.bridge.HiveMetaStoreBridge; ...@@ -29,13 +29,13 @@ import org.apache.atlas.hive.bridge.HiveMetaStoreBridge;
import org.apache.atlas.hive.hook.events.BaseHiveEvent; import org.apache.atlas.hive.hook.events.BaseHiveEvent;
import org.apache.atlas.hive.model.HiveDataTypes; import org.apache.atlas.hive.model.HiveDataTypes;
import org.apache.atlas.model.instance.AtlasClassification; import org.apache.atlas.model.instance.AtlasClassification;
import org.apache.atlas.model.instance.AtlasEntityHeader;
import org.apache.atlas.model.lineage.AtlasLineageInfo;
import org.apache.atlas.model.typedef.AtlasClassificationDef;
import org.apache.atlas.model.instance.AtlasEntity; import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntityHeader;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo; import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
import org.apache.atlas.model.instance.AtlasObjectId; import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.instance.AtlasStruct; import org.apache.atlas.model.instance.AtlasStruct;
import org.apache.atlas.model.lineage.AtlasLineageInfo;
import org.apache.atlas.model.typedef.AtlasClassificationDef;
import org.apache.atlas.model.typedef.AtlasTypesDef; import org.apache.atlas.model.typedef.AtlasTypesDef;
import org.apache.atlas.type.AtlasTypeUtil; import org.apache.atlas.type.AtlasTypeUtil;
import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.CollectionUtils;
...@@ -49,15 +49,12 @@ import org.apache.hadoop.hive.ql.hooks.WriteEntity; ...@@ -49,15 +49,12 @@ import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.io.File;
import java.nio.file.Files;
import java.text.ParseException; import java.text.ParseException;
import java.util.*; import java.util.*;
...@@ -308,7 +305,15 @@ public class HiveHookIT extends HiveITBase { ...@@ -308,7 +305,15 @@ public class HiveHookIT extends HiveITBase {
final Set<ReadEntity> readEntities = getInputs(tableName, Entity.Type.TABLE); final Set<ReadEntity> readEntities = getInputs(tableName, Entity.Type.TABLE);
final Set<WriteEntity> writeEntities = getOutputs(ctasTableName, Entity.Type.TABLE); final Set<WriteEntity> writeEntities = getOutputs(ctasTableName, Entity.Type.TABLE);
assertProcessIsRegistered(constructEvent(query, HiveOperation.CREATETABLE_AS_SELECT, readEntities, writeEntities)); HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.CREATETABLE_AS_SELECT, readEntities,
writeEntities);
AtlasEntity processEntity1 = validateProcess(hiveEventContext);
AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, hiveEventContext);
AtlasObjectId process = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), processEntity1.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
assertTableIsRegistered(DEFAULT_DB, ctasTableName); assertTableIsRegistered(DEFAULT_DB, ctasTableName);
} }
...@@ -347,7 +352,12 @@ public class HiveHookIT extends HiveITBase { ...@@ -347,7 +352,12 @@ public class HiveHookIT extends HiveITBase {
Set<WriteEntity> outputs = getOutputs(ctasTableName, Entity.Type.TABLE); Set<WriteEntity> outputs = getOutputs(ctasTableName, Entity.Type.TABLE);
HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.CREATETABLE_AS_SELECT, inputs, outputs); HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.CREATETABLE_AS_SELECT, inputs, outputs);
String processId = assertProcessIsRegistered(hiveEventContext); AtlasEntity processEntity1 = validateProcess(hiveEventContext);
AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, hiveEventContext);
AtlasObjectId process = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), processEntity1.getGuid());
String drpquery = String.format("drop table %s ", ctasTableName); String drpquery = String.format("drop table %s ", ctasTableName);
runCommandWithDelay(drpquery, 100); runCommandWithDelay(drpquery, 100);
...@@ -360,13 +370,17 @@ public class HiveHookIT extends HiveITBase { ...@@ -360,13 +370,17 @@ public class HiveHookIT extends HiveITBase {
outputs = getOutputs(ctasTableName, Entity.Type.TABLE); outputs = getOutputs(ctasTableName, Entity.Type.TABLE);
String process2Id = assertProcessIsRegistered(hiveEventContext, inputs, outputs); AtlasEntity processEntity2 = validateProcess(hiveEventContext);
AtlasEntity processExecutionEntity2 = validateProcessExecution(processEntity2, hiveEventContext);
assertNotEquals(process2Id, processId); AtlasObjectId process2 = toAtlasObjectId(processExecutionEntity2.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process2.getGuid(), processEntity2.getGuid());
AtlasEntity processsEntity = atlasClientV2.getEntityByGuid(processId).getEntity(); assertNotEquals(processEntity1.getGuid(), processEntity2.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
Assert.assertEquals(numberOfProcessExecutions(processEntity2), 1);
validateOutputTables(processsEntity, outputs); validateOutputTables(processEntity1, outputs);
} }
@Test @Test
...@@ -377,7 +391,14 @@ public class HiveHookIT extends HiveITBase { ...@@ -377,7 +391,14 @@ public class HiveHookIT extends HiveITBase {
runCommand(query); runCommand(query);
assertProcessIsRegistered(constructEvent(query, HiveOperation.CREATEVIEW, getInputs(tableName, Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE))); HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.CREATEVIEW, getInputs(tableName,
Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE));
AtlasEntity processEntity1 = validateProcess(hiveEventContext);
AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, hiveEventContext);
AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process1.getGuid(), processEntity1.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
assertTableIsRegistered(DEFAULT_DB, viewName); assertTableIsRegistered(DEFAULT_DB, viewName);
} }
...@@ -392,7 +413,15 @@ public class HiveHookIT extends HiveITBase { ...@@ -392,7 +413,15 @@ public class HiveHookIT extends HiveITBase {
String table1Id = assertTableIsRegistered(DEFAULT_DB, table1Name); String table1Id = assertTableIsRegistered(DEFAULT_DB, table1Name);
assertProcessIsRegistered(constructEvent(query, HiveOperation.CREATEVIEW, getInputs(table1Name, Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE))); HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.CREATEVIEW, getInputs(table1Name,
Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE));
String processId1 = assertProcessIsRegistered(hiveEventContext);
AtlasEntity processEntity1 = atlasClientV2.getEntityByGuid(processId1).getEntity();
AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, hiveEventContext);
AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process1.getGuid(), processEntity1.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
String viewId = assertTableIsRegistered(DEFAULT_DB, viewName); String viewId = assertTableIsRegistered(DEFAULT_DB, viewName);
...@@ -412,8 +441,16 @@ public class HiveHookIT extends HiveITBase { ...@@ -412,8 +441,16 @@ public class HiveHookIT extends HiveITBase {
runCommand(query); runCommand(query);
//Check if alter view process is reqistered HiveEventContext hiveEventContext2 = constructEvent(query, HiveOperation.CREATEVIEW, getInputs(table2Name,
assertProcessIsRegistered(constructEvent(query, HiveOperation.CREATEVIEW, getInputs(table2Name, Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE))); Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE));
String processId2 = assertProcessIsRegistered(hiveEventContext2);
AtlasEntity processEntity2 = atlasClientV2.getEntityByGuid(processId2).getEntity();
AtlasEntity processExecutionEntity2 = validateProcessExecution(processEntity2, hiveEventContext2);
AtlasObjectId process2 = toAtlasObjectId(processExecutionEntity2.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process2.getGuid(), processEntity2.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity2), 2);
Assert.assertEquals(processEntity1.getGuid(), processEntity2.getGuid());
String table2Id = assertTableIsRegistered(DEFAULT_DB, table2Name); String table2Id = assertTableIsRegistered(DEFAULT_DB, table2Name);
...@@ -526,6 +563,12 @@ public class HiveHookIT extends HiveITBase { ...@@ -526,6 +563,12 @@ public class HiveHookIT extends HiveITBase {
return validateProcess(event, event.getInputs(), event.getOutputs()); return validateProcess(event, event.getInputs(), event.getOutputs());
} }
private AtlasEntity validateProcessExecution(AtlasEntity hiveProcess, HiveEventContext event) throws Exception {
String processExecutionId = assertProcessExecutionIsRegistered(hiveProcess, event);
AtlasEntity processExecutionEntity = atlasClientV2.getEntityByGuid(processExecutionId).getEntity();
return processExecutionEntity;
}
@Test @Test
public void testInsertIntoTable() throws Exception { public void testInsertIntoTable() throws Exception {
String inputTable1Name = createTable(); String inputTable1Name = createTable();
...@@ -581,8 +624,85 @@ public class HiveHookIT extends HiveITBase { ...@@ -581,8 +624,85 @@ public class HiveHookIT extends HiveITBase {
runCommandWithDelay(query, 1000); runCommandWithDelay(query, 1000);
AtlasEntity processEntity2 = validateProcess(event, expectedInputs, outputs); AtlasEntity processEntity2 = validateProcess(event, expectedInputs, outputs);
Assert.assertEquals(numberOfProcessExecutions(processEntity2), 2);
Assert.assertEquals(processEntity1.getGuid(), processEntity2.getGuid());
}
@Test
public void testInsertIntoTableProcessExecution() throws Exception {
String inputTable1Name = createTable();
String inputTable2Name = createTable();
String insertTableName = createTable();
assertTableIsRegistered(DEFAULT_DB, inputTable1Name);
assertTableIsRegistered(DEFAULT_DB, insertTableName);
String query = "insert into " + insertTableName + " select t1.id, t1.name from " + inputTable2Name + " as t2, " + inputTable1Name + " as t1 where t1.id=t2.id";
runCommand(query);
Set<ReadEntity> inputs = getInputs(inputTable1Name, Entity.Type.TABLE);
inputs.addAll(getInputs(inputTable2Name, Entity.Type.TABLE));
Set<WriteEntity> outputs = getOutputs(insertTableName, Entity.Type.TABLE);
(outputs.iterator().next()).setWriteType(WriteEntity.WriteType.INSERT);
HiveEventContext event = constructEvent(query, HiveOperation.QUERY, inputs, outputs);
Set<ReadEntity> expectedInputs = new TreeSet<ReadEntity>(entityComparator) {{
addAll(inputs);
}};
assertTableIsRegistered(DEFAULT_DB, insertTableName);
AtlasEntity processEntity1 = validateProcess(event, expectedInputs, outputs);
AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, event);
AtlasObjectId process = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), processEntity1.getGuid());
//Test sorting of tbl names
SortedSet<String> sortedTblNames = new TreeSet<>();
sortedTblNames.add(inputTable1Name.toLowerCase());
sortedTblNames.add(inputTable2Name.toLowerCase());
//Verify sorted order of inputs in qualified name
Assert.assertEquals(processEntity1.getAttribute(ATTRIBUTE_QUALIFIED_NAME),
Joiner.on(SEP).join("QUERY",
getQualifiedTblName(sortedTblNames.first()),
HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.getHiveClient().getTable(DEFAULT_DB, sortedTblNames.first())),
getQualifiedTblName(sortedTblNames.last()),
HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.getHiveClient().getTable(DEFAULT_DB, sortedTblNames.last())))
+ IO_SEP + SEP
+ Joiner.on(SEP).
join(WriteEntity.WriteType.INSERT.name(),
getQualifiedTblName(insertTableName),
HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.getHiveClient().getTable(DEFAULT_DB, insertTableName)))
);
//Rerun same query. Should result in same process
runCommandWithDelay(query, 1000);
AtlasEntity processEntity2 = validateProcess(event, expectedInputs, outputs);
AtlasEntity processExecutionEntity2 = validateProcessExecution(processEntity2, event);
process = toAtlasObjectId(processExecutionEntity2.getRelationshipAttribute(BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), processEntity2.getGuid());
Assert.assertEquals(processEntity1.getGuid(), processEntity2.getGuid()); Assert.assertEquals(processEntity1.getGuid(), processEntity2.getGuid());
String queryWithDifferentPredicate = "insert into " + insertTableName + " select t1.id, t1.name from " +
inputTable2Name + " as t2, " + inputTable1Name + " as t1 where t1.id=100";
runCommandWithDelay(queryWithDifferentPredicate, 1000);
HiveEventContext event3 = constructEvent(queryWithDifferentPredicate, HiveOperation.QUERY, inputs, outputs);
AtlasEntity processEntity3 = validateProcess(event3, expectedInputs, outputs);
AtlasEntity processExecutionEntity3 = validateProcessExecution(processEntity3, event3);
process = toAtlasObjectId(processExecutionEntity3.getRelationshipAttribute(BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), processEntity3.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity3), 3);
Assert.assertEquals(processEntity2.getGuid(), processEntity3.getGuid());
} }
@Test @Test
...@@ -593,7 +713,14 @@ public class HiveHookIT extends HiveITBase { ...@@ -593,7 +713,14 @@ public class HiveHookIT extends HiveITBase {
runCommand(query); runCommand(query);
validateProcess(constructEvent(query, HiveOperation.QUERY, getInputs(tableName, Entity.Type.TABLE), null)); HiveEventContext event = constructEvent(query, HiveOperation.QUERY,
getInputs(tableName, Entity.Type.TABLE), null);
AtlasEntity hiveProcess = validateProcess(event);
AtlasEntity hiveProcessExecution = validateProcessExecution(hiveProcess, event);
AtlasObjectId process = toAtlasObjectId(hiveProcessExecution.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), hiveProcess.getGuid());
Assert.assertEquals(numberOfProcessExecutions(hiveProcess), 1);
assertTableIsRegistered(DEFAULT_DB, tableName); assertTableIsRegistered(DEFAULT_DB, tableName);
} }
...@@ -613,6 +740,10 @@ public class HiveHookIT extends HiveITBase { ...@@ -613,6 +740,10 @@ public class HiveHookIT extends HiveITBase {
HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.QUERY, inputs, outputs); HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.QUERY, inputs, outputs);
AtlasEntity processEntity = validateProcess(hiveEventContext); AtlasEntity processEntity = validateProcess(hiveEventContext);
AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity, hiveEventContext);
AtlasObjectId process = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), processEntity.getGuid());
validateHDFSPaths(processEntity, OUTPUTS, pFile1); validateHDFSPaths(processEntity, OUTPUTS, pFile1);
...@@ -626,6 +757,11 @@ public class HiveHookIT extends HiveITBase { ...@@ -626,6 +757,11 @@ public class HiveHookIT extends HiveITBase {
assertTableIsRegistered(DEFAULT_DB, tableName); assertTableIsRegistered(DEFAULT_DB, tableName);
AtlasEntity process2Entity = validateProcess(hiveEventContext); AtlasEntity process2Entity = validateProcess(hiveEventContext);
AtlasEntity processExecutionEntity2 = validateProcessExecution(processEntity, hiveEventContext);
AtlasObjectId process2 = toAtlasObjectId(processExecutionEntity2.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process2.getGuid(), process2Entity.getGuid());
validateHDFSPaths(process2Entity, OUTPUTS, pFile1); validateHDFSPaths(process2Entity, OUTPUTS, pFile1);
...@@ -646,9 +782,13 @@ public class HiveHookIT extends HiveITBase { ...@@ -646,9 +782,13 @@ public class HiveHookIT extends HiveITBase {
}}; }};
AtlasEntity process3Entity = validateProcess(constructEvent(query, HiveOperation.QUERY, inputs, p3Outputs)); AtlasEntity process3Entity = validateProcess(constructEvent(query, HiveOperation.QUERY, inputs, p3Outputs));
AtlasEntity processExecutionEntity3 = validateProcessExecution(processEntity, hiveEventContext);
AtlasObjectId process3 = toAtlasObjectId(processExecutionEntity3.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process3.getGuid(), process3Entity.getGuid());
validateHDFSPaths(process3Entity, OUTPUTS, pFile2); validateHDFSPaths(process3Entity, OUTPUTS, pFile2);
Assert.assertEquals(numberOfProcessExecutions(process3Entity), 3);
Assert.assertEquals(process3Entity.getGuid(), processEntity.getGuid()); Assert.assertEquals(process3Entity.getGuid(), processEntity.getGuid());
} }
...@@ -714,7 +854,13 @@ public class HiveHookIT extends HiveITBase { ...@@ -714,7 +854,13 @@ public class HiveHookIT extends HiveITBase {
outputs.iterator().next().setWriteType(WriteEntity.WriteType.INSERT); outputs.iterator().next().setWriteType(WriteEntity.WriteType.INSERT);
validateProcess(constructEvent(query, HiveOperation.QUERY, inputs, outputs)); HiveEventContext event = constructEvent(query, HiveOperation.QUERY, inputs, outputs);
AtlasEntity hiveProcess = validateProcess(event);
AtlasEntity hiveProcessExecution = validateProcessExecution(hiveProcess, event);
AtlasObjectId process = toAtlasObjectId(hiveProcessExecution.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), hiveProcess.getGuid());
Assert.assertEquals(numberOfProcessExecutions(hiveProcess), 1);
assertTableIsRegistered(DEFAULT_DB, tableName); assertTableIsRegistered(DEFAULT_DB, tableName);
assertTableIsRegistered(DEFAULT_DB, insertTableName, null, true); assertTableIsRegistered(DEFAULT_DB, insertTableName, null, true);
...@@ -748,8 +894,13 @@ public class HiveHookIT extends HiveITBase { ...@@ -748,8 +894,13 @@ public class HiveHookIT extends HiveITBase {
} }
}; };
validateProcess(constructEvent(query, HiveOperation.QUERY, partitionIps, partitionOps), inputs, outputs); HiveEventContext event = constructEvent(query, HiveOperation.QUERY, partitionIps, partitionOps);
AtlasEntity hiveProcess = validateProcess(event, inputs, outputs);
AtlasEntity hiveProcessExecution = validateProcessExecution(hiveProcess, event);
AtlasObjectId process = toAtlasObjectId(hiveProcessExecution.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), hiveProcess.getGuid());
Assert.assertEquals(numberOfProcessExecutions(hiveProcess), 1);
assertTableIsRegistered(DEFAULT_DB, tableName); assertTableIsRegistered(DEFAULT_DB, tableName);
assertTableIsRegistered(DEFAULT_DB, insertTableName); assertTableIsRegistered(DEFAULT_DB, insertTableName);
...@@ -769,8 +920,14 @@ public class HiveHookIT extends HiveITBase { ...@@ -769,8 +920,14 @@ public class HiveHookIT extends HiveITBase {
Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE); Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
Set<WriteEntity> outputs = getOutputs(filename, Entity.Type.DFS_DIR); Set<WriteEntity> outputs = getOutputs(filename, Entity.Type.DFS_DIR);
AtlasEntity processEntity = validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs));
HiveEventContext event = constructEvent(query, HiveOperation.EXPORT, inputs, outputs);
AtlasEntity processEntity = validateProcess(event);
AtlasEntity hiveProcessExecution = validateProcessExecution(processEntity, event);
AtlasObjectId process = toAtlasObjectId(hiveProcessExecution.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), processEntity.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity), 1);
validateHDFSPaths(processEntity, OUTPUTS, filename); validateHDFSPaths(processEntity, OUTPUTS, filename);
validateInputTables(processEntity, inputs); validateInputTables(processEntity, inputs);
...@@ -785,7 +942,16 @@ public class HiveHookIT extends HiveITBase { ...@@ -785,7 +942,16 @@ public class HiveHookIT extends HiveITBase {
outputs = getOutputs(importTableName, Entity.Type.TABLE); outputs = getOutputs(importTableName, Entity.Type.TABLE);
validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs)); HiveEventContext event2 = constructEvent(query, HiveOperation.IMPORT,
getInputs(filename, Entity.Type.DFS_DIR), outputs);
AtlasEntity processEntity2 = validateProcess(event2);
AtlasEntity hiveProcessExecution2 = validateProcessExecution(processEntity2, event2);
AtlasObjectId process2 = toAtlasObjectId(hiveProcessExecution2.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process2.getGuid(), processEntity2.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity2), 1);
Assert.assertNotEquals(processEntity.getGuid(), processEntity2.getGuid());
//Should create another process //Should create another process
filename = "pfile://" + mkdir("export2UnPartitioned"); filename = "pfile://" + mkdir("export2UnPartitioned");
...@@ -796,7 +962,18 @@ public class HiveHookIT extends HiveITBase { ...@@ -796,7 +962,18 @@ public class HiveHookIT extends HiveITBase {
inputs = getInputs(tableName, Entity.Type.TABLE); inputs = getInputs(tableName, Entity.Type.TABLE);
outputs = getOutputs(filename, Entity.Type.DFS_DIR); outputs = getOutputs(filename, Entity.Type.DFS_DIR);
validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs)); HiveEventContext event3 = constructEvent(query, HiveOperation.EXPORT, inputs, outputs);
AtlasEntity processEntity3 = validateProcess(event3);
AtlasEntity hiveProcessExecution3 = validateProcessExecution(processEntity3, event3);
AtlasObjectId process3 = toAtlasObjectId(hiveProcessExecution3.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process3.getGuid(), processEntity3.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity3), 1);
// Should be a different process compared to the previous ones
Assert.assertNotEquals(processEntity.getGuid(), processEntity3.getGuid());
Assert.assertNotEquals(processEntity2.getGuid(), processEntity3.getGuid());
//import again shouyld create another process //import again shouyld create another process
query = "import table " + importTableName + " from '" + filename + "'"; query = "import table " + importTableName + " from '" + filename + "'";
...@@ -805,7 +982,20 @@ public class HiveHookIT extends HiveITBase { ...@@ -805,7 +982,20 @@ public class HiveHookIT extends HiveITBase {
outputs = getOutputs(importTableName, Entity.Type.TABLE); outputs = getOutputs(importTableName, Entity.Type.TABLE);
validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs)); HiveEventContext event4 = constructEvent(query, HiveOperation.IMPORT, getInputs(filename,
Entity.Type.DFS_DIR), outputs);
AtlasEntity processEntity4 = validateProcess(event4);
AtlasEntity hiveProcessExecution4 = validateProcessExecution(processEntity4, event4);
AtlasObjectId process4 = toAtlasObjectId(hiveProcessExecution4.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process4.getGuid(), processEntity4.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity4), 1);
// Should be a different process compared to the previous ones
Assert.assertNotEquals(processEntity.getGuid(), processEntity4.getGuid());
Assert.assertNotEquals(processEntity2.getGuid(), processEntity4.getGuid());
Assert.assertNotEquals(processEntity3.getGuid(), processEntity4.getGuid());
} }
@Test @Test
...@@ -833,9 +1023,15 @@ public class HiveHookIT extends HiveITBase { ...@@ -833,9 +1023,15 @@ public class HiveHookIT extends HiveITBase {
partitionIps.addAll(expectedExportInputs); partitionIps.addAll(expectedExportInputs);
AtlasEntity processEntity = validateProcess(constructEvent(query, HiveOperation.EXPORT, partitionIps, outputs), expectedExportInputs, outputs); HiveEventContext event1 = constructEvent(query, HiveOperation.EXPORT, partitionIps, outputs);
AtlasEntity processEntity1 = validateProcess(event1, expectedExportInputs, outputs);
AtlasEntity hiveProcessExecution1 = validateProcessExecution(processEntity1, event1);
AtlasObjectId process1 = toAtlasObjectId(hiveProcessExecution1.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process1.getGuid(), processEntity1.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
validateHDFSPaths(processEntity, OUTPUTS, filename); validateHDFSPaths(processEntity1, OUTPUTS, filename);
//Import //Import
String importTableName = createTable(true); String importTableName = createTable(true);
...@@ -852,7 +1048,14 @@ public class HiveHookIT extends HiveITBase { ...@@ -852,7 +1048,14 @@ public class HiveHookIT extends HiveITBase {
partitionOps.addAll(importOutputs); partitionOps.addAll(importOutputs);
validateProcess(constructEvent(query, HiveOperation.IMPORT, expectedImportInputs , partitionOps), expectedImportInputs, importOutputs); HiveEventContext event2 = constructEvent(query, HiveOperation.IMPORT, expectedImportInputs , partitionOps);
AtlasEntity processEntity2 = validateProcess(event2, expectedImportInputs, importOutputs);
AtlasEntity hiveProcessExecution2 = validateProcessExecution(processEntity2, event2);
AtlasObjectId process2 = toAtlasObjectId(hiveProcessExecution2.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process2.getGuid(), processEntity2.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity2), 1);
Assert.assertNotEquals(processEntity1.getGuid(), processEntity2.getGuid());
//Export should update same process //Export should update same process
filename = "pfile://" + mkdir("export2"); filename = "pfile://" + mkdir("export2");
...@@ -866,7 +1069,17 @@ public class HiveHookIT extends HiveITBase { ...@@ -866,7 +1069,17 @@ public class HiveHookIT extends HiveITBase {
addAll(outputs); addAll(outputs);
}}; }};
validateProcess(constructEvent(query, HiveOperation.EXPORT, partitionIps, outputs2), expectedExportInputs, p3Outputs); HiveEventContext event3 = constructEvent(query, HiveOperation.EXPORT, partitionIps, outputs2);
// this process entity should return same as the processEntity1 since the inputs and outputs are the same,
// hence the qualifiedName will be the same
AtlasEntity processEntity3 = validateProcess(event3, expectedExportInputs, p3Outputs);
AtlasEntity hiveProcessExecution3 = validateProcessExecution(processEntity3, event3);
AtlasObjectId process3 = toAtlasObjectId(hiveProcessExecution3.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process3.getGuid(), processEntity3.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity3), 2);
Assert.assertEquals(processEntity1.getGuid(), processEntity3.getGuid());
query = "alter table " + importTableName + " drop partition (dt='"+ PART_FILE + "')"; query = "alter table " + importTableName + " drop partition (dt='"+ PART_FILE + "')";
...@@ -883,7 +1096,17 @@ public class HiveHookIT extends HiveITBase { ...@@ -883,7 +1096,17 @@ public class HiveHookIT extends HiveITBase {
addAll(expectedImportInputs); addAll(expectedImportInputs);
}}; }};
validateProcess(constructEvent(query, HiveOperation.IMPORT, importInputs, partitionOps), expectedImport2Inputs, importOutputs); HiveEventContext event4 = constructEvent(query, HiveOperation.IMPORT, importInputs, partitionOps);
// This process is going to be same as processEntity2
AtlasEntity processEntity4 = validateProcess(event4, expectedImport2Inputs, importOutputs);
AtlasEntity hiveProcessExecution4 = validateProcessExecution(processEntity4, event4);
AtlasObjectId process4 = toAtlasObjectId(hiveProcessExecution4.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process4.getGuid(), processEntity4.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity4), 2);
Assert.assertEquals(processEntity2.getGuid(), processEntity4.getGuid());
Assert.assertNotEquals(processEntity1.getGuid(), processEntity4.getGuid());
} }
@Test @Test
...@@ -1176,15 +1399,12 @@ public class HiveHookIT extends HiveITBase { ...@@ -1176,15 +1399,12 @@ public class HiveHookIT extends HiveITBase {
); );
} }
/* /**
The test is disabled by default * Reenabling this test since HIVE-14706 is fixed now and the hive version we are using now sends
Reason : Atlas uses Hive version 1.2.x and the Hive patch HIVE-13112 which enables column level lineage is not * us the column lineage information
committed in Hive version 1.2.x * @throws Exception
This test will fail if the lineage information is not available from Hive */
Once the patch for HIVE-13112 is committed to Hive branch 1.2.x, the test can be enabled @Test
Please track HIVE-14706 to know the status of column lineage availability in latest Hive versions i.e 2.1.x
*/
@Test(enabled = false)
public void testColumnLevelLineage() throws Exception { public void testColumnLevelLineage() throws Exception {
String sourceTable = "table" + random(); String sourceTable = "table" + random();
...@@ -1204,8 +1424,14 @@ public class HiveHookIT extends HiveITBase { ...@@ -1204,8 +1424,14 @@ public class HiveHookIT extends HiveITBase {
Set<ReadEntity> inputs = getInputs(sourceTable, Entity.Type.TABLE); Set<ReadEntity> inputs = getInputs(sourceTable, Entity.Type.TABLE);
Set<WriteEntity> outputs = getOutputs(ctasTableName, Entity.Type.TABLE); Set<WriteEntity> outputs = getOutputs(ctasTableName, Entity.Type.TABLE);
HiveEventContext event = constructEvent(query, HiveOperation.CREATETABLE_AS_SELECT, inputs, outputs); HiveEventContext event = constructEvent(query, HiveOperation.CREATETABLE_AS_SELECT, inputs, outputs);
AtlasEntity processEntity1 = validateProcess(event);
AtlasEntity hiveProcessExecution1 = validateProcessExecution(processEntity1, event);
AtlasObjectId process1 = toAtlasObjectId(hiveProcessExecution1.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process1.getGuid(), processEntity1.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
Assert.assertEquals(processEntity1.getGuid(), processEntity1.getGuid());
assertProcessIsRegistered(event);
assertTableIsRegistered(DEFAULT_DB, ctasTableName); assertTableIsRegistered(DEFAULT_DB, ctasTableName);
String processQName = sortEventsAndGetProcessQualifiedName(event); String processQName = sortEventsAndGetProcessQualifiedName(event);
...@@ -1281,14 +1507,20 @@ public class HiveHookIT extends HiveITBase { ...@@ -1281,14 +1507,20 @@ public class HiveHookIT extends HiveITBase {
Set<WriteEntity> outputs = getOutputs(tableName, Entity.Type.TABLE); Set<WriteEntity> outputs = getOutputs(tableName, Entity.Type.TABLE);
String tableId = assertTableIsRegistered(DEFAULT_DB, tableName); String tableId = assertTableIsRegistered(DEFAULT_DB, tableName);
HiveEventContext event = constructEvent(query, HiveOperation.TRUNCATETABLE, null, outputs);
validateProcess(constructEvent(query, HiveOperation.TRUNCATETABLE, null, outputs)); AtlasEntity processEntity = validateProcess(event);
AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity, event);
AtlasObjectId process = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), processEntity.getGuid());
//Check lineage //Check lineage
String datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName); String datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName);
AtlasLineageInfo atlasLineageInfoInput = atlasClientV2.getLineageInfo(tableId, AtlasLineageInfo.LineageDirection.INPUT,0); AtlasLineageInfo atlasLineageInfoInput = atlasClientV2.getLineageInfo(tableId, AtlasLineageInfo.LineageDirection.INPUT,0);
Map<String, AtlasEntityHeader> entityMap = atlasLineageInfoInput.getGuidEntityMap(); Map<String, AtlasEntityHeader> entityMap = atlasLineageInfoInput.getGuidEntityMap();
Assert.assertEquals(numberOfProcessExecutions(processEntity), 1);
//Below should be assertTrue - Fix https://issues.apache.org/jira/browse/ATLAS-653 //Below should be assertTrue - Fix https://issues.apache.org/jira/browse/ATLAS-653
Assert.assertFalse(entityMap.containsKey(tableId)); Assert.assertFalse(entityMap.containsKey(tableId));
} }
...@@ -1402,7 +1634,8 @@ public class HiveHookIT extends HiveITBase { ...@@ -1402,7 +1634,8 @@ public class HiveHookIT extends HiveITBase {
String processQualifiedName = getTableProcessQualifiedName(DEFAULT_DB, tableName); String processQualifiedName = getTableProcessQualifiedName(DEFAULT_DB, tableName);
String processId = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), ATTRIBUTE_QUALIFIED_NAME, processQualifiedName, null); String processId = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), ATTRIBUTE_QUALIFIED_NAME, processQualifiedName, null);
AtlasEntity processEntity = atlasClientV2.getEntityByGuid(processId).getEntity(); AtlasEntity processEntity = atlasClientV2.getEntityByGuid(processId).getEntity();
Assert.assertEquals(numberOfProcessExecutions(processEntity), 2);
//validateProcessExecution(processEntity, event);
validateHDFSPaths(processEntity, INPUTS, testPath); validateHDFSPaths(processEntity, INPUTS, testPath);
} }
...@@ -1890,6 +2123,34 @@ public class HiveHookIT extends HiveITBase { ...@@ -1890,6 +2123,34 @@ public class HiveHookIT extends HiveITBase {
} }
} }
private String assertProcessExecutionIsRegistered(AtlasEntity hiveProcess, final HiveEventContext event) throws Exception {
try {
String guid = "";
List<AtlasObjectId> processExecutions = toAtlasObjectIdList(hiveProcess.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS_EXECUTIONS));
for (AtlasObjectId processExecution : processExecutions) {
AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.
getEntityByGuid(processExecution.getGuid());
AtlasEntity entity = atlasEntityWithExtInfo.getEntity();
if (String.valueOf(entity.getAttribute(ATTRIBUTE_QUERY_TEXT)).equals(event.getQueryStr().toLowerCase().trim())) {
guid = entity.getGuid();
}
}
return assertEntityIsRegisteredViaGuid(guid, new AssertPredicate() {
@Override
public void assertOnEntity(final AtlasEntity entity) throws Exception {
String queryText = (String) entity.getAttribute(ATTRIBUTE_QUERY_TEXT);
Assert.assertEquals(queryText, event.getQueryStr().toLowerCase().trim());
}
});
} catch(Exception e) {
LOG.error("Exception : ", e);
throw e;
}
}
private String getDSTypeName(Entity entity) { private String getDSTypeName(Entity entity) {
return Entity.Type.TABLE.equals(entity.getType()) ? HiveDataTypes.HIVE_TABLE.name() : HiveMetaStoreBridge.HDFS_PATH; return Entity.Type.TABLE.equals(entity.getType()) ? HiveDataTypes.HIVE_TABLE.name() : HiveMetaStoreBridge.HDFS_PATH;
} }
...@@ -2080,4 +2341,9 @@ public class HiveHookIT extends HiveITBase { ...@@ -2080,4 +2341,9 @@ public class HiveHookIT extends HiveITBase {
@Override @Override
public Entity.Type getType() { return type; } public Entity.Type getType() { return type; }
} }
private int numberOfProcessExecutions(AtlasEntity hiveProcess) {
return toAtlasObjectIdList(hiveProcess.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS_EXECUTIONS)).size();
}
} }
...@@ -324,6 +324,15 @@ ...@@ -324,6 +324,15 @@
"isUnique": false "isUnique": false
} }
] ]
},
{
"name": "ProcessExecution",
"superTypes": [
"Asset"
],
"serviceType": "atlas_core",
"typeVersion": "1.0",
"attributeDefs": []
} }
], ],
"relationshipDefs": [ "relationshipDefs": [
......
...@@ -457,6 +457,80 @@ ...@@ -457,6 +457,80 @@
"isUnique": false "isUnique": false
} }
] ]
},
{
"name" : "hive_process_execution",
"superTypes" : [
"ProcessExecution"
],
"serviceType": "hive",
"typeVersion" : "1.0",
"attributeDefs" : [
{
"name": "startTime",
"typeName": "date",
"cardinality": "SINGLE",
"isIndexable": false,
"isOptional": false,
"isUnique": false
},
{
"name": "endTime",
"typeName": "date",
"cardinality": "SINGLE",
"isIndexable": false,
"isOptional": false,
"isUnique": false
},
{
"name": "userName",
"typeName": "string",
"cardinality": "SINGLE",
"isIndexable": true,
"isOptional": false,
"isUnique": false
},
{
"name": "queryText",
"typeName": "string",
"cardinality": "SINGLE",
"isIndexable": false,
"isOptional": false,
"isUnique": false
},
{
"name": "queryGraph",
"typeName": "string",
"cardinality": "SINGLE",
"isIndexable": false,
"isOptional": true,
"isUnique": false
},
{
"name": "queryId",
"typeName": "string",
"cardinality": "SINGLE",
"isIndexable": false,
"isOptional": false,
"isUnique": false
},
{
"name": "queryPlan",
"typeName": "string",
"cardinality": "SINGLE",
"isIndexable": false,
"isOptional": false,
"isUnique": false
},
{
"name": "hostName",
"typeName": "string",
"cardinality": "SINGLE",
"isIndexable": true,
"isOptional": false,
"isUnique": false
}
]
} }
], ],
"relationshipDefs": [ "relationshipDefs": [
...@@ -567,6 +641,24 @@ ...@@ -567,6 +641,24 @@
"cardinality": "SET" "cardinality": "SET"
}, },
"propagateTags": "NONE" "propagateTags": "NONE"
},
{
"name": "hive_process_process_executions",
"serviceType": "hive",
"typeVersion": "1.0",
"relationshipCategory": "COMPOSITION",
"endDef1": {
"type": "hive_process",
"name": "processExecutions",
"cardinality": "SET",
"isContainer": true
},
"endDef2": {
"type": "hive_process_execution",
"name": "process",
"cardinality": "SINGLE"
},
"propagateTags": "NONE"
} }
] ]
} }
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
package org.apache.atlas.examples; package org.apache.atlas.examples;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
import com.sun.jersey.core.util.MultivaluedMapImpl; import com.sun.jersey.core.util.MultivaluedMapImpl;
import org.apache.atlas.ApplicationProperties; import org.apache.atlas.ApplicationProperties;
import org.apache.atlas.AtlasClientV2; import org.apache.atlas.AtlasClientV2;
...@@ -42,6 +43,8 @@ import org.apache.atlas.model.typedef.AtlasEntityDef; ...@@ -42,6 +43,8 @@ import org.apache.atlas.model.typedef.AtlasEntityDef;
import org.apache.atlas.model.typedef.AtlasRelationshipDef; import org.apache.atlas.model.typedef.AtlasRelationshipDef;
import org.apache.atlas.model.typedef.AtlasRelationshipDef.PropagateTags; import org.apache.atlas.model.typedef.AtlasRelationshipDef.PropagateTags;
import org.apache.atlas.model.typedef.AtlasTypesDef; import org.apache.atlas.model.typedef.AtlasTypesDef;
import org.apache.atlas.repository.Constants;
import org.apache.atlas.type.AtlasTypeUtil;
import org.apache.atlas.utils.AuthenticationUtil; import org.apache.atlas.utils.AuthenticationUtil;
import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.configuration.Configuration; import org.apache.commons.configuration.Configuration;
...@@ -107,9 +110,15 @@ public class QuickStartV2 { ...@@ -107,9 +110,15 @@ public class QuickStartV2 {
public static final String JDBC_CLASSIFICATION = "JdbcAccess"; public static final String JDBC_CLASSIFICATION = "JdbcAccess";
public static final String LOGDATA_CLASSIFICATION = "Log Data"; public static final String LOGDATA_CLASSIFICATION = "Log Data";
public static final String LOAD_SALES_DAILY_PROCESS = "loadSalesDaily"; public static final String LOAD_SALES_DAILY_PROCESS = "loadSalesDaily";
public static final String LOAD_SALES_MONTHLY_PROCESS = "loadSalesMonthly"; public static final String LOAD_SALES_DAILY_PROCESS_EXEC1 = "loadSalesDailyExec1";
public static final String LOAD_LOGS_MONTHLY_PROCESS = "loadLogsMonthly"; public static final String LOAD_SALES_DAILY_PROCESS_EXEC2 = "loadSalesDailyExec2";
public static final String LOAD_SALES_MONTHLY_PROCESS = "loadSalesMonthly";
public static final String LOAD_SALES_MONTHLY_PROCESS_EXEC1 = "loadSalesMonthlyExec1";
public static final String LOAD_SALES_MONTHLY_PROCESS_EXEC2 = "loadSalesMonthlyExec2";
public static final String LOAD_LOGS_MONTHLY_PROCESS = "loadLogsMonthly";
public static final String LOAD_LOGS_MONTHLY_PROCESS_EXEC1 = "loadLogsMonthlyExec1";
public static final String LOAD_LOGS_MONTHLY_PROCESS_EXEC2 = "loadLogsMonthlyExec2";
public static final String PRODUCT_DIM_VIEW = "product_dim_view"; public static final String PRODUCT_DIM_VIEW = "product_dim_view";
public static final String CUSTOMER_DIM_VIEW = "customer_dim_view"; public static final String CUSTOMER_DIM_VIEW = "customer_dim_view";
...@@ -119,6 +128,7 @@ public class QuickStartV2 { ...@@ -119,6 +128,7 @@ public class QuickStartV2 {
public static final String TABLE_TYPE = "Table"; public static final String TABLE_TYPE = "Table";
public static final String VIEW_TYPE = "View"; public static final String VIEW_TYPE = "View";
public static final String LOAD_PROCESS_TYPE = "LoadProcess"; public static final String LOAD_PROCESS_TYPE = "LoadProcess";
public static final String LOAD_PROCESS_EXECUTION_TYPE = "LoadProcessExecution";
public static final String STORAGE_DESC_TYPE = "StorageDesc"; public static final String STORAGE_DESC_TYPE = "StorageDesc";
public static final String TABLE_DATABASE_TYPE = "Table_DB"; public static final String TABLE_DATABASE_TYPE = "Table_DB";
...@@ -126,13 +136,14 @@ public class QuickStartV2 { ...@@ -126,13 +136,14 @@ public class QuickStartV2 {
public static final String VIEW_TABLES_TYPE = "View_Tables"; public static final String VIEW_TABLES_TYPE = "View_Tables";
public static final String TABLE_COLUMNS_TYPE = "Table_Columns"; public static final String TABLE_COLUMNS_TYPE = "Table_Columns";
public static final String TABLE_STORAGE_DESC_TYPE = "Table_StorageDesc"; public static final String TABLE_STORAGE_DESC_TYPE = "Table_StorageDesc";
public static final String PROCESS_PROCESS_EXECUTION_DESC_TYPE = "Process_ProcessExecution";
public static final String VERSION_1 = "1.0"; public static final String VERSION_1 = "1.0";
public static final String MANAGED_TABLE = "Managed"; public static final String MANAGED_TABLE = "Managed";
public static final String EXTERNAL_TABLE = "External"; public static final String EXTERNAL_TABLE = "External";
public static final String CLUSTER_SUFFIX = "@cl1"; public static final String CLUSTER_SUFFIX = "@cl1";
public static final String[] TYPES = { DATABASE_TYPE, TABLE_TYPE, STORAGE_DESC_TYPE, COLUMN_TYPE, LOAD_PROCESS_TYPE, public static final String[] TYPES = { DATABASE_TYPE, TABLE_TYPE, STORAGE_DESC_TYPE, COLUMN_TYPE, LOAD_PROCESS_TYPE, LOAD_PROCESS_EXECUTION_TYPE,
VIEW_TYPE, JDBC_CLASSIFICATION, ETL_CLASSIFICATION, METRIC_CLASSIFICATION, VIEW_TYPE, JDBC_CLASSIFICATION, ETL_CLASSIFICATION, METRIC_CLASSIFICATION,
PII_CLASSIFICATION, FACT_CLASSIFICATION, DIMENSION_CLASSIFICATION, LOGDATA_CLASSIFICATION, PII_CLASSIFICATION, FACT_CLASSIFICATION, DIMENSION_CLASSIFICATION, LOGDATA_CLASSIFICATION,
TABLE_DATABASE_TYPE, VIEW_DATABASE_TYPE, VIEW_TABLES_TYPE, TABLE_COLUMNS_TYPE, TABLE_STORAGE_DESC_TYPE }; TABLE_DATABASE_TYPE, VIEW_DATABASE_TYPE, VIEW_TABLES_TYPE, TABLE_COLUMNS_TYPE, TABLE_STORAGE_DESC_TYPE };
...@@ -256,28 +267,41 @@ public class QuickStartV2 { ...@@ -256,28 +267,41 @@ public class QuickStartV2 {
createRequiredAttrDef("queryId", "string"), createRequiredAttrDef("queryId", "string"),
createRequiredAttrDef("queryGraph", "string")); createRequiredAttrDef("queryGraph", "string"));
AtlasEntityDef processExecutionTypeDef = createClassTypeDef(LOAD_PROCESS_EXECUTION_TYPE, LOAD_PROCESS_EXECUTION_TYPE, VERSION_1, Collections.singleton("ProcessExecution"),
createOptionalAttrDef("userName", "string"),
createOptionalAttrDef("startTime", "long"),
createOptionalAttrDef("endTime", "long"),
createRequiredAttrDef("queryText", "string"),
createRequiredAttrDef("queryPlan", "string"),
createRequiredAttrDef("queryId", "string"),
createRequiredAttrDef("queryGraph", "string"));
AtlasEntityDef viewTypeDef = createClassTypeDef(VIEW_TYPE, VIEW_TYPE, VERSION_1, Collections.singleton("DataSet")); AtlasEntityDef viewTypeDef = createClassTypeDef(VIEW_TYPE, VIEW_TYPE, VERSION_1, Collections.singleton("DataSet"));
// Relationship-Definitions // Relationship-Definitions
AtlasRelationshipDef tableDatabaseTypeDef = createRelationshipTypeDef(TABLE_DATABASE_TYPE, TABLE_DATABASE_TYPE, VERSION_1, AGGREGATION, PropagateTags.NONE, AtlasRelationshipDef tableDatabaseTypeDef = createRelationshipTypeDef(TABLE_DATABASE_TYPE, TABLE_DATABASE_TYPE, VERSION_1, AGGREGATION, PropagateTags.NONE,
createRelationshipEndDef(TABLE_TYPE, "db", SINGLE, false), createRelationshipEndDef(TABLE_TYPE, "db", SINGLE, false),
createRelationshipEndDef(DATABASE_TYPE, "tables", SET, true)); createRelationshipEndDef(DATABASE_TYPE, "tables", SET, true));
AtlasRelationshipDef viewDatabaseTypeDef = createRelationshipTypeDef(VIEW_DATABASE_TYPE, VIEW_DATABASE_TYPE, VERSION_1, AGGREGATION, PropagateTags.NONE, AtlasRelationshipDef viewDatabaseTypeDef = createRelationshipTypeDef(VIEW_DATABASE_TYPE, VIEW_DATABASE_TYPE, VERSION_1, AGGREGATION, PropagateTags.NONE,
createRelationshipEndDef(VIEW_TYPE, "db", SINGLE, false), createRelationshipEndDef(VIEW_TYPE, "db", SINGLE, false),
createRelationshipEndDef(DATABASE_TYPE, "views", SET, true)); createRelationshipEndDef(DATABASE_TYPE, "views", SET, true));
AtlasRelationshipDef viewTablesTypeDef = createRelationshipTypeDef(VIEW_TABLES_TYPE, VIEW_TABLES_TYPE, VERSION_1, AGGREGATION, PropagateTags.NONE, AtlasRelationshipDef viewTablesTypeDef = createRelationshipTypeDef(VIEW_TABLES_TYPE, VIEW_TABLES_TYPE, VERSION_1, AGGREGATION, PropagateTags.NONE,
createRelationshipEndDef(VIEW_TYPE, "inputTables", SET, true), createRelationshipEndDef(VIEW_TYPE, "inputTables", SET, true),
createRelationshipEndDef(TABLE_TYPE, "view", SINGLE, false)); createRelationshipEndDef(TABLE_TYPE, "view", SINGLE, false));
AtlasRelationshipDef tableColumnsTypeDef = createRelationshipTypeDef(TABLE_COLUMNS_TYPE, TABLE_COLUMNS_TYPE, VERSION_1, COMPOSITION, PropagateTags.NONE, AtlasRelationshipDef tableColumnsTypeDef = createRelationshipTypeDef(TABLE_COLUMNS_TYPE, TABLE_COLUMNS_TYPE, VERSION_1, COMPOSITION, PropagateTags.NONE,
createRelationshipEndDef(TABLE_TYPE, "columns", SET, true), createRelationshipEndDef(TABLE_TYPE, "columns", SET, true),
createRelationshipEndDef(COLUMN_TYPE, "table", SINGLE, false)); createRelationshipEndDef(COLUMN_TYPE, "table", SINGLE, false));
AtlasRelationshipDef tableStorageDescTypeDef = createRelationshipTypeDef(TABLE_STORAGE_DESC_TYPE, TABLE_STORAGE_DESC_TYPE, VERSION_1, COMPOSITION, PropagateTags.NONE, AtlasRelationshipDef tableStorageDescTypeDef = createRelationshipTypeDef(TABLE_STORAGE_DESC_TYPE, TABLE_STORAGE_DESC_TYPE, VERSION_1, COMPOSITION, PropagateTags.NONE,
createRelationshipEndDef(TABLE_TYPE, "sd", SINGLE, true), createRelationshipEndDef(TABLE_TYPE, "sd", SINGLE, true),
createRelationshipEndDef(STORAGE_DESC_TYPE, "table", SINGLE, false)); createRelationshipEndDef(STORAGE_DESC_TYPE, "table", SINGLE, false));
AtlasRelationshipDef processProcessExecutionTypeDef = createRelationshipTypeDef(PROCESS_PROCESS_EXECUTION_DESC_TYPE, PROCESS_PROCESS_EXECUTION_DESC_TYPE, VERSION_1, AGGREGATION, PropagateTags.NONE,
createRelationshipEndDef(LOAD_PROCESS_TYPE, "processExecutions", SET, true),
createRelationshipEndDef(LOAD_PROCESS_EXECUTION_TYPE, "process", SINGLE, false));
// Classification-Definitions // Classification-Definitions
AtlasClassificationDef dimClassifDef = createTraitTypeDef(DIMENSION_CLASSIFICATION, "Dimension Classification", VERSION_1, Collections.emptySet()); AtlasClassificationDef dimClassifDef = createTraitTypeDef(DIMENSION_CLASSIFICATION, "Dimension Classification", VERSION_1, Collections.emptySet());
...@@ -288,8 +312,8 @@ public class QuickStartV2 { ...@@ -288,8 +312,8 @@ public class QuickStartV2 {
AtlasClassificationDef jdbcClassifDef = createTraitTypeDef(JDBC_CLASSIFICATION, "JdbcAccess Classification", VERSION_1, Collections.emptySet()); AtlasClassificationDef jdbcClassifDef = createTraitTypeDef(JDBC_CLASSIFICATION, "JdbcAccess Classification", VERSION_1, Collections.emptySet());
AtlasClassificationDef logClassifDef = createTraitTypeDef(LOGDATA_CLASSIFICATION, "LogData Classification", VERSION_1, Collections.emptySet()); AtlasClassificationDef logClassifDef = createTraitTypeDef(LOGDATA_CLASSIFICATION, "LogData Classification", VERSION_1, Collections.emptySet());
List<AtlasEntityDef> entityDefs = asList(dbTypeDef, sdTypeDef, colTypeDef, tableTypeDef, processTypeDef, viewTypeDef); List<AtlasEntityDef> entityDefs = asList(dbTypeDef, sdTypeDef, colTypeDef, tableTypeDef, processTypeDef, processExecutionTypeDef, viewTypeDef);
List<AtlasRelationshipDef> relationshipDefs = asList(tableDatabaseTypeDef, viewDatabaseTypeDef, viewTablesTypeDef, tableColumnsTypeDef, tableStorageDescTypeDef); List<AtlasRelationshipDef> relationshipDefs = asList(tableDatabaseTypeDef, viewDatabaseTypeDef, viewTablesTypeDef, tableColumnsTypeDef, tableStorageDescTypeDef, processProcessExecutionTypeDef);
List<AtlasClassificationDef> classificationDefs = asList(dimClassifDef, factClassifDef, piiClassifDef, metricClassifDef, etlClassifDef, jdbcClassifDef, logClassifDef); List<AtlasClassificationDef> classificationDefs = asList(dimClassifDef, factClassifDef, piiClassifDef, metricClassifDef, etlClassifDef, jdbcClassifDef, logClassifDef);
return new AtlasTypesDef(Collections.emptyList(), Collections.emptyList(), classificationDefs, entityDefs, relationshipDefs); return new AtlasTypesDef(Collections.emptyList(), Collections.emptyList(), classificationDefs, entityDefs, relationshipDefs);
...@@ -362,20 +386,36 @@ public class QuickStartV2 { ...@@ -362,20 +386,36 @@ public class QuickStartV2 {
createView(CUSTOMER_DIM_VIEW, reportingDB, asList(customerDim), DIMENSION_CLASSIFICATION, JDBC_CLASSIFICATION); createView(CUSTOMER_DIM_VIEW, reportingDB, asList(customerDim), DIMENSION_CLASSIFICATION, JDBC_CLASSIFICATION);
// Process entities // Process entities
createProcess(LOAD_SALES_DAILY_PROCESS, "hive query for daily summary", "John ETL", AtlasEntity loadProcess = createProcess(LOAD_SALES_DAILY_PROCESS, "hive query for daily summary", "John ETL",
asList(salesFact, timeDim), asList(salesFact, timeDim),
asList(salesFactDaily), asList(salesFactDaily),
"create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION); "create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION);
createProcess(LOAD_SALES_MONTHLY_PROCESS, "hive query for monthly summary", "John ETL", createProcessExecution(loadProcess, LOAD_SALES_DAILY_PROCESS_EXEC1, "hive query execution 1 for daily summary", "John ETL",
"create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION);
createProcessExecution(loadProcess, LOAD_SALES_DAILY_PROCESS_EXEC2, "hive query execution 2 for daily summary", "John ETL",
"create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION);
AtlasEntity loadProcess2 = createProcess(LOAD_SALES_MONTHLY_PROCESS, "hive query for monthly summary", "John ETL",
asList(salesFactDaily), asList(salesFactDaily),
asList(salesFactMonthly), asList(salesFactMonthly),
"create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION); "create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION);
createProcessExecution(loadProcess2, LOAD_SALES_MONTHLY_PROCESS_EXEC1, "hive query execution 1 for monthly summary", "John ETL",
"create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION);
createProcessExecution(loadProcess2, LOAD_SALES_MONTHLY_PROCESS_EXEC2, "hive query execution 2 for monthly summary", "John ETL",
"create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION);
createProcess(LOAD_LOGS_MONTHLY_PROCESS, "hive query for monthly summary", "Tim ETL", AtlasEntity loadProcess3 = createProcess(LOAD_LOGS_MONTHLY_PROCESS, "hive query for monthly summary", "Tim ETL",
asList(loggingFactDaily), asList(loggingFactDaily),
asList(loggingFactMonthly), asList(loggingFactMonthly),
"create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION); "create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION);
createProcessExecution(loadProcess3, LOAD_LOGS_MONTHLY_PROCESS_EXEC1, "hive query execution 1 for monthly summary", "Tim ETL",
"create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION);
createProcessExecution(loadProcess3, LOAD_LOGS_MONTHLY_PROCESS_EXEC2, "hive query execution 1 for monthly summary", "Tim ETL",
"create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION);
} }
private AtlasEntity createInstance(AtlasEntity entity) throws Exception { private AtlasEntity createInstance(AtlasEntity entity) throws Exception {
...@@ -522,6 +562,31 @@ public class QuickStartV2 { ...@@ -522,6 +562,31 @@ public class QuickStartV2 {
return createInstance(entity); return createInstance(entity);
} }
AtlasEntity createProcessExecution(AtlasEntity hiveProcess, String name, String description, String user,
String queryText, String queryPlan, String queryId, String queryGraph, String... classificationNames) throws Exception {
AtlasEntity entity = new AtlasEntity(LOAD_PROCESS_EXECUTION_TYPE);
Long startTime = System.currentTimeMillis();
Long endTime = System.currentTimeMillis() + 10000;
// set attributes
entity.setAttribute("name", name);
entity.setAttribute(REFERENCEABLE_ATTRIBUTE_NAME, name + CLUSTER_SUFFIX + startTime.toString() + endTime.toString());
entity.setAttribute("description", description);
entity.setAttribute("user", user);
entity.setAttribute("startTime", startTime);
entity.setAttribute("endTime", endTime);
entity.setAttribute("queryText", queryText);
entity.setAttribute("queryPlan", queryPlan);
entity.setAttribute("queryId", queryId);
entity.setAttribute("queryGraph", queryGraph);
entity.setRelationshipAttribute("process", AtlasTypeUtil.toAtlasRelatedObjectId(hiveProcess));
// set classifications
entity.setClassifications(toAtlasClassifications(classificationNames));
return createInstance(entity);
}
AtlasEntity createView(String name, AtlasEntity database, List<AtlasEntity> inputTables, String... classificationNames) throws Exception { AtlasEntity createView(String name, AtlasEntity database, List<AtlasEntity> inputTables, String... classificationNames) throws Exception {
AtlasEntity entity = new AtlasEntity(VIEW_TYPE); AtlasEntity entity = new AtlasEntity(VIEW_TYPE);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment