Commit 5c9a699a by Aadarsh Jajodia Committed by Sarath Subramanian

ATLAS-3133: Add support for Process Executions in Atlas

parent 0a81c250
......@@ -133,6 +133,8 @@ public class AtlasHiveHookContext {
return hook.getClusterName();
}
public String getHostName() { return hook.getHostName(); }
public boolean isConvertHdfsPathToLowerCase() {
return hook.isConvertHdfsPathToLowerCase();
}
......
......@@ -32,6 +32,8 @@ import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
......@@ -46,7 +48,6 @@ import static org.apache.atlas.hive.hook.events.BaseHiveEvent.ATTRIBUTE_QUALIFIE
import static org.apache.atlas.hive.hook.events.BaseHiveEvent.HIVE_TYPE_DB;
import static org.apache.atlas.hive.hook.events.BaseHiveEvent.HIVE_TYPE_TABLE;
public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
private static final Logger LOG = LoggerFactory.getLogger(HiveHook.class);
......@@ -66,6 +67,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
public static final String HOOK_HIVE_TABLE_CACHE_SIZE = CONF_PREFIX + "hive_table.cache.size";
public static final String DEFAULT_CLUSTER_NAME = "primary";
public static final String DEFAULT_HOST_NAME = "localhost";
private static final Map<String, HiveOperation> OPERATION_MAP = new HashMap<>();
......@@ -83,6 +85,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
private static final Map<String, PreprocessAction> hiveTablesCache;
private static HiveHookObjectNamesCache knownObjects = null;
private static String hostName;
static {
for (HiveOperation hiveOperation : HiveOperation.values()) {
......@@ -134,6 +137,13 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
}
knownObjects = nameCacheEnabled ? new HiveHookObjectNamesCache(nameCacheDatabaseMaxCount, nameCacheTableMaxCount, nameCacheRebuildIntervalSeconds) : null;
try {
hostName = InetAddress.getLocalHost().getHostName();
} catch (UnknownHostException e) {
LOG.warn("No hostname found. Setting the hostname to default value {}", DEFAULT_HOST_NAME, e);
hostName = DEFAULT_HOST_NAME;
}
}
......@@ -292,6 +302,10 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
return knownObjects;
}
public String getHostName() {
return hostName;
}
public static class HiveHookObjectNamesCache {
private final int dbMaxCacheCount;
private final int tblMaxCacheCount;
......
......@@ -18,6 +18,7 @@
package org.apache.atlas.hive.hook.events;
import com.google.common.collect.ImmutableMap;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.hive.hook.HiveHook.PreprocessAction;
import org.apache.atlas.model.instance.AtlasEntity;
......@@ -27,6 +28,8 @@ import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityExtInfo;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.instance.AtlasStruct;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.repository.Constants;
import org.apache.atlas.type.AtlasTypeUtil;
import org.apache.atlas.utils.HdfsNameServiceResolver;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections.MapUtils;
......@@ -49,6 +52,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.InetAddress;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
......@@ -75,6 +79,7 @@ public abstract class BaseHiveEvent {
public static final String HIVE_TYPE_COLUMN_LINEAGE = "hive_column_lineage";
public static final String HIVE_TYPE_SERDE = "hive_serde";
public static final String HIVE_TYPE_ORDER = "hive_order";
public static final String HIVE_TYPE_PROCESS_EXECUTION = "hive_process_execution";
public static final String HDFS_TYPE_PATH = "hdfs_path";
public static final String HBASE_TYPE_TABLE = "hbase_table";
public static final String HBASE_TYPE_NAMESPACE = "hbase_namespace";
......@@ -126,6 +131,8 @@ public abstract class BaseHiveEvent {
public static final String ATTRIBUTE_START_TIME = "startTime";
public static final String ATTRIBUTE_USER_NAME = "userName";
public static final String ATTRIBUTE_QUERY_TEXT = "queryText";
public static final String ATTRIBUTE_PROCESS = "process";
public static final String ATTRIBUTE_PROCESS_EXECUTIONS = "processExecutions";
public static final String ATTRIBUTE_QUERY_ID = "queryId";
public static final String ATTRIBUTE_QUERY_PLAN = "queryPlan";
public static final String ATTRIBUTE_END_TIME = "endTime";
......@@ -139,6 +146,7 @@ public abstract class BaseHiveEvent {
public static final String ATTRIBUTE_NAMESPACE = "namespace";
public static final String ATTRIBUTE_OBJECT_PREFIX = "objectPrefix";
public static final String ATTRIBUTE_BUCKET = "bucket";
public static final String ATTRIBUTE_HOSTNAME = "hostName";
public static final String HBASE_STORAGE_HANDLER_CLASS = "org.apache.hadoop.hive.hbase.HBaseStorageHandler";
public static final String HBASE_DEFAULT_NAMESPACE = "default";
......@@ -146,6 +154,7 @@ public abstract class BaseHiveEvent {
public static final String HBASE_PARAM_TABLE_NAME = "hbase.table.name";
public static final long MILLIS_CONVERT_FACTOR = 1000;
public static final String HDFS_PATH_PREFIX = "hdfs://";
public static final String EMPTY_ATTRIBUTE_VALUE = "";
public static final Map<Integer, String> OWNER_TYPE_TO_ENUM_VALUE = new HashMap<>();
......@@ -605,14 +614,41 @@ public abstract class BaseHiveEvent {
ret.setAttribute(ATTRIBUTE_OUTPUTS, getObjectIds(outputs));
ret.setAttribute(ATTRIBUTE_NAME, queryStr);
ret.setAttribute(ATTRIBUTE_OPERATION_TYPE, getOperationName());
// We are setting an empty value to these attributes, since now we have a new entity type called hive process
// execution which captures these values. We have to set empty values here because these attributes are
// mandatory attributes for hive process entity type.
ret.setAttribute(ATTRIBUTE_START_TIME, EMPTY_ATTRIBUTE_VALUE);
ret.setAttribute(ATTRIBUTE_END_TIME, EMPTY_ATTRIBUTE_VALUE);
ret.setAttribute(ATTRIBUTE_USER_NAME, EMPTY_ATTRIBUTE_VALUE);
ret.setAttribute(ATTRIBUTE_QUERY_TEXT, EMPTY_ATTRIBUTE_VALUE);
ret.setAttribute(ATTRIBUTE_QUERY_ID, EMPTY_ATTRIBUTE_VALUE);
ret.setAttribute(ATTRIBUTE_QUERY_PLAN, "Not Supported");
ret.setAttribute(ATTRIBUTE_RECENT_QUERIES, Collections.singletonList(queryStr));
return ret;
}
protected AtlasEntity getHiveProcessExecutionEntity(AtlasEntity hiveProcess) throws Exception {
AtlasEntity ret = new AtlasEntity(HIVE_TYPE_PROCESS_EXECUTION);
String queryStr = getQueryString();
if (queryStr != null) {
queryStr = queryStr.toLowerCase().trim();
}
Long endTime = System.currentTimeMillis();
ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, hiveProcess.getAttribute(ATTRIBUTE_QUALIFIED_NAME).toString() +
QNAME_SEP_PROCESS + getQueryStartTime().toString() +
QNAME_SEP_PROCESS + endTime.toString());
ret.setAttribute(ATTRIBUTE_NAME, queryStr + QNAME_SEP_PROCESS + getQueryStartTime().toString());
ret.setAttribute(ATTRIBUTE_START_TIME, getQueryStartTime());
ret.setAttribute(ATTRIBUTE_END_TIME, System.currentTimeMillis());
ret.setAttribute(ATTRIBUTE_END_TIME, endTime);
ret.setAttribute(ATTRIBUTE_USER_NAME, getUserName());
ret.setAttribute(ATTRIBUTE_QUERY_TEXT, queryStr);
ret.setAttribute(ATTRIBUTE_QUERY_ID, getQueryId());
ret.setAttribute(ATTRIBUTE_QUERY_PLAN, "Not Supported");
ret.setAttribute(ATTRIBUTE_RECENT_QUERIES, Collections.singletonList(queryStr));
ret.setAttribute(ATTRIBUTE_HOSTNAME, getContext().getHostName());
ret.setRelationshipAttribute(ATTRIBUTE_PROCESS, AtlasTypeUtil.toAtlasRelatedObjectId(hiveProcess));
return ret;
}
......
......@@ -117,6 +117,9 @@ public class CreateHiveProcess extends BaseHiveEvent {
ret.addEntity(process);
AtlasEntity processExecution = getHiveProcessExecutionEntity(process);
ret.addEntity(processExecution);
processColumnLineage(process, ret);
addProcessedEntities(ret);
......
......@@ -130,8 +130,11 @@ public class CreateTable extends BaseHiveEvent {
} else {
processEntity = getHiveProcessEntity(Collections.singletonList(tblEntity), Collections.singletonList(hbaseTableEntity));
}
ret.addEntity(processEntity);
AtlasEntity processExecution = getHiveProcessExecutionEntity(processEntity);
ret.addEntity(processExecution);
}
} else {
if (EXTERNAL_TABLE.equals(table.getTableType())) {
......@@ -140,6 +143,9 @@ public class CreateTable extends BaseHiveEvent {
ret.addEntity(processEntity);
ret.addReferredEntity(hdfsPathEntity);
AtlasEntity processExecution = getHiveProcessExecutionEntity(processEntity);
ret.addEntity(processExecution);
}
}
}
......
......@@ -43,7 +43,8 @@ public enum HiveDataTypes {
HIVE_ROLE,
HIVE_TYPE,
HIVE_PROCESS,
HIVE_COLUMN_LINEAGE
HIVE_COLUMN_LINEAGE,
HIVE_PROCESS_EXECUTION,
// HIVE_VIEW,
;
......
......@@ -218,6 +218,25 @@ public class HiveITBase {
return (String) entity.getGuid();
}
protected String assertEntityIsRegisteredViaGuid(String guid,
final HiveHookIT.AssertPredicate assertPredicate) throws Exception {
waitFor(80000, new HiveHookIT.Predicate() {
@Override
public void evaluate() throws Exception {
AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.getEntityByGuid(guid);
AtlasEntity entity = atlasEntityWithExtInfo.getEntity();
assertNotNull(entity);
if (assertPredicate != null) {
assertPredicate.assertOnEntity(entity);
}
}
});
AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.getEntityByGuid(guid);
AtlasEntity entity = atlasEntityWithExtInfo.getEntity();
return (String) entity.getGuid();
}
protected AtlasEntity assertEntityIsRegistedViaEntity(final String typeName, final String property, final String value,
final HiveHookIT.AssertPredicate assertPredicate) throws Exception {
waitFor(80000, new HiveHookIT.Predicate() {
......@@ -473,7 +492,6 @@ public class HiveITBase {
return buffer.toString();
}
protected static Entity getEntityByType(Set<? extends Entity> entities, Entity.Type entityType) {
for (Entity entity : entities) {
if (entity.getType() == entityType) {
......
......@@ -29,13 +29,13 @@ import org.apache.atlas.hive.bridge.HiveMetaStoreBridge;
import org.apache.atlas.hive.hook.events.BaseHiveEvent;
import org.apache.atlas.hive.model.HiveDataTypes;
import org.apache.atlas.model.instance.AtlasClassification;
import org.apache.atlas.model.instance.AtlasEntityHeader;
import org.apache.atlas.model.lineage.AtlasLineageInfo;
import org.apache.atlas.model.typedef.AtlasClassificationDef;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntityHeader;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.instance.AtlasStruct;
import org.apache.atlas.model.lineage.AtlasLineageInfo;
import org.apache.atlas.model.typedef.AtlasClassificationDef;
import org.apache.atlas.model.typedef.AtlasTypesDef;
import org.apache.atlas.type.AtlasTypeUtil;
import org.apache.commons.collections.CollectionUtils;
......@@ -49,15 +49,12 @@ import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.io.File;
import java.nio.file.Files;
import java.text.ParseException;
import java.util.*;
......@@ -308,7 +305,15 @@ public class HiveHookIT extends HiveITBase {
final Set<ReadEntity> readEntities = getInputs(tableName, Entity.Type.TABLE);
final Set<WriteEntity> writeEntities = getOutputs(ctasTableName, Entity.Type.TABLE);
assertProcessIsRegistered(constructEvent(query, HiveOperation.CREATETABLE_AS_SELECT, readEntities, writeEntities));
HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.CREATETABLE_AS_SELECT, readEntities,
writeEntities);
AtlasEntity processEntity1 = validateProcess(hiveEventContext);
AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, hiveEventContext);
AtlasObjectId process = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), processEntity1.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
assertTableIsRegistered(DEFAULT_DB, ctasTableName);
}
......@@ -347,7 +352,12 @@ public class HiveHookIT extends HiveITBase {
Set<WriteEntity> outputs = getOutputs(ctasTableName, Entity.Type.TABLE);
HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.CREATETABLE_AS_SELECT, inputs, outputs);
String processId = assertProcessIsRegistered(hiveEventContext);
AtlasEntity processEntity1 = validateProcess(hiveEventContext);
AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, hiveEventContext);
AtlasObjectId process = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), processEntity1.getGuid());
String drpquery = String.format("drop table %s ", ctasTableName);
runCommandWithDelay(drpquery, 100);
......@@ -360,13 +370,17 @@ public class HiveHookIT extends HiveITBase {
outputs = getOutputs(ctasTableName, Entity.Type.TABLE);
String process2Id = assertProcessIsRegistered(hiveEventContext, inputs, outputs);
assertNotEquals(process2Id, processId);
AtlasEntity processEntity2 = validateProcess(hiveEventContext);
AtlasEntity processExecutionEntity2 = validateProcessExecution(processEntity2, hiveEventContext);
AtlasObjectId process2 = toAtlasObjectId(processExecutionEntity2.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process2.getGuid(), processEntity2.getGuid());
AtlasEntity processsEntity = atlasClientV2.getEntityByGuid(processId).getEntity();
assertNotEquals(processEntity1.getGuid(), processEntity2.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
Assert.assertEquals(numberOfProcessExecutions(processEntity2), 1);
validateOutputTables(processsEntity, outputs);
validateOutputTables(processEntity1, outputs);
}
@Test
......@@ -377,7 +391,14 @@ public class HiveHookIT extends HiveITBase {
runCommand(query);
assertProcessIsRegistered(constructEvent(query, HiveOperation.CREATEVIEW, getInputs(tableName, Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE)));
HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.CREATEVIEW, getInputs(tableName,
Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE));
AtlasEntity processEntity1 = validateProcess(hiveEventContext);
AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, hiveEventContext);
AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process1.getGuid(), processEntity1.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
assertTableIsRegistered(DEFAULT_DB, viewName);
}
......@@ -392,7 +413,15 @@ public class HiveHookIT extends HiveITBase {
String table1Id = assertTableIsRegistered(DEFAULT_DB, table1Name);
assertProcessIsRegistered(constructEvent(query, HiveOperation.CREATEVIEW, getInputs(table1Name, Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE)));
HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.CREATEVIEW, getInputs(table1Name,
Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE));
String processId1 = assertProcessIsRegistered(hiveEventContext);
AtlasEntity processEntity1 = atlasClientV2.getEntityByGuid(processId1).getEntity();
AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, hiveEventContext);
AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process1.getGuid(), processEntity1.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
String viewId = assertTableIsRegistered(DEFAULT_DB, viewName);
......@@ -412,8 +441,16 @@ public class HiveHookIT extends HiveITBase {
runCommand(query);
//Check if alter view process is reqistered
assertProcessIsRegistered(constructEvent(query, HiveOperation.CREATEVIEW, getInputs(table2Name, Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE)));
HiveEventContext hiveEventContext2 = constructEvent(query, HiveOperation.CREATEVIEW, getInputs(table2Name,
Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE));
String processId2 = assertProcessIsRegistered(hiveEventContext2);
AtlasEntity processEntity2 = atlasClientV2.getEntityByGuid(processId2).getEntity();
AtlasEntity processExecutionEntity2 = validateProcessExecution(processEntity2, hiveEventContext2);
AtlasObjectId process2 = toAtlasObjectId(processExecutionEntity2.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process2.getGuid(), processEntity2.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity2), 2);
Assert.assertEquals(processEntity1.getGuid(), processEntity2.getGuid());
String table2Id = assertTableIsRegistered(DEFAULT_DB, table2Name);
......@@ -526,6 +563,12 @@ public class HiveHookIT extends HiveITBase {
return validateProcess(event, event.getInputs(), event.getOutputs());
}
private AtlasEntity validateProcessExecution(AtlasEntity hiveProcess, HiveEventContext event) throws Exception {
String processExecutionId = assertProcessExecutionIsRegistered(hiveProcess, event);
AtlasEntity processExecutionEntity = atlasClientV2.getEntityByGuid(processExecutionId).getEntity();
return processExecutionEntity;
}
@Test
public void testInsertIntoTable() throws Exception {
String inputTable1Name = createTable();
......@@ -581,8 +624,85 @@ public class HiveHookIT extends HiveITBase {
runCommandWithDelay(query, 1000);
AtlasEntity processEntity2 = validateProcess(event, expectedInputs, outputs);
Assert.assertEquals(numberOfProcessExecutions(processEntity2), 2);
Assert.assertEquals(processEntity1.getGuid(), processEntity2.getGuid());
}
@Test
public void testInsertIntoTableProcessExecution() throws Exception {
String inputTable1Name = createTable();
String inputTable2Name = createTable();
String insertTableName = createTable();
assertTableIsRegistered(DEFAULT_DB, inputTable1Name);
assertTableIsRegistered(DEFAULT_DB, insertTableName);
String query = "insert into " + insertTableName + " select t1.id, t1.name from " + inputTable2Name + " as t2, " + inputTable1Name + " as t1 where t1.id=t2.id";
runCommand(query);
Set<ReadEntity> inputs = getInputs(inputTable1Name, Entity.Type.TABLE);
inputs.addAll(getInputs(inputTable2Name, Entity.Type.TABLE));
Set<WriteEntity> outputs = getOutputs(insertTableName, Entity.Type.TABLE);
(outputs.iterator().next()).setWriteType(WriteEntity.WriteType.INSERT);
HiveEventContext event = constructEvent(query, HiveOperation.QUERY, inputs, outputs);
Set<ReadEntity> expectedInputs = new TreeSet<ReadEntity>(entityComparator) {{
addAll(inputs);
}};
assertTableIsRegistered(DEFAULT_DB, insertTableName);
AtlasEntity processEntity1 = validateProcess(event, expectedInputs, outputs);
AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, event);
AtlasObjectId process = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), processEntity1.getGuid());
//Test sorting of tbl names
SortedSet<String> sortedTblNames = new TreeSet<>();
sortedTblNames.add(inputTable1Name.toLowerCase());
sortedTblNames.add(inputTable2Name.toLowerCase());
//Verify sorted order of inputs in qualified name
Assert.assertEquals(processEntity1.getAttribute(ATTRIBUTE_QUALIFIED_NAME),
Joiner.on(SEP).join("QUERY",
getQualifiedTblName(sortedTblNames.first()),
HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.getHiveClient().getTable(DEFAULT_DB, sortedTblNames.first())),
getQualifiedTblName(sortedTblNames.last()),
HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.getHiveClient().getTable(DEFAULT_DB, sortedTblNames.last())))
+ IO_SEP + SEP
+ Joiner.on(SEP).
join(WriteEntity.WriteType.INSERT.name(),
getQualifiedTblName(insertTableName),
HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.getHiveClient().getTable(DEFAULT_DB, insertTableName)))
);
//Rerun same query. Should result in same process
runCommandWithDelay(query, 1000);
AtlasEntity processEntity2 = validateProcess(event, expectedInputs, outputs);
AtlasEntity processExecutionEntity2 = validateProcessExecution(processEntity2, event);
process = toAtlasObjectId(processExecutionEntity2.getRelationshipAttribute(BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), processEntity2.getGuid());
Assert.assertEquals(processEntity1.getGuid(), processEntity2.getGuid());
String queryWithDifferentPredicate = "insert into " + insertTableName + " select t1.id, t1.name from " +
inputTable2Name + " as t2, " + inputTable1Name + " as t1 where t1.id=100";
runCommandWithDelay(queryWithDifferentPredicate, 1000);
HiveEventContext event3 = constructEvent(queryWithDifferentPredicate, HiveOperation.QUERY, inputs, outputs);
AtlasEntity processEntity3 = validateProcess(event3, expectedInputs, outputs);
AtlasEntity processExecutionEntity3 = validateProcessExecution(processEntity3, event3);
process = toAtlasObjectId(processExecutionEntity3.getRelationshipAttribute(BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), processEntity3.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity3), 3);
Assert.assertEquals(processEntity2.getGuid(), processEntity3.getGuid());
}
@Test
......@@ -593,7 +713,14 @@ public class HiveHookIT extends HiveITBase {
runCommand(query);
validateProcess(constructEvent(query, HiveOperation.QUERY, getInputs(tableName, Entity.Type.TABLE), null));
HiveEventContext event = constructEvent(query, HiveOperation.QUERY,
getInputs(tableName, Entity.Type.TABLE), null);
AtlasEntity hiveProcess = validateProcess(event);
AtlasEntity hiveProcessExecution = validateProcessExecution(hiveProcess, event);
AtlasObjectId process = toAtlasObjectId(hiveProcessExecution.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), hiveProcess.getGuid());
Assert.assertEquals(numberOfProcessExecutions(hiveProcess), 1);
assertTableIsRegistered(DEFAULT_DB, tableName);
}
......@@ -613,6 +740,10 @@ public class HiveHookIT extends HiveITBase {
HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.QUERY, inputs, outputs);
AtlasEntity processEntity = validateProcess(hiveEventContext);
AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity, hiveEventContext);
AtlasObjectId process = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), processEntity.getGuid());
validateHDFSPaths(processEntity, OUTPUTS, pFile1);
......@@ -626,6 +757,11 @@ public class HiveHookIT extends HiveITBase {
assertTableIsRegistered(DEFAULT_DB, tableName);
AtlasEntity process2Entity = validateProcess(hiveEventContext);
AtlasEntity processExecutionEntity2 = validateProcessExecution(processEntity, hiveEventContext);
AtlasObjectId process2 = toAtlasObjectId(processExecutionEntity2.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process2.getGuid(), process2Entity.getGuid());
validateHDFSPaths(process2Entity, OUTPUTS, pFile1);
......@@ -646,9 +782,13 @@ public class HiveHookIT extends HiveITBase {
}};
AtlasEntity process3Entity = validateProcess(constructEvent(query, HiveOperation.QUERY, inputs, p3Outputs));
AtlasEntity processExecutionEntity3 = validateProcessExecution(processEntity, hiveEventContext);
AtlasObjectId process3 = toAtlasObjectId(processExecutionEntity3.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process3.getGuid(), process3Entity.getGuid());
validateHDFSPaths(process3Entity, OUTPUTS, pFile2);
Assert.assertEquals(numberOfProcessExecutions(process3Entity), 3);
Assert.assertEquals(process3Entity.getGuid(), processEntity.getGuid());
}
......@@ -714,7 +854,13 @@ public class HiveHookIT extends HiveITBase {
outputs.iterator().next().setWriteType(WriteEntity.WriteType.INSERT);
validateProcess(constructEvent(query, HiveOperation.QUERY, inputs, outputs));
HiveEventContext event = constructEvent(query, HiveOperation.QUERY, inputs, outputs);
AtlasEntity hiveProcess = validateProcess(event);
AtlasEntity hiveProcessExecution = validateProcessExecution(hiveProcess, event);
AtlasObjectId process = toAtlasObjectId(hiveProcessExecution.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), hiveProcess.getGuid());
Assert.assertEquals(numberOfProcessExecutions(hiveProcess), 1);
assertTableIsRegistered(DEFAULT_DB, tableName);
assertTableIsRegistered(DEFAULT_DB, insertTableName, null, true);
......@@ -748,8 +894,13 @@ public class HiveHookIT extends HiveITBase {
}
};
validateProcess(constructEvent(query, HiveOperation.QUERY, partitionIps, partitionOps), inputs, outputs);
HiveEventContext event = constructEvent(query, HiveOperation.QUERY, partitionIps, partitionOps);
AtlasEntity hiveProcess = validateProcess(event, inputs, outputs);
AtlasEntity hiveProcessExecution = validateProcessExecution(hiveProcess, event);
AtlasObjectId process = toAtlasObjectId(hiveProcessExecution.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), hiveProcess.getGuid());
Assert.assertEquals(numberOfProcessExecutions(hiveProcess), 1);
assertTableIsRegistered(DEFAULT_DB, tableName);
assertTableIsRegistered(DEFAULT_DB, insertTableName);
......@@ -769,8 +920,14 @@ public class HiveHookIT extends HiveITBase {
Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
Set<WriteEntity> outputs = getOutputs(filename, Entity.Type.DFS_DIR);
AtlasEntity processEntity = validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs));
HiveEventContext event = constructEvent(query, HiveOperation.EXPORT, inputs, outputs);
AtlasEntity processEntity = validateProcess(event);
AtlasEntity hiveProcessExecution = validateProcessExecution(processEntity, event);
AtlasObjectId process = toAtlasObjectId(hiveProcessExecution.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), processEntity.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity), 1);
validateHDFSPaths(processEntity, OUTPUTS, filename);
validateInputTables(processEntity, inputs);
......@@ -785,7 +942,16 @@ public class HiveHookIT extends HiveITBase {
outputs = getOutputs(importTableName, Entity.Type.TABLE);
validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs));
HiveEventContext event2 = constructEvent(query, HiveOperation.IMPORT,
getInputs(filename, Entity.Type.DFS_DIR), outputs);
AtlasEntity processEntity2 = validateProcess(event2);
AtlasEntity hiveProcessExecution2 = validateProcessExecution(processEntity2, event2);
AtlasObjectId process2 = toAtlasObjectId(hiveProcessExecution2.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process2.getGuid(), processEntity2.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity2), 1);
Assert.assertNotEquals(processEntity.getGuid(), processEntity2.getGuid());
//Should create another process
filename = "pfile://" + mkdir("export2UnPartitioned");
......@@ -796,7 +962,18 @@ public class HiveHookIT extends HiveITBase {
inputs = getInputs(tableName, Entity.Type.TABLE);
outputs = getOutputs(filename, Entity.Type.DFS_DIR);
validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs));
HiveEventContext event3 = constructEvent(query, HiveOperation.EXPORT, inputs, outputs);
AtlasEntity processEntity3 = validateProcess(event3);
AtlasEntity hiveProcessExecution3 = validateProcessExecution(processEntity3, event3);
AtlasObjectId process3 = toAtlasObjectId(hiveProcessExecution3.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process3.getGuid(), processEntity3.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity3), 1);
// Should be a different process compared to the previous ones
Assert.assertNotEquals(processEntity.getGuid(), processEntity3.getGuid());
Assert.assertNotEquals(processEntity2.getGuid(), processEntity3.getGuid());
//import again shouyld create another process
query = "import table " + importTableName + " from '" + filename + "'";
......@@ -805,7 +982,20 @@ public class HiveHookIT extends HiveITBase {
outputs = getOutputs(importTableName, Entity.Type.TABLE);
validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs));
HiveEventContext event4 = constructEvent(query, HiveOperation.IMPORT, getInputs(filename,
Entity.Type.DFS_DIR), outputs);
AtlasEntity processEntity4 = validateProcess(event4);
AtlasEntity hiveProcessExecution4 = validateProcessExecution(processEntity4, event4);
AtlasObjectId process4 = toAtlasObjectId(hiveProcessExecution4.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process4.getGuid(), processEntity4.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity4), 1);
// Should be a different process compared to the previous ones
Assert.assertNotEquals(processEntity.getGuid(), processEntity4.getGuid());
Assert.assertNotEquals(processEntity2.getGuid(), processEntity4.getGuid());
Assert.assertNotEquals(processEntity3.getGuid(), processEntity4.getGuid());
}
@Test
......@@ -833,9 +1023,15 @@ public class HiveHookIT extends HiveITBase {
partitionIps.addAll(expectedExportInputs);
AtlasEntity processEntity = validateProcess(constructEvent(query, HiveOperation.EXPORT, partitionIps, outputs), expectedExportInputs, outputs);
HiveEventContext event1 = constructEvent(query, HiveOperation.EXPORT, partitionIps, outputs);
AtlasEntity processEntity1 = validateProcess(event1, expectedExportInputs, outputs);
AtlasEntity hiveProcessExecution1 = validateProcessExecution(processEntity1, event1);
AtlasObjectId process1 = toAtlasObjectId(hiveProcessExecution1.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process1.getGuid(), processEntity1.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
validateHDFSPaths(processEntity, OUTPUTS, filename);
validateHDFSPaths(processEntity1, OUTPUTS, filename);
//Import
String importTableName = createTable(true);
......@@ -852,7 +1048,14 @@ public class HiveHookIT extends HiveITBase {
partitionOps.addAll(importOutputs);
validateProcess(constructEvent(query, HiveOperation.IMPORT, expectedImportInputs , partitionOps), expectedImportInputs, importOutputs);
HiveEventContext event2 = constructEvent(query, HiveOperation.IMPORT, expectedImportInputs , partitionOps);
AtlasEntity processEntity2 = validateProcess(event2, expectedImportInputs, importOutputs);
AtlasEntity hiveProcessExecution2 = validateProcessExecution(processEntity2, event2);
AtlasObjectId process2 = toAtlasObjectId(hiveProcessExecution2.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process2.getGuid(), processEntity2.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity2), 1);
Assert.assertNotEquals(processEntity1.getGuid(), processEntity2.getGuid());
//Export should update same process
filename = "pfile://" + mkdir("export2");
......@@ -866,7 +1069,17 @@ public class HiveHookIT extends HiveITBase {
addAll(outputs);
}};
validateProcess(constructEvent(query, HiveOperation.EXPORT, partitionIps, outputs2), expectedExportInputs, p3Outputs);
HiveEventContext event3 = constructEvent(query, HiveOperation.EXPORT, partitionIps, outputs2);
// this process entity should return same as the processEntity1 since the inputs and outputs are the same,
// hence the qualifiedName will be the same
AtlasEntity processEntity3 = validateProcess(event3, expectedExportInputs, p3Outputs);
AtlasEntity hiveProcessExecution3 = validateProcessExecution(processEntity3, event3);
AtlasObjectId process3 = toAtlasObjectId(hiveProcessExecution3.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process3.getGuid(), processEntity3.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity3), 2);
Assert.assertEquals(processEntity1.getGuid(), processEntity3.getGuid());
query = "alter table " + importTableName + " drop partition (dt='"+ PART_FILE + "')";
......@@ -883,7 +1096,17 @@ public class HiveHookIT extends HiveITBase {
addAll(expectedImportInputs);
}};
validateProcess(constructEvent(query, HiveOperation.IMPORT, importInputs, partitionOps), expectedImport2Inputs, importOutputs);
HiveEventContext event4 = constructEvent(query, HiveOperation.IMPORT, importInputs, partitionOps);
// This process is going to be same as processEntity2
AtlasEntity processEntity4 = validateProcess(event4, expectedImport2Inputs, importOutputs);
AtlasEntity hiveProcessExecution4 = validateProcessExecution(processEntity4, event4);
AtlasObjectId process4 = toAtlasObjectId(hiveProcessExecution4.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process4.getGuid(), processEntity4.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity4), 2);
Assert.assertEquals(processEntity2.getGuid(), processEntity4.getGuid());
Assert.assertNotEquals(processEntity1.getGuid(), processEntity4.getGuid());
}
@Test
......@@ -1176,15 +1399,12 @@ public class HiveHookIT extends HiveITBase {
);
}
/*
The test is disabled by default
Reason : Atlas uses Hive version 1.2.x and the Hive patch HIVE-13112 which enables column level lineage is not
committed in Hive version 1.2.x
This test will fail if the lineage information is not available from Hive
Once the patch for HIVE-13112 is committed to Hive branch 1.2.x, the test can be enabled
Please track HIVE-14706 to know the status of column lineage availability in latest Hive versions i.e 2.1.x
/**
* Reenabling this test since HIVE-14706 is fixed now and the hive version we are using now sends
* us the column lineage information
* @throws Exception
*/
@Test(enabled = false)
@Test
public void testColumnLevelLineage() throws Exception {
String sourceTable = "table" + random();
......@@ -1204,8 +1424,14 @@ public class HiveHookIT extends HiveITBase {
Set<ReadEntity> inputs = getInputs(sourceTable, Entity.Type.TABLE);
Set<WriteEntity> outputs = getOutputs(ctasTableName, Entity.Type.TABLE);
HiveEventContext event = constructEvent(query, HiveOperation.CREATETABLE_AS_SELECT, inputs, outputs);
AtlasEntity processEntity1 = validateProcess(event);
AtlasEntity hiveProcessExecution1 = validateProcessExecution(processEntity1, event);
AtlasObjectId process1 = toAtlasObjectId(hiveProcessExecution1.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process1.getGuid(), processEntity1.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
Assert.assertEquals(processEntity1.getGuid(), processEntity1.getGuid());
assertProcessIsRegistered(event);
assertTableIsRegistered(DEFAULT_DB, ctasTableName);
String processQName = sortEventsAndGetProcessQualifiedName(event);
......@@ -1281,14 +1507,20 @@ public class HiveHookIT extends HiveITBase {
Set<WriteEntity> outputs = getOutputs(tableName, Entity.Type.TABLE);
String tableId = assertTableIsRegistered(DEFAULT_DB, tableName);
HiveEventContext event = constructEvent(query, HiveOperation.TRUNCATETABLE, null, outputs);
validateProcess(constructEvent(query, HiveOperation.TRUNCATETABLE, null, outputs));
AtlasEntity processEntity = validateProcess(event);
AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity, event);
AtlasObjectId process = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), processEntity.getGuid());
//Check lineage
String datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName);
AtlasLineageInfo atlasLineageInfoInput = atlasClientV2.getLineageInfo(tableId, AtlasLineageInfo.LineageDirection.INPUT,0);
Map<String, AtlasEntityHeader> entityMap = atlasLineageInfoInput.getGuidEntityMap();
Assert.assertEquals(numberOfProcessExecutions(processEntity), 1);
//Below should be assertTrue - Fix https://issues.apache.org/jira/browse/ATLAS-653
Assert.assertFalse(entityMap.containsKey(tableId));
}
......@@ -1402,7 +1634,8 @@ public class HiveHookIT extends HiveITBase {
String processQualifiedName = getTableProcessQualifiedName(DEFAULT_DB, tableName);
String processId = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), ATTRIBUTE_QUALIFIED_NAME, processQualifiedName, null);
AtlasEntity processEntity = atlasClientV2.getEntityByGuid(processId).getEntity();
Assert.assertEquals(numberOfProcessExecutions(processEntity), 2);
//validateProcessExecution(processEntity, event);
validateHDFSPaths(processEntity, INPUTS, testPath);
}
......@@ -1890,6 +2123,34 @@ public class HiveHookIT extends HiveITBase {
}
}
private String assertProcessExecutionIsRegistered(AtlasEntity hiveProcess, final HiveEventContext event) throws Exception {
try {
String guid = "";
List<AtlasObjectId> processExecutions = toAtlasObjectIdList(hiveProcess.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS_EXECUTIONS));
for (AtlasObjectId processExecution : processExecutions) {
AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.
getEntityByGuid(processExecution.getGuid());
AtlasEntity entity = atlasEntityWithExtInfo.getEntity();
if (String.valueOf(entity.getAttribute(ATTRIBUTE_QUERY_TEXT)).equals(event.getQueryStr().toLowerCase().trim())) {
guid = entity.getGuid();
}
}
return assertEntityIsRegisteredViaGuid(guid, new AssertPredicate() {
@Override
public void assertOnEntity(final AtlasEntity entity) throws Exception {
String queryText = (String) entity.getAttribute(ATTRIBUTE_QUERY_TEXT);
Assert.assertEquals(queryText, event.getQueryStr().toLowerCase().trim());
}
});
} catch(Exception e) {
LOG.error("Exception : ", e);
throw e;
}
}
private String getDSTypeName(Entity entity) {
return Entity.Type.TABLE.equals(entity.getType()) ? HiveDataTypes.HIVE_TABLE.name() : HiveMetaStoreBridge.HDFS_PATH;
}
......@@ -2080,4 +2341,9 @@ public class HiveHookIT extends HiveITBase {
@Override
public Entity.Type getType() { return type; }
}
private int numberOfProcessExecutions(AtlasEntity hiveProcess) {
return toAtlasObjectIdList(hiveProcess.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS_EXECUTIONS)).size();
}
}
......@@ -324,6 +324,15 @@
"isUnique": false
}
]
},
{
"name": "ProcessExecution",
"superTypes": [
"Asset"
],
"serviceType": "atlas_core",
"typeVersion": "1.0",
"attributeDefs": []
}
],
"relationshipDefs": [
......
......@@ -457,6 +457,80 @@
"isUnique": false
}
]
},
{
"name" : "hive_process_execution",
"superTypes" : [
"ProcessExecution"
],
"serviceType": "hive",
"typeVersion" : "1.0",
"attributeDefs" : [
{
"name": "startTime",
"typeName": "date",
"cardinality": "SINGLE",
"isIndexable": false,
"isOptional": false,
"isUnique": false
},
{
"name": "endTime",
"typeName": "date",
"cardinality": "SINGLE",
"isIndexable": false,
"isOptional": false,
"isUnique": false
},
{
"name": "userName",
"typeName": "string",
"cardinality": "SINGLE",
"isIndexable": true,
"isOptional": false,
"isUnique": false
},
{
"name": "queryText",
"typeName": "string",
"cardinality": "SINGLE",
"isIndexable": false,
"isOptional": false,
"isUnique": false
},
{
"name": "queryGraph",
"typeName": "string",
"cardinality": "SINGLE",
"isIndexable": false,
"isOptional": true,
"isUnique": false
},
{
"name": "queryId",
"typeName": "string",
"cardinality": "SINGLE",
"isIndexable": false,
"isOptional": false,
"isUnique": false
},
{
"name": "queryPlan",
"typeName": "string",
"cardinality": "SINGLE",
"isIndexable": false,
"isOptional": false,
"isUnique": false
},
{
"name": "hostName",
"typeName": "string",
"cardinality": "SINGLE",
"isIndexable": true,
"isOptional": false,
"isUnique": false
}
]
}
],
"relationshipDefs": [
......@@ -567,6 +641,24 @@
"cardinality": "SET"
},
"propagateTags": "NONE"
},
{
"name": "hive_process_process_executions",
"serviceType": "hive",
"typeVersion": "1.0",
"relationshipCategory": "COMPOSITION",
"endDef1": {
"type": "hive_process",
"name": "processExecutions",
"cardinality": "SET",
"isContainer": true
},
"endDef2": {
"type": "hive_process_execution",
"name": "process",
"cardinality": "SINGLE"
},
"propagateTags": "NONE"
}
]
}
......@@ -19,6 +19,7 @@
package org.apache.atlas.examples;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
import com.sun.jersey.core.util.MultivaluedMapImpl;
import org.apache.atlas.ApplicationProperties;
import org.apache.atlas.AtlasClientV2;
......@@ -42,6 +43,8 @@ import org.apache.atlas.model.typedef.AtlasEntityDef;
import org.apache.atlas.model.typedef.AtlasRelationshipDef;
import org.apache.atlas.model.typedef.AtlasRelationshipDef.PropagateTags;
import org.apache.atlas.model.typedef.AtlasTypesDef;
import org.apache.atlas.repository.Constants;
import org.apache.atlas.type.AtlasTypeUtil;
import org.apache.atlas.utils.AuthenticationUtil;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.configuration.Configuration;
......@@ -108,8 +111,14 @@ public class QuickStartV2 {
public static final String LOGDATA_CLASSIFICATION = "Log Data";
public static final String LOAD_SALES_DAILY_PROCESS = "loadSalesDaily";
public static final String LOAD_SALES_DAILY_PROCESS_EXEC1 = "loadSalesDailyExec1";
public static final String LOAD_SALES_DAILY_PROCESS_EXEC2 = "loadSalesDailyExec2";
public static final String LOAD_SALES_MONTHLY_PROCESS = "loadSalesMonthly";
public static final String LOAD_SALES_MONTHLY_PROCESS_EXEC1 = "loadSalesMonthlyExec1";
public static final String LOAD_SALES_MONTHLY_PROCESS_EXEC2 = "loadSalesMonthlyExec2";
public static final String LOAD_LOGS_MONTHLY_PROCESS = "loadLogsMonthly";
public static final String LOAD_LOGS_MONTHLY_PROCESS_EXEC1 = "loadLogsMonthlyExec1";
public static final String LOAD_LOGS_MONTHLY_PROCESS_EXEC2 = "loadLogsMonthlyExec2";
public static final String PRODUCT_DIM_VIEW = "product_dim_view";
public static final String CUSTOMER_DIM_VIEW = "customer_dim_view";
......@@ -119,6 +128,7 @@ public class QuickStartV2 {
public static final String TABLE_TYPE = "Table";
public static final String VIEW_TYPE = "View";
public static final String LOAD_PROCESS_TYPE = "LoadProcess";
public static final String LOAD_PROCESS_EXECUTION_TYPE = "LoadProcessExecution";
public static final String STORAGE_DESC_TYPE = "StorageDesc";
public static final String TABLE_DATABASE_TYPE = "Table_DB";
......@@ -126,13 +136,14 @@ public class QuickStartV2 {
public static final String VIEW_TABLES_TYPE = "View_Tables";
public static final String TABLE_COLUMNS_TYPE = "Table_Columns";
public static final String TABLE_STORAGE_DESC_TYPE = "Table_StorageDesc";
public static final String PROCESS_PROCESS_EXECUTION_DESC_TYPE = "Process_ProcessExecution";
public static final String VERSION_1 = "1.0";
public static final String MANAGED_TABLE = "Managed";
public static final String EXTERNAL_TABLE = "External";
public static final String CLUSTER_SUFFIX = "@cl1";
public static final String[] TYPES = { DATABASE_TYPE, TABLE_TYPE, STORAGE_DESC_TYPE, COLUMN_TYPE, LOAD_PROCESS_TYPE,
public static final String[] TYPES = { DATABASE_TYPE, TABLE_TYPE, STORAGE_DESC_TYPE, COLUMN_TYPE, LOAD_PROCESS_TYPE, LOAD_PROCESS_EXECUTION_TYPE,
VIEW_TYPE, JDBC_CLASSIFICATION, ETL_CLASSIFICATION, METRIC_CLASSIFICATION,
PII_CLASSIFICATION, FACT_CLASSIFICATION, DIMENSION_CLASSIFICATION, LOGDATA_CLASSIFICATION,
TABLE_DATABASE_TYPE, VIEW_DATABASE_TYPE, VIEW_TABLES_TYPE, TABLE_COLUMNS_TYPE, TABLE_STORAGE_DESC_TYPE };
......@@ -256,6 +267,15 @@ public class QuickStartV2 {
createRequiredAttrDef("queryId", "string"),
createRequiredAttrDef("queryGraph", "string"));
AtlasEntityDef processExecutionTypeDef = createClassTypeDef(LOAD_PROCESS_EXECUTION_TYPE, LOAD_PROCESS_EXECUTION_TYPE, VERSION_1, Collections.singleton("ProcessExecution"),
createOptionalAttrDef("userName", "string"),
createOptionalAttrDef("startTime", "long"),
createOptionalAttrDef("endTime", "long"),
createRequiredAttrDef("queryText", "string"),
createRequiredAttrDef("queryPlan", "string"),
createRequiredAttrDef("queryId", "string"),
createRequiredAttrDef("queryGraph", "string"));
AtlasEntityDef viewTypeDef = createClassTypeDef(VIEW_TYPE, VIEW_TYPE, VERSION_1, Collections.singleton("DataSet"));
// Relationship-Definitions
......@@ -278,6 +298,10 @@ public class QuickStartV2 {
AtlasRelationshipDef tableStorageDescTypeDef = createRelationshipTypeDef(TABLE_STORAGE_DESC_TYPE, TABLE_STORAGE_DESC_TYPE, VERSION_1, COMPOSITION, PropagateTags.NONE,
createRelationshipEndDef(TABLE_TYPE, "sd", SINGLE, true),
createRelationshipEndDef(STORAGE_DESC_TYPE, "table", SINGLE, false));
AtlasRelationshipDef processProcessExecutionTypeDef = createRelationshipTypeDef(PROCESS_PROCESS_EXECUTION_DESC_TYPE, PROCESS_PROCESS_EXECUTION_DESC_TYPE, VERSION_1, AGGREGATION, PropagateTags.NONE,
createRelationshipEndDef(LOAD_PROCESS_TYPE, "processExecutions", SET, true),
createRelationshipEndDef(LOAD_PROCESS_EXECUTION_TYPE, "process", SINGLE, false));
// Classification-Definitions
AtlasClassificationDef dimClassifDef = createTraitTypeDef(DIMENSION_CLASSIFICATION, "Dimension Classification", VERSION_1, Collections.emptySet());
......@@ -288,8 +312,8 @@ public class QuickStartV2 {
AtlasClassificationDef jdbcClassifDef = createTraitTypeDef(JDBC_CLASSIFICATION, "JdbcAccess Classification", VERSION_1, Collections.emptySet());
AtlasClassificationDef logClassifDef = createTraitTypeDef(LOGDATA_CLASSIFICATION, "LogData Classification", VERSION_1, Collections.emptySet());
List<AtlasEntityDef> entityDefs = asList(dbTypeDef, sdTypeDef, colTypeDef, tableTypeDef, processTypeDef, viewTypeDef);
List<AtlasRelationshipDef> relationshipDefs = asList(tableDatabaseTypeDef, viewDatabaseTypeDef, viewTablesTypeDef, tableColumnsTypeDef, tableStorageDescTypeDef);
List<AtlasEntityDef> entityDefs = asList(dbTypeDef, sdTypeDef, colTypeDef, tableTypeDef, processTypeDef, processExecutionTypeDef, viewTypeDef);
List<AtlasRelationshipDef> relationshipDefs = asList(tableDatabaseTypeDef, viewDatabaseTypeDef, viewTablesTypeDef, tableColumnsTypeDef, tableStorageDescTypeDef, processProcessExecutionTypeDef);
List<AtlasClassificationDef> classificationDefs = asList(dimClassifDef, factClassifDef, piiClassifDef, metricClassifDef, etlClassifDef, jdbcClassifDef, logClassifDef);
return new AtlasTypesDef(Collections.emptyList(), Collections.emptyList(), classificationDefs, entityDefs, relationshipDefs);
......@@ -362,20 +386,36 @@ public class QuickStartV2 {
createView(CUSTOMER_DIM_VIEW, reportingDB, asList(customerDim), DIMENSION_CLASSIFICATION, JDBC_CLASSIFICATION);
// Process entities
createProcess(LOAD_SALES_DAILY_PROCESS, "hive query for daily summary", "John ETL",
AtlasEntity loadProcess = createProcess(LOAD_SALES_DAILY_PROCESS, "hive query for daily summary", "John ETL",
asList(salesFact, timeDim),
asList(salesFactDaily),
"create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION);
createProcess(LOAD_SALES_MONTHLY_PROCESS, "hive query for monthly summary", "John ETL",
createProcessExecution(loadProcess, LOAD_SALES_DAILY_PROCESS_EXEC1, "hive query execution 1 for daily summary", "John ETL",
"create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION);
createProcessExecution(loadProcess, LOAD_SALES_DAILY_PROCESS_EXEC2, "hive query execution 2 for daily summary", "John ETL",
"create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION);
AtlasEntity loadProcess2 = createProcess(LOAD_SALES_MONTHLY_PROCESS, "hive query for monthly summary", "John ETL",
asList(salesFactDaily),
asList(salesFactMonthly),
"create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION);
createProcessExecution(loadProcess2, LOAD_SALES_MONTHLY_PROCESS_EXEC1, "hive query execution 1 for monthly summary", "John ETL",
"create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION);
createProcess(LOAD_LOGS_MONTHLY_PROCESS, "hive query for monthly summary", "Tim ETL",
createProcessExecution(loadProcess2, LOAD_SALES_MONTHLY_PROCESS_EXEC2, "hive query execution 2 for monthly summary", "John ETL",
"create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION);
AtlasEntity loadProcess3 = createProcess(LOAD_LOGS_MONTHLY_PROCESS, "hive query for monthly summary", "Tim ETL",
asList(loggingFactDaily),
asList(loggingFactMonthly),
"create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION);
createProcessExecution(loadProcess3, LOAD_LOGS_MONTHLY_PROCESS_EXEC1, "hive query execution 1 for monthly summary", "Tim ETL",
"create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION);
createProcessExecution(loadProcess3, LOAD_LOGS_MONTHLY_PROCESS_EXEC2, "hive query execution 1 for monthly summary", "Tim ETL",
"create table as select ", "plan", "id", "graph", ETL_CLASSIFICATION);
}
private AtlasEntity createInstance(AtlasEntity entity) throws Exception {
......@@ -522,6 +562,31 @@ public class QuickStartV2 {
return createInstance(entity);
}
AtlasEntity createProcessExecution(AtlasEntity hiveProcess, String name, String description, String user,
String queryText, String queryPlan, String queryId, String queryGraph, String... classificationNames) throws Exception {
AtlasEntity entity = new AtlasEntity(LOAD_PROCESS_EXECUTION_TYPE);
Long startTime = System.currentTimeMillis();
Long endTime = System.currentTimeMillis() + 10000;
// set attributes
entity.setAttribute("name", name);
entity.setAttribute(REFERENCEABLE_ATTRIBUTE_NAME, name + CLUSTER_SUFFIX + startTime.toString() + endTime.toString());
entity.setAttribute("description", description);
entity.setAttribute("user", user);
entity.setAttribute("startTime", startTime);
entity.setAttribute("endTime", endTime);
entity.setAttribute("queryText", queryText);
entity.setAttribute("queryPlan", queryPlan);
entity.setAttribute("queryId", queryId);
entity.setAttribute("queryGraph", queryGraph);
entity.setRelationshipAttribute("process", AtlasTypeUtil.toAtlasRelatedObjectId(hiveProcess));
// set classifications
entity.setClassifications(toAtlasClassifications(classificationNames));
return createInstance(entity);
}
AtlasEntity createView(String name, AtlasEntity database, List<AtlasEntity> inputTables, String... classificationNames) throws Exception {
AtlasEntity entity = new AtlasEntity(VIEW_TYPE);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment