Commit f51c8861 by Suma Shivaprasad

ATLAS-917 Add hdfs paths to process qualified name for non-partition based queries(sumasai)

parent f623bddf
...@@ -426,8 +426,8 @@ public class HiveMetaStoreBridge { ...@@ -426,8 +426,8 @@ public class HiveMetaStoreBridge {
createDate = new Date(hiveTable.getTTable().getCreateTime() * MILLIS_CONVERT_FACTOR); createDate = new Date(hiveTable.getTTable().getCreateTime() * MILLIS_CONVERT_FACTOR);
LOG.debug("Setting create time to {} ", createDate); LOG.debug("Setting create time to {} ", createDate);
tableReference.set(HiveDataModelGenerator.CREATE_TIME, createDate); tableReference.set(HiveDataModelGenerator.CREATE_TIME, createDate);
} catch(NumberFormatException ne) { } catch(Exception ne) {
LOG.error("Error while updating createTime for the table {} ", hiveTable.getCompleteName(), ne); LOG.error("Error while setting createTime for the table {} ", hiveTable.getCompleteName(), ne);
} }
} }
......
...@@ -21,6 +21,7 @@ package org.apache.atlas.hive.hook; ...@@ -21,6 +21,7 @@ package org.apache.atlas.hive.hook;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.common.util.concurrent.ThreadFactoryBuilder;
import kafka.security.auth.Write;
import org.apache.atlas.AtlasClient; import org.apache.atlas.AtlasClient;
import org.apache.atlas.AtlasConstants; import org.apache.atlas.AtlasConstants;
import org.apache.atlas.hive.bridge.HiveMetaStoreBridge; import org.apache.atlas.hive.bridge.HiveMetaStoreBridge;
...@@ -66,7 +67,9 @@ import java.util.List; ...@@ -66,7 +67,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.SortedMap; import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.ThreadPoolExecutor;
...@@ -86,8 +89,8 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -86,8 +89,8 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
public static final String QUEUE_SIZE = CONF_PREFIX + "queueSize"; public static final String QUEUE_SIZE = CONF_PREFIX + "queueSize";
public static final String HOOK_NUM_RETRIES = CONF_PREFIX + "numRetries"; public static final String HOOK_NUM_RETRIES = CONF_PREFIX + "numRetries";
private static final String SEP = ":".intern(); static final String SEP = ":".intern();
private static final String IO_SEP = "->".intern(); static final String IO_SEP = "->".intern();
private static final Map<String, HiveOperation> OPERATION_MAP = new HashMap<>(); private static final Map<String, HiveOperation> OPERATION_MAP = new HashMap<>();
...@@ -291,6 +294,8 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -291,6 +294,8 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
private void deleteDatabase(HiveMetaStoreBridge dgiBridge, HiveEventContext event) { private void deleteDatabase(HiveMetaStoreBridge dgiBridge, HiveEventContext event) {
if (event.getOutputs().size() > 1) { if (event.getOutputs().size() > 1) {
LOG.info("Starting deletion of tables and databases with cascade {} ", event.getQueryStr()); LOG.info("Starting deletion of tables and databases with cascade {} ", event.getQueryStr());
} else {
LOG.info("Starting deletion of database {} ", event.getQueryStr());
} }
for (WriteEntity output : event.getOutputs()) { for (WriteEntity output : event.getOutputs()) {
...@@ -549,10 +554,6 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -549,10 +554,6 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
return str.toLowerCase().trim(); return str.toLowerCase().trim();
} }
public static String normalize(String queryStr) {
return lower(queryStr);
}
private void registerProcess(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws Exception { private void registerProcess(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws Exception {
Set<ReadEntity> inputs = event.getInputs(); Set<ReadEntity> inputs = event.getInputs();
Set<WriteEntity> outputs = event.getOutputs(); Set<WriteEntity> outputs = event.getOutputs();
...@@ -567,8 +568,8 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -567,8 +568,8 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
LOG.info("Query id/plan is missing for {}", event.getQueryStr()); LOG.info("Query id/plan is missing for {}", event.getQueryStr());
} }
final SortedMap<Entity, Referenceable> source = new TreeMap<>(entityComparator); final SortedMap<ReadEntity, Referenceable> source = new TreeMap<>(entityComparator);
final SortedMap<Entity, Referenceable> target = new TreeMap<>(entityComparator); final SortedMap<WriteEntity, Referenceable> target = new TreeMap<>(entityComparator);
final Set<String> dataSets = new HashSet<>(); final Set<String> dataSets = new HashSet<>();
final Set<Referenceable> entities = new LinkedHashSet<>(); final Set<Referenceable> entities = new LinkedHashSet<>();
...@@ -577,16 +578,27 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -577,16 +578,27 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
// filter out select queries which do not modify data // filter out select queries which do not modify data
if (!isSelectQuery) { if (!isSelectQuery) {
for (ReadEntity readEntity : event.getInputs()) {
SortedSet<ReadEntity> sortedHiveInputs = new TreeSet<>(entityComparator);;
if ( event.getInputs() != null) {
sortedHiveInputs.addAll(event.getInputs());
}
SortedSet<WriteEntity> sortedHiveOutputs = new TreeSet<>(entityComparator);
if ( event.getOutputs() != null) {
sortedHiveOutputs.addAll(event.getOutputs());
}
for (ReadEntity readEntity : sortedHiveInputs) {
processHiveEntity(dgiBridge, event, readEntity, dataSets, source, entities); processHiveEntity(dgiBridge, event, readEntity, dataSets, source, entities);
} }
for (WriteEntity writeEntity : event.getOutputs()) { for (WriteEntity writeEntity : sortedHiveOutputs) {
processHiveEntity(dgiBridge, event, writeEntity, dataSets, target, entities); processHiveEntity(dgiBridge, event, writeEntity, dataSets, target, entities);
} }
if (source.size() > 0 || target.size() > 0) { if (source.size() > 0 || target.size() > 0) {
Referenceable processReferenceable = getProcessReferenceable(dgiBridge, event, source, target); Referenceable processReferenceable = getProcessReferenceable(dgiBridge, event, sortedHiveInputs, sortedHiveOutputs, source, target);
entities.add(processReferenceable); entities.add(processReferenceable);
event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), new ArrayList<>(entities))); event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), new ArrayList<>(entities)));
} else { } else {
...@@ -597,8 +609,8 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -597,8 +609,8 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
} }
} }
private void processHiveEntity(HiveMetaStoreBridge dgiBridge, HiveEventContext event, Entity entity, Set<String> dataSetsProcessed, private <T extends Entity> void processHiveEntity(HiveMetaStoreBridge dgiBridge, HiveEventContext event, T entity, Set<String> dataSetsProcessed,
SortedMap<Entity, Referenceable> dataSets, Set<Referenceable> entities) throws Exception { SortedMap<T, Referenceable> dataSets, Set<Referenceable> entities) throws Exception {
if (entity.getType() == Type.TABLE || entity.getType() == Type.PARTITION) { if (entity.getType() == Type.TABLE || entity.getType() == Type.PARTITION) {
final String tblQFName = dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), entity.getTable()); final String tblQFName = dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), entity.getTable());
if (!dataSetsProcessed.contains(tblQFName)) { if (!dataSetsProcessed.contains(tblQFName)) {
...@@ -609,7 +621,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -609,7 +621,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
} }
} else if (entity.getType() == Type.DFS_DIR) { } else if (entity.getType() == Type.DFS_DIR) {
final String pathUri = lower(new Path(entity.getLocation()).toString()); final String pathUri = lower(new Path(entity.getLocation()).toString());
LOG.info("Registering DFS Path {} ", pathUri); LOG.debug("Registering DFS Path {} ", pathUri);
if (!dataSetsProcessed.contains(pathUri)) { if (!dataSetsProcessed.contains(pathUri)) {
Referenceable hdfsPath = dgiBridge.fillHDFSDataSet(pathUri); Referenceable hdfsPath = dgiBridge.fillHDFSDataSet(pathUri);
dataSets.put(entity, hdfsPath); dataSets.put(entity, hdfsPath);
...@@ -653,7 +665,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -653,7 +665,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
private void handleExternalTables(final HiveMetaStoreBridge dgiBridge, final HiveEventContext event, final LinkedHashMap<Type, Referenceable> tables) throws HiveException, MalformedURLException { private void handleExternalTables(final HiveMetaStoreBridge dgiBridge, final HiveEventContext event, final LinkedHashMap<Type, Referenceable> tables) throws HiveException, MalformedURLException {
List<Referenceable> entities = new ArrayList<>(); List<Referenceable> entities = new ArrayList<>();
final Entity hiveEntity = getEntityByType(event.getOutputs(), Type.TABLE); final WriteEntity hiveEntity = (WriteEntity) getEntityByType(event.getOutputs(), Type.TABLE);
Table hiveTable = hiveEntity.getTable(); Table hiveTable = hiveEntity.getTable();
//Refresh to get the correct location //Refresh to get the correct location
hiveTable = dgiBridge.hiveClient.getTable(hiveTable.getDbName(), hiveTable.getTableName()); hiveTable = dgiBridge.hiveClient.getTable(hiveTable.getDbName(), hiveTable.getTableName());
...@@ -665,18 +677,25 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -665,18 +677,25 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
dfsEntity.setTyp(Type.DFS_DIR); dfsEntity.setTyp(Type.DFS_DIR);
dfsEntity.setName(location); dfsEntity.setName(location);
SortedMap<Entity, Referenceable> inputs = new TreeMap<Entity, Referenceable>(entityComparator) {{ SortedMap<ReadEntity, Referenceable> hiveInputsMap = new TreeMap<ReadEntity, Referenceable>(entityComparator) {{
put(dfsEntity, dgiBridge.fillHDFSDataSet(location)); put(dfsEntity, dgiBridge.fillHDFSDataSet(location));
}}; }};
SortedMap<Entity, Referenceable> outputs = new TreeMap<Entity, Referenceable>(entityComparator) {{ SortedMap<WriteEntity, Referenceable> hiveOutputsMap = new TreeMap<WriteEntity, Referenceable>(entityComparator) {{
put(hiveEntity, tables.get(Type.TABLE)); put(hiveEntity, tables.get(Type.TABLE));
}}; }};
Referenceable processReferenceable = getProcessReferenceable(dgiBridge, event, inputs, outputs); SortedSet<ReadEntity> sortedIps = new TreeSet<>(entityComparator);
sortedIps.addAll(hiveInputsMap.keySet());
SortedSet<WriteEntity> sortedOps = new TreeSet<>(entityComparator);
sortedOps.addAll(hiveOutputsMap.keySet());
Referenceable processReferenceable = getProcessReferenceable(dgiBridge, event,
sortedIps, sortedOps, hiveInputsMap, hiveOutputsMap);
String tableQualifiedName = dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), hiveTable); String tableQualifiedName = dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), hiveTable);
if (isCreateOp(event)){ if (isCreateOp(event)){
LOG.info("Overriding process qualified name to {}", tableQualifiedName);
processReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName); processReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName);
} }
entities.addAll(tables.values()); entities.addAll(tables.values());
...@@ -689,6 +708,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -689,6 +708,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
if (HiveOperation.CREATETABLE.equals(hiveEvent.getOperation()) if (HiveOperation.CREATETABLE.equals(hiveEvent.getOperation())
|| HiveOperation.CREATEVIEW.equals(hiveEvent.getOperation()) || HiveOperation.CREATEVIEW.equals(hiveEvent.getOperation())
|| HiveOperation.ALTERVIEW_AS.equals(hiveEvent.getOperation()) || HiveOperation.ALTERVIEW_AS.equals(hiveEvent.getOperation())
|| HiveOperation.ALTERTABLE_LOCATION.equals(hiveEvent.getOperation())
|| HiveOperation.CREATETABLE_AS_SELECT.equals(hiveEvent.getOperation())) { || HiveOperation.CREATETABLE_AS_SELECT.equals(hiveEvent.getOperation())) {
return true; return true;
} }
...@@ -696,11 +716,11 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -696,11 +716,11 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
} }
private Referenceable getProcessReferenceable(HiveMetaStoreBridge dgiBridge, HiveEventContext hiveEvent, private Referenceable getProcessReferenceable(HiveMetaStoreBridge dgiBridge, HiveEventContext hiveEvent,
SortedMap<Entity, Referenceable> source, SortedMap<Entity, Referenceable> target) { final SortedSet<ReadEntity> sortedHiveInputs, final SortedSet<WriteEntity> sortedHiveOutputs, SortedMap<ReadEntity, Referenceable> source, SortedMap<WriteEntity, Referenceable> target) {
Referenceable processReferenceable = new Referenceable(HiveDataTypes.HIVE_PROCESS.getName()); Referenceable processReferenceable = new Referenceable(HiveDataTypes.HIVE_PROCESS.getName());
String queryStr = lower(hiveEvent.getQueryStr()); String queryStr = lower(hiveEvent.getQueryStr());
processReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getProcessQualifiedName(hiveEvent.getOperation(), source, target)); processReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getProcessQualifiedName(hiveEvent, sortedHiveInputs, sortedHiveOutputs, source, target));
LOG.debug("Registering query: {}", queryStr); LOG.debug("Registering query: {}", queryStr);
List<Referenceable> sourceList = new ArrayList<>(source.values()); List<Referenceable> sourceList = new ArrayList<>(source.values());
...@@ -733,51 +753,113 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -733,51 +753,113 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
} }
@VisibleForTesting @VisibleForTesting
static String getProcessQualifiedName(HiveOperation op, SortedMap<Entity, Referenceable> inputs, SortedMap<Entity, Referenceable> outputs) { static String getProcessQualifiedName(HiveEventContext eventContext, final SortedSet<ReadEntity> sortedHiveInputs, final SortedSet<WriteEntity> sortedHiveOutputs, SortedMap<ReadEntity, Referenceable> hiveInputsMap, SortedMap<WriteEntity, Referenceable> hiveOutputsMap) {
HiveOperation op = eventContext.getOperation();
StringBuilder buffer = new StringBuilder(op.getOperationName()); StringBuilder buffer = new StringBuilder(op.getOperationName());
addDatasets(op, buffer, inputs);
boolean ignoreHDFSPathsinQFName = ignoreHDFSPathsinQFName(op, sortedHiveInputs, sortedHiveOutputs);
if ( ignoreHDFSPathsinQFName && LOG.isDebugEnabled()) {
LOG.debug("Ignoring HDFS paths in qualifiedName for {} {} ", op, eventContext.getQueryStr());
}
addInputs(op, sortedHiveInputs, buffer, hiveInputsMap, ignoreHDFSPathsinQFName);
buffer.append(IO_SEP); buffer.append(IO_SEP);
addDatasets(op, buffer, outputs); addOutputs(op, sortedHiveOutputs, buffer, hiveOutputsMap, ignoreHDFSPathsinQFName);
LOG.info("Setting process qualified name to {}", buffer); LOG.info("Setting process qualified name to {}", buffer);
return buffer.toString(); return buffer.toString();
} }
private static void addDatasets(HiveOperation op, StringBuilder buffer, final Map<Entity, Referenceable> refs) { private static boolean ignoreHDFSPathsinQFName(final HiveOperation op, final Set<ReadEntity> inputs, final Set<WriteEntity> outputs) {
switch (op) {
case LOAD:
case IMPORT:
return isPartitionBasedQuery(outputs);
case EXPORT:
return isPartitionBasedQuery(inputs);
case QUERY:
return true;
}
return false;
}
private static boolean isPartitionBasedQuery(Set<? extends Entity> entities) {
for (Entity entity : entities) {
if (Type.PARTITION.equals(entity.getType())) {
return true;
}
}
return false;
}
private static void addInputs(HiveOperation op, SortedSet<ReadEntity> sortedInputs, StringBuilder buffer, final Map<ReadEntity, Referenceable> refs, final boolean ignoreHDFSPathsInQFName) {
if (refs != null) { if (refs != null) {
for (Entity input : refs.keySet()) { if (sortedInputs != null) {
final Entity entity = input; Set<String> dataSetsProcessed = new LinkedHashSet<>();
for (Entity input : sortedInputs) {
if (!dataSetsProcessed.contains(input.getName().toLowerCase())) {
//HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations //HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations
if (addQueryType(op, entity)) { if (ignoreHDFSPathsInQFName &&
buffer.append(SEP); (Type.DFS_DIR.equals(input.getType()) || Type.LOCAL_DIR.equals(input.getType()))) {
buffer.append(((WriteEntity) entity).getWriteType().name()); LOG.debug("Skipping dfs dir input addition to process qualified name {} ", input.getName());
} else if (refs.containsKey(input)) {
addDataset(buffer, refs.get(input));
} }
if (Type.DFS_DIR.equals(entity.getType()) || dataSetsProcessed.add(input.getName().toLowerCase());
Type.LOCAL_DIR.equals(entity.getType())) { }
LOG.debug("Skipping dfs dir addition into process qualified name {} ", refs.get(input).get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME)); }
} else {
}
}
}
private static void addDataset(StringBuilder buffer, Referenceable ref) {
buffer.append(SEP); buffer.append(SEP);
String dataSetQlfdName = (String) refs.get(input).get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME); String dataSetQlfdName = (String) ref.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME);
// '/' breaks query parsing on ATLAS // '/' breaks query parsing on ATLAS
buffer.append(dataSetQlfdName.toLowerCase().replaceAll("/", "")); buffer.append(dataSetQlfdName.toLowerCase().replaceAll("/", ""));
} }
private static void addOutputs(HiveOperation op, SortedSet<WriteEntity> sortedOutputs, StringBuilder buffer, final Map<WriteEntity, Referenceable> refs, final boolean ignoreHDFSPathsInQFName) {
if (refs != null) {
Set<String> dataSetsProcessed = new LinkedHashSet<>();
if (sortedOutputs != null) {
for (Entity output : sortedOutputs) {
final Entity entity = output;
if (!dataSetsProcessed.contains(output.getName().toLowerCase())) {
//HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations
if (addQueryType(op, (WriteEntity) entity)) {
buffer.append(SEP);
buffer.append(((WriteEntity) entity).getWriteType().name());
}
if (ignoreHDFSPathsInQFName &&
(Type.DFS_DIR.equals(output.getType()) || Type.LOCAL_DIR.equals(output.getType()))) {
LOG.debug("Skipping dfs dir output addition to process qualified name {} ", output.getName());
} else if (refs.containsKey(output)) {
addDataset(buffer, refs.get(output));
}
dataSetsProcessed.add(output.getName().toLowerCase());
}
}
} }
} }
} }
private static boolean addQueryType(HiveOperation op, Entity entity) { private static boolean addQueryType(HiveOperation op, WriteEntity entity) {
if (WriteEntity.class.isAssignableFrom(entity.getClass())) { if (((WriteEntity) entity).getWriteType() != null && HiveOperation.QUERY.equals(op)) {
if (((WriteEntity) entity).getWriteType() != null &&
op.equals(HiveOperation.QUERY)) {
switch (((WriteEntity) entity).getWriteType()) { switch (((WriteEntity) entity).getWriteType()) {
case INSERT: case INSERT:
case INSERT_OVERWRITE: case INSERT_OVERWRITE:
case UPDATE: case UPDATE:
case DELETE: case DELETE:
return true;
case PATH_WRITE: case PATH_WRITE:
//Add query type only for DFS paths and ignore local paths since they are not added as outputs
if ( !Type.LOCAL_DIR.equals(entity.getType())) {
return true; return true;
default:
} }
break;
default:
} }
} }
return false; return false;
......
...@@ -62,15 +62,22 @@ import java.text.ParseException; ...@@ -62,15 +62,22 @@ import java.text.ParseException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set;
import java.util.SortedMap; import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.TreeSet;
import static org.apache.atlas.AtlasClient.NAME; import static org.apache.atlas.AtlasClient.NAME;
import static org.apache.atlas.hive.hook.HiveHook.entityComparator; import static org.apache.atlas.hive.hook.HiveHook.entityComparator;
import static org.apache.atlas.hive.hook.HiveHook.getProcessQualifiedName; import static org.apache.atlas.hive.hook.HiveHook.getProcessQualifiedName;
import static org.apache.atlas.hive.hook.HiveHook.lower; import static org.apache.atlas.hive.hook.HiveHook.lower;
import static org.apache.atlas.hive.hook.HiveHook.IO_SEP;
import static org.apache.atlas.hive.hook.HiveHook.SEP;
import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertTrue; import static org.testng.Assert.assertTrue;
...@@ -82,6 +89,8 @@ public class HiveHookIT { ...@@ -82,6 +89,8 @@ public class HiveHookIT {
private static final String DGI_URL = "http://localhost:21000/"; private static final String DGI_URL = "http://localhost:21000/";
private static final String CLUSTER_NAME = "test"; private static final String CLUSTER_NAME = "test";
public static final String DEFAULT_DB = "default"; public static final String DEFAULT_DB = "default";
private static final String PART_FILE = "2015-01-01";
private Driver driver; private Driver driver;
private AtlasClient atlasClient; private AtlasClient atlasClient;
private HiveMetaStoreBridge hiveMetaStoreBridge; private HiveMetaStoreBridge hiveMetaStoreBridge;
...@@ -262,7 +271,7 @@ public class HiveHookIT { ...@@ -262,7 +271,7 @@ public class HiveHookIT {
validateHDFSPaths(processReference, INPUTS, pFile); validateHDFSPaths(processReference, INPUTS, pFile);
} }
private List<Entity> getInputs(String inputName, Entity.Type entityType) { private Set<ReadEntity> getInputs(String inputName, Entity.Type entityType) {
final ReadEntity entity = new ReadEntity(); final ReadEntity entity = new ReadEntity();
if ( Entity.Type.DFS_DIR.equals(entityType)) { if ( Entity.Type.DFS_DIR.equals(entityType)) {
...@@ -270,14 +279,13 @@ public class HiveHookIT { ...@@ -270,14 +279,13 @@ public class HiveHookIT {
entity.setTyp(Entity.Type.DFS_DIR); entity.setTyp(Entity.Type.DFS_DIR);
} else { } else {
entity.setName(getQualifiedTblName(inputName)); entity.setName(getQualifiedTblName(inputName));
entity.setTyp(Entity.Type.TABLE); entity.setTyp(entityType);
} }
return new ArrayList<Entity>() {{ add(entity); }}; return new LinkedHashSet<ReadEntity>() {{ add(entity); }};
} }
private Set<WriteEntity> getOutputs(String inputName, Entity.Type entityType) {
private List<Entity> getOutputs(String inputName, Entity.Type entityType) {
final WriteEntity entity = new WriteEntity(); final WriteEntity entity = new WriteEntity();
if ( Entity.Type.DFS_DIR.equals(entityType) || Entity.Type.LOCAL_DIR.equals(entityType)) { if ( Entity.Type.DFS_DIR.equals(entityType) || Entity.Type.LOCAL_DIR.equals(entityType)) {
...@@ -285,27 +293,32 @@ public class HiveHookIT { ...@@ -285,27 +293,32 @@ public class HiveHookIT {
entity.setTyp(entityType); entity.setTyp(entityType);
} else { } else {
entity.setName(getQualifiedTblName(inputName)); entity.setName(getQualifiedTblName(inputName));
entity.setTyp(Entity.Type.TABLE); entity.setTyp(entityType);
} }
return new ArrayList<Entity>() {{ add(entity); }}; return new LinkedHashSet<WriteEntity>() {{ add(entity); }};
} }
private void validateOutputTables(Referenceable processReference, Set<WriteEntity> expectedTables) throws Exception {
private void validateOutputTables(Referenceable processReference, List<Entity> expectedTables) throws Exception {
validateTables(processReference, OUTPUTS, expectedTables); validateTables(processReference, OUTPUTS, expectedTables);
} }
private void validateInputTables(Referenceable processReference, List<Entity> expectedTables) throws Exception { private void validateInputTables(Referenceable processReference, Set<ReadEntity> expectedTables) throws Exception {
validateTables(processReference, INPUTS, expectedTables); validateTables(processReference, INPUTS, expectedTables);
} }
private void validateTables(Referenceable processReference, String attrName, List<Entity> expectedTables) throws Exception { private void validateTables(Referenceable processReference, String attrName, Set<? extends Entity> expectedTables) throws Exception {
List<Id> tableRef = (List<Id>) processReference.get(attrName); List<Id> tableRef = (List<Id>) processReference.get(attrName);
Iterator<? extends Entity> iterator = expectedTables.iterator();
for(int i = 0; i < expectedTables.size(); i++) { for(int i = 0; i < expectedTables.size(); i++) {
Entity hiveEntity = iterator.next();
if (Entity.Type.TABLE.equals(hiveEntity.getType()) ||
Entity.Type.DFS_DIR.equals(hiveEntity.getType())) {
Referenceable entity = atlasClient.getEntity(tableRef.get(i)._getId()); Referenceable entity = atlasClient.getEntity(tableRef.get(i)._getId());
LOG.debug("Validating output {} {} ", i, entity); LOG.debug("Validating output {} {} ", i, entity);
Assert.assertEquals(entity.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), expectedTables.get(i).getName()); Assert.assertEquals(entity.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), hiveEntity.getName());
}
} }
} }
...@@ -338,18 +351,22 @@ public class HiveHookIT { ...@@ -338,18 +351,22 @@ public class HiveHookIT {
String query = "create table " + ctasTableName + " as select * from " + tableName; String query = "create table " + ctasTableName + " as select * from " + tableName;
runCommand(query); runCommand(query);
final ReadEntity entity = new ReadEntity(); final Set<ReadEntity> readEntities = getInputs(tableName, Entity.Type.TABLE);
entity.setName(getQualifiedTblName(tableName)); final Set<WriteEntity> writeEntities = getOutputs(ctasTableName, Entity.Type.TABLE);
entity.setTyp(Entity.Type.TABLE);
final WriteEntity writeEntity = new WriteEntity();
writeEntity.setTyp(Entity.Type.TABLE);
writeEntity.setName(getQualifiedTblName(ctasTableName));
assertProcessIsRegistered(query, HiveOperation.CREATETABLE_AS_SELECT, new ArrayList<Entity>() {{ add(entity); }}, new ArrayList<Entity>() {{ add(writeEntity); }}); assertProcessIsRegistered(constructEvent(query, HiveOperation.CREATETABLE_AS_SELECT, readEntities, writeEntities));
assertTableIsRegistered(DEFAULT_DB, ctasTableName); assertTableIsRegistered(DEFAULT_DB, ctasTableName);
} }
private HiveHook.HiveEventContext constructEvent(String query, HiveOperation op, Set<ReadEntity> inputs, Set<WriteEntity> outputs) {
HiveHook.HiveEventContext event = new HiveHook.HiveEventContext();
event.setQueryStr(query);
event.setOperation(op);
event.setInputs(inputs);
event.setOutputs(outputs);
return event;
}
@Test @Test
public void testDropAndRecreateCTASOutput() throws Exception { public void testDropAndRecreateCTASOutput() throws Exception {
String tableName = createTable(); String tableName = createTable();
...@@ -359,10 +376,11 @@ public class HiveHookIT { ...@@ -359,10 +376,11 @@ public class HiveHookIT {
assertTableIsRegistered(DEFAULT_DB, ctasTableName); assertTableIsRegistered(DEFAULT_DB, ctasTableName);
List<Entity> inputs = getInputs(tableName, Entity.Type.TABLE); Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
List<Entity> outputs = getOutputs(ctasTableName, Entity.Type.TABLE); Set<WriteEntity> outputs = getOutputs(ctasTableName, Entity.Type.TABLE);
String processId = assertProcessIsRegistered(query, HiveOperation.CREATETABLE_AS_SELECT, inputs, outputs); final HiveHook.HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.CREATETABLE_AS_SELECT, inputs, outputs);
String processId = assertProcessIsRegistered(hiveEventContext);
final String drpquery = String.format("drop table %s ", ctasTableName); final String drpquery = String.format("drop table %s ", ctasTableName);
runCommand(drpquery); runCommand(drpquery);
...@@ -371,14 +389,13 @@ public class HiveHookIT { ...@@ -371,14 +389,13 @@ public class HiveHookIT {
//Fix after ATLAS-876 //Fix after ATLAS-876
runCommand(query); runCommand(query);
assertTableIsRegistered(DEFAULT_DB, ctasTableName); assertTableIsRegistered(DEFAULT_DB, ctasTableName);
String process2Id = assertProcessIsRegistered(query, HiveOperation.CREATETABLE_AS_SELECT, inputs, outputs); String process2Id = assertProcessIsRegistered(hiveEventContext, inputs, outputs);
Assert.assertEquals(process2Id, processId); Assert.assertEquals(process2Id, processId);
Referenceable processRef = atlasClient.getEntity(processId); Referenceable processRef = atlasClient.getEntity(processId);
validateInputTables(processRef, inputs); outputs.add(outputs.iterator().next());
outputs.add(outputs.get(0));
validateOutputTables(processRef, outputs); validateOutputTables(processRef, outputs);
} }
...@@ -389,7 +406,7 @@ public class HiveHookIT { ...@@ -389,7 +406,7 @@ public class HiveHookIT {
String query = "create view " + viewName + " as select * from " + tableName; String query = "create view " + viewName + " as select * from " + tableName;
runCommand(query); runCommand(query);
assertProcessIsRegistered(query, HiveOperation.CREATEVIEW, getInputs(tableName, Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE)); assertProcessIsRegistered(constructEvent(query, HiveOperation.CREATEVIEW, getInputs(tableName, Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE)));
assertTableIsRegistered(DEFAULT_DB, viewName); assertTableIsRegistered(DEFAULT_DB, viewName);
} }
...@@ -403,7 +420,7 @@ public class HiveHookIT { ...@@ -403,7 +420,7 @@ public class HiveHookIT {
runCommand(query); runCommand(query);
String table1Id = assertTableIsRegistered(DEFAULT_DB, table1Name); String table1Id = assertTableIsRegistered(DEFAULT_DB, table1Name);
assertProcessIsRegistered(query, HiveOperation.CREATEVIEW, getInputs(table1Name, Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE)); assertProcessIsRegistered(constructEvent(query, HiveOperation.CREATEVIEW, getInputs(table1Name, Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE)));
String viewId = assertTableIsRegistered(DEFAULT_DB, viewName); String viewId = assertTableIsRegistered(DEFAULT_DB, viewName);
//Check lineage which includes table1 //Check lineage which includes table1
...@@ -419,7 +436,7 @@ public class HiveHookIT { ...@@ -419,7 +436,7 @@ public class HiveHookIT {
runCommand(query); runCommand(query);
//Check if alter view process is reqistered //Check if alter view process is reqistered
assertProcessIsRegistered(query, HiveOperation.CREATEVIEW, getInputs(table2Name, Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE)); assertProcessIsRegistered(constructEvent(query, HiveOperation.CREATEVIEW, getInputs(table2Name, Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE)));
String table2Id = assertTableIsRegistered(DEFAULT_DB, table2Name); String table2Id = assertTableIsRegistered(DEFAULT_DB, table2Name);
Assert.assertEquals(assertTableIsRegistered(DEFAULT_DB, viewName), viewId); Assert.assertEquals(assertTableIsRegistered(DEFAULT_DB, viewName), viewId);
...@@ -456,9 +473,7 @@ public class HiveHookIT { ...@@ -456,9 +473,7 @@ public class HiveHookIT {
String query = "load data local inpath 'file://" + loadFile + "' into table " + tableName; String query = "load data local inpath 'file://" + loadFile + "' into table " + tableName;
runCommand(query); runCommand(query);
List<Entity> outputs = getOutputs(tableName, Entity.Type.TABLE); assertProcessIsRegistered(constructEvent(query, HiveOperation.LOAD, null, getOutputs(tableName, Entity.Type.TABLE)));
assertProcessIsRegistered(query, HiveOperation.LOAD, null, outputs);
} }
@Test @Test
...@@ -466,41 +481,56 @@ public class HiveHookIT { ...@@ -466,41 +481,56 @@ public class HiveHookIT {
String tableName = createTable(true); String tableName = createTable(true);
String loadFile = file("load"); String loadFile = file("load");
String query = "load data local inpath 'file://" + loadFile + "' into table " + tableName + " partition(dt = '2015-01-01')"; String query = "load data local inpath 'file://" + loadFile + "' into table " + tableName + " partition(dt = '"+ PART_FILE + "')";
runCommand(query); runCommand(query);
validateProcess(query, HiveOperation.LOAD, null, getOutputs(tableName, Entity.Type.TABLE)); assertProcessIsRegistered(constructEvent(query, HiveOperation.LOAD, null, getOutputs(tableName, Entity.Type.TABLE)));
} }
@Test @Test
public void testLoadDFSPath() throws Exception { public void testLoadDFSPathPartitioned() throws Exception {
String tableName = createTable(true, true, false); String tableName = createTable(true, true, false);
String tableId = assertTableIsRegistered(DEFAULT_DB, tableName); assertTableIsRegistered(DEFAULT_DB, tableName);
String loadFile = createTestDFSFile("loadDFSFile"); final String loadFile = createTestDFSFile("loadDFSFile");
String query = "load data inpath '" + loadFile + "' into table " + tableName + " partition(dt = '2015-01-01')"; String query = "load data inpath '" + loadFile + "' into table " + tableName + " partition(dt = '"+ PART_FILE + "')";
runCommand(query); runCommand(query);
final List<Entity> outputs = getOutputs(tableName, Entity.Type.TABLE); final Set<WriteEntity> outputs = getOutputs(tableName, Entity.Type.TABLE);
Referenceable processReference = validateProcess(query, HiveOperation.LOAD, getInputs(loadFile, Entity.Type.DFS_DIR), outputs); final Set<ReadEntity> inputs = getInputs(loadFile, Entity.Type.DFS_DIR);
validateHDFSPaths(processReference, INPUTS, loadFile); final Set<WriteEntity> partitionOps = new LinkedHashSet<>(outputs);
partitionOps.addAll(getOutputs(DEFAULT_DB + "@" + tableName + "@dt=" + PART_FILE, Entity.Type.PARTITION));
Referenceable processReference = validateProcess(constructEvent(query, HiveOperation.LOAD, inputs, partitionOps), inputs, outputs);
validateHDFSPaths(processReference, INPUTS, loadFile);
validateOutputTables(processReference, outputs); validateOutputTables(processReference, outputs);
final String loadFile2 = createTestDFSFile("loadDFSFile1");
query = "load data inpath '" + loadFile2 + "' into table " + tableName + " partition(dt = '"+ PART_FILE + "')";
runCommand(query);
Set<ReadEntity> process2Inputs = getInputs(loadFile2, Entity.Type.DFS_DIR);
Set<ReadEntity> expectedInputs = new LinkedHashSet<>();
expectedInputs.addAll(process2Inputs);
expectedInputs.addAll(inputs);
validateProcess(constructEvent(query, HiveOperation.LOAD, expectedInputs, partitionOps), expectedInputs, outputs);
} }
private String getQualifiedTblName(String inputTable) { private String getQualifiedTblName(String inputTable) {
String inputtblQlfdName = inputTable; String inputtblQlfdName = inputTable;
if (inputTable != null && !inputTable.contains(".")) { if (inputTable != null && !inputTable.contains("@")) {
inputtblQlfdName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, inputTable); inputtblQlfdName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, inputTable);
} }
return inputtblQlfdName; return inputtblQlfdName;
} }
private Referenceable validateProcess(String query, HiveOperation op, List<Entity> inputTables, List<Entity> outputTables) throws Exception { private Referenceable validateProcess(HiveHook.HiveEventContext event, Set<ReadEntity> inputTables, Set<WriteEntity> outputTables) throws Exception {
String processId = assertProcessIsRegistered(query, op, inputTables, outputTables); String processId = assertProcessIsRegistered(event, inputTables, outputTables);
Referenceable process = atlasClient.getEntity(processId); Referenceable process = atlasClient.getEntity(processId);
if (inputTables == null) { if (inputTables == null) {
Assert.assertNull(process.get(INPUTS)); Assert.assertNull(process.get(INPUTS));
...@@ -519,25 +549,47 @@ public class HiveHookIT { ...@@ -519,25 +549,47 @@ public class HiveHookIT {
return process; return process;
} }
private Referenceable validateProcess(HiveHook.HiveEventContext event) throws Exception {
return validateProcess(event, event.getInputs(), event.getOutputs());
}
@Test @Test
public void testInsertIntoTable() throws Exception { public void testInsertIntoTable() throws Exception {
String tableName = createTable(); String inputTable1Name = createTable();
String inputTable2Name = createTable();
String insertTableName = createTable(); String insertTableName = createTable();
assertTableIsRegistered(DEFAULT_DB, tableName); assertTableIsRegistered(DEFAULT_DB, inputTable1Name);
assertTableIsRegistered(DEFAULT_DB, insertTableName); assertTableIsRegistered(DEFAULT_DB, insertTableName);
String query = "insert into " + insertTableName + " select id, name from " + tableName; String query = "insert into " + insertTableName + " select t1.id, t1.name from " + inputTable2Name + " as t2, " + inputTable1Name + " as t1 where t1.id=t2.id";
runCommand(query); runCommand(query);
List<Entity> inputs = getInputs(tableName, Entity.Type.TABLE); final Set<ReadEntity> inputs = getInputs(inputTable1Name, Entity.Type.TABLE);
List<Entity> outputs = getOutputs(insertTableName, Entity.Type.TABLE); inputs.addAll(getInputs(inputTable2Name, Entity.Type.TABLE));
((WriteEntity)outputs.get(0)).setWriteType(WriteEntity.WriteType.INSERT);
Set<WriteEntity> outputs = getOutputs(insertTableName, Entity.Type.TABLE);
(outputs.iterator().next()).setWriteType(WriteEntity.WriteType.INSERT);
HiveHook.HiveEventContext event = constructEvent(query, HiveOperation.QUERY, inputs, outputs);
Set<ReadEntity> expectedInputs = new TreeSet<ReadEntity>(entityComparator) {{
addAll(inputs);
}};
assertTableIsRegistered(DEFAULT_DB, insertTableName);
Referenceable processRef1 = validateProcess(event, expectedInputs, outputs);
//Test sorting of tbl names
SortedSet<String> sortedTblNames = new TreeSet<>();
sortedTblNames.add(getQualifiedTblName(inputTable1Name));
sortedTblNames.add(getQualifiedTblName(inputTable2Name));
Referenceable processRef1 = validateProcess(query, HiveOperation.QUERY, inputs, outputs); //Verify sorted orer of inputs in qualified name
Assert.assertEquals(Joiner.on(SEP).join("QUERY", sortedTblNames.first(), sortedTblNames.last()) + IO_SEP + SEP + Joiner.on(SEP).join(WriteEntity.WriteType.INSERT.name(), getQualifiedTblName(insertTableName))
, processRef1.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME));
//Rerun same query. Should result in same process //Rerun same query. Should result in same process
runCommandWithDelay(query, 1000); runCommandWithDelay(query, 1000);
Referenceable processRef2 = validateProcess(query, HiveOperation.QUERY, inputs, outputs); Referenceable processRef2 = validateProcess(event, expectedInputs, outputs);
Assert.assertEquals(processRef1.getId()._getId(), processRef2.getId()._getId()); Assert.assertEquals(processRef1.getId()._getId(), processRef2.getId()._getId());
} }
...@@ -550,7 +602,7 @@ public class HiveHookIT { ...@@ -550,7 +602,7 @@ public class HiveHookIT {
"insert overwrite LOCAL DIRECTORY '" + randomLocalPath.getAbsolutePath() + "' select id, name from " + tableName; "insert overwrite LOCAL DIRECTORY '" + randomLocalPath.getAbsolutePath() + "' select id, name from " + tableName;
runCommand(query); runCommand(query);
validateProcess(query, HiveOperation.QUERY, getInputs(tableName, Entity.Type.TABLE), null); validateProcess(constructEvent(query, HiveOperation.QUERY, getInputs(tableName, Entity.Type.TABLE), null));
assertTableIsRegistered(DEFAULT_DB, tableName); assertTableIsRegistered(DEFAULT_DB, tableName);
} }
...@@ -564,72 +616,78 @@ public class HiveHookIT { ...@@ -564,72 +616,78 @@ public class HiveHookIT {
runCommand(query); runCommand(query);
List<Entity> inputs = getInputs(tableName, Entity.Type.TABLE); Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
final List<Entity> outputs = getOutputs(pFile1, Entity.Type.DFS_DIR); final Set<WriteEntity> outputs = getOutputs(pFile1, Entity.Type.DFS_DIR);
((WriteEntity)outputs.get(0)).setWriteType(WriteEntity.WriteType.PATH_WRITE); ((WriteEntity)outputs.iterator().next()).setWriteType(WriteEntity.WriteType.PATH_WRITE);
Referenceable processReference = validateProcess(query, HiveOperation.QUERY, inputs, outputs); final HiveHook.HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.QUERY, inputs, outputs);
Referenceable processReference = validateProcess(hiveEventContext);
validateHDFSPaths(processReference, OUTPUTS, pFile1); validateHDFSPaths(processReference, OUTPUTS, pFile1);
String tableId = assertTableIsRegistered(DEFAULT_DB, tableName); String tableId = assertTableIsRegistered(DEFAULT_DB, tableName);
validateInputTables(processReference, inputs); validateInputTables(processReference, inputs);
//Rerun same query with same HDFS path //Rerun same query with same HDFS path
runCommandWithDelay(query, 1000);
runCommand(query); assertTableIsRegistered(DEFAULT_DB, tableName);
Referenceable process2Reference = validateProcess(query, HiveOperation.QUERY, inputs, outputs); Referenceable process2Reference = validateProcess(hiveEventContext);
validateHDFSPaths(process2Reference, OUTPUTS, pFile1); validateHDFSPaths(process2Reference, OUTPUTS, pFile1);
Assert.assertEquals(process2Reference.getId()._getId(), processReference.getId()._getId()); Assert.assertEquals(process2Reference.getId()._getId(), processReference.getId()._getId());
//Rerun same query with a new HDFS path. Will result in same process since HDFS paths are not part of qualifiedName. //Rerun same query with a new HDFS path. Will result in same process since HDFS paths is not part of qualified name for QUERY operations
final String pFile2 = createTestDFSPath("somedfspath2"); final String pFile2 = createTestDFSPath("somedfspath2");
query = "insert overwrite DIRECTORY '" + pFile2 + "' select id, name from " + tableName; query = "insert overwrite DIRECTORY '" + pFile2 + "' select id, name from " + tableName;
runCommand(query); runCommandWithDelay(query, 1000);
List<Entity> p3Outputs = new ArrayList<Entity>() {{ assertTableIsRegistered(DEFAULT_DB, tableName);
Set<WriteEntity> p3Outputs = new LinkedHashSet<WriteEntity>() {{
addAll(getOutputs(pFile2, Entity.Type.DFS_DIR)); addAll(getOutputs(pFile2, Entity.Type.DFS_DIR));
addAll(outputs); addAll(outputs);
}}; }};
Referenceable process3Reference = validateProcess(query, HiveOperation.QUERY, inputs, p3Outputs); Referenceable process3Reference = validateProcess(constructEvent(query, HiveOperation.QUERY, inputs, p3Outputs));
validateHDFSPaths(process3Reference, OUTPUTS, pFile2); validateHDFSPaths(process3Reference, OUTPUTS, pFile2);
Assert.assertEquals(process3Reference.getId()._getId(), processReference.getId()._getId()); Assert.assertEquals(process3Reference.getId()._getId(), processReference.getId()._getId());
} }
@Test @Test
public void testInsertIntoDFSDir() throws Exception { public void testInsertIntoDFSDirPartitioned() throws Exception {
String tableName = createTable();
//Test with partitioned table
String tableName = createTable(true);
String pFile1 = createTestDFSPath("somedfspath1"); String pFile1 = createTestDFSPath("somedfspath1");
String query = String query =
"insert overwrite DIRECTORY '" + pFile1 + "' select id, name from " + tableName; "insert overwrite DIRECTORY '" + pFile1 + "' select id, name from " + tableName + " where dt = '" + PART_FILE + "'";
runCommand(query); runCommand(query);
List<Entity> inputs = getInputs(tableName, Entity.Type.TABLE); Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
final List<Entity> outputs = getOutputs(pFile1, Entity.Type.DFS_DIR); final Set<WriteEntity> outputs = getOutputs(pFile1, Entity.Type.DFS_DIR);
((WriteEntity)outputs.get(0)).setWriteType(WriteEntity.WriteType.PATH_WRITE); ((WriteEntity)outputs.iterator().next()).setWriteType(WriteEntity.WriteType.PATH_WRITE);
Referenceable processReference = validateProcess(query, HiveOperation.QUERY, inputs, outputs); final Set<ReadEntity> partitionIps = new LinkedHashSet<>(inputs);
validateHDFSPaths(processReference, OUTPUTS, pFile1); partitionIps.addAll(getInputs(DEFAULT_DB + "@" + tableName + "@dt='" + PART_FILE + "'", Entity.Type.PARTITION));
String tableId = assertTableIsRegistered(DEFAULT_DB, tableName); Referenceable processReference = validateProcess(constructEvent(query, HiveOperation.QUERY, partitionIps, outputs), inputs, outputs);
validateInputTables(processReference, inputs);
//Rerun same query with different HDFS path //Rerun same query with different HDFS path. Should not create another process and should update it.
final String pFile2 = createTestDFSPath("somedfspath2"); final String pFile2 = createTestDFSPath("somedfspath2");
query = query =
"insert overwrite DIRECTORY '" + pFile2 + "' select id, name from " + tableName; "insert overwrite DIRECTORY '" + pFile2 + "' select id, name from " + tableName + " where dt = '" + PART_FILE + "'";
runCommand(query); runCommand(query);
List<Entity> p2Outputs = new ArrayList<Entity>() {{
addAll(getOutputs(pFile2, Entity.Type.DFS_DIR)); final Set<WriteEntity> pFile2Outputs = getOutputs(pFile2, Entity.Type.DFS_DIR);
((WriteEntity)pFile2Outputs.iterator().next()).setWriteType(WriteEntity.WriteType.PATH_WRITE);
//Now the process has 2 paths - one older with deleted reference to partition and another with the the latest partition
Set<WriteEntity> p2Outputs = new LinkedHashSet<WriteEntity>() {{
addAll(pFile2Outputs);
addAll(outputs); addAll(outputs);
}}; }};
Referenceable process2Reference = validateProcess(query, HiveOperation.QUERY, inputs, p2Outputs); Referenceable process2Reference = validateProcess(constructEvent(query, HiveOperation.QUERY, partitionIps, pFile2Outputs), inputs, p2Outputs);
validateHDFSPaths(process2Reference, OUTPUTS, pFile2); validateHDFSPaths(process2Reference, OUTPUTS, pFile2);
Assert.assertEquals(process2Reference.getId()._getId(), processReference.getId()._getId()); Assert.assertEquals(process2Reference.getId()._getId(), processReference.getId()._getId());
...@@ -647,12 +705,12 @@ public class HiveHookIT { ...@@ -647,12 +705,12 @@ public class HiveHookIT {
runCommand(query); runCommand(query);
List<Entity> inputs = getInputs(tableName, Entity.Type.TABLE); Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
List<Entity> outputs = getOutputs(insertTableName, Entity.Type.TABLE); Set<WriteEntity> outputs = getOutputs(insertTableName, Entity.Type.TABLE);
outputs.get(0).setName(getQualifiedTblName(insertTableName + HiveMetaStoreBridge.TEMP_TABLE_PREFIX + SessionState.get().getSessionId())); outputs.iterator().next().setName(getQualifiedTblName(insertTableName + HiveMetaStoreBridge.TEMP_TABLE_PREFIX + SessionState.get().getSessionId()));
((WriteEntity)outputs.get(0)).setWriteType(WriteEntity.WriteType.INSERT); ((WriteEntity)outputs.iterator().next()).setWriteType(WriteEntity.WriteType.INSERT);
validateProcess(query, HiveOperation.QUERY, inputs, outputs); validateProcess(constructEvent(query, HiveOperation.QUERY, inputs, outputs));
assertTableIsRegistered(DEFAULT_DB, tableName); assertTableIsRegistered(DEFAULT_DB, tableName);
assertTableIsRegistered(DEFAULT_DB, insertTableName, null, true); assertTableIsRegistered(DEFAULT_DB, insertTableName, null, true);
...@@ -660,21 +718,40 @@ public class HiveHookIT { ...@@ -660,21 +718,40 @@ public class HiveHookIT {
@Test @Test
public void testInsertIntoPartition() throws Exception { public void testInsertIntoPartition() throws Exception {
String tableName = createTable(true); final boolean isPartitionedTable = true;
String insertTableName = createTable(true); String tableName = createTable(isPartitionedTable);
String insertTableName = createTable(isPartitionedTable);
String query = String query =
"insert into " + insertTableName + " partition(dt = '2015-01-01') select id, name from " + tableName "insert into " + insertTableName + " partition(dt = '"+ PART_FILE + "') select id, name from " + tableName
+ " where dt = '2015-01-01'"; + " where dt = '"+ PART_FILE + "'";
runCommand(query); runCommand(query);
List<Entity> inputs = getInputs(tableName, Entity.Type.TABLE); final Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
List<Entity> outputs = getOutputs(insertTableName, Entity.Type.TABLE); final Set<WriteEntity> outputs = getOutputs(insertTableName, Entity.Type.TABLE);
((WriteEntity)outputs.get(0)).setWriteType(WriteEntity.WriteType.INSERT); ((WriteEntity)outputs.iterator().next()).setWriteType(WriteEntity.WriteType.INSERT);
final Set<ReadEntity> partitionIps = new LinkedHashSet<ReadEntity>() {
{
addAll(inputs);
add(getPartitionInput());
validateProcess(query, HiveOperation.QUERY, inputs, outputs); }
};
final Set<WriteEntity> partitionOps = new LinkedHashSet<WriteEntity>() {
{
addAll(outputs);
add(getPartitionOutput());
}
};
validateProcess(constructEvent(query, HiveOperation.QUERY, partitionIps, partitionOps), inputs, outputs);
assertTableIsRegistered(DEFAULT_DB, tableName); assertTableIsRegistered(DEFAULT_DB, tableName);
assertTableIsRegistered(DEFAULT_DB, insertTableName); assertTableIsRegistered(DEFAULT_DB, insertTableName);
//TODO - update
} }
private String random() { private String random() {
...@@ -701,65 +778,111 @@ public class HiveHookIT { ...@@ -701,65 +778,111 @@ public class HiveHookIT {
assertTableIsRegistered(DEFAULT_DB, tableName); assertTableIsRegistered(DEFAULT_DB, tableName);
String filename = "pfile://" + mkdir("export"); String filename = "pfile://" + mkdir("exportUnPartitioned");
String query = "export table " + tableName + " to \"" + filename + "\""; String query = "export table " + tableName + " to \"" + filename + "\"";
runCommand(query); runCommand(query);
List<Entity> inputs = getInputs(tableName, Entity.Type.TABLE); Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
List<Entity> outputs = getOutputs(filename, Entity.Type.DFS_DIR); Set<WriteEntity> outputs = getOutputs(filename, Entity.Type.DFS_DIR);
Referenceable processReference = validateProcess(query, HiveOperation.EXPORT, inputs, outputs); Referenceable processReference = validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs));
validateHDFSPaths(processReference, OUTPUTS, filename); validateHDFSPaths(processReference, OUTPUTS, filename);
validateInputTables(processReference, inputs); validateInputTables(processReference, inputs);
//Import //Import
tableName = createTable(false); String importTableName = createTable(false);
assertTableIsRegistered(DEFAULT_DB, tableName); assertTableIsRegistered(DEFAULT_DB, importTableName);
query = "import table " + tableName + " from '" + filename + "'"; query = "import table " + importTableName + " from '" + filename + "'";
runCommand(query); runCommand(query);
outputs = getOutputs(tableName, Entity.Type.TABLE); outputs = getOutputs(importTableName, Entity.Type.TABLE);
processReference = validateProcess(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs); validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs));
validateHDFSPaths(processReference, INPUTS, filename);
validateOutputTables(processReference, outputs); //Should create another process
filename = "pfile://" + mkdir("export2UnPartitioned");
query = "export table " + tableName + " to \"" + filename + "\"";
runCommand(query);
inputs = getInputs(tableName, Entity.Type.TABLE);
outputs = getOutputs(filename, Entity.Type.DFS_DIR);
validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs));
//import again shouyld create another process
query = "import table " + importTableName + " from '" + filename + "'";
runCommand(query);
outputs = getOutputs(importTableName, Entity.Type.TABLE);
validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs));
} }
@Test @Test
public void testExportImportPartitionedTable() throws Exception { public void testExportImportPartitionedTable() throws Exception {
String tableName = createTable(true); boolean isPartitionedTable = true;
String tableId = assertTableIsRegistered(DEFAULT_DB, tableName); final String tableName = createTable(isPartitionedTable);
assertTableIsRegistered(DEFAULT_DB, tableName);
//Add a partition //Add a partition
String partFile = "pfile://" + mkdir("partition"); String partFile = "pfile://" + mkdir("partition");
String query = "alter table " + tableName + " add partition (dt='2015-01-01') location '" + partFile + "'"; String query = "alter table " + tableName + " add partition (dt='"+ PART_FILE + "') location '" + partFile + "'";
runCommand(query); runCommand(query);
String filename = "pfile://" + mkdir("export"); String filename = "pfile://" + mkdir("export");
query = "export table " + tableName + " to \"" + filename + "\""; query = "export table " + tableName + " to \"" + filename + "\"";
runCommand(query); runCommand(query);
List<Entity> inputs = getInputs(tableName, Entity.Type.TABLE); final Set<ReadEntity> expectedExportInputs = getInputs(tableName, Entity.Type.TABLE);
List<Entity> outputs = getOutputs(filename, Entity.Type.DFS_DIR); final Set<WriteEntity> outputs = getOutputs(filename, Entity.Type.DFS_DIR);
Referenceable processReference = validateProcess(query, HiveOperation.EXPORT, inputs, outputs); //Note that export has only partition as input in this case
validateHDFSPaths(processReference, OUTPUTS, filename); final Set<ReadEntity> partitionIps = getInputs(DEFAULT_DB + "@" + tableName + "@dt=" + PART_FILE, Entity.Type.PARTITION);
partitionIps.addAll(expectedExportInputs);
validateInputTables(processReference, inputs); Referenceable processReference = validateProcess(constructEvent(query, HiveOperation.EXPORT, partitionIps, outputs), expectedExportInputs, outputs);
validateHDFSPaths(processReference, OUTPUTS, filename);
//Import //Import
tableName = createTable(true); String importTableName = createTable(true);
tableId = assertTableIsRegistered(DEFAULT_DB, tableName); assertTableIsRegistered(DEFAULT_DB, tableName);
query = "import table " + tableName + " from '" + filename + "'"; query = "import table " + importTableName + " from '" + filename + "'";
runCommand(query); runCommand(query);
outputs = getOutputs(tableName, Entity.Type.TABLE); final Set<ReadEntity> expectedImportInputs = getInputs(filename, Entity.Type.DFS_DIR);
processReference = validateProcess(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs); final Set<WriteEntity> importOutputs = getOutputs(importTableName, Entity.Type.TABLE);
validateHDFSPaths(processReference, INPUTS, filename);
validateOutputTables(processReference, outputs); final Set<WriteEntity> partitionOps = getOutputs(DEFAULT_DB + "@" + importTableName + "@dt=" + PART_FILE, Entity.Type.PARTITION);
partitionOps.addAll(importOutputs);
validateProcess(constructEvent(query, HiveOperation.IMPORT, expectedImportInputs , partitionOps), expectedImportInputs, importOutputs);
//Export should update same process
filename = "pfile://" + mkdir("export2");
query = "export table " + tableName + " to \"" + filename + "\"";
runCommand(query);
final Set<WriteEntity> outputs2 = getOutputs(filename, Entity.Type.DFS_DIR);
Set<WriteEntity> p3Outputs = new LinkedHashSet<WriteEntity>() {{
addAll(outputs2);
addAll(outputs);
}};
validateProcess(constructEvent(query, HiveOperation.EXPORT, partitionIps, outputs2), expectedExportInputs, p3Outputs);
query = "alter table " + importTableName + " drop partition (dt='"+ PART_FILE + "')";
runCommand(query);
//Import should update same process
query = "import table " + importTableName + " from '" + filename + "'";
runCommandWithDelay(query, 1000);
final Set<ReadEntity> importInputs = getInputs(filename, Entity.Type.DFS_DIR);
final Set<ReadEntity> expectedImport2Inputs = new LinkedHashSet<ReadEntity>() {{
addAll(importInputs);
addAll(expectedImportInputs);
}};
validateProcess(constructEvent(query, HiveOperation.IMPORT, importInputs, partitionOps), expectedImport2Inputs, importOutputs);
} }
@Test @Test
...@@ -767,13 +890,14 @@ public class HiveHookIT { ...@@ -767,13 +890,14 @@ public class HiveHookIT {
String tableName = createTable(); String tableName = createTable();
String query = "select * from " + tableName; String query = "select * from " + tableName;
runCommand(query); runCommand(query);
List<Entity> inputs = getInputs(tableName, Entity.Type.TABLE); Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
assertProcessIsNotRegistered(query, HiveOperation.QUERY, inputs, null); HiveHook.HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.QUERY, inputs, null);
assertProcessIsNotRegistered(hiveEventContext);
//check with uppercase table name //check with uppercase table name
query = "SELECT * from " + tableName.toUpperCase(); query = "SELECT * from " + tableName.toUpperCase();
runCommand(query); runCommand(query);
assertProcessIsNotRegistered(query, HiveOperation.QUERY, inputs, null); assertProcessIsNotRegistered(hiveEventContext);
} }
@Test @Test
...@@ -1042,10 +1166,10 @@ public class HiveHookIT { ...@@ -1042,10 +1166,10 @@ public class HiveHookIT {
String query = String.format("truncate table %s", tableName); String query = String.format("truncate table %s", tableName);
runCommand(query); runCommand(query);
List<Entity> outputs = getInputs(tableName, Entity.Type.TABLE); Set<WriteEntity> outputs = getOutputs(tableName, Entity.Type.TABLE);
String tableId = assertTableIsRegistered(DEFAULT_DB, tableName); String tableId = assertTableIsRegistered(DEFAULT_DB, tableName);
validateProcess(query, HiveOperation.TRUNCATETABLE, null, outputs); validateProcess(constructEvent(query, HiveOperation.TRUNCATETABLE, null, outputs));
//Check lineage //Check lineage
String datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName); String datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName);
...@@ -1144,7 +1268,7 @@ public class HiveHookIT { ...@@ -1144,7 +1268,7 @@ public class HiveHookIT {
String query = "alter table " + tableName + " set location '" + testPath + "'"; String query = "alter table " + tableName + " set location '" + testPath + "'";
runCommand(query); runCommand(query);
String tableId = assertTableIsRegistered(DEFAULT_DB, tableName, new AssertPredicate() { assertTableIsRegistered(DEFAULT_DB, tableName, new AssertPredicate() {
@Override @Override
public void assertOnEntity(Referenceable tableRef) throws Exception { public void assertOnEntity(Referenceable tableRef) throws Exception {
Referenceable sdRef = (Referenceable) tableRef.get(HiveDataModelGenerator.STORAGE_DESC); Referenceable sdRef = (Referenceable) tableRef.get(HiveDataModelGenerator.STORAGE_DESC);
...@@ -1152,10 +1276,11 @@ public class HiveHookIT { ...@@ -1152,10 +1276,11 @@ public class HiveHookIT {
} }
}); });
List<Entity> inputs = getInputs(testPath, Entity.Type.DFS_DIR); String processId = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
List<Entity> outputs = getOutputs(tableName, Entity.Type.TABLE); HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName, false), null);
Referenceable processReference = atlasClient.getEntity(processId);
Referenceable processReference = validateProcess(query, HiveOperation.ALTERTABLE_LOCATION, inputs, outputs);
validateHDFSPaths(processReference, INPUTS, testPath); validateHDFSPaths(processReference, INPUTS, testPath);
} }
...@@ -1302,6 +1427,20 @@ public class HiveHookIT { ...@@ -1302,6 +1427,20 @@ public class HiveHookIT {
assertTableIsNotRegistered(DEFAULT_DB, tableName); assertTableIsNotRegistered(DEFAULT_DB, tableName);
} }
private WriteEntity getPartitionOutput() {
WriteEntity partEntity = new WriteEntity();
partEntity.setName(PART_FILE);
partEntity.setTyp(Entity.Type.PARTITION);
return partEntity;
}
private ReadEntity getPartitionInput() {
ReadEntity partEntity = new ReadEntity();
partEntity.setName(PART_FILE);
partEntity.setTyp(Entity.Type.PARTITION);
return partEntity;
}
@Test @Test
public void testDropDatabaseWithCascade() throws Exception { public void testDropDatabaseWithCascade() throws Exception {
//Test Deletion of database and its corresponding tables //Test Deletion of database and its corresponding tables
...@@ -1550,26 +1689,66 @@ public class HiveHookIT { ...@@ -1550,26 +1689,66 @@ public class HiveHookIT {
} }
} }
private String assertProcessIsRegistered(final String queryStr, HiveOperation op, final List<Entity> inputTbls, final List<Entity> outputTbls) throws Exception { private String assertProcessIsRegistered(final HiveHook.HiveEventContext event) throws Exception {
String processQFName = getProcessQualifiedName(op, getSortedProcessDataSets(inputTbls), getSortedProcessDataSets(outputTbls)); try {
SortedSet<ReadEntity> sortedHiveInputs = event.getInputs() == null ? null : new TreeSet<ReadEntity>(entityComparator);
SortedSet<WriteEntity> sortedHiveOutputs = event.getOutputs() == null ? null : new TreeSet<WriteEntity>(entityComparator);
if ( event.getInputs() != null) {
sortedHiveInputs.addAll(event.getInputs());
}
if ( event.getOutputs() != null) {
sortedHiveOutputs.addAll(event.getOutputs());
}
String processQFName = getProcessQualifiedName(event, sortedHiveInputs, sortedHiveOutputs, getSortedProcessDataSets(event.getInputs()), getSortedProcessDataSets(event.getOutputs()));
LOG.debug("Searching for process with query {}", processQFName);
return assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, processQFName, new AssertPredicate() {
@Override
public void assertOnEntity(final Referenceable entity) throws Exception {
List<String> recentQueries = (List<String>) entity.get("recentQueries");
Assert.assertEquals(recentQueries.get(0), lower(event.getQueryStr()));
}
});
} catch (Exception e) {
LOG.error("Exception : ", e);
throw e;
}
}
private String assertProcessIsRegistered(final HiveHook.HiveEventContext event, final Set<ReadEntity> inputTbls, final Set<WriteEntity> outputTbls) throws Exception {
try {
SortedSet<ReadEntity> sortedHiveInputs = event.getInputs() == null ? null : new TreeSet<ReadEntity>(entityComparator);
SortedSet<WriteEntity> sortedHiveOutputs = event.getOutputs() == null ? null : new TreeSet<WriteEntity>(entityComparator);
if ( event.getInputs() != null) {
sortedHiveInputs.addAll(event.getInputs());
}
if ( event.getOutputs() != null) {
sortedHiveOutputs.addAll(event.getOutputs());
}
String processQFName = getProcessQualifiedName(event, sortedHiveInputs, sortedHiveOutputs, getSortedProcessDataSets(inputTbls), getSortedProcessDataSets(outputTbls));
LOG.debug("Searching for process with query {}", processQFName); LOG.debug("Searching for process with query {}", processQFName);
return assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, processQFName, new AssertPredicate() { return assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, processQFName, new AssertPredicate() {
@Override @Override
public void assertOnEntity(final Referenceable entity) throws Exception { public void assertOnEntity(final Referenceable entity) throws Exception {
List<String> recentQueries = (List<String>) entity.get("recentQueries"); List<String> recentQueries = (List<String>) entity.get("recentQueries");
Assert.assertEquals(recentQueries.get(0), lower(queryStr)); Assert.assertEquals(recentQueries.get(0), lower(event.getQueryStr()));
} }
}); });
} catch(Exception e) {
LOG.error("Exception : ", e);
throw e;
}
} }
private String getDSTypeName(Entity entity) { private String getDSTypeName(Entity entity) {
return Entity.Type.TABLE.equals(entity.getType()) ? HiveDataTypes.HIVE_TABLE.name() : FSDataTypes.HDFS_PATH().toString(); return Entity.Type.TABLE.equals(entity.getType()) ? HiveDataTypes.HIVE_TABLE.name() : FSDataTypes.HDFS_PATH().toString();
} }
private SortedMap<Entity, Referenceable> getSortedProcessDataSets(List<Entity> inputTbls) { private <T extends Entity> SortedMap<T, Referenceable> getSortedProcessDataSets(Set<T> inputTbls) {
SortedMap<Entity, Referenceable> inputs = new TreeMap<Entity, Referenceable>(entityComparator); SortedMap<T, Referenceable> inputs = new TreeMap<T, Referenceable>(entityComparator);
if (inputTbls != null) { if (inputTbls != null) {
for (final Entity tbl : inputTbls) { for (final T tbl : inputTbls) {
Referenceable inputTableRef = new Referenceable(getDSTypeName(tbl), new HashMap<String, Object>() {{ Referenceable inputTableRef = new Referenceable(getDSTypeName(tbl), new HashMap<String, Object>() {{
put(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tbl.getName()); put(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tbl.getName());
}}); }});
...@@ -1579,10 +1758,22 @@ public class HiveHookIT { ...@@ -1579,10 +1758,22 @@ public class HiveHookIT {
return inputs; return inputs;
} }
private void assertProcessIsNotRegistered(String queryStr, HiveOperation op, final List<Entity> inputTbls, final List<Entity> outputTbls) throws Exception { private void assertProcessIsNotRegistered(HiveHook.HiveEventContext event) throws Exception {
String processQFName = getProcessQualifiedName(op, getSortedProcessDataSets(inputTbls), getSortedProcessDataSets(outputTbls)); try {
SortedSet<ReadEntity> sortedHiveInputs = event.getInputs() == null ? null : new TreeSet<ReadEntity>(entityComparator);
SortedSet<WriteEntity> sortedHiveOutputs = event.getOutputs() == null ? null : new TreeSet<WriteEntity>(entityComparator);
if ( event.getInputs() != null) {
sortedHiveInputs.addAll(event.getInputs());
}
if ( event.getOutputs() != null) {
sortedHiveOutputs.addAll(event.getOutputs());
}
String processQFName = getProcessQualifiedName(event, sortedHiveInputs, sortedHiveOutputs, getSortedProcessDataSets(event.getInputs()), getSortedProcessDataSets(event.getOutputs()));
LOG.debug("Searching for process with query {}", processQFName); LOG.debug("Searching for process with query {}", processQFName);
assertEntityIsNotRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, processQFName); assertEntityIsNotRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, processQFName);
} catch( Exception e) {
LOG.error("Exception : ", e);
}
} }
private void assertTableIsNotRegistered(String dbName, String tableName, boolean isTemporaryTable) throws Exception { private void assertTableIsNotRegistered(String dbName, String tableName, boolean isTemporaryTable) throws Exception {
......
...@@ -128,7 +128,7 @@ public abstract class AtlasHook { ...@@ -128,7 +128,7 @@ public abstract class AtlasHook {
} catch (Exception e) { } catch (Exception e) {
numRetries++; numRetries++;
if (numRetries < maxRetries) { if (numRetries < maxRetries) {
LOG.debug("Failed to notify atlas for entity {}. Retrying", message, e); LOG.info("Failed to notify atlas for entity {}. Retrying", message, e);
} else { } else {
if (shouldLogFailedMessages && e instanceof NotificationException) { if (shouldLogFailedMessages && e instanceof NotificationException) {
List<String> failedMessages = ((NotificationException) e).getFailedMessages(); List<String> failedMessages = ((NotificationException) e).getFailedMessages();
......
...@@ -7,6 +7,7 @@ INCOMPATIBLE CHANGES: ...@@ -7,6 +7,7 @@ INCOMPATIBLE CHANGES:
ALL CHANGES: ALL CHANGES:
ATLAS-966 Exit execution of import_hive.sh if HIVE_HOME is not set (svimal2106 via sumasai) ATLAS-966 Exit execution of import_hive.sh if HIVE_HOME is not set (svimal2106 via sumasai)
ATLAS-917 Add hdfs paths to process qualified name for non-partition based queries (sumasai)
--Release 0.7-incubating --Release 0.7-incubating
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment