diff --git a/addons/impala-bridge-shim/src/main/java/org/apache/atlas/impala/hook/ImpalaLineageHook.java b/addons/impala-bridge-shim/src/main/java/org/apache/atlas/impala/hook/ImpalaLineageHook.java index b796f37..34e6dcf 100644 --- a/addons/impala-bridge-shim/src/main/java/org/apache/atlas/impala/hook/ImpalaLineageHook.java +++ b/addons/impala-bridge-shim/src/main/java/org/apache/atlas/impala/hook/ImpalaLineageHook.java @@ -19,8 +19,8 @@ package org.apache.atlas.impala.hook; import org.apache.atlas.plugin.classloader.AtlasPluginClassLoader; -import org.apache.impala.hooks.PostQueryHookContext; -import org.apache.impala.hooks.QueryExecHook; +import org.apache.impala.hooks.QueryCompleteContext; +import org.apache.impala.hooks.QueryEventHook; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -28,7 +28,7 @@ import org.slf4j.LoggerFactory; * This class is used to convert lineage records from Impala to lineage notifications and * send them to Atlas. */ -public class ImpalaLineageHook implements QueryExecHook { +public class ImpalaLineageHook implements QueryEventHook { private static final Logger LOG = LoggerFactory.getLogger(ImpalaLineageHook.class); private static final String ATLAS_PLUGIN_TYPE_IMPALA = "impala"; @@ -36,20 +36,20 @@ public class ImpalaLineageHook implements QueryExecHook { "org.apache.atlas.impala.hook.ImpalaHook"; private AtlasPluginClassLoader atlasPluginClassLoader = null; - private QueryExecHook impalaLineageHookImpl; + private QueryEventHook impalaLineageHookImpl; public ImpalaLineageHook() { } /** - * Execute Impala post-hook + * Execute Impala hook */ - public void postQueryExecute(PostQueryHookContext context) { - LOG.debug("==> ImpalaLineageHook.postQueryExecute()"); + public void onQueryComplete(QueryCompleteContext context) { + LOG.debug("==> ImpalaLineageHook.onQueryComplete()"); try { activatePluginClassLoader(); - impalaLineageHookImpl.postQueryExecute(context); + impalaLineageHookImpl.onQueryComplete(context); } catch (Exception ex) { String errorMessage = String.format("Error in processing impala lineage: {}", context.getLineageGraph()); LOG.error(errorMessage, ex); @@ -57,33 +57,33 @@ public class ImpalaLineageHook implements QueryExecHook { deactivatePluginClassLoader(); } - LOG.debug("<== ImpalaLineageHook.postQueryExecute()"); + LOG.debug("<== ImpalaLineageHook.onQueryComplete()"); } /** - * Initialization of Impala post-execution hook + * Initialization of Impala hook */ - public void impalaStartup() { - LOG.debug("==> ImpalaLineageHook.impalaStartup()"); + public void onImpalaStartup() { + LOG.debug("==> ImpalaLineageHook.onImpalaStartup()"); try { atlasPluginClassLoader = AtlasPluginClassLoader.getInstance(ATLAS_PLUGIN_TYPE_IMPALA, this.getClass()); @SuppressWarnings("unchecked") - Class<QueryExecHook> cls = (Class<QueryExecHook>) Class + Class<QueryEventHook> cls = (Class<QueryEventHook>) Class .forName(ATLAS_IMPALA_LINEAGE_HOOK_IMPL_CLASSNAME, true, atlasPluginClassLoader); activatePluginClassLoader(); impalaLineageHookImpl = cls.newInstance(); - impalaLineageHookImpl.impalaStartup(); + impalaLineageHookImpl.onImpalaStartup(); } catch (Exception excp) { LOG.error("Error instantiating Atlas hook implementation for Impala lineage", excp); } finally { deactivatePluginClassLoader(); } - LOG.debug("<== ImpalaLineageHook.impalaStartup()"); + LOG.debug("<== ImpalaLineageHook.onImpalaStartup()"); } private void activatePluginClassLoader() { diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaHook.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaHook.java index 2afb4eb..fc04762 100644 --- a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaHook.java +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaHook.java @@ -18,20 +18,20 @@ package org.apache.atlas.impala.hook; -import org.apache.impala.hooks.PostQueryHookContext; -import org.apache.impala.hooks.QueryExecHook; +import org.apache.impala.hooks.QueryCompleteContext; +import org.apache.impala.hooks.QueryEventHook; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class ImpalaHook implements QueryExecHook { +public class ImpalaHook implements QueryEventHook { private static final Logger LOG = LoggerFactory.getLogger(ImpalaHook.class); private ImpalaLineageHook lineageHook; /** - * Execute Impala post-hook + * Execute Impala hook */ - public void postQueryExecute(PostQueryHookContext context) { + public void onQueryComplete(QueryCompleteContext context) { try { lineageHook.process(context.getLineageGraph()); } catch (Exception ex) { @@ -41,9 +41,9 @@ public class ImpalaHook implements QueryExecHook { } /** - * Initialization of Impala post-execution hook + * Initialization of Impala hook */ - public void impalaStartup() { + public void onImpalaStartup() { lineageHook = new ImpalaLineageHook(); } } diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/events/BaseImpalaEvent.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/events/BaseImpalaEvent.java index 63c5f87..c7604ba 100644 --- a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/events/BaseImpalaEvent.java +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/events/BaseImpalaEvent.java @@ -563,7 +563,7 @@ public abstract class BaseImpalaEvent { LineageVertexMetadata metadata = vertex.getMetadata(); if (metadata != null) { - return metadata.getCreateTime(); + return metadata.getTableCreateTime(); } } diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/events/CreateImpalaProcess.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/events/CreateImpalaProcess.java index 0dc520c..3071576 100644 --- a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/events/CreateImpalaProcess.java +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/events/CreateImpalaProcess.java @@ -270,7 +270,7 @@ public class CreateImpalaProcess extends BaseImpalaEvent { ImpalaNode tableNode = vertexNameMap.get(tableName); if (tableNode == null) { - tableNode = createTableNode(tableName, metadata.getCreateTime()); + tableNode = createTableNode(tableName, metadata.getTableCreateTime()); vertexNameMap.put(tableName, tableNode); } } diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/LineageVertexMetadata.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/LineageVertexMetadata.java index 0bd236d..2b3226c 100644 --- a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/LineageVertexMetadata.java +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/LineageVertexMetadata.java @@ -36,13 +36,13 @@ public class LineageVertexMetadata { private String tableName; // the create time of the table. Its unit is in seconds. - private Long createTime; + private Long tableCreateTime; public String getTableName() { return tableName; } - public Long getCreateTime() { return createTime; } + public Long getTableCreateTime() { return tableCreateTime; } public void setTableName(String tableName) { this.tableName = tableName; } - public void setCreateTime(Long createTime) { this.createTime = createTime; } + public void setTableCreateTime(Long createTime) { this.tableCreateTime = createTime; } } diff --git a/addons/impala-bridge/src/test/resources/impalaAlterViewAsSelect.json b/addons/impala-bridge/src/test/resources/impalaAlterViewAsSelect.json index f8de7e3..aca2661 100644 --- a/addons/impala-bridge/src/test/resources/impalaAlterViewAsSelect.json +++ b/addons/impala-bridge/src/test/resources/impalaAlterViewAsSelect.json @@ -32,7 +32,7 @@ "vertexId":"db_4.view_1.count", "metadata": { "tableName": "db_4.view_1", - "createTime": 1554750072 + "tableCreateTime": 1554750072 } }, { @@ -41,7 +41,7 @@ "vertexId":"db_4.table_1.count", "metadata": { "tableName": "db_4.table_1", - "createTime": 1554750070 + "tableCreateTime": 1554750070 } }, { @@ -50,7 +50,7 @@ "vertexId":"db_4.view_1.id", "metadata": { "tableName": "db_4.view_1", - "createTime": 1554750072 + "tableCreateTime": 1554750072 } }, { @@ -59,7 +59,7 @@ "vertexId":"db_4.table_1.id", "metadata": { "tableName": "db_4.table_1", - "createTime": 1554750070 + "tableCreateTime": 1554750070 } } ] diff --git a/addons/impala-bridge/src/test/resources/impalaCreateTableAsSelect.json b/addons/impala-bridge/src/test/resources/impalaCreateTableAsSelect.json index b6ac516..7bf361c 100644 --- a/addons/impala-bridge/src/test/resources/impalaCreateTableAsSelect.json +++ b/addons/impala-bridge/src/test/resources/impalaCreateTableAsSelect.json @@ -32,7 +32,7 @@ "vertexId":"db_3.table_2.count", "metadata": { "tableName": "db_3.table_2", - "createTime": 1554750072 + "tableCreateTime": 1554750072 } }, { @@ -41,7 +41,7 @@ "vertexId":"db_3.table_1.count", "metadata": { "tableName": "db_3.table_1", - "createTime": 1554750070 + "tableCreateTime": 1554750070 } }, { @@ -50,7 +50,7 @@ "vertexId":"db_3.table_2.id", "metadata": { "tableName": "db_3.table_2", - "createTime": 1554750072 + "tableCreateTime": 1554750072 } }, { @@ -59,7 +59,7 @@ "vertexId":"db_3.table_1.id", "metadata": { "tableName": "db_3.table_1", - "createTime": 1554750070 + "tableCreateTime": 1554750070 } } ] diff --git a/addons/impala-bridge/src/test/resources/impalaCreateView.json b/addons/impala-bridge/src/test/resources/impalaCreateView.json index ffdc658..bf55d9f 100644 --- a/addons/impala-bridge/src/test/resources/impalaCreateView.json +++ b/addons/impala-bridge/src/test/resources/impalaCreateView.json @@ -32,7 +32,7 @@ "vertexId":"db_1.view_1.count", "metadata": { "tableName": "db_1.view_1", - "createTime": 1554750072 + "tableCreateTime": 1554750072 } }, { @@ -41,7 +41,7 @@ "vertexId":"db_1.table_1.count", "metadata": { "tableName": "db_1.table_1", - "createTime": 1554750070 + "tableCreateTime": 1554750070 } }, { @@ -50,7 +50,7 @@ "vertexId":"db_1.view_1.id", "metadata": { "tableName": "db_1.view_1", - "createTime": 1554750072 + "tableCreateTime": 1554750072 } }, { @@ -59,7 +59,7 @@ "vertexId":"db_1.table_1.id", "metadata": { "tableName": "db_1.table_1", - "createTime": 1554750070 + "tableCreateTime": 1554750070 } } ] diff --git a/addons/impala-bridge/src/test/resources/impalaInsertIntoAsSelect.json b/addons/impala-bridge/src/test/resources/impalaInsertIntoAsSelect.json index 140eced..deb1466 100644 --- a/addons/impala-bridge/src/test/resources/impalaInsertIntoAsSelect.json +++ b/addons/impala-bridge/src/test/resources/impalaInsertIntoAsSelect.json @@ -40,7 +40,7 @@ "vertexId":"db_5.table_2.count", "metadata": { "tableName": "db_5.table_2", - "createTime": 1554750072 + "tableCreateTime": 1554750072 } }, { @@ -49,7 +49,7 @@ "vertexId":"db_5.table_1.count", "metadata": { "tableName": "db_5.table_1", - "createTime": 1554750070 + "tableCreateTime": 1554750070 } }, { @@ -58,7 +58,7 @@ "vertexId":"db_5.table_2.id", "metadata": { "tableName": "db_5.table_2", - "createTime": 1554750072 + "tableCreateTime": 1554750072 } }, { @@ -67,7 +67,7 @@ "vertexId":"db_5.table_1.id", "metadata": { "tableName": "db_5.table_1", - "createTime": 1554750070 + "tableCreateTime": 1554750070 } }, { @@ -76,7 +76,7 @@ "vertexId":"db_5.table_2.int_col", "metadata": { "tableName": "db_5.table_2", - "createTime": 1554750072 + "tableCreateTime": 1554750072 } } ] diff --git a/addons/impala-hook-api/src/main/java/org/apache/impala/hooks/PostQueryHookContext.java b/addons/impala-hook-api/src/main/java/org/apache/impala/hooks/QueryCompleteContext.java similarity index 74% rename from addons/impala-hook-api/src/main/java/org/apache/impala/hooks/PostQueryHookContext.java rename to addons/impala-hook-api/src/main/java/org/apache/impala/hooks/QueryCompleteContext.java index c8a5b64..dc8e317 100644 --- a/addons/impala-hook-api/src/main/java/org/apache/impala/hooks/PostQueryHookContext.java +++ b/addons/impala-hook-api/src/main/java/org/apache/impala/hooks/QueryCompleteContext.java @@ -20,36 +20,37 @@ package org.apache.impala.hooks; import java.util.Objects; /** - * {@link PostQueryHookContext} encapsulates immutable information sent from the + * {@link QueryCompleteContext} encapsulates immutable information sent from the * BE to a post-query hook. */ -public class PostQueryHookContext { - private final String lineageGraph; +public class QueryCompleteContext { + private final String lineageGraph_; - public PostQueryHookContext(String lineageGraph) { - this.lineageGraph = Objects.requireNonNull(lineageGraph); + public QueryCompleteContext(String lineageGraph) { + lineageGraph_ = Objects.requireNonNull(lineageGraph); } /** * Returns the lineage graph sent from the backend during - * {@link QueryExecHook#postQueryExecute(PostQueryHookContext)}. This graph + * {@link QueryEventHook#onQueryComplete(QueryCompleteContext)}. This graph * object will generally contain more information than it did when it was * first constructed in the frontend, because the backend will have filled * in additional information. * <p> - * The returned object is serilized json string of the graph sent from the backend. + * The returned object is a JSON representation of the lineage graph object + * for the query. The details of the JSON translation are not provided here + * as this is meant to be a temporary feature, and the String format will + * be changed to something more strongly-typed in the future. * </p> * * @return lineage graph from the query that executed */ - public String getLineageGraph() { - return lineageGraph; - } + public String getLineageGraph() { return lineageGraph_; } @Override public String toString() { - return "PostQueryHookContext{" + - "lineageGraph='" + lineageGraph + '\'' + + return "QueryCompleteContext{" + + "lineageGraph='" + lineageGraph_ + '\'' + '}'; } -} \ No newline at end of file +} diff --git a/addons/impala-hook-api/src/main/java/org/apache/impala/hooks/QueryExecHook.java b/addons/impala-hook-api/src/main/java/org/apache/impala/hooks/QueryEventHook.java similarity index 55% rename from addons/impala-hook-api/src/main/java/org/apache/impala/hooks/QueryExecHook.java rename to addons/impala-hook-api/src/main/java/org/apache/impala/hooks/QueryEventHook.java index a7d84d4..cd4d2ec 100644 --- a/addons/impala-hook-api/src/main/java/org/apache/impala/hooks/QueryExecHook.java +++ b/addons/impala-hook-api/src/main/java/org/apache/impala/hooks/QueryEventHook.java @@ -17,13 +17,11 @@ */ package org.apache.impala.hooks; - /** - * {@link QueryExecHook} is the interface for implementations that - * can hook into supported places in Impala query execution. + * {@link QueryEventHook} is the interface for implementations that + * can hook into supported events in Impala query execution. */ -public interface QueryExecHook { - +public interface QueryEventHook { /** * Hook method invoked when the Impala daemon starts up. * <p> @@ -38,7 +36,7 @@ public interface QueryExecHook { * throw them. * </p> */ - void impalaStartup(); + void onImpalaStartup(); /** * Hook method invoked asynchronously when a (qualifying) Impala query @@ -49,12 +47,70 @@ public interface QueryExecHook { * </p> * <h3>Error-Handling</h3> * <p> - * Any {@link Exception} thrown from this method will only be caught - * and logged and will not affect the result of any query. + * Any {@link Throwable} thrown from this method will only be caught + * and logged and will not affect the result of any query. Hook implementations + * should make a best-effort to handle their own exceptions. + * </p> + * <h3>Important:</h3> + * <p> + * This hook is actually invoked when the query is <i>unregistered</i>, + * which may happen a long time after the query has executed. + * e.g. the following sequence is possible: + * <ol> + * <li>User executes query from Hue. + * <li>User goes home for weekend, leaving Hue tab open in browser + * <li>If we're lucky, the session timeout expires after some amount of idle time. + * <li>The query gets unregistered, lineage record gets logged + * </ol> + * </p> + * <h3>Service Guarantees</h3> + * + * Impala makes the following guarantees about how this method is executed + * with respect to other implementations that may be registered: + * + * <h4>Hooks are executed asynchronously</h4> + * + * All hook execution happens asynchronously of the query that triggered + * them. Hooks may still be executing after the query response has returned + * to the caller. Additionally, hooks may execute concurrently if the + * hook executor thread size is configured appropriately. + * + * <h4>Hook Invocation is in Configuration Order</h4> + * + * The <i>submission</i> of the hook execution tasks occurs in the order + * that the hooks were defined in configuration. This generally means that + * hooks will <i>start</i> executing in order, but there are no guarantees + * about finishing order. + * <p> + * For example, if configured with {@code query_event_hook_classes=hook1,hook2,hook3}, + * then hook1 will start before hook2, and hook2 will start before hook3. + * If you need to guarantee that hook1 <i>completes</i> before hook2 starts, then + * you should specify {@code query_event_hook_nthreads=1} for serial hook + * execution. * </p> * + * <h4>Hook Execution Blocks</h4> + * + * A hook will block the thread it executes on until it completes. If a hook hangs, + * then the thread also hangs. Impala (currently) will not check for hanging hooks to + * take any action. This means that if you have {@code query_event_hook_nthreads} + * less than the number of hooks, then 1 hook may effectively block others from + * executing. + * + * <h4>Hook Exceptions are non-fatal</h4> + * + * Any exception thrown from this hook method will be logged and ignored. Therefore, + * an exception in 1 hook will not affect another hook (when no shared resources are + * involved). + * + * <h4>Hook Execution may end abruptly at Impala shutdown</h4> + * + * If a hook is still executing when Impala is shutdown, there are no guarantees + * that it will complete execution before being killed. + * + * * @param context object containing the post execution context * of the query */ - void postQueryExecute(PostQueryHookContext context); -} \ No newline at end of file + void onQueryComplete(QueryCompleteContext context); +}