Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
atlas
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dataplatform
atlas
Commits
5c9a699a
Commit
5c9a699a
authored
Apr 24, 2019
by
Aadarsh Jajodia
Committed by
Sarath Subramanian
Apr 24, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ATLAS-3133: Add support for Process Executions in Atlas
Signed-off-by:
Sarath Subramanian
<
ssubramanian@hortonworks.com
>
parent
0a81c250
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
595 additions
and
83 deletions
+595
-83
AtlasHiveHookContext.java
...java/org/apache/atlas/hive/hook/AtlasHiveHookContext.java
+2
-0
HiveHook.java
...ge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
+15
-1
BaseHiveEvent.java
...java/org/apache/atlas/hive/hook/events/BaseHiveEvent.java
+57
-21
CreateHiveProcess.java
.../org/apache/atlas/hive/hook/events/CreateHiveProcess.java
+3
-0
CreateTable.java
...n/java/org/apache/atlas/hive/hook/events/CreateTable.java
+7
-1
HiveDataTypes.java
.../main/java/org/apache/atlas/hive/model/HiveDataTypes.java
+2
-1
HiveITBase.java
...ridge/src/test/java/org/apache/atlas/hive/HiveITBase.java
+19
-1
HiveHookIT.java
.../src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
+309
-43
0010-base_model.json
addons/models/0000-Area0/0010-base_model.json
+9
-0
1030-hive_model.json
addons/models/1000-Hadoop/1030-hive_model.json
+92
-0
QuickStartV2.java
...src/main/java/org/apache/atlas/examples/QuickStartV2.java
+80
-15
No files found.
addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/AtlasHiveHookContext.java
View file @
5c9a699a
...
@@ -133,6 +133,8 @@ public class AtlasHiveHookContext {
...
@@ -133,6 +133,8 @@ public class AtlasHiveHookContext {
return
hook
.
getClusterName
();
return
hook
.
getClusterName
();
}
}
public
String
getHostName
()
{
return
hook
.
getHostName
();
}
public
boolean
isConvertHdfsPathToLowerCase
()
{
public
boolean
isConvertHdfsPathToLowerCase
()
{
return
hook
.
isConvertHdfsPathToLowerCase
();
return
hook
.
isConvertHdfsPathToLowerCase
();
}
}
...
...
addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
View file @
5c9a699a
...
@@ -32,6 +32,8 @@ import org.apache.hadoop.security.UserGroupInformation;
...
@@ -32,6 +32,8 @@ import org.apache.hadoop.security.UserGroupInformation;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
java.net.InetAddress
;
import
java.net.UnknownHostException
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collection
;
import
java.util.Collection
;
import
java.util.Collections
;
import
java.util.Collections
;
...
@@ -46,7 +48,6 @@ import static org.apache.atlas.hive.hook.events.BaseHiveEvent.ATTRIBUTE_QUALIFIE
...
@@ -46,7 +48,6 @@ import static org.apache.atlas.hive.hook.events.BaseHiveEvent.ATTRIBUTE_QUALIFIE
import
static
org
.
apache
.
atlas
.
hive
.
hook
.
events
.
BaseHiveEvent
.
HIVE_TYPE_DB
;
import
static
org
.
apache
.
atlas
.
hive
.
hook
.
events
.
BaseHiveEvent
.
HIVE_TYPE_DB
;
import
static
org
.
apache
.
atlas
.
hive
.
hook
.
events
.
BaseHiveEvent
.
HIVE_TYPE_TABLE
;
import
static
org
.
apache
.
atlas
.
hive
.
hook
.
events
.
BaseHiveEvent
.
HIVE_TYPE_TABLE
;
public
class
HiveHook
extends
AtlasHook
implements
ExecuteWithHookContext
{
public
class
HiveHook
extends
AtlasHook
implements
ExecuteWithHookContext
{
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
HiveHook
.
class
);
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
HiveHook
.
class
);
...
@@ -66,6 +67,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -66,6 +67,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
public
static
final
String
HOOK_HIVE_TABLE_CACHE_SIZE
=
CONF_PREFIX
+
"hive_table.cache.size"
;
public
static
final
String
HOOK_HIVE_TABLE_CACHE_SIZE
=
CONF_PREFIX
+
"hive_table.cache.size"
;
public
static
final
String
DEFAULT_CLUSTER_NAME
=
"primary"
;
public
static
final
String
DEFAULT_CLUSTER_NAME
=
"primary"
;
public
static
final
String
DEFAULT_HOST_NAME
=
"localhost"
;
private
static
final
Map
<
String
,
HiveOperation
>
OPERATION_MAP
=
new
HashMap
<>();
private
static
final
Map
<
String
,
HiveOperation
>
OPERATION_MAP
=
new
HashMap
<>();
...
@@ -83,6 +85,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -83,6 +85,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
private
static
final
Map
<
String
,
PreprocessAction
>
hiveTablesCache
;
private
static
final
Map
<
String
,
PreprocessAction
>
hiveTablesCache
;
private
static
HiveHookObjectNamesCache
knownObjects
=
null
;
private
static
HiveHookObjectNamesCache
knownObjects
=
null
;
private
static
String
hostName
;
static
{
static
{
for
(
HiveOperation
hiveOperation
:
HiveOperation
.
values
())
{
for
(
HiveOperation
hiveOperation
:
HiveOperation
.
values
())
{
...
@@ -134,6 +137,13 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -134,6 +137,13 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
}
}
knownObjects
=
nameCacheEnabled
?
new
HiveHookObjectNamesCache
(
nameCacheDatabaseMaxCount
,
nameCacheTableMaxCount
,
nameCacheRebuildIntervalSeconds
)
:
null
;
knownObjects
=
nameCacheEnabled
?
new
HiveHookObjectNamesCache
(
nameCacheDatabaseMaxCount
,
nameCacheTableMaxCount
,
nameCacheRebuildIntervalSeconds
)
:
null
;
try
{
hostName
=
InetAddress
.
getLocalHost
().
getHostName
();
}
catch
(
UnknownHostException
e
)
{
LOG
.
warn
(
"No hostname found. Setting the hostname to default value {}"
,
DEFAULT_HOST_NAME
,
e
);
hostName
=
DEFAULT_HOST_NAME
;
}
}
}
...
@@ -292,6 +302,10 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -292,6 +302,10 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
return
knownObjects
;
return
knownObjects
;
}
}
public
String
getHostName
()
{
return
hostName
;
}
public
static
class
HiveHookObjectNamesCache
{
public
static
class
HiveHookObjectNamesCache
{
private
final
int
dbMaxCacheCount
;
private
final
int
dbMaxCacheCount
;
private
final
int
tblMaxCacheCount
;
private
final
int
tblMaxCacheCount
;
...
...
addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/BaseHiveEvent.java
View file @
5c9a699a
...
@@ -18,6 +18,7 @@
...
@@ -18,6 +18,7 @@
package
org
.
apache
.
atlas
.
hive
.
hook
.
events
;
package
org
.
apache
.
atlas
.
hive
.
hook
.
events
;
import
com.google.common.collect.ImmutableMap
;
import
org.apache.atlas.hive.hook.AtlasHiveHookContext
;
import
org.apache.atlas.hive.hook.AtlasHiveHookContext
;
import
org.apache.atlas.hive.hook.HiveHook.PreprocessAction
;
import
org.apache.atlas.hive.hook.HiveHook.PreprocessAction
;
import
org.apache.atlas.model.instance.AtlasEntity
;
import
org.apache.atlas.model.instance.AtlasEntity
;
...
@@ -27,6 +28,8 @@ import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityExtInfo;
...
@@ -27,6 +28,8 @@ import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityExtInfo;
import
org.apache.atlas.model.instance.AtlasObjectId
;
import
org.apache.atlas.model.instance.AtlasObjectId
;
import
org.apache.atlas.model.instance.AtlasStruct
;
import
org.apache.atlas.model.instance.AtlasStruct
;
import
org.apache.atlas.model.notification.HookNotification
;
import
org.apache.atlas.model.notification.HookNotification
;
import
org.apache.atlas.repository.Constants
;
import
org.apache.atlas.type.AtlasTypeUtil
;
import
org.apache.atlas.utils.HdfsNameServiceResolver
;
import
org.apache.atlas.utils.HdfsNameServiceResolver
;
import
org.apache.commons.collections.CollectionUtils
;
import
org.apache.commons.collections.CollectionUtils
;
import
org.apache.commons.collections.MapUtils
;
import
org.apache.commons.collections.MapUtils
;
...
@@ -49,6 +52,7 @@ import org.slf4j.Logger;
...
@@ -49,6 +52,7 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.net.InetAddress
;
import
java.net.URI
;
import
java.net.URI
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collection
;
import
java.util.Collection
;
...
@@ -67,24 +71,25 @@ import static org.apache.atlas.hive.hook.AtlasHiveHookContext.QNAME_SEP_PROCESS;
...
@@ -67,24 +71,25 @@ import static org.apache.atlas.hive.hook.AtlasHiveHookContext.QNAME_SEP_PROCESS;
public
abstract
class
BaseHiveEvent
{
public
abstract
class
BaseHiveEvent
{
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
BaseHiveEvent
.
class
);
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
BaseHiveEvent
.
class
);
public
static
final
String
HIVE_TYPE_DB
=
"hive_db"
;
public
static
final
String
HIVE_TYPE_DB
=
"hive_db"
;
public
static
final
String
HIVE_TYPE_TABLE
=
"hive_table"
;
public
static
final
String
HIVE_TYPE_TABLE
=
"hive_table"
;
public
static
final
String
HIVE_TYPE_STORAGEDESC
=
"hive_storagedesc"
;
public
static
final
String
HIVE_TYPE_STORAGEDESC
=
"hive_storagedesc"
;
public
static
final
String
HIVE_TYPE_COLUMN
=
"hive_column"
;
public
static
final
String
HIVE_TYPE_COLUMN
=
"hive_column"
;
public
static
final
String
HIVE_TYPE_PROCESS
=
"hive_process"
;
public
static
final
String
HIVE_TYPE_PROCESS
=
"hive_process"
;
public
static
final
String
HIVE_TYPE_COLUMN_LINEAGE
=
"hive_column_lineage"
;
public
static
final
String
HIVE_TYPE_COLUMN_LINEAGE
=
"hive_column_lineage"
;
public
static
final
String
HIVE_TYPE_SERDE
=
"hive_serde"
;
public
static
final
String
HIVE_TYPE_SERDE
=
"hive_serde"
;
public
static
final
String
HIVE_TYPE_ORDER
=
"hive_order"
;
public
static
final
String
HIVE_TYPE_ORDER
=
"hive_order"
;
public
static
final
String
HDFS_TYPE_PATH
=
"hdfs_path"
;
public
static
final
String
HIVE_TYPE_PROCESS_EXECUTION
=
"hive_process_execution"
;
public
static
final
String
HBASE_TYPE_TABLE
=
"hbase_table"
;
public
static
final
String
HDFS_TYPE_PATH
=
"hdfs_path"
;
public
static
final
String
HBASE_TYPE_NAMESPACE
=
"hbase_namespace"
;
public
static
final
String
HBASE_TYPE_TABLE
=
"hbase_table"
;
public
static
final
String
AWS_S3_BUCKET
=
"aws_s3_bucket"
;
public
static
final
String
HBASE_TYPE_NAMESPACE
=
"hbase_namespace"
;
public
static
final
String
AWS_S3_PSEUDO_DIR
=
"aws_s3_pseudo_dir"
;
public
static
final
String
AWS_S3_BUCKET
=
"aws_s3_bucket"
;
public
static
final
String
AWS_S3_OBJECT
=
"aws_s3_object"
;
public
static
final
String
AWS_S3_PSEUDO_DIR
=
"aws_s3_pseudo_dir"
;
public
static
final
String
AWS_S3_OBJECT
=
"aws_s3_object"
;
public
static
final
String
SCHEME_SEPARATOR
=
"://"
;
public
static
final
String
S3_SCHEME
=
"s3"
+
SCHEME_SEPARATOR
;
public
static
final
String
SCHEME_SEPARATOR
=
"://"
;
public
static
final
String
S3A_SCHEME
=
"s3a"
+
SCHEME_SEPARATOR
;
public
static
final
String
S3_SCHEME
=
"s3"
+
SCHEME_SEPARATOR
;
public
static
final
String
S3A_SCHEME
=
"s3a"
+
SCHEME_SEPARATOR
;
public
static
final
String
ATTRIBUTE_QUALIFIED_NAME
=
"qualifiedName"
;
public
static
final
String
ATTRIBUTE_QUALIFIED_NAME
=
"qualifiedName"
;
public
static
final
String
ATTRIBUTE_NAME
=
"name"
;
public
static
final
String
ATTRIBUTE_NAME
=
"name"
;
...
@@ -126,6 +131,8 @@ public abstract class BaseHiveEvent {
...
@@ -126,6 +131,8 @@ public abstract class BaseHiveEvent {
public
static
final
String
ATTRIBUTE_START_TIME
=
"startTime"
;
public
static
final
String
ATTRIBUTE_START_TIME
=
"startTime"
;
public
static
final
String
ATTRIBUTE_USER_NAME
=
"userName"
;
public
static
final
String
ATTRIBUTE_USER_NAME
=
"userName"
;
public
static
final
String
ATTRIBUTE_QUERY_TEXT
=
"queryText"
;
public
static
final
String
ATTRIBUTE_QUERY_TEXT
=
"queryText"
;
public
static
final
String
ATTRIBUTE_PROCESS
=
"process"
;
public
static
final
String
ATTRIBUTE_PROCESS_EXECUTIONS
=
"processExecutions"
;
public
static
final
String
ATTRIBUTE_QUERY_ID
=
"queryId"
;
public
static
final
String
ATTRIBUTE_QUERY_ID
=
"queryId"
;
public
static
final
String
ATTRIBUTE_QUERY_PLAN
=
"queryPlan"
;
public
static
final
String
ATTRIBUTE_QUERY_PLAN
=
"queryPlan"
;
public
static
final
String
ATTRIBUTE_END_TIME
=
"endTime"
;
public
static
final
String
ATTRIBUTE_END_TIME
=
"endTime"
;
...
@@ -139,6 +146,7 @@ public abstract class BaseHiveEvent {
...
@@ -139,6 +146,7 @@ public abstract class BaseHiveEvent {
public
static
final
String
ATTRIBUTE_NAMESPACE
=
"namespace"
;
public
static
final
String
ATTRIBUTE_NAMESPACE
=
"namespace"
;
public
static
final
String
ATTRIBUTE_OBJECT_PREFIX
=
"objectPrefix"
;
public
static
final
String
ATTRIBUTE_OBJECT_PREFIX
=
"objectPrefix"
;
public
static
final
String
ATTRIBUTE_BUCKET
=
"bucket"
;
public
static
final
String
ATTRIBUTE_BUCKET
=
"bucket"
;
public
static
final
String
ATTRIBUTE_HOSTNAME
=
"hostName"
;
public
static
final
String
HBASE_STORAGE_HANDLER_CLASS
=
"org.apache.hadoop.hive.hbase.HBaseStorageHandler"
;
public
static
final
String
HBASE_STORAGE_HANDLER_CLASS
=
"org.apache.hadoop.hive.hbase.HBaseStorageHandler"
;
public
static
final
String
HBASE_DEFAULT_NAMESPACE
=
"default"
;
public
static
final
String
HBASE_DEFAULT_NAMESPACE
=
"default"
;
...
@@ -146,6 +154,7 @@ public abstract class BaseHiveEvent {
...
@@ -146,6 +154,7 @@ public abstract class BaseHiveEvent {
public
static
final
String
HBASE_PARAM_TABLE_NAME
=
"hbase.table.name"
;
public
static
final
String
HBASE_PARAM_TABLE_NAME
=
"hbase.table.name"
;
public
static
final
long
MILLIS_CONVERT_FACTOR
=
1000
;
public
static
final
long
MILLIS_CONVERT_FACTOR
=
1000
;
public
static
final
String
HDFS_PATH_PREFIX
=
"hdfs://"
;
public
static
final
String
HDFS_PATH_PREFIX
=
"hdfs://"
;
public
static
final
String
EMPTY_ATTRIBUTE_VALUE
=
""
;
public
static
final
Map
<
Integer
,
String
>
OWNER_TYPE_TO_ENUM_VALUE
=
new
HashMap
<>();
public
static
final
Map
<
Integer
,
String
>
OWNER_TYPE_TO_ENUM_VALUE
=
new
HashMap
<>();
...
@@ -605,14 +614,41 @@ public abstract class BaseHiveEvent {
...
@@ -605,14 +614,41 @@ public abstract class BaseHiveEvent {
ret
.
setAttribute
(
ATTRIBUTE_OUTPUTS
,
getObjectIds
(
outputs
));
ret
.
setAttribute
(
ATTRIBUTE_OUTPUTS
,
getObjectIds
(
outputs
));
ret
.
setAttribute
(
ATTRIBUTE_NAME
,
queryStr
);
ret
.
setAttribute
(
ATTRIBUTE_NAME
,
queryStr
);
ret
.
setAttribute
(
ATTRIBUTE_OPERATION_TYPE
,
getOperationName
());
ret
.
setAttribute
(
ATTRIBUTE_OPERATION_TYPE
,
getOperationName
());
// We are setting an empty value to these attributes, since now we have a new entity type called hive process
// execution which captures these values. We have to set empty values here because these attributes are
// mandatory attributes for hive process entity type.
ret
.
setAttribute
(
ATTRIBUTE_START_TIME
,
EMPTY_ATTRIBUTE_VALUE
);
ret
.
setAttribute
(
ATTRIBUTE_END_TIME
,
EMPTY_ATTRIBUTE_VALUE
);
ret
.
setAttribute
(
ATTRIBUTE_USER_NAME
,
EMPTY_ATTRIBUTE_VALUE
);
ret
.
setAttribute
(
ATTRIBUTE_QUERY_TEXT
,
EMPTY_ATTRIBUTE_VALUE
);
ret
.
setAttribute
(
ATTRIBUTE_QUERY_ID
,
EMPTY_ATTRIBUTE_VALUE
);
ret
.
setAttribute
(
ATTRIBUTE_QUERY_PLAN
,
"Not Supported"
);
ret
.
setAttribute
(
ATTRIBUTE_RECENT_QUERIES
,
Collections
.
singletonList
(
queryStr
));
return
ret
;
}
protected
AtlasEntity
getHiveProcessExecutionEntity
(
AtlasEntity
hiveProcess
)
throws
Exception
{
AtlasEntity
ret
=
new
AtlasEntity
(
HIVE_TYPE_PROCESS_EXECUTION
);
String
queryStr
=
getQueryString
();
if
(
queryStr
!=
null
)
{
queryStr
=
queryStr
.
toLowerCase
().
trim
();
}
Long
endTime
=
System
.
currentTimeMillis
();
ret
.
setAttribute
(
ATTRIBUTE_QUALIFIED_NAME
,
hiveProcess
.
getAttribute
(
ATTRIBUTE_QUALIFIED_NAME
).
toString
()
+
QNAME_SEP_PROCESS
+
getQueryStartTime
().
toString
()
+
QNAME_SEP_PROCESS
+
endTime
.
toString
());
ret
.
setAttribute
(
ATTRIBUTE_NAME
,
queryStr
+
QNAME_SEP_PROCESS
+
getQueryStartTime
().
toString
());
ret
.
setAttribute
(
ATTRIBUTE_START_TIME
,
getQueryStartTime
());
ret
.
setAttribute
(
ATTRIBUTE_START_TIME
,
getQueryStartTime
());
ret
.
setAttribute
(
ATTRIBUTE_END_TIME
,
System
.
currentTimeMillis
()
);
ret
.
setAttribute
(
ATTRIBUTE_END_TIME
,
endTime
);
ret
.
setAttribute
(
ATTRIBUTE_USER_NAME
,
getUserName
());
ret
.
setAttribute
(
ATTRIBUTE_USER_NAME
,
getUserName
());
ret
.
setAttribute
(
ATTRIBUTE_QUERY_TEXT
,
queryStr
);
ret
.
setAttribute
(
ATTRIBUTE_QUERY_TEXT
,
queryStr
);
ret
.
setAttribute
(
ATTRIBUTE_QUERY_ID
,
getQueryId
());
ret
.
setAttribute
(
ATTRIBUTE_QUERY_ID
,
getQueryId
());
ret
.
setAttribute
(
ATTRIBUTE_QUERY_PLAN
,
"Not Supported"
);
ret
.
setAttribute
(
ATTRIBUTE_QUERY_PLAN
,
"Not Supported"
);
ret
.
setAttribute
(
ATTRIBUTE_
RECENT_QUERIES
,
Collections
.
singletonList
(
queryStr
));
ret
.
setAttribute
(
ATTRIBUTE_
HOSTNAME
,
getContext
().
getHostName
(
));
ret
.
setRelationshipAttribute
(
ATTRIBUTE_PROCESS
,
AtlasTypeUtil
.
toAtlasRelatedObjectId
(
hiveProcess
));
return
ret
;
return
ret
;
}
}
...
...
addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateHiveProcess.java
View file @
5c9a699a
...
@@ -117,6 +117,9 @@ public class CreateHiveProcess extends BaseHiveEvent {
...
@@ -117,6 +117,9 @@ public class CreateHiveProcess extends BaseHiveEvent {
ret
.
addEntity
(
process
);
ret
.
addEntity
(
process
);
AtlasEntity
processExecution
=
getHiveProcessExecutionEntity
(
process
);
ret
.
addEntity
(
processExecution
);
processColumnLineage
(
process
,
ret
);
processColumnLineage
(
process
,
ret
);
addProcessedEntities
(
ret
);
addProcessedEntities
(
ret
);
...
...
addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java
View file @
5c9a699a
...
@@ -130,8 +130,11 @@ public class CreateTable extends BaseHiveEvent {
...
@@ -130,8 +130,11 @@ public class CreateTable extends BaseHiveEvent {
}
else
{
}
else
{
processEntity
=
getHiveProcessEntity
(
Collections
.
singletonList
(
tblEntity
),
Collections
.
singletonList
(
hbaseTableEntity
));
processEntity
=
getHiveProcessEntity
(
Collections
.
singletonList
(
tblEntity
),
Collections
.
singletonList
(
hbaseTableEntity
));
}
}
ret
.
addEntity
(
processEntity
);
ret
.
addEntity
(
processEntity
);
AtlasEntity
processExecution
=
getHiveProcessExecutionEntity
(
processEntity
);
ret
.
addEntity
(
processExecution
);
}
}
}
else
{
}
else
{
if
(
EXTERNAL_TABLE
.
equals
(
table
.
getTableType
()))
{
if
(
EXTERNAL_TABLE
.
equals
(
table
.
getTableType
()))
{
...
@@ -140,6 +143,9 @@ public class CreateTable extends BaseHiveEvent {
...
@@ -140,6 +143,9 @@ public class CreateTable extends BaseHiveEvent {
ret
.
addEntity
(
processEntity
);
ret
.
addEntity
(
processEntity
);
ret
.
addReferredEntity
(
hdfsPathEntity
);
ret
.
addReferredEntity
(
hdfsPathEntity
);
AtlasEntity
processExecution
=
getHiveProcessExecutionEntity
(
processEntity
);
ret
.
addEntity
(
processExecution
);
}
}
}
}
}
}
...
...
addons/hive-bridge/src/main/java/org/apache/atlas/hive/model/HiveDataTypes.java
View file @
5c9a699a
...
@@ -43,7 +43,8 @@ public enum HiveDataTypes {
...
@@ -43,7 +43,8 @@ public enum HiveDataTypes {
HIVE_ROLE
,
HIVE_ROLE
,
HIVE_TYPE
,
HIVE_TYPE
,
HIVE_PROCESS
,
HIVE_PROCESS
,
HIVE_COLUMN_LINEAGE
HIVE_COLUMN_LINEAGE
,
HIVE_PROCESS_EXECUTION
,
// HIVE_VIEW,
// HIVE_VIEW,
;
;
...
...
addons/hive-bridge/src/test/java/org/apache/atlas/hive/HiveITBase.java
View file @
5c9a699a
...
@@ -218,6 +218,25 @@ public class HiveITBase {
...
@@ -218,6 +218,25 @@ public class HiveITBase {
return
(
String
)
entity
.
getGuid
();
return
(
String
)
entity
.
getGuid
();
}
}
protected
String
assertEntityIsRegisteredViaGuid
(
String
guid
,
final
HiveHookIT
.
AssertPredicate
assertPredicate
)
throws
Exception
{
waitFor
(
80000
,
new
HiveHookIT
.
Predicate
()
{
@Override
public
void
evaluate
()
throws
Exception
{
AtlasEntity
.
AtlasEntityWithExtInfo
atlasEntityWithExtInfo
=
atlasClientV2
.
getEntityByGuid
(
guid
);
AtlasEntity
entity
=
atlasEntityWithExtInfo
.
getEntity
();
assertNotNull
(
entity
);
if
(
assertPredicate
!=
null
)
{
assertPredicate
.
assertOnEntity
(
entity
);
}
}
});
AtlasEntity
.
AtlasEntityWithExtInfo
atlasEntityWithExtInfo
=
atlasClientV2
.
getEntityByGuid
(
guid
);
AtlasEntity
entity
=
atlasEntityWithExtInfo
.
getEntity
();
return
(
String
)
entity
.
getGuid
();
}
protected
AtlasEntity
assertEntityIsRegistedViaEntity
(
final
String
typeName
,
final
String
property
,
final
String
value
,
protected
AtlasEntity
assertEntityIsRegistedViaEntity
(
final
String
typeName
,
final
String
property
,
final
String
value
,
final
HiveHookIT
.
AssertPredicate
assertPredicate
)
throws
Exception
{
final
HiveHookIT
.
AssertPredicate
assertPredicate
)
throws
Exception
{
waitFor
(
80000
,
new
HiveHookIT
.
Predicate
()
{
waitFor
(
80000
,
new
HiveHookIT
.
Predicate
()
{
...
@@ -473,7 +492,6 @@ public class HiveITBase {
...
@@ -473,7 +492,6 @@ public class HiveITBase {
return
buffer
.
toString
();
return
buffer
.
toString
();
}
}
protected
static
Entity
getEntityByType
(
Set
<?
extends
Entity
>
entities
,
Entity
.
Type
entityType
)
{
protected
static
Entity
getEntityByType
(
Set
<?
extends
Entity
>
entities
,
Entity
.
Type
entityType
)
{
for
(
Entity
entity
:
entities
)
{
for
(
Entity
entity
:
entities
)
{
if
(
entity
.
getType
()
==
entityType
)
{
if
(
entity
.
getType
()
==
entityType
)
{
...
...
addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
View file @
5c9a699a
...
@@ -29,13 +29,13 @@ import org.apache.atlas.hive.bridge.HiveMetaStoreBridge;
...
@@ -29,13 +29,13 @@ import org.apache.atlas.hive.bridge.HiveMetaStoreBridge;
import
org.apache.atlas.hive.hook.events.BaseHiveEvent
;
import
org.apache.atlas.hive.hook.events.BaseHiveEvent
;
import
org.apache.atlas.hive.model.HiveDataTypes
;
import
org.apache.atlas.hive.model.HiveDataTypes
;
import
org.apache.atlas.model.instance.AtlasClassification
;
import
org.apache.atlas.model.instance.AtlasClassification
;
import
org.apache.atlas.model.instance.AtlasEntityHeader
;
import
org.apache.atlas.model.lineage.AtlasLineageInfo
;
import
org.apache.atlas.model.typedef.AtlasClassificationDef
;
import
org.apache.atlas.model.instance.AtlasEntity
;
import
org.apache.atlas.model.instance.AtlasEntity
;
import
org.apache.atlas.model.instance.AtlasEntityHeader
;
import
org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo
;
import
org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo
;
import
org.apache.atlas.model.instance.AtlasObjectId
;
import
org.apache.atlas.model.instance.AtlasObjectId
;
import
org.apache.atlas.model.instance.AtlasStruct
;
import
org.apache.atlas.model.instance.AtlasStruct
;
import
org.apache.atlas.model.lineage.AtlasLineageInfo
;
import
org.apache.atlas.model.typedef.AtlasClassificationDef
;
import
org.apache.atlas.model.typedef.AtlasTypesDef
;
import
org.apache.atlas.model.typedef.AtlasTypesDef
;
import
org.apache.atlas.type.AtlasTypeUtil
;
import
org.apache.atlas.type.AtlasTypeUtil
;
import
org.apache.commons.collections.CollectionUtils
;
import
org.apache.commons.collections.CollectionUtils
;
...
@@ -49,15 +49,12 @@ import org.apache.hadoop.hive.ql.hooks.WriteEntity;
...
@@ -49,15 +49,12 @@ import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import
org.apache.hadoop.hive.ql.metadata.HiveException
;
import
org.apache.hadoop.hive.ql.metadata.HiveException
;
import
org.apache.hadoop.hive.ql.metadata.Table
;
import
org.apache.hadoop.hive.ql.metadata.Table
;
import
org.apache.hadoop.hive.ql.plan.HiveOperation
;
import
org.apache.hadoop.hive.ql.plan.HiveOperation
;
import
org.apache.hadoop.hive.ql.session.SessionState
;
import
org.apache.hadoop.security.UserGroupInformation
;
import
org.apache.hadoop.security.UserGroupInformation
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
org.testng.Assert
;
import
org.testng.Assert
;
import
org.testng.annotations.Test
;
import
org.testng.annotations.Test
;
import
java.io.File
;
import
java.nio.file.Files
;
import
java.text.ParseException
;
import
java.text.ParseException
;
import
java.util.*
;
import
java.util.*
;
...
@@ -308,7 +305,15 @@ public class HiveHookIT extends HiveITBase {
...
@@ -308,7 +305,15 @@ public class HiveHookIT extends HiveITBase {
final
Set
<
ReadEntity
>
readEntities
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
final
Set
<
ReadEntity
>
readEntities
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
final
Set
<
WriteEntity
>
writeEntities
=
getOutputs
(
ctasTableName
,
Entity
.
Type
.
TABLE
);
final
Set
<
WriteEntity
>
writeEntities
=
getOutputs
(
ctasTableName
,
Entity
.
Type
.
TABLE
);
assertProcessIsRegistered
(
constructEvent
(
query
,
HiveOperation
.
CREATETABLE_AS_SELECT
,
readEntities
,
writeEntities
));
HiveEventContext
hiveEventContext
=
constructEvent
(
query
,
HiveOperation
.
CREATETABLE_AS_SELECT
,
readEntities
,
writeEntities
);
AtlasEntity
processEntity1
=
validateProcess
(
hiveEventContext
);
AtlasEntity
processExecutionEntity1
=
validateProcessExecution
(
processEntity1
,
hiveEventContext
);
AtlasObjectId
process
=
toAtlasObjectId
(
processExecutionEntity1
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
processEntity1
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity1
),
1
);
assertTableIsRegistered
(
DEFAULT_DB
,
ctasTableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
ctasTableName
);
}
}
...
@@ -347,7 +352,12 @@ public class HiveHookIT extends HiveITBase {
...
@@ -347,7 +352,12 @@ public class HiveHookIT extends HiveITBase {
Set
<
WriteEntity
>
outputs
=
getOutputs
(
ctasTableName
,
Entity
.
Type
.
TABLE
);
Set
<
WriteEntity
>
outputs
=
getOutputs
(
ctasTableName
,
Entity
.
Type
.
TABLE
);
HiveEventContext
hiveEventContext
=
constructEvent
(
query
,
HiveOperation
.
CREATETABLE_AS_SELECT
,
inputs
,
outputs
);
HiveEventContext
hiveEventContext
=
constructEvent
(
query
,
HiveOperation
.
CREATETABLE_AS_SELECT
,
inputs
,
outputs
);
String
processId
=
assertProcessIsRegistered
(
hiveEventContext
);
AtlasEntity
processEntity1
=
validateProcess
(
hiveEventContext
);
AtlasEntity
processExecutionEntity1
=
validateProcessExecution
(
processEntity1
,
hiveEventContext
);
AtlasObjectId
process
=
toAtlasObjectId
(
processExecutionEntity1
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
processEntity1
.
getGuid
());
String
drpquery
=
String
.
format
(
"drop table %s "
,
ctasTableName
);
String
drpquery
=
String
.
format
(
"drop table %s "
,
ctasTableName
);
runCommandWithDelay
(
drpquery
,
100
);
runCommandWithDelay
(
drpquery
,
100
);
...
@@ -360,13 +370,17 @@ public class HiveHookIT extends HiveITBase {
...
@@ -360,13 +370,17 @@ public class HiveHookIT extends HiveITBase {
outputs
=
getOutputs
(
ctasTableName
,
Entity
.
Type
.
TABLE
);
outputs
=
getOutputs
(
ctasTableName
,
Entity
.
Type
.
TABLE
);
String
process2Id
=
assertProcessIsRegistered
(
hiveEventContext
,
inputs
,
outputs
);
AtlasEntity
processEntity2
=
validateProcess
(
hiveEventContext
);
AtlasEntity
processExecutionEntity2
=
validateProcessExecution
(
processEntity2
,
hiveEventContext
);
assertNotEquals
(
process2Id
,
processId
);
AtlasObjectId
process2
=
toAtlasObjectId
(
processExecutionEntity2
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process2
.
getGuid
(),
processEntity2
.
getGuid
());
AtlasEntity
processsEntity
=
atlasClientV2
.
getEntityByGuid
(
processId
).
getEntity
();
assertNotEquals
(
processEntity1
.
getGuid
(),
processEntity2
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity1
),
1
);
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity2
),
1
);
validateOutputTables
(
process
sEntity
,
outputs
);
validateOutputTables
(
process
Entity1
,
outputs
);
}
}
@Test
@Test
...
@@ -377,7 +391,14 @@ public class HiveHookIT extends HiveITBase {
...
@@ -377,7 +391,14 @@ public class HiveHookIT extends HiveITBase {
runCommand
(
query
);
runCommand
(
query
);
assertProcessIsRegistered
(
constructEvent
(
query
,
HiveOperation
.
CREATEVIEW
,
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
),
getOutputs
(
viewName
,
Entity
.
Type
.
TABLE
)));
HiveEventContext
hiveEventContext
=
constructEvent
(
query
,
HiveOperation
.
CREATEVIEW
,
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
),
getOutputs
(
viewName
,
Entity
.
Type
.
TABLE
));
AtlasEntity
processEntity1
=
validateProcess
(
hiveEventContext
);
AtlasEntity
processExecutionEntity1
=
validateProcessExecution
(
processEntity1
,
hiveEventContext
);
AtlasObjectId
process1
=
toAtlasObjectId
(
processExecutionEntity1
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process1
.
getGuid
(),
processEntity1
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity1
),
1
);
assertTableIsRegistered
(
DEFAULT_DB
,
viewName
);
assertTableIsRegistered
(
DEFAULT_DB
,
viewName
);
}
}
...
@@ -392,7 +413,15 @@ public class HiveHookIT extends HiveITBase {
...
@@ -392,7 +413,15 @@ public class HiveHookIT extends HiveITBase {
String
table1Id
=
assertTableIsRegistered
(
DEFAULT_DB
,
table1Name
);
String
table1Id
=
assertTableIsRegistered
(
DEFAULT_DB
,
table1Name
);
assertProcessIsRegistered
(
constructEvent
(
query
,
HiveOperation
.
CREATEVIEW
,
getInputs
(
table1Name
,
Entity
.
Type
.
TABLE
),
getOutputs
(
viewName
,
Entity
.
Type
.
TABLE
)));
HiveEventContext
hiveEventContext
=
constructEvent
(
query
,
HiveOperation
.
CREATEVIEW
,
getInputs
(
table1Name
,
Entity
.
Type
.
TABLE
),
getOutputs
(
viewName
,
Entity
.
Type
.
TABLE
));
String
processId1
=
assertProcessIsRegistered
(
hiveEventContext
);
AtlasEntity
processEntity1
=
atlasClientV2
.
getEntityByGuid
(
processId1
).
getEntity
();
AtlasEntity
processExecutionEntity1
=
validateProcessExecution
(
processEntity1
,
hiveEventContext
);
AtlasObjectId
process1
=
toAtlasObjectId
(
processExecutionEntity1
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process1
.
getGuid
(),
processEntity1
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity1
),
1
);
String
viewId
=
assertTableIsRegistered
(
DEFAULT_DB
,
viewName
);
String
viewId
=
assertTableIsRegistered
(
DEFAULT_DB
,
viewName
);
...
@@ -412,8 +441,16 @@ public class HiveHookIT extends HiveITBase {
...
@@ -412,8 +441,16 @@ public class HiveHookIT extends HiveITBase {
runCommand
(
query
);
runCommand
(
query
);
//Check if alter view process is reqistered
HiveEventContext
hiveEventContext2
=
constructEvent
(
query
,
HiveOperation
.
CREATEVIEW
,
getInputs
(
table2Name
,
assertProcessIsRegistered
(
constructEvent
(
query
,
HiveOperation
.
CREATEVIEW
,
getInputs
(
table2Name
,
Entity
.
Type
.
TABLE
),
getOutputs
(
viewName
,
Entity
.
Type
.
TABLE
)));
Entity
.
Type
.
TABLE
),
getOutputs
(
viewName
,
Entity
.
Type
.
TABLE
));
String
processId2
=
assertProcessIsRegistered
(
hiveEventContext2
);
AtlasEntity
processEntity2
=
atlasClientV2
.
getEntityByGuid
(
processId2
).
getEntity
();
AtlasEntity
processExecutionEntity2
=
validateProcessExecution
(
processEntity2
,
hiveEventContext2
);
AtlasObjectId
process2
=
toAtlasObjectId
(
processExecutionEntity2
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process2
.
getGuid
(),
processEntity2
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity2
),
2
);
Assert
.
assertEquals
(
processEntity1
.
getGuid
(),
processEntity2
.
getGuid
());
String
table2Id
=
assertTableIsRegistered
(
DEFAULT_DB
,
table2Name
);
String
table2Id
=
assertTableIsRegistered
(
DEFAULT_DB
,
table2Name
);
...
@@ -526,6 +563,12 @@ public class HiveHookIT extends HiveITBase {
...
@@ -526,6 +563,12 @@ public class HiveHookIT extends HiveITBase {
return
validateProcess
(
event
,
event
.
getInputs
(),
event
.
getOutputs
());
return
validateProcess
(
event
,
event
.
getInputs
(),
event
.
getOutputs
());
}
}
private
AtlasEntity
validateProcessExecution
(
AtlasEntity
hiveProcess
,
HiveEventContext
event
)
throws
Exception
{
String
processExecutionId
=
assertProcessExecutionIsRegistered
(
hiveProcess
,
event
);
AtlasEntity
processExecutionEntity
=
atlasClientV2
.
getEntityByGuid
(
processExecutionId
).
getEntity
();
return
processExecutionEntity
;
}
@Test
@Test
public
void
testInsertIntoTable
()
throws
Exception
{
public
void
testInsertIntoTable
()
throws
Exception
{
String
inputTable1Name
=
createTable
();
String
inputTable1Name
=
createTable
();
...
@@ -581,8 +624,85 @@ public class HiveHookIT extends HiveITBase {
...
@@ -581,8 +624,85 @@ public class HiveHookIT extends HiveITBase {
runCommandWithDelay
(
query
,
1000
);
runCommandWithDelay
(
query
,
1000
);
AtlasEntity
processEntity2
=
validateProcess
(
event
,
expectedInputs
,
outputs
);
AtlasEntity
processEntity2
=
validateProcess
(
event
,
expectedInputs
,
outputs
);
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity2
),
2
);
Assert
.
assertEquals
(
processEntity1
.
getGuid
(),
processEntity2
.
getGuid
());
}
@Test
public
void
testInsertIntoTableProcessExecution
()
throws
Exception
{
String
inputTable1Name
=
createTable
();
String
inputTable2Name
=
createTable
();
String
insertTableName
=
createTable
();
assertTableIsRegistered
(
DEFAULT_DB
,
inputTable1Name
);
assertTableIsRegistered
(
DEFAULT_DB
,
insertTableName
);
String
query
=
"insert into "
+
insertTableName
+
" select t1.id, t1.name from "
+
inputTable2Name
+
" as t2, "
+
inputTable1Name
+
" as t1 where t1.id=t2.id"
;
runCommand
(
query
);
Set
<
ReadEntity
>
inputs
=
getInputs
(
inputTable1Name
,
Entity
.
Type
.
TABLE
);
inputs
.
addAll
(
getInputs
(
inputTable2Name
,
Entity
.
Type
.
TABLE
));
Set
<
WriteEntity
>
outputs
=
getOutputs
(
insertTableName
,
Entity
.
Type
.
TABLE
);
(
outputs
.
iterator
().
next
()).
setWriteType
(
WriteEntity
.
WriteType
.
INSERT
);
HiveEventContext
event
=
constructEvent
(
query
,
HiveOperation
.
QUERY
,
inputs
,
outputs
);
Set
<
ReadEntity
>
expectedInputs
=
new
TreeSet
<
ReadEntity
>(
entityComparator
)
{{
addAll
(
inputs
);
}};
assertTableIsRegistered
(
DEFAULT_DB
,
insertTableName
);
AtlasEntity
processEntity1
=
validateProcess
(
event
,
expectedInputs
,
outputs
);
AtlasEntity
processExecutionEntity1
=
validateProcessExecution
(
processEntity1
,
event
);
AtlasObjectId
process
=
toAtlasObjectId
(
processExecutionEntity1
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
processEntity1
.
getGuid
());
//Test sorting of tbl names
SortedSet
<
String
>
sortedTblNames
=
new
TreeSet
<>();
sortedTblNames
.
add
(
inputTable1Name
.
toLowerCase
());
sortedTblNames
.
add
(
inputTable2Name
.
toLowerCase
());
//Verify sorted order of inputs in qualified name
Assert
.
assertEquals
(
processEntity1
.
getAttribute
(
ATTRIBUTE_QUALIFIED_NAME
),
Joiner
.
on
(
SEP
).
join
(
"QUERY"
,
getQualifiedTblName
(
sortedTblNames
.
first
()),
HiveMetaStoreBridge
.
getTableCreatedTime
(
hiveMetaStoreBridge
.
getHiveClient
().
getTable
(
DEFAULT_DB
,
sortedTblNames
.
first
())),
getQualifiedTblName
(
sortedTblNames
.
last
()),
HiveMetaStoreBridge
.
getTableCreatedTime
(
hiveMetaStoreBridge
.
getHiveClient
().
getTable
(
DEFAULT_DB
,
sortedTblNames
.
last
())))
+
IO_SEP
+
SEP
+
Joiner
.
on
(
SEP
).
join
(
WriteEntity
.
WriteType
.
INSERT
.
name
(),
getQualifiedTblName
(
insertTableName
),
HiveMetaStoreBridge
.
getTableCreatedTime
(
hiveMetaStoreBridge
.
getHiveClient
().
getTable
(
DEFAULT_DB
,
insertTableName
)))
);
//Rerun same query. Should result in same process
runCommandWithDelay
(
query
,
1000
);
AtlasEntity
processEntity2
=
validateProcess
(
event
,
expectedInputs
,
outputs
);
AtlasEntity
processExecutionEntity2
=
validateProcessExecution
(
processEntity2
,
event
);
process
=
toAtlasObjectId
(
processExecutionEntity2
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
processEntity2
.
getGuid
());
Assert
.
assertEquals
(
processEntity1
.
getGuid
(),
processEntity2
.
getGuid
());
Assert
.
assertEquals
(
processEntity1
.
getGuid
(),
processEntity2
.
getGuid
());
String
queryWithDifferentPredicate
=
"insert into "
+
insertTableName
+
" select t1.id, t1.name from "
+
inputTable2Name
+
" as t2, "
+
inputTable1Name
+
" as t1 where t1.id=100"
;
runCommandWithDelay
(
queryWithDifferentPredicate
,
1000
);
HiveEventContext
event3
=
constructEvent
(
queryWithDifferentPredicate
,
HiveOperation
.
QUERY
,
inputs
,
outputs
);
AtlasEntity
processEntity3
=
validateProcess
(
event3
,
expectedInputs
,
outputs
);
AtlasEntity
processExecutionEntity3
=
validateProcessExecution
(
processEntity3
,
event3
);
process
=
toAtlasObjectId
(
processExecutionEntity3
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
processEntity3
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity3
),
3
);
Assert
.
assertEquals
(
processEntity2
.
getGuid
(),
processEntity3
.
getGuid
());
}
}
@Test
@Test
...
@@ -593,7 +713,14 @@ public class HiveHookIT extends HiveITBase {
...
@@ -593,7 +713,14 @@ public class HiveHookIT extends HiveITBase {
runCommand
(
query
);
runCommand
(
query
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
QUERY
,
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
),
null
));
HiveEventContext
event
=
constructEvent
(
query
,
HiveOperation
.
QUERY
,
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
),
null
);
AtlasEntity
hiveProcess
=
validateProcess
(
event
);
AtlasEntity
hiveProcessExecution
=
validateProcessExecution
(
hiveProcess
,
event
);
AtlasObjectId
process
=
toAtlasObjectId
(
hiveProcessExecution
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
hiveProcess
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
hiveProcess
),
1
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
}
}
...
@@ -613,6 +740,10 @@ public class HiveHookIT extends HiveITBase {
...
@@ -613,6 +740,10 @@ public class HiveHookIT extends HiveITBase {
HiveEventContext
hiveEventContext
=
constructEvent
(
query
,
HiveOperation
.
QUERY
,
inputs
,
outputs
);
HiveEventContext
hiveEventContext
=
constructEvent
(
query
,
HiveOperation
.
QUERY
,
inputs
,
outputs
);
AtlasEntity
processEntity
=
validateProcess
(
hiveEventContext
);
AtlasEntity
processEntity
=
validateProcess
(
hiveEventContext
);
AtlasEntity
processExecutionEntity1
=
validateProcessExecution
(
processEntity
,
hiveEventContext
);
AtlasObjectId
process
=
toAtlasObjectId
(
processExecutionEntity1
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
processEntity
.
getGuid
());
validateHDFSPaths
(
processEntity
,
OUTPUTS
,
pFile1
);
validateHDFSPaths
(
processEntity
,
OUTPUTS
,
pFile1
);
...
@@ -626,6 +757,11 @@ public class HiveHookIT extends HiveITBase {
...
@@ -626,6 +757,11 @@ public class HiveHookIT extends HiveITBase {
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
AtlasEntity
process2Entity
=
validateProcess
(
hiveEventContext
);
AtlasEntity
process2Entity
=
validateProcess
(
hiveEventContext
);
AtlasEntity
processExecutionEntity2
=
validateProcessExecution
(
processEntity
,
hiveEventContext
);
AtlasObjectId
process2
=
toAtlasObjectId
(
processExecutionEntity2
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process2
.
getGuid
(),
process2Entity
.
getGuid
());
validateHDFSPaths
(
process2Entity
,
OUTPUTS
,
pFile1
);
validateHDFSPaths
(
process2Entity
,
OUTPUTS
,
pFile1
);
...
@@ -646,9 +782,13 @@ public class HiveHookIT extends HiveITBase {
...
@@ -646,9 +782,13 @@ public class HiveHookIT extends HiveITBase {
}};
}};
AtlasEntity
process3Entity
=
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
QUERY
,
inputs
,
p3Outputs
));
AtlasEntity
process3Entity
=
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
QUERY
,
inputs
,
p3Outputs
));
AtlasEntity
processExecutionEntity3
=
validateProcessExecution
(
processEntity
,
hiveEventContext
);
AtlasObjectId
process3
=
toAtlasObjectId
(
processExecutionEntity3
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process3
.
getGuid
(),
process3Entity
.
getGuid
());
validateHDFSPaths
(
process3Entity
,
OUTPUTS
,
pFile2
);
validateHDFSPaths
(
process3Entity
,
OUTPUTS
,
pFile2
);
Assert
.
assertEquals
(
numberOfProcessExecutions
(
process3Entity
),
3
);
Assert
.
assertEquals
(
process3Entity
.
getGuid
(),
processEntity
.
getGuid
());
Assert
.
assertEquals
(
process3Entity
.
getGuid
(),
processEntity
.
getGuid
());
}
}
...
@@ -714,7 +854,13 @@ public class HiveHookIT extends HiveITBase {
...
@@ -714,7 +854,13 @@ public class HiveHookIT extends HiveITBase {
outputs
.
iterator
().
next
().
setWriteType
(
WriteEntity
.
WriteType
.
INSERT
);
outputs
.
iterator
().
next
().
setWriteType
(
WriteEntity
.
WriteType
.
INSERT
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
QUERY
,
inputs
,
outputs
));
HiveEventContext
event
=
constructEvent
(
query
,
HiveOperation
.
QUERY
,
inputs
,
outputs
);
AtlasEntity
hiveProcess
=
validateProcess
(
event
);
AtlasEntity
hiveProcessExecution
=
validateProcessExecution
(
hiveProcess
,
event
);
AtlasObjectId
process
=
toAtlasObjectId
(
hiveProcessExecution
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
hiveProcess
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
hiveProcess
),
1
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
insertTableName
,
null
,
true
);
assertTableIsRegistered
(
DEFAULT_DB
,
insertTableName
,
null
,
true
);
...
@@ -748,8 +894,13 @@ public class HiveHookIT extends HiveITBase {
...
@@ -748,8 +894,13 @@ public class HiveHookIT extends HiveITBase {
}
}
};
};
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
QUERY
,
partitionIps
,
partitionOps
),
inputs
,
outputs
);
HiveEventContext
event
=
constructEvent
(
query
,
HiveOperation
.
QUERY
,
partitionIps
,
partitionOps
);
AtlasEntity
hiveProcess
=
validateProcess
(
event
,
inputs
,
outputs
);
AtlasEntity
hiveProcessExecution
=
validateProcessExecution
(
hiveProcess
,
event
);
AtlasObjectId
process
=
toAtlasObjectId
(
hiveProcessExecution
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
hiveProcess
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
hiveProcess
),
1
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
insertTableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
insertTableName
);
...
@@ -769,8 +920,14 @@ public class HiveHookIT extends HiveITBase {
...
@@ -769,8 +920,14 @@ public class HiveHookIT extends HiveITBase {
Set
<
ReadEntity
>
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
Set
<
ReadEntity
>
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
Set
<
WriteEntity
>
outputs
=
getOutputs
(
filename
,
Entity
.
Type
.
DFS_DIR
);
Set
<
WriteEntity
>
outputs
=
getOutputs
(
filename
,
Entity
.
Type
.
DFS_DIR
);
AtlasEntity
processEntity
=
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
inputs
,
outputs
));
HiveEventContext
event
=
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
inputs
,
outputs
);
AtlasEntity
processEntity
=
validateProcess
(
event
);
AtlasEntity
hiveProcessExecution
=
validateProcessExecution
(
processEntity
,
event
);
AtlasObjectId
process
=
toAtlasObjectId
(
hiveProcessExecution
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
processEntity
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity
),
1
);
validateHDFSPaths
(
processEntity
,
OUTPUTS
,
filename
);
validateHDFSPaths
(
processEntity
,
OUTPUTS
,
filename
);
validateInputTables
(
processEntity
,
inputs
);
validateInputTables
(
processEntity
,
inputs
);
...
@@ -785,7 +942,16 @@ public class HiveHookIT extends HiveITBase {
...
@@ -785,7 +942,16 @@ public class HiveHookIT extends HiveITBase {
outputs
=
getOutputs
(
importTableName
,
Entity
.
Type
.
TABLE
);
outputs
=
getOutputs
(
importTableName
,
Entity
.
Type
.
TABLE
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
getInputs
(
filename
,
Entity
.
Type
.
DFS_DIR
),
outputs
));
HiveEventContext
event2
=
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
getInputs
(
filename
,
Entity
.
Type
.
DFS_DIR
),
outputs
);
AtlasEntity
processEntity2
=
validateProcess
(
event2
);
AtlasEntity
hiveProcessExecution2
=
validateProcessExecution
(
processEntity2
,
event2
);
AtlasObjectId
process2
=
toAtlasObjectId
(
hiveProcessExecution2
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process2
.
getGuid
(),
processEntity2
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity2
),
1
);
Assert
.
assertNotEquals
(
processEntity
.
getGuid
(),
processEntity2
.
getGuid
());
//Should create another process
//Should create another process
filename
=
"pfile://"
+
mkdir
(
"export2UnPartitioned"
);
filename
=
"pfile://"
+
mkdir
(
"export2UnPartitioned"
);
...
@@ -796,7 +962,18 @@ public class HiveHookIT extends HiveITBase {
...
@@ -796,7 +962,18 @@ public class HiveHookIT extends HiveITBase {
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
outputs
=
getOutputs
(
filename
,
Entity
.
Type
.
DFS_DIR
);
outputs
=
getOutputs
(
filename
,
Entity
.
Type
.
DFS_DIR
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
inputs
,
outputs
));
HiveEventContext
event3
=
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
inputs
,
outputs
);
AtlasEntity
processEntity3
=
validateProcess
(
event3
);
AtlasEntity
hiveProcessExecution3
=
validateProcessExecution
(
processEntity3
,
event3
);
AtlasObjectId
process3
=
toAtlasObjectId
(
hiveProcessExecution3
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process3
.
getGuid
(),
processEntity3
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity3
),
1
);
// Should be a different process compared to the previous ones
Assert
.
assertNotEquals
(
processEntity
.
getGuid
(),
processEntity3
.
getGuid
());
Assert
.
assertNotEquals
(
processEntity2
.
getGuid
(),
processEntity3
.
getGuid
());
//import again shouyld create another process
//import again shouyld create another process
query
=
"import table "
+
importTableName
+
" from '"
+
filename
+
"'"
;
query
=
"import table "
+
importTableName
+
" from '"
+
filename
+
"'"
;
...
@@ -805,7 +982,20 @@ public class HiveHookIT extends HiveITBase {
...
@@ -805,7 +982,20 @@ public class HiveHookIT extends HiveITBase {
outputs
=
getOutputs
(
importTableName
,
Entity
.
Type
.
TABLE
);
outputs
=
getOutputs
(
importTableName
,
Entity
.
Type
.
TABLE
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
getInputs
(
filename
,
Entity
.
Type
.
DFS_DIR
),
outputs
));
HiveEventContext
event4
=
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
getInputs
(
filename
,
Entity
.
Type
.
DFS_DIR
),
outputs
);
AtlasEntity
processEntity4
=
validateProcess
(
event4
);
AtlasEntity
hiveProcessExecution4
=
validateProcessExecution
(
processEntity4
,
event4
);
AtlasObjectId
process4
=
toAtlasObjectId
(
hiveProcessExecution4
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process4
.
getGuid
(),
processEntity4
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity4
),
1
);
// Should be a different process compared to the previous ones
Assert
.
assertNotEquals
(
processEntity
.
getGuid
(),
processEntity4
.
getGuid
());
Assert
.
assertNotEquals
(
processEntity2
.
getGuid
(),
processEntity4
.
getGuid
());
Assert
.
assertNotEquals
(
processEntity3
.
getGuid
(),
processEntity4
.
getGuid
());
}
}
@Test
@Test
...
@@ -833,9 +1023,15 @@ public class HiveHookIT extends HiveITBase {
...
@@ -833,9 +1023,15 @@ public class HiveHookIT extends HiveITBase {
partitionIps
.
addAll
(
expectedExportInputs
);
partitionIps
.
addAll
(
expectedExportInputs
);
AtlasEntity
processEntity
=
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
partitionIps
,
outputs
),
expectedExportInputs
,
outputs
);
HiveEventContext
event1
=
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
partitionIps
,
outputs
);
AtlasEntity
processEntity1
=
validateProcess
(
event1
,
expectedExportInputs
,
outputs
);
AtlasEntity
hiveProcessExecution1
=
validateProcessExecution
(
processEntity1
,
event1
);
AtlasObjectId
process1
=
toAtlasObjectId
(
hiveProcessExecution1
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process1
.
getGuid
(),
processEntity1
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity1
),
1
);
validateHDFSPaths
(
processEntity
,
OUTPUTS
,
filename
);
validateHDFSPaths
(
processEntity
1
,
OUTPUTS
,
filename
);
//Import
//Import
String
importTableName
=
createTable
(
true
);
String
importTableName
=
createTable
(
true
);
...
@@ -852,7 +1048,14 @@ public class HiveHookIT extends HiveITBase {
...
@@ -852,7 +1048,14 @@ public class HiveHookIT extends HiveITBase {
partitionOps
.
addAll
(
importOutputs
);
partitionOps
.
addAll
(
importOutputs
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
expectedImportInputs
,
partitionOps
),
expectedImportInputs
,
importOutputs
);
HiveEventContext
event2
=
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
expectedImportInputs
,
partitionOps
);
AtlasEntity
processEntity2
=
validateProcess
(
event2
,
expectedImportInputs
,
importOutputs
);
AtlasEntity
hiveProcessExecution2
=
validateProcessExecution
(
processEntity2
,
event2
);
AtlasObjectId
process2
=
toAtlasObjectId
(
hiveProcessExecution2
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process2
.
getGuid
(),
processEntity2
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity2
),
1
);
Assert
.
assertNotEquals
(
processEntity1
.
getGuid
(),
processEntity2
.
getGuid
());
//Export should update same process
//Export should update same process
filename
=
"pfile://"
+
mkdir
(
"export2"
);
filename
=
"pfile://"
+
mkdir
(
"export2"
);
...
@@ -866,7 +1069,17 @@ public class HiveHookIT extends HiveITBase {
...
@@ -866,7 +1069,17 @@ public class HiveHookIT extends HiveITBase {
addAll
(
outputs
);
addAll
(
outputs
);
}};
}};
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
partitionIps
,
outputs2
),
expectedExportInputs
,
p3Outputs
);
HiveEventContext
event3
=
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
partitionIps
,
outputs2
);
// this process entity should return same as the processEntity1 since the inputs and outputs are the same,
// hence the qualifiedName will be the same
AtlasEntity
processEntity3
=
validateProcess
(
event3
,
expectedExportInputs
,
p3Outputs
);
AtlasEntity
hiveProcessExecution3
=
validateProcessExecution
(
processEntity3
,
event3
);
AtlasObjectId
process3
=
toAtlasObjectId
(
hiveProcessExecution3
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process3
.
getGuid
(),
processEntity3
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity3
),
2
);
Assert
.
assertEquals
(
processEntity1
.
getGuid
(),
processEntity3
.
getGuid
());
query
=
"alter table "
+
importTableName
+
" drop partition (dt='"
+
PART_FILE
+
"')"
;
query
=
"alter table "
+
importTableName
+
" drop partition (dt='"
+
PART_FILE
+
"')"
;
...
@@ -883,7 +1096,17 @@ public class HiveHookIT extends HiveITBase {
...
@@ -883,7 +1096,17 @@ public class HiveHookIT extends HiveITBase {
addAll
(
expectedImportInputs
);
addAll
(
expectedImportInputs
);
}};
}};
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
importInputs
,
partitionOps
),
expectedImport2Inputs
,
importOutputs
);
HiveEventContext
event4
=
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
importInputs
,
partitionOps
);
// This process is going to be same as processEntity2
AtlasEntity
processEntity4
=
validateProcess
(
event4
,
expectedImport2Inputs
,
importOutputs
);
AtlasEntity
hiveProcessExecution4
=
validateProcessExecution
(
processEntity4
,
event4
);
AtlasObjectId
process4
=
toAtlasObjectId
(
hiveProcessExecution4
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process4
.
getGuid
(),
processEntity4
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity4
),
2
);
Assert
.
assertEquals
(
processEntity2
.
getGuid
(),
processEntity4
.
getGuid
());
Assert
.
assertNotEquals
(
processEntity1
.
getGuid
(),
processEntity4
.
getGuid
());
}
}
@Test
@Test
...
@@ -1176,15 +1399,12 @@ public class HiveHookIT extends HiveITBase {
...
@@ -1176,15 +1399,12 @@ public class HiveHookIT extends HiveITBase {
);
);
}
}
/*
/**
The test is disabled by default
* Reenabling this test since HIVE-14706 is fixed now and the hive version we are using now sends
Reason : Atlas uses Hive version 1.2.x and the Hive patch HIVE-13112 which enables column level lineage is not
* us the column lineage information
committed in Hive version 1.2.x
* @throws Exception
This test will fail if the lineage information is not available from Hive
*/
Once the patch for HIVE-13112 is committed to Hive branch 1.2.x, the test can be enabled
@Test
Please track HIVE-14706 to know the status of column lineage availability in latest Hive versions i.e 2.1.x
*/
@Test
(
enabled
=
false
)
public
void
testColumnLevelLineage
()
throws
Exception
{
public
void
testColumnLevelLineage
()
throws
Exception
{
String
sourceTable
=
"table"
+
random
();
String
sourceTable
=
"table"
+
random
();
...
@@ -1204,8 +1424,14 @@ public class HiveHookIT extends HiveITBase {
...
@@ -1204,8 +1424,14 @@ public class HiveHookIT extends HiveITBase {
Set
<
ReadEntity
>
inputs
=
getInputs
(
sourceTable
,
Entity
.
Type
.
TABLE
);
Set
<
ReadEntity
>
inputs
=
getInputs
(
sourceTable
,
Entity
.
Type
.
TABLE
);
Set
<
WriteEntity
>
outputs
=
getOutputs
(
ctasTableName
,
Entity
.
Type
.
TABLE
);
Set
<
WriteEntity
>
outputs
=
getOutputs
(
ctasTableName
,
Entity
.
Type
.
TABLE
);
HiveEventContext
event
=
constructEvent
(
query
,
HiveOperation
.
CREATETABLE_AS_SELECT
,
inputs
,
outputs
);
HiveEventContext
event
=
constructEvent
(
query
,
HiveOperation
.
CREATETABLE_AS_SELECT
,
inputs
,
outputs
);
AtlasEntity
processEntity1
=
validateProcess
(
event
);
AtlasEntity
hiveProcessExecution1
=
validateProcessExecution
(
processEntity1
,
event
);
AtlasObjectId
process1
=
toAtlasObjectId
(
hiveProcessExecution1
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process1
.
getGuid
(),
processEntity1
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity1
),
1
);
Assert
.
assertEquals
(
processEntity1
.
getGuid
(),
processEntity1
.
getGuid
());
assertProcessIsRegistered
(
event
);
assertTableIsRegistered
(
DEFAULT_DB
,
ctasTableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
ctasTableName
);
String
processQName
=
sortEventsAndGetProcessQualifiedName
(
event
);
String
processQName
=
sortEventsAndGetProcessQualifiedName
(
event
);
...
@@ -1281,14 +1507,20 @@ public class HiveHookIT extends HiveITBase {
...
@@ -1281,14 +1507,20 @@ public class HiveHookIT extends HiveITBase {
Set
<
WriteEntity
>
outputs
=
getOutputs
(
tableName
,
Entity
.
Type
.
TABLE
);
Set
<
WriteEntity
>
outputs
=
getOutputs
(
tableName
,
Entity
.
Type
.
TABLE
);
String
tableId
=
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
String
tableId
=
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
HiveEventContext
event
=
constructEvent
(
query
,
HiveOperation
.
TRUNCATETABLE
,
null
,
outputs
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
TRUNCATETABLE
,
null
,
outputs
));
AtlasEntity
processEntity
=
validateProcess
(
event
);
AtlasEntity
processExecutionEntity1
=
validateProcessExecution
(
processEntity
,
event
);
AtlasObjectId
process
=
toAtlasObjectId
(
processExecutionEntity1
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
processEntity
.
getGuid
());
//Check lineage
//Check lineage
String
datasetName
=
HiveMetaStoreBridge
.
getTableQualifiedName
(
CLUSTER_NAME
,
DEFAULT_DB
,
tableName
);
String
datasetName
=
HiveMetaStoreBridge
.
getTableQualifiedName
(
CLUSTER_NAME
,
DEFAULT_DB
,
tableName
);
AtlasLineageInfo
atlasLineageInfoInput
=
atlasClientV2
.
getLineageInfo
(
tableId
,
AtlasLineageInfo
.
LineageDirection
.
INPUT
,
0
);
AtlasLineageInfo
atlasLineageInfoInput
=
atlasClientV2
.
getLineageInfo
(
tableId
,
AtlasLineageInfo
.
LineageDirection
.
INPUT
,
0
);
Map
<
String
,
AtlasEntityHeader
>
entityMap
=
atlasLineageInfoInput
.
getGuidEntityMap
();
Map
<
String
,
AtlasEntityHeader
>
entityMap
=
atlasLineageInfoInput
.
getGuidEntityMap
();
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity
),
1
);
//Below should be assertTrue - Fix https://issues.apache.org/jira/browse/ATLAS-653
//Below should be assertTrue - Fix https://issues.apache.org/jira/browse/ATLAS-653
Assert
.
assertFalse
(
entityMap
.
containsKey
(
tableId
));
Assert
.
assertFalse
(
entityMap
.
containsKey
(
tableId
));
}
}
...
@@ -1402,7 +1634,8 @@ public class HiveHookIT extends HiveITBase {
...
@@ -1402,7 +1634,8 @@ public class HiveHookIT extends HiveITBase {
String
processQualifiedName
=
getTableProcessQualifiedName
(
DEFAULT_DB
,
tableName
);
String
processQualifiedName
=
getTableProcessQualifiedName
(
DEFAULT_DB
,
tableName
);
String
processId
=
assertEntityIsRegistered
(
HiveDataTypes
.
HIVE_PROCESS
.
getName
(),
ATTRIBUTE_QUALIFIED_NAME
,
processQualifiedName
,
null
);
String
processId
=
assertEntityIsRegistered
(
HiveDataTypes
.
HIVE_PROCESS
.
getName
(),
ATTRIBUTE_QUALIFIED_NAME
,
processQualifiedName
,
null
);
AtlasEntity
processEntity
=
atlasClientV2
.
getEntityByGuid
(
processId
).
getEntity
();
AtlasEntity
processEntity
=
atlasClientV2
.
getEntityByGuid
(
processId
).
getEntity
();
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity
),
2
);
//validateProcessExecution(processEntity, event);
validateHDFSPaths
(
processEntity
,
INPUTS
,
testPath
);
validateHDFSPaths
(
processEntity
,
INPUTS
,
testPath
);
}
}
...
@@ -1890,6 +2123,34 @@ public class HiveHookIT extends HiveITBase {
...
@@ -1890,6 +2123,34 @@ public class HiveHookIT extends HiveITBase {
}
}
}
}
private
String
assertProcessExecutionIsRegistered
(
AtlasEntity
hiveProcess
,
final
HiveEventContext
event
)
throws
Exception
{
try
{
String
guid
=
""
;
List
<
AtlasObjectId
>
processExecutions
=
toAtlasObjectIdList
(
hiveProcess
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS_EXECUTIONS
));
for
(
AtlasObjectId
processExecution
:
processExecutions
)
{
AtlasEntity
.
AtlasEntityWithExtInfo
atlasEntityWithExtInfo
=
atlasClientV2
.
getEntityByGuid
(
processExecution
.
getGuid
());
AtlasEntity
entity
=
atlasEntityWithExtInfo
.
getEntity
();
if
(
String
.
valueOf
(
entity
.
getAttribute
(
ATTRIBUTE_QUERY_TEXT
)).
equals
(
event
.
getQueryStr
().
toLowerCase
().
trim
()))
{
guid
=
entity
.
getGuid
();
}
}
return
assertEntityIsRegisteredViaGuid
(
guid
,
new
AssertPredicate
()
{
@Override
public
void
assertOnEntity
(
final
AtlasEntity
entity
)
throws
Exception
{
String
queryText
=
(
String
)
entity
.
getAttribute
(
ATTRIBUTE_QUERY_TEXT
);
Assert
.
assertEquals
(
queryText
,
event
.
getQueryStr
().
toLowerCase
().
trim
());
}
});
}
catch
(
Exception
e
)
{
LOG
.
error
(
"Exception : "
,
e
);
throw
e
;
}
}
private
String
getDSTypeName
(
Entity
entity
)
{
private
String
getDSTypeName
(
Entity
entity
)
{
return
Entity
.
Type
.
TABLE
.
equals
(
entity
.
getType
())
?
HiveDataTypes
.
HIVE_TABLE
.
name
()
:
HiveMetaStoreBridge
.
HDFS_PATH
;
return
Entity
.
Type
.
TABLE
.
equals
(
entity
.
getType
())
?
HiveDataTypes
.
HIVE_TABLE
.
name
()
:
HiveMetaStoreBridge
.
HDFS_PATH
;
}
}
...
@@ -2080,4 +2341,9 @@ public class HiveHookIT extends HiveITBase {
...
@@ -2080,4 +2341,9 @@ public class HiveHookIT extends HiveITBase {
@Override
@Override
public
Entity
.
Type
getType
()
{
return
type
;
}
public
Entity
.
Type
getType
()
{
return
type
;
}
}
}
private
int
numberOfProcessExecutions
(
AtlasEntity
hiveProcess
)
{
return
toAtlasObjectIdList
(
hiveProcess
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS_EXECUTIONS
)).
size
();
}
}
}
addons/models/0000-Area0/0010-base_model.json
View file @
5c9a699a
...
@@ -324,6 +324,15 @@
...
@@ -324,6 +324,15 @@
"isUnique"
:
false
"isUnique"
:
false
}
}
]
]
},
{
"name"
:
"ProcessExecution"
,
"superTypes"
:
[
"Asset"
],
"serviceType"
:
"atlas_core"
,
"typeVersion"
:
"1.0"
,
"attributeDefs"
:
[]
}
}
],
],
"relationshipDefs"
:
[
"relationshipDefs"
:
[
...
...
addons/models/1000-Hadoop/1030-hive_model.json
View file @
5c9a699a
...
@@ -457,6 +457,80 @@
...
@@ -457,6 +457,80 @@
"isUnique"
:
false
"isUnique"
:
false
}
}
]
]
},
{
"name"
:
"hive_process_execution"
,
"superTypes"
:
[
"ProcessExecution"
],
"serviceType"
:
"hive"
,
"typeVersion"
:
"1.0"
,
"attributeDefs"
:
[
{
"name"
:
"startTime"
,
"typeName"
:
"date"
,
"cardinality"
:
"SINGLE"
,
"isIndexable"
:
false
,
"isOptional"
:
false
,
"isUnique"
:
false
},
{
"name"
:
"endTime"
,
"typeName"
:
"date"
,
"cardinality"
:
"SINGLE"
,
"isIndexable"
:
false
,
"isOptional"
:
false
,
"isUnique"
:
false
},
{
"name"
:
"userName"
,
"typeName"
:
"string"
,
"cardinality"
:
"SINGLE"
,
"isIndexable"
:
true
,
"isOptional"
:
false
,
"isUnique"
:
false
},
{
"name"
:
"queryText"
,
"typeName"
:
"string"
,
"cardinality"
:
"SINGLE"
,
"isIndexable"
:
false
,
"isOptional"
:
false
,
"isUnique"
:
false
},
{
"name"
:
"queryGraph"
,
"typeName"
:
"string"
,
"cardinality"
:
"SINGLE"
,
"isIndexable"
:
false
,
"isOptional"
:
true
,
"isUnique"
:
false
},
{
"name"
:
"queryId"
,
"typeName"
:
"string"
,
"cardinality"
:
"SINGLE"
,
"isIndexable"
:
false
,
"isOptional"
:
false
,
"isUnique"
:
false
},
{
"name"
:
"queryPlan"
,
"typeName"
:
"string"
,
"cardinality"
:
"SINGLE"
,
"isIndexable"
:
false
,
"isOptional"
:
false
,
"isUnique"
:
false
},
{
"name"
:
"hostName"
,
"typeName"
:
"string"
,
"cardinality"
:
"SINGLE"
,
"isIndexable"
:
true
,
"isOptional"
:
false
,
"isUnique"
:
false
}
]
}
}
],
],
"relationshipDefs"
:
[
"relationshipDefs"
:
[
...
@@ -567,6 +641,24 @@
...
@@ -567,6 +641,24 @@
"cardinality"
:
"SET"
"cardinality"
:
"SET"
},
},
"propagateTags"
:
"NONE"
"propagateTags"
:
"NONE"
},
{
"name"
:
"hive_process_process_executions"
,
"serviceType"
:
"hive"
,
"typeVersion"
:
"1.0"
,
"relationshipCategory"
:
"COMPOSITION"
,
"endDef1"
:
{
"type"
:
"hive_process"
,
"name"
:
"processExecutions"
,
"cardinality"
:
"SET"
,
"isContainer"
:
true
},
"endDef2"
:
{
"type"
:
"hive_process_execution"
,
"name"
:
"process"
,
"cardinality"
:
"SINGLE"
},
"propagateTags"
:
"NONE"
}
}
]
]
}
}
webapp/src/main/java/org/apache/atlas/examples/QuickStartV2.java
View file @
5c9a699a
...
@@ -19,6 +19,7 @@
...
@@ -19,6 +19,7 @@
package
org
.
apache
.
atlas
.
examples
;
package
org
.
apache
.
atlas
.
examples
;
import
com.google.common.annotations.VisibleForTesting
;
import
com.google.common.annotations.VisibleForTesting
;
import
com.google.common.collect.ImmutableMap
;
import
com.sun.jersey.core.util.MultivaluedMapImpl
;
import
com.sun.jersey.core.util.MultivaluedMapImpl
;
import
org.apache.atlas.ApplicationProperties
;
import
org.apache.atlas.ApplicationProperties
;
import
org.apache.atlas.AtlasClientV2
;
import
org.apache.atlas.AtlasClientV2
;
...
@@ -42,6 +43,8 @@ import org.apache.atlas.model.typedef.AtlasEntityDef;
...
@@ -42,6 +43,8 @@ import org.apache.atlas.model.typedef.AtlasEntityDef;
import
org.apache.atlas.model.typedef.AtlasRelationshipDef
;
import
org.apache.atlas.model.typedef.AtlasRelationshipDef
;
import
org.apache.atlas.model.typedef.AtlasRelationshipDef.PropagateTags
;
import
org.apache.atlas.model.typedef.AtlasRelationshipDef.PropagateTags
;
import
org.apache.atlas.model.typedef.AtlasTypesDef
;
import
org.apache.atlas.model.typedef.AtlasTypesDef
;
import
org.apache.atlas.repository.Constants
;
import
org.apache.atlas.type.AtlasTypeUtil
;
import
org.apache.atlas.utils.AuthenticationUtil
;
import
org.apache.atlas.utils.AuthenticationUtil
;
import
org.apache.commons.collections.CollectionUtils
;
import
org.apache.commons.collections.CollectionUtils
;
import
org.apache.commons.configuration.Configuration
;
import
org.apache.commons.configuration.Configuration
;
...
@@ -107,9 +110,15 @@ public class QuickStartV2 {
...
@@ -107,9 +110,15 @@ public class QuickStartV2 {
public
static
final
String
JDBC_CLASSIFICATION
=
"JdbcAccess"
;
public
static
final
String
JDBC_CLASSIFICATION
=
"JdbcAccess"
;
public
static
final
String
LOGDATA_CLASSIFICATION
=
"Log Data"
;
public
static
final
String
LOGDATA_CLASSIFICATION
=
"Log Data"
;
public
static
final
String
LOAD_SALES_DAILY_PROCESS
=
"loadSalesDaily"
;
public
static
final
String
LOAD_SALES_DAILY_PROCESS
=
"loadSalesDaily"
;
public
static
final
String
LOAD_SALES_MONTHLY_PROCESS
=
"loadSalesMonthly"
;
public
static
final
String
LOAD_SALES_DAILY_PROCESS_EXEC1
=
"loadSalesDailyExec1"
;
public
static
final
String
LOAD_LOGS_MONTHLY_PROCESS
=
"loadLogsMonthly"
;
public
static
final
String
LOAD_SALES_DAILY_PROCESS_EXEC2
=
"loadSalesDailyExec2"
;
public
static
final
String
LOAD_SALES_MONTHLY_PROCESS
=
"loadSalesMonthly"
;
public
static
final
String
LOAD_SALES_MONTHLY_PROCESS_EXEC1
=
"loadSalesMonthlyExec1"
;
public
static
final
String
LOAD_SALES_MONTHLY_PROCESS_EXEC2
=
"loadSalesMonthlyExec2"
;
public
static
final
String
LOAD_LOGS_MONTHLY_PROCESS
=
"loadLogsMonthly"
;
public
static
final
String
LOAD_LOGS_MONTHLY_PROCESS_EXEC1
=
"loadLogsMonthlyExec1"
;
public
static
final
String
LOAD_LOGS_MONTHLY_PROCESS_EXEC2
=
"loadLogsMonthlyExec2"
;
public
static
final
String
PRODUCT_DIM_VIEW
=
"product_dim_view"
;
public
static
final
String
PRODUCT_DIM_VIEW
=
"product_dim_view"
;
public
static
final
String
CUSTOMER_DIM_VIEW
=
"customer_dim_view"
;
public
static
final
String
CUSTOMER_DIM_VIEW
=
"customer_dim_view"
;
...
@@ -119,6 +128,7 @@ public class QuickStartV2 {
...
@@ -119,6 +128,7 @@ public class QuickStartV2 {
public
static
final
String
TABLE_TYPE
=
"Table"
;
public
static
final
String
TABLE_TYPE
=
"Table"
;
public
static
final
String
VIEW_TYPE
=
"View"
;
public
static
final
String
VIEW_TYPE
=
"View"
;
public
static
final
String
LOAD_PROCESS_TYPE
=
"LoadProcess"
;
public
static
final
String
LOAD_PROCESS_TYPE
=
"LoadProcess"
;
public
static
final
String
LOAD_PROCESS_EXECUTION_TYPE
=
"LoadProcessExecution"
;
public
static
final
String
STORAGE_DESC_TYPE
=
"StorageDesc"
;
public
static
final
String
STORAGE_DESC_TYPE
=
"StorageDesc"
;
public
static
final
String
TABLE_DATABASE_TYPE
=
"Table_DB"
;
public
static
final
String
TABLE_DATABASE_TYPE
=
"Table_DB"
;
...
@@ -126,13 +136,14 @@ public class QuickStartV2 {
...
@@ -126,13 +136,14 @@ public class QuickStartV2 {
public
static
final
String
VIEW_TABLES_TYPE
=
"View_Tables"
;
public
static
final
String
VIEW_TABLES_TYPE
=
"View_Tables"
;
public
static
final
String
TABLE_COLUMNS_TYPE
=
"Table_Columns"
;
public
static
final
String
TABLE_COLUMNS_TYPE
=
"Table_Columns"
;
public
static
final
String
TABLE_STORAGE_DESC_TYPE
=
"Table_StorageDesc"
;
public
static
final
String
TABLE_STORAGE_DESC_TYPE
=
"Table_StorageDesc"
;
public
static
final
String
PROCESS_PROCESS_EXECUTION_DESC_TYPE
=
"Process_ProcessExecution"
;
public
static
final
String
VERSION_1
=
"1.0"
;
public
static
final
String
VERSION_1
=
"1.0"
;
public
static
final
String
MANAGED_TABLE
=
"Managed"
;
public
static
final
String
MANAGED_TABLE
=
"Managed"
;
public
static
final
String
EXTERNAL_TABLE
=
"External"
;
public
static
final
String
EXTERNAL_TABLE
=
"External"
;
public
static
final
String
CLUSTER_SUFFIX
=
"@cl1"
;
public
static
final
String
CLUSTER_SUFFIX
=
"@cl1"
;
public
static
final
String
[]
TYPES
=
{
DATABASE_TYPE
,
TABLE_TYPE
,
STORAGE_DESC_TYPE
,
COLUMN_TYPE
,
LOAD_PROCESS_TYPE
,
public
static
final
String
[]
TYPES
=
{
DATABASE_TYPE
,
TABLE_TYPE
,
STORAGE_DESC_TYPE
,
COLUMN_TYPE
,
LOAD_PROCESS_TYPE
,
LOAD_PROCESS_EXECUTION_TYPE
,
VIEW_TYPE
,
JDBC_CLASSIFICATION
,
ETL_CLASSIFICATION
,
METRIC_CLASSIFICATION
,
VIEW_TYPE
,
JDBC_CLASSIFICATION
,
ETL_CLASSIFICATION
,
METRIC_CLASSIFICATION
,
PII_CLASSIFICATION
,
FACT_CLASSIFICATION
,
DIMENSION_CLASSIFICATION
,
LOGDATA_CLASSIFICATION
,
PII_CLASSIFICATION
,
FACT_CLASSIFICATION
,
DIMENSION_CLASSIFICATION
,
LOGDATA_CLASSIFICATION
,
TABLE_DATABASE_TYPE
,
VIEW_DATABASE_TYPE
,
VIEW_TABLES_TYPE
,
TABLE_COLUMNS_TYPE
,
TABLE_STORAGE_DESC_TYPE
};
TABLE_DATABASE_TYPE
,
VIEW_DATABASE_TYPE
,
VIEW_TABLES_TYPE
,
TABLE_COLUMNS_TYPE
,
TABLE_STORAGE_DESC_TYPE
};
...
@@ -256,28 +267,41 @@ public class QuickStartV2 {
...
@@ -256,28 +267,41 @@ public class QuickStartV2 {
createRequiredAttrDef
(
"queryId"
,
"string"
),
createRequiredAttrDef
(
"queryId"
,
"string"
),
createRequiredAttrDef
(
"queryGraph"
,
"string"
));
createRequiredAttrDef
(
"queryGraph"
,
"string"
));
AtlasEntityDef
processExecutionTypeDef
=
createClassTypeDef
(
LOAD_PROCESS_EXECUTION_TYPE
,
LOAD_PROCESS_EXECUTION_TYPE
,
VERSION_1
,
Collections
.
singleton
(
"ProcessExecution"
),
createOptionalAttrDef
(
"userName"
,
"string"
),
createOptionalAttrDef
(
"startTime"
,
"long"
),
createOptionalAttrDef
(
"endTime"
,
"long"
),
createRequiredAttrDef
(
"queryText"
,
"string"
),
createRequiredAttrDef
(
"queryPlan"
,
"string"
),
createRequiredAttrDef
(
"queryId"
,
"string"
),
createRequiredAttrDef
(
"queryGraph"
,
"string"
));
AtlasEntityDef
viewTypeDef
=
createClassTypeDef
(
VIEW_TYPE
,
VIEW_TYPE
,
VERSION_1
,
Collections
.
singleton
(
"DataSet"
));
AtlasEntityDef
viewTypeDef
=
createClassTypeDef
(
VIEW_TYPE
,
VIEW_TYPE
,
VERSION_1
,
Collections
.
singleton
(
"DataSet"
));
// Relationship-Definitions
// Relationship-Definitions
AtlasRelationshipDef
tableDatabaseTypeDef
=
createRelationshipTypeDef
(
TABLE_DATABASE_TYPE
,
TABLE_DATABASE_TYPE
,
VERSION_1
,
AGGREGATION
,
PropagateTags
.
NONE
,
AtlasRelationshipDef
tableDatabaseTypeDef
=
createRelationshipTypeDef
(
TABLE_DATABASE_TYPE
,
TABLE_DATABASE_TYPE
,
VERSION_1
,
AGGREGATION
,
PropagateTags
.
NONE
,
createRelationshipEndDef
(
TABLE_TYPE
,
"db"
,
SINGLE
,
false
),
createRelationshipEndDef
(
TABLE_TYPE
,
"db"
,
SINGLE
,
false
),
createRelationshipEndDef
(
DATABASE_TYPE
,
"tables"
,
SET
,
true
));
createRelationshipEndDef
(
DATABASE_TYPE
,
"tables"
,
SET
,
true
));
AtlasRelationshipDef
viewDatabaseTypeDef
=
createRelationshipTypeDef
(
VIEW_DATABASE_TYPE
,
VIEW_DATABASE_TYPE
,
VERSION_1
,
AGGREGATION
,
PropagateTags
.
NONE
,
AtlasRelationshipDef
viewDatabaseTypeDef
=
createRelationshipTypeDef
(
VIEW_DATABASE_TYPE
,
VIEW_DATABASE_TYPE
,
VERSION_1
,
AGGREGATION
,
PropagateTags
.
NONE
,
createRelationshipEndDef
(
VIEW_TYPE
,
"db"
,
SINGLE
,
false
),
createRelationshipEndDef
(
VIEW_TYPE
,
"db"
,
SINGLE
,
false
),
createRelationshipEndDef
(
DATABASE_TYPE
,
"views"
,
SET
,
true
));
createRelationshipEndDef
(
DATABASE_TYPE
,
"views"
,
SET
,
true
));
AtlasRelationshipDef
viewTablesTypeDef
=
createRelationshipTypeDef
(
VIEW_TABLES_TYPE
,
VIEW_TABLES_TYPE
,
VERSION_1
,
AGGREGATION
,
PropagateTags
.
NONE
,
AtlasRelationshipDef
viewTablesTypeDef
=
createRelationshipTypeDef
(
VIEW_TABLES_TYPE
,
VIEW_TABLES_TYPE
,
VERSION_1
,
AGGREGATION
,
PropagateTags
.
NONE
,
createRelationshipEndDef
(
VIEW_TYPE
,
"inputTables"
,
SET
,
true
),
createRelationshipEndDef
(
VIEW_TYPE
,
"inputTables"
,
SET
,
true
),
createRelationshipEndDef
(
TABLE_TYPE
,
"view"
,
SINGLE
,
false
));
createRelationshipEndDef
(
TABLE_TYPE
,
"view"
,
SINGLE
,
false
));
AtlasRelationshipDef
tableColumnsTypeDef
=
createRelationshipTypeDef
(
TABLE_COLUMNS_TYPE
,
TABLE_COLUMNS_TYPE
,
VERSION_1
,
COMPOSITION
,
PropagateTags
.
NONE
,
AtlasRelationshipDef
tableColumnsTypeDef
=
createRelationshipTypeDef
(
TABLE_COLUMNS_TYPE
,
TABLE_COLUMNS_TYPE
,
VERSION_1
,
COMPOSITION
,
PropagateTags
.
NONE
,
createRelationshipEndDef
(
TABLE_TYPE
,
"columns"
,
SET
,
true
),
createRelationshipEndDef
(
TABLE_TYPE
,
"columns"
,
SET
,
true
),
createRelationshipEndDef
(
COLUMN_TYPE
,
"table"
,
SINGLE
,
false
));
createRelationshipEndDef
(
COLUMN_TYPE
,
"table"
,
SINGLE
,
false
));
AtlasRelationshipDef
tableStorageDescTypeDef
=
createRelationshipTypeDef
(
TABLE_STORAGE_DESC_TYPE
,
TABLE_STORAGE_DESC_TYPE
,
VERSION_1
,
COMPOSITION
,
PropagateTags
.
NONE
,
AtlasRelationshipDef
tableStorageDescTypeDef
=
createRelationshipTypeDef
(
TABLE_STORAGE_DESC_TYPE
,
TABLE_STORAGE_DESC_TYPE
,
VERSION_1
,
COMPOSITION
,
PropagateTags
.
NONE
,
createRelationshipEndDef
(
TABLE_TYPE
,
"sd"
,
SINGLE
,
true
),
createRelationshipEndDef
(
TABLE_TYPE
,
"sd"
,
SINGLE
,
true
),
createRelationshipEndDef
(
STORAGE_DESC_TYPE
,
"table"
,
SINGLE
,
false
));
createRelationshipEndDef
(
STORAGE_DESC_TYPE
,
"table"
,
SINGLE
,
false
));
AtlasRelationshipDef
processProcessExecutionTypeDef
=
createRelationshipTypeDef
(
PROCESS_PROCESS_EXECUTION_DESC_TYPE
,
PROCESS_PROCESS_EXECUTION_DESC_TYPE
,
VERSION_1
,
AGGREGATION
,
PropagateTags
.
NONE
,
createRelationshipEndDef
(
LOAD_PROCESS_TYPE
,
"processExecutions"
,
SET
,
true
),
createRelationshipEndDef
(
LOAD_PROCESS_EXECUTION_TYPE
,
"process"
,
SINGLE
,
false
));
// Classification-Definitions
// Classification-Definitions
AtlasClassificationDef
dimClassifDef
=
createTraitTypeDef
(
DIMENSION_CLASSIFICATION
,
"Dimension Classification"
,
VERSION_1
,
Collections
.
emptySet
());
AtlasClassificationDef
dimClassifDef
=
createTraitTypeDef
(
DIMENSION_CLASSIFICATION
,
"Dimension Classification"
,
VERSION_1
,
Collections
.
emptySet
());
...
@@ -288,8 +312,8 @@ public class QuickStartV2 {
...
@@ -288,8 +312,8 @@ public class QuickStartV2 {
AtlasClassificationDef
jdbcClassifDef
=
createTraitTypeDef
(
JDBC_CLASSIFICATION
,
"JdbcAccess Classification"
,
VERSION_1
,
Collections
.
emptySet
());
AtlasClassificationDef
jdbcClassifDef
=
createTraitTypeDef
(
JDBC_CLASSIFICATION
,
"JdbcAccess Classification"
,
VERSION_1
,
Collections
.
emptySet
());
AtlasClassificationDef
logClassifDef
=
createTraitTypeDef
(
LOGDATA_CLASSIFICATION
,
"LogData Classification"
,
VERSION_1
,
Collections
.
emptySet
());
AtlasClassificationDef
logClassifDef
=
createTraitTypeDef
(
LOGDATA_CLASSIFICATION
,
"LogData Classification"
,
VERSION_1
,
Collections
.
emptySet
());
List
<
AtlasEntityDef
>
entityDefs
=
asList
(
dbTypeDef
,
sdTypeDef
,
colTypeDef
,
tableTypeDef
,
processTypeDef
,
viewTypeDef
);
List
<
AtlasEntityDef
>
entityDefs
=
asList
(
dbTypeDef
,
sdTypeDef
,
colTypeDef
,
tableTypeDef
,
processTypeDef
,
processExecutionTypeDef
,
viewTypeDef
);
List
<
AtlasRelationshipDef
>
relationshipDefs
=
asList
(
tableDatabaseTypeDef
,
viewDatabaseTypeDef
,
viewTablesTypeDef
,
tableColumnsTypeDef
,
tableStorageDescTypeDef
);
List
<
AtlasRelationshipDef
>
relationshipDefs
=
asList
(
tableDatabaseTypeDef
,
viewDatabaseTypeDef
,
viewTablesTypeDef
,
tableColumnsTypeDef
,
tableStorageDescTypeDef
,
processProcessExecutionTypeDef
);
List
<
AtlasClassificationDef
>
classificationDefs
=
asList
(
dimClassifDef
,
factClassifDef
,
piiClassifDef
,
metricClassifDef
,
etlClassifDef
,
jdbcClassifDef
,
logClassifDef
);
List
<
AtlasClassificationDef
>
classificationDefs
=
asList
(
dimClassifDef
,
factClassifDef
,
piiClassifDef
,
metricClassifDef
,
etlClassifDef
,
jdbcClassifDef
,
logClassifDef
);
return
new
AtlasTypesDef
(
Collections
.
emptyList
(),
Collections
.
emptyList
(),
classificationDefs
,
entityDefs
,
relationshipDefs
);
return
new
AtlasTypesDef
(
Collections
.
emptyList
(),
Collections
.
emptyList
(),
classificationDefs
,
entityDefs
,
relationshipDefs
);
...
@@ -362,20 +386,36 @@ public class QuickStartV2 {
...
@@ -362,20 +386,36 @@ public class QuickStartV2 {
createView
(
CUSTOMER_DIM_VIEW
,
reportingDB
,
asList
(
customerDim
),
DIMENSION_CLASSIFICATION
,
JDBC_CLASSIFICATION
);
createView
(
CUSTOMER_DIM_VIEW
,
reportingDB
,
asList
(
customerDim
),
DIMENSION_CLASSIFICATION
,
JDBC_CLASSIFICATION
);
// Process entities
// Process entities
createProcess
(
LOAD_SALES_DAILY_PROCESS
,
"hive query for daily summary"
,
"John ETL"
,
AtlasEntity
loadProcess
=
createProcess
(
LOAD_SALES_DAILY_PROCESS
,
"hive query for daily summary"
,
"John ETL"
,
asList
(
salesFact
,
timeDim
),
asList
(
salesFact
,
timeDim
),
asList
(
salesFactDaily
),
asList
(
salesFactDaily
),
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
createProcess
(
LOAD_SALES_MONTHLY_PROCESS
,
"hive query for monthly summary"
,
"John ETL"
,
createProcessExecution
(
loadProcess
,
LOAD_SALES_DAILY_PROCESS_EXEC1
,
"hive query execution 1 for daily summary"
,
"John ETL"
,
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
createProcessExecution
(
loadProcess
,
LOAD_SALES_DAILY_PROCESS_EXEC2
,
"hive query execution 2 for daily summary"
,
"John ETL"
,
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
AtlasEntity
loadProcess2
=
createProcess
(
LOAD_SALES_MONTHLY_PROCESS
,
"hive query for monthly summary"
,
"John ETL"
,
asList
(
salesFactDaily
),
asList
(
salesFactDaily
),
asList
(
salesFactMonthly
),
asList
(
salesFactMonthly
),
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
createProcessExecution
(
loadProcess2
,
LOAD_SALES_MONTHLY_PROCESS_EXEC1
,
"hive query execution 1 for monthly summary"
,
"John ETL"
,
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
createProcessExecution
(
loadProcess2
,
LOAD_SALES_MONTHLY_PROCESS_EXEC2
,
"hive query execution 2 for monthly summary"
,
"John ETL"
,
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
createProcess
(
LOAD_LOGS_MONTHLY_PROCESS
,
"hive query for monthly summary"
,
"Tim ETL"
,
AtlasEntity
loadProcess3
=
createProcess
(
LOAD_LOGS_MONTHLY_PROCESS
,
"hive query for monthly summary"
,
"Tim ETL"
,
asList
(
loggingFactDaily
),
asList
(
loggingFactDaily
),
asList
(
loggingFactMonthly
),
asList
(
loggingFactMonthly
),
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
createProcessExecution
(
loadProcess3
,
LOAD_LOGS_MONTHLY_PROCESS_EXEC1
,
"hive query execution 1 for monthly summary"
,
"Tim ETL"
,
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
createProcessExecution
(
loadProcess3
,
LOAD_LOGS_MONTHLY_PROCESS_EXEC2
,
"hive query execution 1 for monthly summary"
,
"Tim ETL"
,
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
}
}
private
AtlasEntity
createInstance
(
AtlasEntity
entity
)
throws
Exception
{
private
AtlasEntity
createInstance
(
AtlasEntity
entity
)
throws
Exception
{
...
@@ -522,6 +562,31 @@ public class QuickStartV2 {
...
@@ -522,6 +562,31 @@ public class QuickStartV2 {
return
createInstance
(
entity
);
return
createInstance
(
entity
);
}
}
AtlasEntity
createProcessExecution
(
AtlasEntity
hiveProcess
,
String
name
,
String
description
,
String
user
,
String
queryText
,
String
queryPlan
,
String
queryId
,
String
queryGraph
,
String
...
classificationNames
)
throws
Exception
{
AtlasEntity
entity
=
new
AtlasEntity
(
LOAD_PROCESS_EXECUTION_TYPE
);
Long
startTime
=
System
.
currentTimeMillis
();
Long
endTime
=
System
.
currentTimeMillis
()
+
10000
;
// set attributes
entity
.
setAttribute
(
"name"
,
name
);
entity
.
setAttribute
(
REFERENCEABLE_ATTRIBUTE_NAME
,
name
+
CLUSTER_SUFFIX
+
startTime
.
toString
()
+
endTime
.
toString
());
entity
.
setAttribute
(
"description"
,
description
);
entity
.
setAttribute
(
"user"
,
user
);
entity
.
setAttribute
(
"startTime"
,
startTime
);
entity
.
setAttribute
(
"endTime"
,
endTime
);
entity
.
setAttribute
(
"queryText"
,
queryText
);
entity
.
setAttribute
(
"queryPlan"
,
queryPlan
);
entity
.
setAttribute
(
"queryId"
,
queryId
);
entity
.
setAttribute
(
"queryGraph"
,
queryGraph
);
entity
.
setRelationshipAttribute
(
"process"
,
AtlasTypeUtil
.
toAtlasRelatedObjectId
(
hiveProcess
));
// set classifications
entity
.
setClassifications
(
toAtlasClassifications
(
classificationNames
));
return
createInstance
(
entity
);
}
AtlasEntity
createView
(
String
name
,
AtlasEntity
database
,
List
<
AtlasEntity
>
inputTables
,
String
...
classificationNames
)
throws
Exception
{
AtlasEntity
createView
(
String
name
,
AtlasEntity
database
,
List
<
AtlasEntity
>
inputTables
,
String
...
classificationNames
)
throws
Exception
{
AtlasEntity
entity
=
new
AtlasEntity
(
VIEW_TYPE
);
AtlasEntity
entity
=
new
AtlasEntity
(
VIEW_TYPE
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment