Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
atlas
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dataplatform
atlas
Commits
5c9a699a
Commit
5c9a699a
authored
6 years ago
by
Aadarsh Jajodia
Committed by
Sarath Subramanian
6 years ago
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ATLAS-3133: Add support for Process Executions in Atlas
Signed-off-by:
Sarath Subramanian
<
ssubramanian@hortonworks.com
>
parent
0a81c250
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
595 additions
and
83 deletions
+595
-83
AtlasHiveHookContext.java
...java/org/apache/atlas/hive/hook/AtlasHiveHookContext.java
+2
-0
HiveHook.java
...ge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
+15
-1
BaseHiveEvent.java
...java/org/apache/atlas/hive/hook/events/BaseHiveEvent.java
+57
-21
CreateHiveProcess.java
.../org/apache/atlas/hive/hook/events/CreateHiveProcess.java
+3
-0
CreateTable.java
...n/java/org/apache/atlas/hive/hook/events/CreateTable.java
+7
-1
HiveDataTypes.java
.../main/java/org/apache/atlas/hive/model/HiveDataTypes.java
+2
-1
HiveITBase.java
...ridge/src/test/java/org/apache/atlas/hive/HiveITBase.java
+19
-1
HiveHookIT.java
.../src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
+309
-43
0010-base_model.json
addons/models/0000-Area0/0010-base_model.json
+9
-0
1030-hive_model.json
addons/models/1000-Hadoop/1030-hive_model.json
+92
-0
QuickStartV2.java
...src/main/java/org/apache/atlas/examples/QuickStartV2.java
+80
-15
No files found.
addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/AtlasHiveHookContext.java
View file @
5c9a699a
...
@@ -133,6 +133,8 @@ public class AtlasHiveHookContext {
...
@@ -133,6 +133,8 @@ public class AtlasHiveHookContext {
return
hook
.
getClusterName
();
return
hook
.
getClusterName
();
}
}
public
String
getHostName
()
{
return
hook
.
getHostName
();
}
public
boolean
isConvertHdfsPathToLowerCase
()
{
public
boolean
isConvertHdfsPathToLowerCase
()
{
return
hook
.
isConvertHdfsPathToLowerCase
();
return
hook
.
isConvertHdfsPathToLowerCase
();
}
}
...
...
This diff is collapsed.
Click to expand it.
addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
View file @
5c9a699a
...
@@ -32,6 +32,8 @@ import org.apache.hadoop.security.UserGroupInformation;
...
@@ -32,6 +32,8 @@ import org.apache.hadoop.security.UserGroupInformation;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
java.net.InetAddress
;
import
java.net.UnknownHostException
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collection
;
import
java.util.Collection
;
import
java.util.Collections
;
import
java.util.Collections
;
...
@@ -46,7 +48,6 @@ import static org.apache.atlas.hive.hook.events.BaseHiveEvent.ATTRIBUTE_QUALIFIE
...
@@ -46,7 +48,6 @@ import static org.apache.atlas.hive.hook.events.BaseHiveEvent.ATTRIBUTE_QUALIFIE
import
static
org
.
apache
.
atlas
.
hive
.
hook
.
events
.
BaseHiveEvent
.
HIVE_TYPE_DB
;
import
static
org
.
apache
.
atlas
.
hive
.
hook
.
events
.
BaseHiveEvent
.
HIVE_TYPE_DB
;
import
static
org
.
apache
.
atlas
.
hive
.
hook
.
events
.
BaseHiveEvent
.
HIVE_TYPE_TABLE
;
import
static
org
.
apache
.
atlas
.
hive
.
hook
.
events
.
BaseHiveEvent
.
HIVE_TYPE_TABLE
;
public
class
HiveHook
extends
AtlasHook
implements
ExecuteWithHookContext
{
public
class
HiveHook
extends
AtlasHook
implements
ExecuteWithHookContext
{
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
HiveHook
.
class
);
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
HiveHook
.
class
);
...
@@ -66,6 +67,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -66,6 +67,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
public
static
final
String
HOOK_HIVE_TABLE_CACHE_SIZE
=
CONF_PREFIX
+
"hive_table.cache.size"
;
public
static
final
String
HOOK_HIVE_TABLE_CACHE_SIZE
=
CONF_PREFIX
+
"hive_table.cache.size"
;
public
static
final
String
DEFAULT_CLUSTER_NAME
=
"primary"
;
public
static
final
String
DEFAULT_CLUSTER_NAME
=
"primary"
;
public
static
final
String
DEFAULT_HOST_NAME
=
"localhost"
;
private
static
final
Map
<
String
,
HiveOperation
>
OPERATION_MAP
=
new
HashMap
<>();
private
static
final
Map
<
String
,
HiveOperation
>
OPERATION_MAP
=
new
HashMap
<>();
...
@@ -83,6 +85,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -83,6 +85,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
private
static
final
Map
<
String
,
PreprocessAction
>
hiveTablesCache
;
private
static
final
Map
<
String
,
PreprocessAction
>
hiveTablesCache
;
private
static
HiveHookObjectNamesCache
knownObjects
=
null
;
private
static
HiveHookObjectNamesCache
knownObjects
=
null
;
private
static
String
hostName
;
static
{
static
{
for
(
HiveOperation
hiveOperation
:
HiveOperation
.
values
())
{
for
(
HiveOperation
hiveOperation
:
HiveOperation
.
values
())
{
...
@@ -134,6 +137,13 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -134,6 +137,13 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
}
}
knownObjects
=
nameCacheEnabled
?
new
HiveHookObjectNamesCache
(
nameCacheDatabaseMaxCount
,
nameCacheTableMaxCount
,
nameCacheRebuildIntervalSeconds
)
:
null
;
knownObjects
=
nameCacheEnabled
?
new
HiveHookObjectNamesCache
(
nameCacheDatabaseMaxCount
,
nameCacheTableMaxCount
,
nameCacheRebuildIntervalSeconds
)
:
null
;
try
{
hostName
=
InetAddress
.
getLocalHost
().
getHostName
();
}
catch
(
UnknownHostException
e
)
{
LOG
.
warn
(
"No hostname found. Setting the hostname to default value {}"
,
DEFAULT_HOST_NAME
,
e
);
hostName
=
DEFAULT_HOST_NAME
;
}
}
}
...
@@ -292,6 +302,10 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -292,6 +302,10 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
return
knownObjects
;
return
knownObjects
;
}
}
public
String
getHostName
()
{
return
hostName
;
}
public
static
class
HiveHookObjectNamesCache
{
public
static
class
HiveHookObjectNamesCache
{
private
final
int
dbMaxCacheCount
;
private
final
int
dbMaxCacheCount
;
private
final
int
tblMaxCacheCount
;
private
final
int
tblMaxCacheCount
;
...
...
This diff is collapsed.
Click to expand it.
addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/BaseHiveEvent.java
View file @
5c9a699a
...
@@ -18,6 +18,7 @@
...
@@ -18,6 +18,7 @@
package
org
.
apache
.
atlas
.
hive
.
hook
.
events
;
package
org
.
apache
.
atlas
.
hive
.
hook
.
events
;
import
com.google.common.collect.ImmutableMap
;
import
org.apache.atlas.hive.hook.AtlasHiveHookContext
;
import
org.apache.atlas.hive.hook.AtlasHiveHookContext
;
import
org.apache.atlas.hive.hook.HiveHook.PreprocessAction
;
import
org.apache.atlas.hive.hook.HiveHook.PreprocessAction
;
import
org.apache.atlas.model.instance.AtlasEntity
;
import
org.apache.atlas.model.instance.AtlasEntity
;
...
@@ -27,6 +28,8 @@ import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityExtInfo;
...
@@ -27,6 +28,8 @@ import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityExtInfo;
import
org.apache.atlas.model.instance.AtlasObjectId
;
import
org.apache.atlas.model.instance.AtlasObjectId
;
import
org.apache.atlas.model.instance.AtlasStruct
;
import
org.apache.atlas.model.instance.AtlasStruct
;
import
org.apache.atlas.model.notification.HookNotification
;
import
org.apache.atlas.model.notification.HookNotification
;
import
org.apache.atlas.repository.Constants
;
import
org.apache.atlas.type.AtlasTypeUtil
;
import
org.apache.atlas.utils.HdfsNameServiceResolver
;
import
org.apache.atlas.utils.HdfsNameServiceResolver
;
import
org.apache.commons.collections.CollectionUtils
;
import
org.apache.commons.collections.CollectionUtils
;
import
org.apache.commons.collections.MapUtils
;
import
org.apache.commons.collections.MapUtils
;
...
@@ -49,6 +52,7 @@ import org.slf4j.Logger;
...
@@ -49,6 +52,7 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.net.InetAddress
;
import
java.net.URI
;
import
java.net.URI
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collection
;
import
java.util.Collection
;
...
@@ -67,24 +71,25 @@ import static org.apache.atlas.hive.hook.AtlasHiveHookContext.QNAME_SEP_PROCESS;
...
@@ -67,24 +71,25 @@ import static org.apache.atlas.hive.hook.AtlasHiveHookContext.QNAME_SEP_PROCESS;
public
abstract
class
BaseHiveEvent
{
public
abstract
class
BaseHiveEvent
{
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
BaseHiveEvent
.
class
);
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
BaseHiveEvent
.
class
);
public
static
final
String
HIVE_TYPE_DB
=
"hive_db"
;
public
static
final
String
HIVE_TYPE_DB
=
"hive_db"
;
public
static
final
String
HIVE_TYPE_TABLE
=
"hive_table"
;
public
static
final
String
HIVE_TYPE_TABLE
=
"hive_table"
;
public
static
final
String
HIVE_TYPE_STORAGEDESC
=
"hive_storagedesc"
;
public
static
final
String
HIVE_TYPE_STORAGEDESC
=
"hive_storagedesc"
;
public
static
final
String
HIVE_TYPE_COLUMN
=
"hive_column"
;
public
static
final
String
HIVE_TYPE_COLUMN
=
"hive_column"
;
public
static
final
String
HIVE_TYPE_PROCESS
=
"hive_process"
;
public
static
final
String
HIVE_TYPE_PROCESS
=
"hive_process"
;
public
static
final
String
HIVE_TYPE_COLUMN_LINEAGE
=
"hive_column_lineage"
;
public
static
final
String
HIVE_TYPE_COLUMN_LINEAGE
=
"hive_column_lineage"
;
public
static
final
String
HIVE_TYPE_SERDE
=
"hive_serde"
;
public
static
final
String
HIVE_TYPE_SERDE
=
"hive_serde"
;
public
static
final
String
HIVE_TYPE_ORDER
=
"hive_order"
;
public
static
final
String
HIVE_TYPE_ORDER
=
"hive_order"
;
public
static
final
String
HDFS_TYPE_PATH
=
"hdfs_path"
;
public
static
final
String
HIVE_TYPE_PROCESS_EXECUTION
=
"hive_process_execution"
;
public
static
final
String
HBASE_TYPE_TABLE
=
"hbase_table"
;
public
static
final
String
HDFS_TYPE_PATH
=
"hdfs_path"
;
public
static
final
String
HBASE_TYPE_NAMESPACE
=
"hbase_namespace"
;
public
static
final
String
HBASE_TYPE_TABLE
=
"hbase_table"
;
public
static
final
String
AWS_S3_BUCKET
=
"aws_s3_bucket"
;
public
static
final
String
HBASE_TYPE_NAMESPACE
=
"hbase_namespace"
;
public
static
final
String
AWS_S3_PSEUDO_DIR
=
"aws_s3_pseudo_dir"
;
public
static
final
String
AWS_S3_BUCKET
=
"aws_s3_bucket"
;
public
static
final
String
AWS_S3_OBJECT
=
"aws_s3_object"
;
public
static
final
String
AWS_S3_PSEUDO_DIR
=
"aws_s3_pseudo_dir"
;
public
static
final
String
AWS_S3_OBJECT
=
"aws_s3_object"
;
public
static
final
String
SCHEME_SEPARATOR
=
"://"
;
public
static
final
String
S3_SCHEME
=
"s3"
+
SCHEME_SEPARATOR
;
public
static
final
String
SCHEME_SEPARATOR
=
"://"
;
public
static
final
String
S3A_SCHEME
=
"s3a"
+
SCHEME_SEPARATOR
;
public
static
final
String
S3_SCHEME
=
"s3"
+
SCHEME_SEPARATOR
;
public
static
final
String
S3A_SCHEME
=
"s3a"
+
SCHEME_SEPARATOR
;
public
static
final
String
ATTRIBUTE_QUALIFIED_NAME
=
"qualifiedName"
;
public
static
final
String
ATTRIBUTE_QUALIFIED_NAME
=
"qualifiedName"
;
public
static
final
String
ATTRIBUTE_NAME
=
"name"
;
public
static
final
String
ATTRIBUTE_NAME
=
"name"
;
...
@@ -126,6 +131,8 @@ public abstract class BaseHiveEvent {
...
@@ -126,6 +131,8 @@ public abstract class BaseHiveEvent {
public
static
final
String
ATTRIBUTE_START_TIME
=
"startTime"
;
public
static
final
String
ATTRIBUTE_START_TIME
=
"startTime"
;
public
static
final
String
ATTRIBUTE_USER_NAME
=
"userName"
;
public
static
final
String
ATTRIBUTE_USER_NAME
=
"userName"
;
public
static
final
String
ATTRIBUTE_QUERY_TEXT
=
"queryText"
;
public
static
final
String
ATTRIBUTE_QUERY_TEXT
=
"queryText"
;
public
static
final
String
ATTRIBUTE_PROCESS
=
"process"
;
public
static
final
String
ATTRIBUTE_PROCESS_EXECUTIONS
=
"processExecutions"
;
public
static
final
String
ATTRIBUTE_QUERY_ID
=
"queryId"
;
public
static
final
String
ATTRIBUTE_QUERY_ID
=
"queryId"
;
public
static
final
String
ATTRIBUTE_QUERY_PLAN
=
"queryPlan"
;
public
static
final
String
ATTRIBUTE_QUERY_PLAN
=
"queryPlan"
;
public
static
final
String
ATTRIBUTE_END_TIME
=
"endTime"
;
public
static
final
String
ATTRIBUTE_END_TIME
=
"endTime"
;
...
@@ -139,6 +146,7 @@ public abstract class BaseHiveEvent {
...
@@ -139,6 +146,7 @@ public abstract class BaseHiveEvent {
public
static
final
String
ATTRIBUTE_NAMESPACE
=
"namespace"
;
public
static
final
String
ATTRIBUTE_NAMESPACE
=
"namespace"
;
public
static
final
String
ATTRIBUTE_OBJECT_PREFIX
=
"objectPrefix"
;
public
static
final
String
ATTRIBUTE_OBJECT_PREFIX
=
"objectPrefix"
;
public
static
final
String
ATTRIBUTE_BUCKET
=
"bucket"
;
public
static
final
String
ATTRIBUTE_BUCKET
=
"bucket"
;
public
static
final
String
ATTRIBUTE_HOSTNAME
=
"hostName"
;
public
static
final
String
HBASE_STORAGE_HANDLER_CLASS
=
"org.apache.hadoop.hive.hbase.HBaseStorageHandler"
;
public
static
final
String
HBASE_STORAGE_HANDLER_CLASS
=
"org.apache.hadoop.hive.hbase.HBaseStorageHandler"
;
public
static
final
String
HBASE_DEFAULT_NAMESPACE
=
"default"
;
public
static
final
String
HBASE_DEFAULT_NAMESPACE
=
"default"
;
...
@@ -146,6 +154,7 @@ public abstract class BaseHiveEvent {
...
@@ -146,6 +154,7 @@ public abstract class BaseHiveEvent {
public
static
final
String
HBASE_PARAM_TABLE_NAME
=
"hbase.table.name"
;
public
static
final
String
HBASE_PARAM_TABLE_NAME
=
"hbase.table.name"
;
public
static
final
long
MILLIS_CONVERT_FACTOR
=
1000
;
public
static
final
long
MILLIS_CONVERT_FACTOR
=
1000
;
public
static
final
String
HDFS_PATH_PREFIX
=
"hdfs://"
;
public
static
final
String
HDFS_PATH_PREFIX
=
"hdfs://"
;
public
static
final
String
EMPTY_ATTRIBUTE_VALUE
=
""
;
public
static
final
Map
<
Integer
,
String
>
OWNER_TYPE_TO_ENUM_VALUE
=
new
HashMap
<>();
public
static
final
Map
<
Integer
,
String
>
OWNER_TYPE_TO_ENUM_VALUE
=
new
HashMap
<>();
...
@@ -605,14 +614,41 @@ public abstract class BaseHiveEvent {
...
@@ -605,14 +614,41 @@ public abstract class BaseHiveEvent {
ret
.
setAttribute
(
ATTRIBUTE_OUTPUTS
,
getObjectIds
(
outputs
));
ret
.
setAttribute
(
ATTRIBUTE_OUTPUTS
,
getObjectIds
(
outputs
));
ret
.
setAttribute
(
ATTRIBUTE_NAME
,
queryStr
);
ret
.
setAttribute
(
ATTRIBUTE_NAME
,
queryStr
);
ret
.
setAttribute
(
ATTRIBUTE_OPERATION_TYPE
,
getOperationName
());
ret
.
setAttribute
(
ATTRIBUTE_OPERATION_TYPE
,
getOperationName
());
// We are setting an empty value to these attributes, since now we have a new entity type called hive process
// execution which captures these values. We have to set empty values here because these attributes are
// mandatory attributes for hive process entity type.
ret
.
setAttribute
(
ATTRIBUTE_START_TIME
,
EMPTY_ATTRIBUTE_VALUE
);
ret
.
setAttribute
(
ATTRIBUTE_END_TIME
,
EMPTY_ATTRIBUTE_VALUE
);
ret
.
setAttribute
(
ATTRIBUTE_USER_NAME
,
EMPTY_ATTRIBUTE_VALUE
);
ret
.
setAttribute
(
ATTRIBUTE_QUERY_TEXT
,
EMPTY_ATTRIBUTE_VALUE
);
ret
.
setAttribute
(
ATTRIBUTE_QUERY_ID
,
EMPTY_ATTRIBUTE_VALUE
);
ret
.
setAttribute
(
ATTRIBUTE_QUERY_PLAN
,
"Not Supported"
);
ret
.
setAttribute
(
ATTRIBUTE_RECENT_QUERIES
,
Collections
.
singletonList
(
queryStr
));
return
ret
;
}
protected
AtlasEntity
getHiveProcessExecutionEntity
(
AtlasEntity
hiveProcess
)
throws
Exception
{
AtlasEntity
ret
=
new
AtlasEntity
(
HIVE_TYPE_PROCESS_EXECUTION
);
String
queryStr
=
getQueryString
();
if
(
queryStr
!=
null
)
{
queryStr
=
queryStr
.
toLowerCase
().
trim
();
}
Long
endTime
=
System
.
currentTimeMillis
();
ret
.
setAttribute
(
ATTRIBUTE_QUALIFIED_NAME
,
hiveProcess
.
getAttribute
(
ATTRIBUTE_QUALIFIED_NAME
).
toString
()
+
QNAME_SEP_PROCESS
+
getQueryStartTime
().
toString
()
+
QNAME_SEP_PROCESS
+
endTime
.
toString
());
ret
.
setAttribute
(
ATTRIBUTE_NAME
,
queryStr
+
QNAME_SEP_PROCESS
+
getQueryStartTime
().
toString
());
ret
.
setAttribute
(
ATTRIBUTE_START_TIME
,
getQueryStartTime
());
ret
.
setAttribute
(
ATTRIBUTE_START_TIME
,
getQueryStartTime
());
ret
.
setAttribute
(
ATTRIBUTE_END_TIME
,
System
.
currentTimeMillis
()
);
ret
.
setAttribute
(
ATTRIBUTE_END_TIME
,
endTime
);
ret
.
setAttribute
(
ATTRIBUTE_USER_NAME
,
getUserName
());
ret
.
setAttribute
(
ATTRIBUTE_USER_NAME
,
getUserName
());
ret
.
setAttribute
(
ATTRIBUTE_QUERY_TEXT
,
queryStr
);
ret
.
setAttribute
(
ATTRIBUTE_QUERY_TEXT
,
queryStr
);
ret
.
setAttribute
(
ATTRIBUTE_QUERY_ID
,
getQueryId
());
ret
.
setAttribute
(
ATTRIBUTE_QUERY_ID
,
getQueryId
());
ret
.
setAttribute
(
ATTRIBUTE_QUERY_PLAN
,
"Not Supported"
);
ret
.
setAttribute
(
ATTRIBUTE_QUERY_PLAN
,
"Not Supported"
);
ret
.
setAttribute
(
ATTRIBUTE_
RECENT_QUERIES
,
Collections
.
singletonList
(
queryStr
));
ret
.
setAttribute
(
ATTRIBUTE_
HOSTNAME
,
getContext
().
getHostName
(
));
ret
.
setRelationshipAttribute
(
ATTRIBUTE_PROCESS
,
AtlasTypeUtil
.
toAtlasRelatedObjectId
(
hiveProcess
));
return
ret
;
return
ret
;
}
}
...
...
This diff is collapsed.
Click to expand it.
addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateHiveProcess.java
View file @
5c9a699a
...
@@ -117,6 +117,9 @@ public class CreateHiveProcess extends BaseHiveEvent {
...
@@ -117,6 +117,9 @@ public class CreateHiveProcess extends BaseHiveEvent {
ret
.
addEntity
(
process
);
ret
.
addEntity
(
process
);
AtlasEntity
processExecution
=
getHiveProcessExecutionEntity
(
process
);
ret
.
addEntity
(
processExecution
);
processColumnLineage
(
process
,
ret
);
processColumnLineage
(
process
,
ret
);
addProcessedEntities
(
ret
);
addProcessedEntities
(
ret
);
...
...
This diff is collapsed.
Click to expand it.
addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java
View file @
5c9a699a
...
@@ -130,8 +130,11 @@ public class CreateTable extends BaseHiveEvent {
...
@@ -130,8 +130,11 @@ public class CreateTable extends BaseHiveEvent {
}
else
{
}
else
{
processEntity
=
getHiveProcessEntity
(
Collections
.
singletonList
(
tblEntity
),
Collections
.
singletonList
(
hbaseTableEntity
));
processEntity
=
getHiveProcessEntity
(
Collections
.
singletonList
(
tblEntity
),
Collections
.
singletonList
(
hbaseTableEntity
));
}
}
ret
.
addEntity
(
processEntity
);
ret
.
addEntity
(
processEntity
);
AtlasEntity
processExecution
=
getHiveProcessExecutionEntity
(
processEntity
);
ret
.
addEntity
(
processExecution
);
}
}
}
else
{
}
else
{
if
(
EXTERNAL_TABLE
.
equals
(
table
.
getTableType
()))
{
if
(
EXTERNAL_TABLE
.
equals
(
table
.
getTableType
()))
{
...
@@ -140,6 +143,9 @@ public class CreateTable extends BaseHiveEvent {
...
@@ -140,6 +143,9 @@ public class CreateTable extends BaseHiveEvent {
ret
.
addEntity
(
processEntity
);
ret
.
addEntity
(
processEntity
);
ret
.
addReferredEntity
(
hdfsPathEntity
);
ret
.
addReferredEntity
(
hdfsPathEntity
);
AtlasEntity
processExecution
=
getHiveProcessExecutionEntity
(
processEntity
);
ret
.
addEntity
(
processExecution
);
}
}
}
}
}
}
...
...
This diff is collapsed.
Click to expand it.
addons/hive-bridge/src/main/java/org/apache/atlas/hive/model/HiveDataTypes.java
View file @
5c9a699a
...
@@ -43,7 +43,8 @@ public enum HiveDataTypes {
...
@@ -43,7 +43,8 @@ public enum HiveDataTypes {
HIVE_ROLE
,
HIVE_ROLE
,
HIVE_TYPE
,
HIVE_TYPE
,
HIVE_PROCESS
,
HIVE_PROCESS
,
HIVE_COLUMN_LINEAGE
HIVE_COLUMN_LINEAGE
,
HIVE_PROCESS_EXECUTION
,
// HIVE_VIEW,
// HIVE_VIEW,
;
;
...
...
This diff is collapsed.
Click to expand it.
addons/hive-bridge/src/test/java/org/apache/atlas/hive/HiveITBase.java
View file @
5c9a699a
...
@@ -218,6 +218,25 @@ public class HiveITBase {
...
@@ -218,6 +218,25 @@ public class HiveITBase {
return
(
String
)
entity
.
getGuid
();
return
(
String
)
entity
.
getGuid
();
}
}
protected
String
assertEntityIsRegisteredViaGuid
(
String
guid
,
final
HiveHookIT
.
AssertPredicate
assertPredicate
)
throws
Exception
{
waitFor
(
80000
,
new
HiveHookIT
.
Predicate
()
{
@Override
public
void
evaluate
()
throws
Exception
{
AtlasEntity
.
AtlasEntityWithExtInfo
atlasEntityWithExtInfo
=
atlasClientV2
.
getEntityByGuid
(
guid
);
AtlasEntity
entity
=
atlasEntityWithExtInfo
.
getEntity
();
assertNotNull
(
entity
);
if
(
assertPredicate
!=
null
)
{
assertPredicate
.
assertOnEntity
(
entity
);
}
}
});
AtlasEntity
.
AtlasEntityWithExtInfo
atlasEntityWithExtInfo
=
atlasClientV2
.
getEntityByGuid
(
guid
);
AtlasEntity
entity
=
atlasEntityWithExtInfo
.
getEntity
();
return
(
String
)
entity
.
getGuid
();
}
protected
AtlasEntity
assertEntityIsRegistedViaEntity
(
final
String
typeName
,
final
String
property
,
final
String
value
,
protected
AtlasEntity
assertEntityIsRegistedViaEntity
(
final
String
typeName
,
final
String
property
,
final
String
value
,
final
HiveHookIT
.
AssertPredicate
assertPredicate
)
throws
Exception
{
final
HiveHookIT
.
AssertPredicate
assertPredicate
)
throws
Exception
{
waitFor
(
80000
,
new
HiveHookIT
.
Predicate
()
{
waitFor
(
80000
,
new
HiveHookIT
.
Predicate
()
{
...
@@ -473,7 +492,6 @@ public class HiveITBase {
...
@@ -473,7 +492,6 @@ public class HiveITBase {
return
buffer
.
toString
();
return
buffer
.
toString
();
}
}
protected
static
Entity
getEntityByType
(
Set
<?
extends
Entity
>
entities
,
Entity
.
Type
entityType
)
{
protected
static
Entity
getEntityByType
(
Set
<?
extends
Entity
>
entities
,
Entity
.
Type
entityType
)
{
for
(
Entity
entity
:
entities
)
{
for
(
Entity
entity
:
entities
)
{
if
(
entity
.
getType
()
==
entityType
)
{
if
(
entity
.
getType
()
==
entityType
)
{
...
...
This diff is collapsed.
Click to expand it.
addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
View file @
5c9a699a
...
@@ -29,13 +29,13 @@ import org.apache.atlas.hive.bridge.HiveMetaStoreBridge;
...
@@ -29,13 +29,13 @@ import org.apache.atlas.hive.bridge.HiveMetaStoreBridge;
import
org.apache.atlas.hive.hook.events.BaseHiveEvent
;
import
org.apache.atlas.hive.hook.events.BaseHiveEvent
;
import
org.apache.atlas.hive.model.HiveDataTypes
;
import
org.apache.atlas.hive.model.HiveDataTypes
;
import
org.apache.atlas.model.instance.AtlasClassification
;
import
org.apache.atlas.model.instance.AtlasClassification
;
import
org.apache.atlas.model.instance.AtlasEntityHeader
;
import
org.apache.atlas.model.lineage.AtlasLineageInfo
;
import
org.apache.atlas.model.typedef.AtlasClassificationDef
;
import
org.apache.atlas.model.instance.AtlasEntity
;
import
org.apache.atlas.model.instance.AtlasEntity
;
import
org.apache.atlas.model.instance.AtlasEntityHeader
;
import
org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo
;
import
org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo
;
import
org.apache.atlas.model.instance.AtlasObjectId
;
import
org.apache.atlas.model.instance.AtlasObjectId
;
import
org.apache.atlas.model.instance.AtlasStruct
;
import
org.apache.atlas.model.instance.AtlasStruct
;
import
org.apache.atlas.model.lineage.AtlasLineageInfo
;
import
org.apache.atlas.model.typedef.AtlasClassificationDef
;
import
org.apache.atlas.model.typedef.AtlasTypesDef
;
import
org.apache.atlas.model.typedef.AtlasTypesDef
;
import
org.apache.atlas.type.AtlasTypeUtil
;
import
org.apache.atlas.type.AtlasTypeUtil
;
import
org.apache.commons.collections.CollectionUtils
;
import
org.apache.commons.collections.CollectionUtils
;
...
@@ -49,15 +49,12 @@ import org.apache.hadoop.hive.ql.hooks.WriteEntity;
...
@@ -49,15 +49,12 @@ import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import
org.apache.hadoop.hive.ql.metadata.HiveException
;
import
org.apache.hadoop.hive.ql.metadata.HiveException
;
import
org.apache.hadoop.hive.ql.metadata.Table
;
import
org.apache.hadoop.hive.ql.metadata.Table
;
import
org.apache.hadoop.hive.ql.plan.HiveOperation
;
import
org.apache.hadoop.hive.ql.plan.HiveOperation
;
import
org.apache.hadoop.hive.ql.session.SessionState
;
import
org.apache.hadoop.security.UserGroupInformation
;
import
org.apache.hadoop.security.UserGroupInformation
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
org.testng.Assert
;
import
org.testng.Assert
;
import
org.testng.annotations.Test
;
import
org.testng.annotations.Test
;
import
java.io.File
;
import
java.nio.file.Files
;
import
java.text.ParseException
;
import
java.text.ParseException
;
import
java.util.*
;
import
java.util.*
;
...
@@ -308,7 +305,15 @@ public class HiveHookIT extends HiveITBase {
...
@@ -308,7 +305,15 @@ public class HiveHookIT extends HiveITBase {
final
Set
<
ReadEntity
>
readEntities
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
final
Set
<
ReadEntity
>
readEntities
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
final
Set
<
WriteEntity
>
writeEntities
=
getOutputs
(
ctasTableName
,
Entity
.
Type
.
TABLE
);
final
Set
<
WriteEntity
>
writeEntities
=
getOutputs
(
ctasTableName
,
Entity
.
Type
.
TABLE
);
assertProcessIsRegistered
(
constructEvent
(
query
,
HiveOperation
.
CREATETABLE_AS_SELECT
,
readEntities
,
writeEntities
));
HiveEventContext
hiveEventContext
=
constructEvent
(
query
,
HiveOperation
.
CREATETABLE_AS_SELECT
,
readEntities
,
writeEntities
);
AtlasEntity
processEntity1
=
validateProcess
(
hiveEventContext
);
AtlasEntity
processExecutionEntity1
=
validateProcessExecution
(
processEntity1
,
hiveEventContext
);
AtlasObjectId
process
=
toAtlasObjectId
(
processExecutionEntity1
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
processEntity1
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity1
),
1
);
assertTableIsRegistered
(
DEFAULT_DB
,
ctasTableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
ctasTableName
);
}
}
...
@@ -347,7 +352,12 @@ public class HiveHookIT extends HiveITBase {
...
@@ -347,7 +352,12 @@ public class HiveHookIT extends HiveITBase {
Set
<
WriteEntity
>
outputs
=
getOutputs
(
ctasTableName
,
Entity
.
Type
.
TABLE
);
Set
<
WriteEntity
>
outputs
=
getOutputs
(
ctasTableName
,
Entity
.
Type
.
TABLE
);
HiveEventContext
hiveEventContext
=
constructEvent
(
query
,
HiveOperation
.
CREATETABLE_AS_SELECT
,
inputs
,
outputs
);
HiveEventContext
hiveEventContext
=
constructEvent
(
query
,
HiveOperation
.
CREATETABLE_AS_SELECT
,
inputs
,
outputs
);
String
processId
=
assertProcessIsRegistered
(
hiveEventContext
);
AtlasEntity
processEntity1
=
validateProcess
(
hiveEventContext
);
AtlasEntity
processExecutionEntity1
=
validateProcessExecution
(
processEntity1
,
hiveEventContext
);
AtlasObjectId
process
=
toAtlasObjectId
(
processExecutionEntity1
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
processEntity1
.
getGuid
());
String
drpquery
=
String
.
format
(
"drop table %s "
,
ctasTableName
);
String
drpquery
=
String
.
format
(
"drop table %s "
,
ctasTableName
);
runCommandWithDelay
(
drpquery
,
100
);
runCommandWithDelay
(
drpquery
,
100
);
...
@@ -360,13 +370,17 @@ public class HiveHookIT extends HiveITBase {
...
@@ -360,13 +370,17 @@ public class HiveHookIT extends HiveITBase {
outputs
=
getOutputs
(
ctasTableName
,
Entity
.
Type
.
TABLE
);
outputs
=
getOutputs
(
ctasTableName
,
Entity
.
Type
.
TABLE
);
String
process2Id
=
assertProcessIsRegistered
(
hiveEventContext
,
inputs
,
outputs
);
AtlasEntity
processEntity2
=
validateProcess
(
hiveEventContext
);
AtlasEntity
processExecutionEntity2
=
validateProcessExecution
(
processEntity2
,
hiveEventContext
);
assertNotEquals
(
process2Id
,
processId
);
AtlasObjectId
process2
=
toAtlasObjectId
(
processExecutionEntity2
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process2
.
getGuid
(),
processEntity2
.
getGuid
());
AtlasEntity
processsEntity
=
atlasClientV2
.
getEntityByGuid
(
processId
).
getEntity
();
assertNotEquals
(
processEntity1
.
getGuid
(),
processEntity2
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity1
),
1
);
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity2
),
1
);
validateOutputTables
(
process
sEntity
,
outputs
);
validateOutputTables
(
process
Entity1
,
outputs
);
}
}
@Test
@Test
...
@@ -377,7 +391,14 @@ public class HiveHookIT extends HiveITBase {
...
@@ -377,7 +391,14 @@ public class HiveHookIT extends HiveITBase {
runCommand
(
query
);
runCommand
(
query
);
assertProcessIsRegistered
(
constructEvent
(
query
,
HiveOperation
.
CREATEVIEW
,
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
),
getOutputs
(
viewName
,
Entity
.
Type
.
TABLE
)));
HiveEventContext
hiveEventContext
=
constructEvent
(
query
,
HiveOperation
.
CREATEVIEW
,
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
),
getOutputs
(
viewName
,
Entity
.
Type
.
TABLE
));
AtlasEntity
processEntity1
=
validateProcess
(
hiveEventContext
);
AtlasEntity
processExecutionEntity1
=
validateProcessExecution
(
processEntity1
,
hiveEventContext
);
AtlasObjectId
process1
=
toAtlasObjectId
(
processExecutionEntity1
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process1
.
getGuid
(),
processEntity1
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity1
),
1
);
assertTableIsRegistered
(
DEFAULT_DB
,
viewName
);
assertTableIsRegistered
(
DEFAULT_DB
,
viewName
);
}
}
...
@@ -392,7 +413,15 @@ public class HiveHookIT extends HiveITBase {
...
@@ -392,7 +413,15 @@ public class HiveHookIT extends HiveITBase {
String
table1Id
=
assertTableIsRegistered
(
DEFAULT_DB
,
table1Name
);
String
table1Id
=
assertTableIsRegistered
(
DEFAULT_DB
,
table1Name
);
assertProcessIsRegistered
(
constructEvent
(
query
,
HiveOperation
.
CREATEVIEW
,
getInputs
(
table1Name
,
Entity
.
Type
.
TABLE
),
getOutputs
(
viewName
,
Entity
.
Type
.
TABLE
)));
HiveEventContext
hiveEventContext
=
constructEvent
(
query
,
HiveOperation
.
CREATEVIEW
,
getInputs
(
table1Name
,
Entity
.
Type
.
TABLE
),
getOutputs
(
viewName
,
Entity
.
Type
.
TABLE
));
String
processId1
=
assertProcessIsRegistered
(
hiveEventContext
);
AtlasEntity
processEntity1
=
atlasClientV2
.
getEntityByGuid
(
processId1
).
getEntity
();
AtlasEntity
processExecutionEntity1
=
validateProcessExecution
(
processEntity1
,
hiveEventContext
);
AtlasObjectId
process1
=
toAtlasObjectId
(
processExecutionEntity1
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process1
.
getGuid
(),
processEntity1
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity1
),
1
);
String
viewId
=
assertTableIsRegistered
(
DEFAULT_DB
,
viewName
);
String
viewId
=
assertTableIsRegistered
(
DEFAULT_DB
,
viewName
);
...
@@ -412,8 +441,16 @@ public class HiveHookIT extends HiveITBase {
...
@@ -412,8 +441,16 @@ public class HiveHookIT extends HiveITBase {
runCommand
(
query
);
runCommand
(
query
);
//Check if alter view process is reqistered
HiveEventContext
hiveEventContext2
=
constructEvent
(
query
,
HiveOperation
.
CREATEVIEW
,
getInputs
(
table2Name
,
assertProcessIsRegistered
(
constructEvent
(
query
,
HiveOperation
.
CREATEVIEW
,
getInputs
(
table2Name
,
Entity
.
Type
.
TABLE
),
getOutputs
(
viewName
,
Entity
.
Type
.
TABLE
)));
Entity
.
Type
.
TABLE
),
getOutputs
(
viewName
,
Entity
.
Type
.
TABLE
));
String
processId2
=
assertProcessIsRegistered
(
hiveEventContext2
);
AtlasEntity
processEntity2
=
atlasClientV2
.
getEntityByGuid
(
processId2
).
getEntity
();
AtlasEntity
processExecutionEntity2
=
validateProcessExecution
(
processEntity2
,
hiveEventContext2
);
AtlasObjectId
process2
=
toAtlasObjectId
(
processExecutionEntity2
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process2
.
getGuid
(),
processEntity2
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity2
),
2
);
Assert
.
assertEquals
(
processEntity1
.
getGuid
(),
processEntity2
.
getGuid
());
String
table2Id
=
assertTableIsRegistered
(
DEFAULT_DB
,
table2Name
);
String
table2Id
=
assertTableIsRegistered
(
DEFAULT_DB
,
table2Name
);
...
@@ -526,6 +563,12 @@ public class HiveHookIT extends HiveITBase {
...
@@ -526,6 +563,12 @@ public class HiveHookIT extends HiveITBase {
return
validateProcess
(
event
,
event
.
getInputs
(),
event
.
getOutputs
());
return
validateProcess
(
event
,
event
.
getInputs
(),
event
.
getOutputs
());
}
}
private
AtlasEntity
validateProcessExecution
(
AtlasEntity
hiveProcess
,
HiveEventContext
event
)
throws
Exception
{
String
processExecutionId
=
assertProcessExecutionIsRegistered
(
hiveProcess
,
event
);
AtlasEntity
processExecutionEntity
=
atlasClientV2
.
getEntityByGuid
(
processExecutionId
).
getEntity
();
return
processExecutionEntity
;
}
@Test
@Test
public
void
testInsertIntoTable
()
throws
Exception
{
public
void
testInsertIntoTable
()
throws
Exception
{
String
inputTable1Name
=
createTable
();
String
inputTable1Name
=
createTable
();
...
@@ -581,8 +624,85 @@ public class HiveHookIT extends HiveITBase {
...
@@ -581,8 +624,85 @@ public class HiveHookIT extends HiveITBase {
runCommandWithDelay
(
query
,
1000
);
runCommandWithDelay
(
query
,
1000
);
AtlasEntity
processEntity2
=
validateProcess
(
event
,
expectedInputs
,
outputs
);
AtlasEntity
processEntity2
=
validateProcess
(
event
,
expectedInputs
,
outputs
);
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity2
),
2
);
Assert
.
assertEquals
(
processEntity1
.
getGuid
(),
processEntity2
.
getGuid
());
}
@Test
public
void
testInsertIntoTableProcessExecution
()
throws
Exception
{
String
inputTable1Name
=
createTable
();
String
inputTable2Name
=
createTable
();
String
insertTableName
=
createTable
();
assertTableIsRegistered
(
DEFAULT_DB
,
inputTable1Name
);
assertTableIsRegistered
(
DEFAULT_DB
,
insertTableName
);
String
query
=
"insert into "
+
insertTableName
+
" select t1.id, t1.name from "
+
inputTable2Name
+
" as t2, "
+
inputTable1Name
+
" as t1 where t1.id=t2.id"
;
runCommand
(
query
);
Set
<
ReadEntity
>
inputs
=
getInputs
(
inputTable1Name
,
Entity
.
Type
.
TABLE
);
inputs
.
addAll
(
getInputs
(
inputTable2Name
,
Entity
.
Type
.
TABLE
));
Set
<
WriteEntity
>
outputs
=
getOutputs
(
insertTableName
,
Entity
.
Type
.
TABLE
);
(
outputs
.
iterator
().
next
()).
setWriteType
(
WriteEntity
.
WriteType
.
INSERT
);
HiveEventContext
event
=
constructEvent
(
query
,
HiveOperation
.
QUERY
,
inputs
,
outputs
);
Set
<
ReadEntity
>
expectedInputs
=
new
TreeSet
<
ReadEntity
>(
entityComparator
)
{{
addAll
(
inputs
);
}};
assertTableIsRegistered
(
DEFAULT_DB
,
insertTableName
);
AtlasEntity
processEntity1
=
validateProcess
(
event
,
expectedInputs
,
outputs
);
AtlasEntity
processExecutionEntity1
=
validateProcessExecution
(
processEntity1
,
event
);
AtlasObjectId
process
=
toAtlasObjectId
(
processExecutionEntity1
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
processEntity1
.
getGuid
());
//Test sorting of tbl names
SortedSet
<
String
>
sortedTblNames
=
new
TreeSet
<>();
sortedTblNames
.
add
(
inputTable1Name
.
toLowerCase
());
sortedTblNames
.
add
(
inputTable2Name
.
toLowerCase
());
//Verify sorted order of inputs in qualified name
Assert
.
assertEquals
(
processEntity1
.
getAttribute
(
ATTRIBUTE_QUALIFIED_NAME
),
Joiner
.
on
(
SEP
).
join
(
"QUERY"
,
getQualifiedTblName
(
sortedTblNames
.
first
()),
HiveMetaStoreBridge
.
getTableCreatedTime
(
hiveMetaStoreBridge
.
getHiveClient
().
getTable
(
DEFAULT_DB
,
sortedTblNames
.
first
())),
getQualifiedTblName
(
sortedTblNames
.
last
()),
HiveMetaStoreBridge
.
getTableCreatedTime
(
hiveMetaStoreBridge
.
getHiveClient
().
getTable
(
DEFAULT_DB
,
sortedTblNames
.
last
())))
+
IO_SEP
+
SEP
+
Joiner
.
on
(
SEP
).
join
(
WriteEntity
.
WriteType
.
INSERT
.
name
(),
getQualifiedTblName
(
insertTableName
),
HiveMetaStoreBridge
.
getTableCreatedTime
(
hiveMetaStoreBridge
.
getHiveClient
().
getTable
(
DEFAULT_DB
,
insertTableName
)))
);
//Rerun same query. Should result in same process
runCommandWithDelay
(
query
,
1000
);
AtlasEntity
processEntity2
=
validateProcess
(
event
,
expectedInputs
,
outputs
);
AtlasEntity
processExecutionEntity2
=
validateProcessExecution
(
processEntity2
,
event
);
process
=
toAtlasObjectId
(
processExecutionEntity2
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
processEntity2
.
getGuid
());
Assert
.
assertEquals
(
processEntity1
.
getGuid
(),
processEntity2
.
getGuid
());
Assert
.
assertEquals
(
processEntity1
.
getGuid
(),
processEntity2
.
getGuid
());
String
queryWithDifferentPredicate
=
"insert into "
+
insertTableName
+
" select t1.id, t1.name from "
+
inputTable2Name
+
" as t2, "
+
inputTable1Name
+
" as t1 where t1.id=100"
;
runCommandWithDelay
(
queryWithDifferentPredicate
,
1000
);
HiveEventContext
event3
=
constructEvent
(
queryWithDifferentPredicate
,
HiveOperation
.
QUERY
,
inputs
,
outputs
);
AtlasEntity
processEntity3
=
validateProcess
(
event3
,
expectedInputs
,
outputs
);
AtlasEntity
processExecutionEntity3
=
validateProcessExecution
(
processEntity3
,
event3
);
process
=
toAtlasObjectId
(
processExecutionEntity3
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
processEntity3
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity3
),
3
);
Assert
.
assertEquals
(
processEntity2
.
getGuid
(),
processEntity3
.
getGuid
());
}
}
@Test
@Test
...
@@ -593,7 +713,14 @@ public class HiveHookIT extends HiveITBase {
...
@@ -593,7 +713,14 @@ public class HiveHookIT extends HiveITBase {
runCommand
(
query
);
runCommand
(
query
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
QUERY
,
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
),
null
));
HiveEventContext
event
=
constructEvent
(
query
,
HiveOperation
.
QUERY
,
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
),
null
);
AtlasEntity
hiveProcess
=
validateProcess
(
event
);
AtlasEntity
hiveProcessExecution
=
validateProcessExecution
(
hiveProcess
,
event
);
AtlasObjectId
process
=
toAtlasObjectId
(
hiveProcessExecution
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
hiveProcess
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
hiveProcess
),
1
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
}
}
...
@@ -613,6 +740,10 @@ public class HiveHookIT extends HiveITBase {
...
@@ -613,6 +740,10 @@ public class HiveHookIT extends HiveITBase {
HiveEventContext
hiveEventContext
=
constructEvent
(
query
,
HiveOperation
.
QUERY
,
inputs
,
outputs
);
HiveEventContext
hiveEventContext
=
constructEvent
(
query
,
HiveOperation
.
QUERY
,
inputs
,
outputs
);
AtlasEntity
processEntity
=
validateProcess
(
hiveEventContext
);
AtlasEntity
processEntity
=
validateProcess
(
hiveEventContext
);
AtlasEntity
processExecutionEntity1
=
validateProcessExecution
(
processEntity
,
hiveEventContext
);
AtlasObjectId
process
=
toAtlasObjectId
(
processExecutionEntity1
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
processEntity
.
getGuid
());
validateHDFSPaths
(
processEntity
,
OUTPUTS
,
pFile1
);
validateHDFSPaths
(
processEntity
,
OUTPUTS
,
pFile1
);
...
@@ -626,6 +757,11 @@ public class HiveHookIT extends HiveITBase {
...
@@ -626,6 +757,11 @@ public class HiveHookIT extends HiveITBase {
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
AtlasEntity
process2Entity
=
validateProcess
(
hiveEventContext
);
AtlasEntity
process2Entity
=
validateProcess
(
hiveEventContext
);
AtlasEntity
processExecutionEntity2
=
validateProcessExecution
(
processEntity
,
hiveEventContext
);
AtlasObjectId
process2
=
toAtlasObjectId
(
processExecutionEntity2
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process2
.
getGuid
(),
process2Entity
.
getGuid
());
validateHDFSPaths
(
process2Entity
,
OUTPUTS
,
pFile1
);
validateHDFSPaths
(
process2Entity
,
OUTPUTS
,
pFile1
);
...
@@ -646,9 +782,13 @@ public class HiveHookIT extends HiveITBase {
...
@@ -646,9 +782,13 @@ public class HiveHookIT extends HiveITBase {
}};
}};
AtlasEntity
process3Entity
=
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
QUERY
,
inputs
,
p3Outputs
));
AtlasEntity
process3Entity
=
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
QUERY
,
inputs
,
p3Outputs
));
AtlasEntity
processExecutionEntity3
=
validateProcessExecution
(
processEntity
,
hiveEventContext
);
AtlasObjectId
process3
=
toAtlasObjectId
(
processExecutionEntity3
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process3
.
getGuid
(),
process3Entity
.
getGuid
());
validateHDFSPaths
(
process3Entity
,
OUTPUTS
,
pFile2
);
validateHDFSPaths
(
process3Entity
,
OUTPUTS
,
pFile2
);
Assert
.
assertEquals
(
numberOfProcessExecutions
(
process3Entity
),
3
);
Assert
.
assertEquals
(
process3Entity
.
getGuid
(),
processEntity
.
getGuid
());
Assert
.
assertEquals
(
process3Entity
.
getGuid
(),
processEntity
.
getGuid
());
}
}
...
@@ -714,7 +854,13 @@ public class HiveHookIT extends HiveITBase {
...
@@ -714,7 +854,13 @@ public class HiveHookIT extends HiveITBase {
outputs
.
iterator
().
next
().
setWriteType
(
WriteEntity
.
WriteType
.
INSERT
);
outputs
.
iterator
().
next
().
setWriteType
(
WriteEntity
.
WriteType
.
INSERT
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
QUERY
,
inputs
,
outputs
));
HiveEventContext
event
=
constructEvent
(
query
,
HiveOperation
.
QUERY
,
inputs
,
outputs
);
AtlasEntity
hiveProcess
=
validateProcess
(
event
);
AtlasEntity
hiveProcessExecution
=
validateProcessExecution
(
hiveProcess
,
event
);
AtlasObjectId
process
=
toAtlasObjectId
(
hiveProcessExecution
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
hiveProcess
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
hiveProcess
),
1
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
insertTableName
,
null
,
true
);
assertTableIsRegistered
(
DEFAULT_DB
,
insertTableName
,
null
,
true
);
...
@@ -748,8 +894,13 @@ public class HiveHookIT extends HiveITBase {
...
@@ -748,8 +894,13 @@ public class HiveHookIT extends HiveITBase {
}
}
};
};
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
QUERY
,
partitionIps
,
partitionOps
),
inputs
,
outputs
);
HiveEventContext
event
=
constructEvent
(
query
,
HiveOperation
.
QUERY
,
partitionIps
,
partitionOps
);
AtlasEntity
hiveProcess
=
validateProcess
(
event
,
inputs
,
outputs
);
AtlasEntity
hiveProcessExecution
=
validateProcessExecution
(
hiveProcess
,
event
);
AtlasObjectId
process
=
toAtlasObjectId
(
hiveProcessExecution
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
hiveProcess
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
hiveProcess
),
1
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
insertTableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
insertTableName
);
...
@@ -769,8 +920,14 @@ public class HiveHookIT extends HiveITBase {
...
@@ -769,8 +920,14 @@ public class HiveHookIT extends HiveITBase {
Set
<
ReadEntity
>
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
Set
<
ReadEntity
>
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
Set
<
WriteEntity
>
outputs
=
getOutputs
(
filename
,
Entity
.
Type
.
DFS_DIR
);
Set
<
WriteEntity
>
outputs
=
getOutputs
(
filename
,
Entity
.
Type
.
DFS_DIR
);
AtlasEntity
processEntity
=
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
inputs
,
outputs
));
HiveEventContext
event
=
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
inputs
,
outputs
);
AtlasEntity
processEntity
=
validateProcess
(
event
);
AtlasEntity
hiveProcessExecution
=
validateProcessExecution
(
processEntity
,
event
);
AtlasObjectId
process
=
toAtlasObjectId
(
hiveProcessExecution
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
processEntity
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity
),
1
);
validateHDFSPaths
(
processEntity
,
OUTPUTS
,
filename
);
validateHDFSPaths
(
processEntity
,
OUTPUTS
,
filename
);
validateInputTables
(
processEntity
,
inputs
);
validateInputTables
(
processEntity
,
inputs
);
...
@@ -785,7 +942,16 @@ public class HiveHookIT extends HiveITBase {
...
@@ -785,7 +942,16 @@ public class HiveHookIT extends HiveITBase {
outputs
=
getOutputs
(
importTableName
,
Entity
.
Type
.
TABLE
);
outputs
=
getOutputs
(
importTableName
,
Entity
.
Type
.
TABLE
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
getInputs
(
filename
,
Entity
.
Type
.
DFS_DIR
),
outputs
));
HiveEventContext
event2
=
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
getInputs
(
filename
,
Entity
.
Type
.
DFS_DIR
),
outputs
);
AtlasEntity
processEntity2
=
validateProcess
(
event2
);
AtlasEntity
hiveProcessExecution2
=
validateProcessExecution
(
processEntity2
,
event2
);
AtlasObjectId
process2
=
toAtlasObjectId
(
hiveProcessExecution2
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process2
.
getGuid
(),
processEntity2
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity2
),
1
);
Assert
.
assertNotEquals
(
processEntity
.
getGuid
(),
processEntity2
.
getGuid
());
//Should create another process
//Should create another process
filename
=
"pfile://"
+
mkdir
(
"export2UnPartitioned"
);
filename
=
"pfile://"
+
mkdir
(
"export2UnPartitioned"
);
...
@@ -796,7 +962,18 @@ public class HiveHookIT extends HiveITBase {
...
@@ -796,7 +962,18 @@ public class HiveHookIT extends HiveITBase {
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
outputs
=
getOutputs
(
filename
,
Entity
.
Type
.
DFS_DIR
);
outputs
=
getOutputs
(
filename
,
Entity
.
Type
.
DFS_DIR
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
inputs
,
outputs
));
HiveEventContext
event3
=
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
inputs
,
outputs
);
AtlasEntity
processEntity3
=
validateProcess
(
event3
);
AtlasEntity
hiveProcessExecution3
=
validateProcessExecution
(
processEntity3
,
event3
);
AtlasObjectId
process3
=
toAtlasObjectId
(
hiveProcessExecution3
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process3
.
getGuid
(),
processEntity3
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity3
),
1
);
// Should be a different process compared to the previous ones
Assert
.
assertNotEquals
(
processEntity
.
getGuid
(),
processEntity3
.
getGuid
());
Assert
.
assertNotEquals
(
processEntity2
.
getGuid
(),
processEntity3
.
getGuid
());
//import again shouyld create another process
//import again shouyld create another process
query
=
"import table "
+
importTableName
+
" from '"
+
filename
+
"'"
;
query
=
"import table "
+
importTableName
+
" from '"
+
filename
+
"'"
;
...
@@ -805,7 +982,20 @@ public class HiveHookIT extends HiveITBase {
...
@@ -805,7 +982,20 @@ public class HiveHookIT extends HiveITBase {
outputs
=
getOutputs
(
importTableName
,
Entity
.
Type
.
TABLE
);
outputs
=
getOutputs
(
importTableName
,
Entity
.
Type
.
TABLE
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
getInputs
(
filename
,
Entity
.
Type
.
DFS_DIR
),
outputs
));
HiveEventContext
event4
=
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
getInputs
(
filename
,
Entity
.
Type
.
DFS_DIR
),
outputs
);
AtlasEntity
processEntity4
=
validateProcess
(
event4
);
AtlasEntity
hiveProcessExecution4
=
validateProcessExecution
(
processEntity4
,
event4
);
AtlasObjectId
process4
=
toAtlasObjectId
(
hiveProcessExecution4
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process4
.
getGuid
(),
processEntity4
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity4
),
1
);
// Should be a different process compared to the previous ones
Assert
.
assertNotEquals
(
processEntity
.
getGuid
(),
processEntity4
.
getGuid
());
Assert
.
assertNotEquals
(
processEntity2
.
getGuid
(),
processEntity4
.
getGuid
());
Assert
.
assertNotEquals
(
processEntity3
.
getGuid
(),
processEntity4
.
getGuid
());
}
}
@Test
@Test
...
@@ -833,9 +1023,15 @@ public class HiveHookIT extends HiveITBase {
...
@@ -833,9 +1023,15 @@ public class HiveHookIT extends HiveITBase {
partitionIps
.
addAll
(
expectedExportInputs
);
partitionIps
.
addAll
(
expectedExportInputs
);
AtlasEntity
processEntity
=
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
partitionIps
,
outputs
),
expectedExportInputs
,
outputs
);
HiveEventContext
event1
=
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
partitionIps
,
outputs
);
AtlasEntity
processEntity1
=
validateProcess
(
event1
,
expectedExportInputs
,
outputs
);
AtlasEntity
hiveProcessExecution1
=
validateProcessExecution
(
processEntity1
,
event1
);
AtlasObjectId
process1
=
toAtlasObjectId
(
hiveProcessExecution1
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process1
.
getGuid
(),
processEntity1
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity1
),
1
);
validateHDFSPaths
(
processEntity
,
OUTPUTS
,
filename
);
validateHDFSPaths
(
processEntity
1
,
OUTPUTS
,
filename
);
//Import
//Import
String
importTableName
=
createTable
(
true
);
String
importTableName
=
createTable
(
true
);
...
@@ -852,7 +1048,14 @@ public class HiveHookIT extends HiveITBase {
...
@@ -852,7 +1048,14 @@ public class HiveHookIT extends HiveITBase {
partitionOps
.
addAll
(
importOutputs
);
partitionOps
.
addAll
(
importOutputs
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
expectedImportInputs
,
partitionOps
),
expectedImportInputs
,
importOutputs
);
HiveEventContext
event2
=
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
expectedImportInputs
,
partitionOps
);
AtlasEntity
processEntity2
=
validateProcess
(
event2
,
expectedImportInputs
,
importOutputs
);
AtlasEntity
hiveProcessExecution2
=
validateProcessExecution
(
processEntity2
,
event2
);
AtlasObjectId
process2
=
toAtlasObjectId
(
hiveProcessExecution2
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process2
.
getGuid
(),
processEntity2
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity2
),
1
);
Assert
.
assertNotEquals
(
processEntity1
.
getGuid
(),
processEntity2
.
getGuid
());
//Export should update same process
//Export should update same process
filename
=
"pfile://"
+
mkdir
(
"export2"
);
filename
=
"pfile://"
+
mkdir
(
"export2"
);
...
@@ -866,7 +1069,17 @@ public class HiveHookIT extends HiveITBase {
...
@@ -866,7 +1069,17 @@ public class HiveHookIT extends HiveITBase {
addAll
(
outputs
);
addAll
(
outputs
);
}};
}};
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
partitionIps
,
outputs2
),
expectedExportInputs
,
p3Outputs
);
HiveEventContext
event3
=
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
partitionIps
,
outputs2
);
// this process entity should return same as the processEntity1 since the inputs and outputs are the same,
// hence the qualifiedName will be the same
AtlasEntity
processEntity3
=
validateProcess
(
event3
,
expectedExportInputs
,
p3Outputs
);
AtlasEntity
hiveProcessExecution3
=
validateProcessExecution
(
processEntity3
,
event3
);
AtlasObjectId
process3
=
toAtlasObjectId
(
hiveProcessExecution3
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process3
.
getGuid
(),
processEntity3
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity3
),
2
);
Assert
.
assertEquals
(
processEntity1
.
getGuid
(),
processEntity3
.
getGuid
());
query
=
"alter table "
+
importTableName
+
" drop partition (dt='"
+
PART_FILE
+
"')"
;
query
=
"alter table "
+
importTableName
+
" drop partition (dt='"
+
PART_FILE
+
"')"
;
...
@@ -883,7 +1096,17 @@ public class HiveHookIT extends HiveITBase {
...
@@ -883,7 +1096,17 @@ public class HiveHookIT extends HiveITBase {
addAll
(
expectedImportInputs
);
addAll
(
expectedImportInputs
);
}};
}};
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
importInputs
,
partitionOps
),
expectedImport2Inputs
,
importOutputs
);
HiveEventContext
event4
=
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
importInputs
,
partitionOps
);
// This process is going to be same as processEntity2
AtlasEntity
processEntity4
=
validateProcess
(
event4
,
expectedImport2Inputs
,
importOutputs
);
AtlasEntity
hiveProcessExecution4
=
validateProcessExecution
(
processEntity4
,
event4
);
AtlasObjectId
process4
=
toAtlasObjectId
(
hiveProcessExecution4
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process4
.
getGuid
(),
processEntity4
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity4
),
2
);
Assert
.
assertEquals
(
processEntity2
.
getGuid
(),
processEntity4
.
getGuid
());
Assert
.
assertNotEquals
(
processEntity1
.
getGuid
(),
processEntity4
.
getGuid
());
}
}
@Test
@Test
...
@@ -1176,15 +1399,12 @@ public class HiveHookIT extends HiveITBase {
...
@@ -1176,15 +1399,12 @@ public class HiveHookIT extends HiveITBase {
);
);
}
}
/*
/**
The test is disabled by default
* Reenabling this test since HIVE-14706 is fixed now and the hive version we are using now sends
Reason : Atlas uses Hive version 1.2.x and the Hive patch HIVE-13112 which enables column level lineage is not
* us the column lineage information
committed in Hive version 1.2.x
* @throws Exception
This test will fail if the lineage information is not available from Hive
*/
Once the patch for HIVE-13112 is committed to Hive branch 1.2.x, the test can be enabled
@Test
Please track HIVE-14706 to know the status of column lineage availability in latest Hive versions i.e 2.1.x
*/
@Test
(
enabled
=
false
)
public
void
testColumnLevelLineage
()
throws
Exception
{
public
void
testColumnLevelLineage
()
throws
Exception
{
String
sourceTable
=
"table"
+
random
();
String
sourceTable
=
"table"
+
random
();
...
@@ -1204,8 +1424,14 @@ public class HiveHookIT extends HiveITBase {
...
@@ -1204,8 +1424,14 @@ public class HiveHookIT extends HiveITBase {
Set
<
ReadEntity
>
inputs
=
getInputs
(
sourceTable
,
Entity
.
Type
.
TABLE
);
Set
<
ReadEntity
>
inputs
=
getInputs
(
sourceTable
,
Entity
.
Type
.
TABLE
);
Set
<
WriteEntity
>
outputs
=
getOutputs
(
ctasTableName
,
Entity
.
Type
.
TABLE
);
Set
<
WriteEntity
>
outputs
=
getOutputs
(
ctasTableName
,
Entity
.
Type
.
TABLE
);
HiveEventContext
event
=
constructEvent
(
query
,
HiveOperation
.
CREATETABLE_AS_SELECT
,
inputs
,
outputs
);
HiveEventContext
event
=
constructEvent
(
query
,
HiveOperation
.
CREATETABLE_AS_SELECT
,
inputs
,
outputs
);
AtlasEntity
processEntity1
=
validateProcess
(
event
);
AtlasEntity
hiveProcessExecution1
=
validateProcessExecution
(
processEntity1
,
event
);
AtlasObjectId
process1
=
toAtlasObjectId
(
hiveProcessExecution1
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process1
.
getGuid
(),
processEntity1
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity1
),
1
);
Assert
.
assertEquals
(
processEntity1
.
getGuid
(),
processEntity1
.
getGuid
());
assertProcessIsRegistered
(
event
);
assertTableIsRegistered
(
DEFAULT_DB
,
ctasTableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
ctasTableName
);
String
processQName
=
sortEventsAndGetProcessQualifiedName
(
event
);
String
processQName
=
sortEventsAndGetProcessQualifiedName
(
event
);
...
@@ -1281,14 +1507,20 @@ public class HiveHookIT extends HiveITBase {
...
@@ -1281,14 +1507,20 @@ public class HiveHookIT extends HiveITBase {
Set
<
WriteEntity
>
outputs
=
getOutputs
(
tableName
,
Entity
.
Type
.
TABLE
);
Set
<
WriteEntity
>
outputs
=
getOutputs
(
tableName
,
Entity
.
Type
.
TABLE
);
String
tableId
=
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
String
tableId
=
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
HiveEventContext
event
=
constructEvent
(
query
,
HiveOperation
.
TRUNCATETABLE
,
null
,
outputs
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
TRUNCATETABLE
,
null
,
outputs
));
AtlasEntity
processEntity
=
validateProcess
(
event
);
AtlasEntity
processExecutionEntity1
=
validateProcessExecution
(
processEntity
,
event
);
AtlasObjectId
process
=
toAtlasObjectId
(
processExecutionEntity1
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process
.
getGuid
(),
processEntity
.
getGuid
());
//Check lineage
//Check lineage
String
datasetName
=
HiveMetaStoreBridge
.
getTableQualifiedName
(
CLUSTER_NAME
,
DEFAULT_DB
,
tableName
);
String
datasetName
=
HiveMetaStoreBridge
.
getTableQualifiedName
(
CLUSTER_NAME
,
DEFAULT_DB
,
tableName
);
AtlasLineageInfo
atlasLineageInfoInput
=
atlasClientV2
.
getLineageInfo
(
tableId
,
AtlasLineageInfo
.
LineageDirection
.
INPUT
,
0
);
AtlasLineageInfo
atlasLineageInfoInput
=
atlasClientV2
.
getLineageInfo
(
tableId
,
AtlasLineageInfo
.
LineageDirection
.
INPUT
,
0
);
Map
<
String
,
AtlasEntityHeader
>
entityMap
=
atlasLineageInfoInput
.
getGuidEntityMap
();
Map
<
String
,
AtlasEntityHeader
>
entityMap
=
atlasLineageInfoInput
.
getGuidEntityMap
();
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity
),
1
);
//Below should be assertTrue - Fix https://issues.apache.org/jira/browse/ATLAS-653
//Below should be assertTrue - Fix https://issues.apache.org/jira/browse/ATLAS-653
Assert
.
assertFalse
(
entityMap
.
containsKey
(
tableId
));
Assert
.
assertFalse
(
entityMap
.
containsKey
(
tableId
));
}
}
...
@@ -1402,7 +1634,8 @@ public class HiveHookIT extends HiveITBase {
...
@@ -1402,7 +1634,8 @@ public class HiveHookIT extends HiveITBase {
String
processQualifiedName
=
getTableProcessQualifiedName
(
DEFAULT_DB
,
tableName
);
String
processQualifiedName
=
getTableProcessQualifiedName
(
DEFAULT_DB
,
tableName
);
String
processId
=
assertEntityIsRegistered
(
HiveDataTypes
.
HIVE_PROCESS
.
getName
(),
ATTRIBUTE_QUALIFIED_NAME
,
processQualifiedName
,
null
);
String
processId
=
assertEntityIsRegistered
(
HiveDataTypes
.
HIVE_PROCESS
.
getName
(),
ATTRIBUTE_QUALIFIED_NAME
,
processQualifiedName
,
null
);
AtlasEntity
processEntity
=
atlasClientV2
.
getEntityByGuid
(
processId
).
getEntity
();
AtlasEntity
processEntity
=
atlasClientV2
.
getEntityByGuid
(
processId
).
getEntity
();
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity
),
2
);
//validateProcessExecution(processEntity, event);
validateHDFSPaths
(
processEntity
,
INPUTS
,
testPath
);
validateHDFSPaths
(
processEntity
,
INPUTS
,
testPath
);
}
}
...
@@ -1890,6 +2123,34 @@ public class HiveHookIT extends HiveITBase {
...
@@ -1890,6 +2123,34 @@ public class HiveHookIT extends HiveITBase {
}
}
}
}
private
String
assertProcessExecutionIsRegistered
(
AtlasEntity
hiveProcess
,
final
HiveEventContext
event
)
throws
Exception
{
try
{
String
guid
=
""
;
List
<
AtlasObjectId
>
processExecutions
=
toAtlasObjectIdList
(
hiveProcess
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS_EXECUTIONS
));
for
(
AtlasObjectId
processExecution
:
processExecutions
)
{
AtlasEntity
.
AtlasEntityWithExtInfo
atlasEntityWithExtInfo
=
atlasClientV2
.
getEntityByGuid
(
processExecution
.
getGuid
());
AtlasEntity
entity
=
atlasEntityWithExtInfo
.
getEntity
();
if
(
String
.
valueOf
(
entity
.
getAttribute
(
ATTRIBUTE_QUERY_TEXT
)).
equals
(
event
.
getQueryStr
().
toLowerCase
().
trim
()))
{
guid
=
entity
.
getGuid
();
}
}
return
assertEntityIsRegisteredViaGuid
(
guid
,
new
AssertPredicate
()
{
@Override
public
void
assertOnEntity
(
final
AtlasEntity
entity
)
throws
Exception
{
String
queryText
=
(
String
)
entity
.
getAttribute
(
ATTRIBUTE_QUERY_TEXT
);
Assert
.
assertEquals
(
queryText
,
event
.
getQueryStr
().
toLowerCase
().
trim
());
}
});
}
catch
(
Exception
e
)
{
LOG
.
error
(
"Exception : "
,
e
);
throw
e
;
}
}
private
String
getDSTypeName
(
Entity
entity
)
{
private
String
getDSTypeName
(
Entity
entity
)
{
return
Entity
.
Type
.
TABLE
.
equals
(
entity
.
getType
())
?
HiveDataTypes
.
HIVE_TABLE
.
name
()
:
HiveMetaStoreBridge
.
HDFS_PATH
;
return
Entity
.
Type
.
TABLE
.
equals
(
entity
.
getType
())
?
HiveDataTypes
.
HIVE_TABLE
.
name
()
:
HiveMetaStoreBridge
.
HDFS_PATH
;
}
}
...
@@ -2080,4 +2341,9 @@ public class HiveHookIT extends HiveITBase {
...
@@ -2080,4 +2341,9 @@ public class HiveHookIT extends HiveITBase {
@Override
@Override
public
Entity
.
Type
getType
()
{
return
type
;
}
public
Entity
.
Type
getType
()
{
return
type
;
}
}
}
private
int
numberOfProcessExecutions
(
AtlasEntity
hiveProcess
)
{
return
toAtlasObjectIdList
(
hiveProcess
.
getRelationshipAttribute
(
BaseHiveEvent
.
ATTRIBUTE_PROCESS_EXECUTIONS
)).
size
();
}
}
}
This diff is collapsed.
Click to expand it.
addons/models/0000-Area0/0010-base_model.json
View file @
5c9a699a
...
@@ -324,6 +324,15 @@
...
@@ -324,6 +324,15 @@
"isUnique"
:
false
"isUnique"
:
false
}
}
]
]
},
{
"name"
:
"ProcessExecution"
,
"superTypes"
:
[
"Asset"
],
"serviceType"
:
"atlas_core"
,
"typeVersion"
:
"1.0"
,
"attributeDefs"
:
[]
}
}
],
],
"relationshipDefs"
:
[
"relationshipDefs"
:
[
...
...
This diff is collapsed.
Click to expand it.
addons/models/1000-Hadoop/1030-hive_model.json
View file @
5c9a699a
...
@@ -457,6 +457,80 @@
...
@@ -457,6 +457,80 @@
"isUnique"
:
false
"isUnique"
:
false
}
}
]
]
},
{
"name"
:
"hive_process_execution"
,
"superTypes"
:
[
"ProcessExecution"
],
"serviceType"
:
"hive"
,
"typeVersion"
:
"1.0"
,
"attributeDefs"
:
[
{
"name"
:
"startTime"
,
"typeName"
:
"date"
,
"cardinality"
:
"SINGLE"
,
"isIndexable"
:
false
,
"isOptional"
:
false
,
"isUnique"
:
false
},
{
"name"
:
"endTime"
,
"typeName"
:
"date"
,
"cardinality"
:
"SINGLE"
,
"isIndexable"
:
false
,
"isOptional"
:
false
,
"isUnique"
:
false
},
{
"name"
:
"userName"
,
"typeName"
:
"string"
,
"cardinality"
:
"SINGLE"
,
"isIndexable"
:
true
,
"isOptional"
:
false
,
"isUnique"
:
false
},
{
"name"
:
"queryText"
,
"typeName"
:
"string"
,
"cardinality"
:
"SINGLE"
,
"isIndexable"
:
false
,
"isOptional"
:
false
,
"isUnique"
:
false
},
{
"name"
:
"queryGraph"
,
"typeName"
:
"string"
,
"cardinality"
:
"SINGLE"
,
"isIndexable"
:
false
,
"isOptional"
:
true
,
"isUnique"
:
false
},
{
"name"
:
"queryId"
,
"typeName"
:
"string"
,
"cardinality"
:
"SINGLE"
,
"isIndexable"
:
false
,
"isOptional"
:
false
,
"isUnique"
:
false
},
{
"name"
:
"queryPlan"
,
"typeName"
:
"string"
,
"cardinality"
:
"SINGLE"
,
"isIndexable"
:
false
,
"isOptional"
:
false
,
"isUnique"
:
false
},
{
"name"
:
"hostName"
,
"typeName"
:
"string"
,
"cardinality"
:
"SINGLE"
,
"isIndexable"
:
true
,
"isOptional"
:
false
,
"isUnique"
:
false
}
]
}
}
],
],
"relationshipDefs"
:
[
"relationshipDefs"
:
[
...
@@ -567,6 +641,24 @@
...
@@ -567,6 +641,24 @@
"cardinality"
:
"SET"
"cardinality"
:
"SET"
},
},
"propagateTags"
:
"NONE"
"propagateTags"
:
"NONE"
},
{
"name"
:
"hive_process_process_executions"
,
"serviceType"
:
"hive"
,
"typeVersion"
:
"1.0"
,
"relationshipCategory"
:
"COMPOSITION"
,
"endDef1"
:
{
"type"
:
"hive_process"
,
"name"
:
"processExecutions"
,
"cardinality"
:
"SET"
,
"isContainer"
:
true
},
"endDef2"
:
{
"type"
:
"hive_process_execution"
,
"name"
:
"process"
,
"cardinality"
:
"SINGLE"
},
"propagateTags"
:
"NONE"
}
}
]
]
}
}
This diff is collapsed.
Click to expand it.
webapp/src/main/java/org/apache/atlas/examples/QuickStartV2.java
View file @
5c9a699a
...
@@ -19,6 +19,7 @@
...
@@ -19,6 +19,7 @@
package
org
.
apache
.
atlas
.
examples
;
package
org
.
apache
.
atlas
.
examples
;
import
com.google.common.annotations.VisibleForTesting
;
import
com.google.common.annotations.VisibleForTesting
;
import
com.google.common.collect.ImmutableMap
;
import
com.sun.jersey.core.util.MultivaluedMapImpl
;
import
com.sun.jersey.core.util.MultivaluedMapImpl
;
import
org.apache.atlas.ApplicationProperties
;
import
org.apache.atlas.ApplicationProperties
;
import
org.apache.atlas.AtlasClientV2
;
import
org.apache.atlas.AtlasClientV2
;
...
@@ -42,6 +43,8 @@ import org.apache.atlas.model.typedef.AtlasEntityDef;
...
@@ -42,6 +43,8 @@ import org.apache.atlas.model.typedef.AtlasEntityDef;
import
org.apache.atlas.model.typedef.AtlasRelationshipDef
;
import
org.apache.atlas.model.typedef.AtlasRelationshipDef
;
import
org.apache.atlas.model.typedef.AtlasRelationshipDef.PropagateTags
;
import
org.apache.atlas.model.typedef.AtlasRelationshipDef.PropagateTags
;
import
org.apache.atlas.model.typedef.AtlasTypesDef
;
import
org.apache.atlas.model.typedef.AtlasTypesDef
;
import
org.apache.atlas.repository.Constants
;
import
org.apache.atlas.type.AtlasTypeUtil
;
import
org.apache.atlas.utils.AuthenticationUtil
;
import
org.apache.atlas.utils.AuthenticationUtil
;
import
org.apache.commons.collections.CollectionUtils
;
import
org.apache.commons.collections.CollectionUtils
;
import
org.apache.commons.configuration.Configuration
;
import
org.apache.commons.configuration.Configuration
;
...
@@ -107,9 +110,15 @@ public class QuickStartV2 {
...
@@ -107,9 +110,15 @@ public class QuickStartV2 {
public
static
final
String
JDBC_CLASSIFICATION
=
"JdbcAccess"
;
public
static
final
String
JDBC_CLASSIFICATION
=
"JdbcAccess"
;
public
static
final
String
LOGDATA_CLASSIFICATION
=
"Log Data"
;
public
static
final
String
LOGDATA_CLASSIFICATION
=
"Log Data"
;
public
static
final
String
LOAD_SALES_DAILY_PROCESS
=
"loadSalesDaily"
;
public
static
final
String
LOAD_SALES_DAILY_PROCESS
=
"loadSalesDaily"
;
public
static
final
String
LOAD_SALES_MONTHLY_PROCESS
=
"loadSalesMonthly"
;
public
static
final
String
LOAD_SALES_DAILY_PROCESS_EXEC1
=
"loadSalesDailyExec1"
;
public
static
final
String
LOAD_LOGS_MONTHLY_PROCESS
=
"loadLogsMonthly"
;
public
static
final
String
LOAD_SALES_DAILY_PROCESS_EXEC2
=
"loadSalesDailyExec2"
;
public
static
final
String
LOAD_SALES_MONTHLY_PROCESS
=
"loadSalesMonthly"
;
public
static
final
String
LOAD_SALES_MONTHLY_PROCESS_EXEC1
=
"loadSalesMonthlyExec1"
;
public
static
final
String
LOAD_SALES_MONTHLY_PROCESS_EXEC2
=
"loadSalesMonthlyExec2"
;
public
static
final
String
LOAD_LOGS_MONTHLY_PROCESS
=
"loadLogsMonthly"
;
public
static
final
String
LOAD_LOGS_MONTHLY_PROCESS_EXEC1
=
"loadLogsMonthlyExec1"
;
public
static
final
String
LOAD_LOGS_MONTHLY_PROCESS_EXEC2
=
"loadLogsMonthlyExec2"
;
public
static
final
String
PRODUCT_DIM_VIEW
=
"product_dim_view"
;
public
static
final
String
PRODUCT_DIM_VIEW
=
"product_dim_view"
;
public
static
final
String
CUSTOMER_DIM_VIEW
=
"customer_dim_view"
;
public
static
final
String
CUSTOMER_DIM_VIEW
=
"customer_dim_view"
;
...
@@ -119,6 +128,7 @@ public class QuickStartV2 {
...
@@ -119,6 +128,7 @@ public class QuickStartV2 {
public
static
final
String
TABLE_TYPE
=
"Table"
;
public
static
final
String
TABLE_TYPE
=
"Table"
;
public
static
final
String
VIEW_TYPE
=
"View"
;
public
static
final
String
VIEW_TYPE
=
"View"
;
public
static
final
String
LOAD_PROCESS_TYPE
=
"LoadProcess"
;
public
static
final
String
LOAD_PROCESS_TYPE
=
"LoadProcess"
;
public
static
final
String
LOAD_PROCESS_EXECUTION_TYPE
=
"LoadProcessExecution"
;
public
static
final
String
STORAGE_DESC_TYPE
=
"StorageDesc"
;
public
static
final
String
STORAGE_DESC_TYPE
=
"StorageDesc"
;
public
static
final
String
TABLE_DATABASE_TYPE
=
"Table_DB"
;
public
static
final
String
TABLE_DATABASE_TYPE
=
"Table_DB"
;
...
@@ -126,13 +136,14 @@ public class QuickStartV2 {
...
@@ -126,13 +136,14 @@ public class QuickStartV2 {
public
static
final
String
VIEW_TABLES_TYPE
=
"View_Tables"
;
public
static
final
String
VIEW_TABLES_TYPE
=
"View_Tables"
;
public
static
final
String
TABLE_COLUMNS_TYPE
=
"Table_Columns"
;
public
static
final
String
TABLE_COLUMNS_TYPE
=
"Table_Columns"
;
public
static
final
String
TABLE_STORAGE_DESC_TYPE
=
"Table_StorageDesc"
;
public
static
final
String
TABLE_STORAGE_DESC_TYPE
=
"Table_StorageDesc"
;
public
static
final
String
PROCESS_PROCESS_EXECUTION_DESC_TYPE
=
"Process_ProcessExecution"
;
public
static
final
String
VERSION_1
=
"1.0"
;
public
static
final
String
VERSION_1
=
"1.0"
;
public
static
final
String
MANAGED_TABLE
=
"Managed"
;
public
static
final
String
MANAGED_TABLE
=
"Managed"
;
public
static
final
String
EXTERNAL_TABLE
=
"External"
;
public
static
final
String
EXTERNAL_TABLE
=
"External"
;
public
static
final
String
CLUSTER_SUFFIX
=
"@cl1"
;
public
static
final
String
CLUSTER_SUFFIX
=
"@cl1"
;
public
static
final
String
[]
TYPES
=
{
DATABASE_TYPE
,
TABLE_TYPE
,
STORAGE_DESC_TYPE
,
COLUMN_TYPE
,
LOAD_PROCESS_TYPE
,
public
static
final
String
[]
TYPES
=
{
DATABASE_TYPE
,
TABLE_TYPE
,
STORAGE_DESC_TYPE
,
COLUMN_TYPE
,
LOAD_PROCESS_TYPE
,
LOAD_PROCESS_EXECUTION_TYPE
,
VIEW_TYPE
,
JDBC_CLASSIFICATION
,
ETL_CLASSIFICATION
,
METRIC_CLASSIFICATION
,
VIEW_TYPE
,
JDBC_CLASSIFICATION
,
ETL_CLASSIFICATION
,
METRIC_CLASSIFICATION
,
PII_CLASSIFICATION
,
FACT_CLASSIFICATION
,
DIMENSION_CLASSIFICATION
,
LOGDATA_CLASSIFICATION
,
PII_CLASSIFICATION
,
FACT_CLASSIFICATION
,
DIMENSION_CLASSIFICATION
,
LOGDATA_CLASSIFICATION
,
TABLE_DATABASE_TYPE
,
VIEW_DATABASE_TYPE
,
VIEW_TABLES_TYPE
,
TABLE_COLUMNS_TYPE
,
TABLE_STORAGE_DESC_TYPE
};
TABLE_DATABASE_TYPE
,
VIEW_DATABASE_TYPE
,
VIEW_TABLES_TYPE
,
TABLE_COLUMNS_TYPE
,
TABLE_STORAGE_DESC_TYPE
};
...
@@ -256,28 +267,41 @@ public class QuickStartV2 {
...
@@ -256,28 +267,41 @@ public class QuickStartV2 {
createRequiredAttrDef
(
"queryId"
,
"string"
),
createRequiredAttrDef
(
"queryId"
,
"string"
),
createRequiredAttrDef
(
"queryGraph"
,
"string"
));
createRequiredAttrDef
(
"queryGraph"
,
"string"
));
AtlasEntityDef
processExecutionTypeDef
=
createClassTypeDef
(
LOAD_PROCESS_EXECUTION_TYPE
,
LOAD_PROCESS_EXECUTION_TYPE
,
VERSION_1
,
Collections
.
singleton
(
"ProcessExecution"
),
createOptionalAttrDef
(
"userName"
,
"string"
),
createOptionalAttrDef
(
"startTime"
,
"long"
),
createOptionalAttrDef
(
"endTime"
,
"long"
),
createRequiredAttrDef
(
"queryText"
,
"string"
),
createRequiredAttrDef
(
"queryPlan"
,
"string"
),
createRequiredAttrDef
(
"queryId"
,
"string"
),
createRequiredAttrDef
(
"queryGraph"
,
"string"
));
AtlasEntityDef
viewTypeDef
=
createClassTypeDef
(
VIEW_TYPE
,
VIEW_TYPE
,
VERSION_1
,
Collections
.
singleton
(
"DataSet"
));
AtlasEntityDef
viewTypeDef
=
createClassTypeDef
(
VIEW_TYPE
,
VIEW_TYPE
,
VERSION_1
,
Collections
.
singleton
(
"DataSet"
));
// Relationship-Definitions
// Relationship-Definitions
AtlasRelationshipDef
tableDatabaseTypeDef
=
createRelationshipTypeDef
(
TABLE_DATABASE_TYPE
,
TABLE_DATABASE_TYPE
,
VERSION_1
,
AGGREGATION
,
PropagateTags
.
NONE
,
AtlasRelationshipDef
tableDatabaseTypeDef
=
createRelationshipTypeDef
(
TABLE_DATABASE_TYPE
,
TABLE_DATABASE_TYPE
,
VERSION_1
,
AGGREGATION
,
PropagateTags
.
NONE
,
createRelationshipEndDef
(
TABLE_TYPE
,
"db"
,
SINGLE
,
false
),
createRelationshipEndDef
(
TABLE_TYPE
,
"db"
,
SINGLE
,
false
),
createRelationshipEndDef
(
DATABASE_TYPE
,
"tables"
,
SET
,
true
));
createRelationshipEndDef
(
DATABASE_TYPE
,
"tables"
,
SET
,
true
));
AtlasRelationshipDef
viewDatabaseTypeDef
=
createRelationshipTypeDef
(
VIEW_DATABASE_TYPE
,
VIEW_DATABASE_TYPE
,
VERSION_1
,
AGGREGATION
,
PropagateTags
.
NONE
,
AtlasRelationshipDef
viewDatabaseTypeDef
=
createRelationshipTypeDef
(
VIEW_DATABASE_TYPE
,
VIEW_DATABASE_TYPE
,
VERSION_1
,
AGGREGATION
,
PropagateTags
.
NONE
,
createRelationshipEndDef
(
VIEW_TYPE
,
"db"
,
SINGLE
,
false
),
createRelationshipEndDef
(
VIEW_TYPE
,
"db"
,
SINGLE
,
false
),
createRelationshipEndDef
(
DATABASE_TYPE
,
"views"
,
SET
,
true
));
createRelationshipEndDef
(
DATABASE_TYPE
,
"views"
,
SET
,
true
));
AtlasRelationshipDef
viewTablesTypeDef
=
createRelationshipTypeDef
(
VIEW_TABLES_TYPE
,
VIEW_TABLES_TYPE
,
VERSION_1
,
AGGREGATION
,
PropagateTags
.
NONE
,
AtlasRelationshipDef
viewTablesTypeDef
=
createRelationshipTypeDef
(
VIEW_TABLES_TYPE
,
VIEW_TABLES_TYPE
,
VERSION_1
,
AGGREGATION
,
PropagateTags
.
NONE
,
createRelationshipEndDef
(
VIEW_TYPE
,
"inputTables"
,
SET
,
true
),
createRelationshipEndDef
(
VIEW_TYPE
,
"inputTables"
,
SET
,
true
),
createRelationshipEndDef
(
TABLE_TYPE
,
"view"
,
SINGLE
,
false
));
createRelationshipEndDef
(
TABLE_TYPE
,
"view"
,
SINGLE
,
false
));
AtlasRelationshipDef
tableColumnsTypeDef
=
createRelationshipTypeDef
(
TABLE_COLUMNS_TYPE
,
TABLE_COLUMNS_TYPE
,
VERSION_1
,
COMPOSITION
,
PropagateTags
.
NONE
,
AtlasRelationshipDef
tableColumnsTypeDef
=
createRelationshipTypeDef
(
TABLE_COLUMNS_TYPE
,
TABLE_COLUMNS_TYPE
,
VERSION_1
,
COMPOSITION
,
PropagateTags
.
NONE
,
createRelationshipEndDef
(
TABLE_TYPE
,
"columns"
,
SET
,
true
),
createRelationshipEndDef
(
TABLE_TYPE
,
"columns"
,
SET
,
true
),
createRelationshipEndDef
(
COLUMN_TYPE
,
"table"
,
SINGLE
,
false
));
createRelationshipEndDef
(
COLUMN_TYPE
,
"table"
,
SINGLE
,
false
));
AtlasRelationshipDef
tableStorageDescTypeDef
=
createRelationshipTypeDef
(
TABLE_STORAGE_DESC_TYPE
,
TABLE_STORAGE_DESC_TYPE
,
VERSION_1
,
COMPOSITION
,
PropagateTags
.
NONE
,
AtlasRelationshipDef
tableStorageDescTypeDef
=
createRelationshipTypeDef
(
TABLE_STORAGE_DESC_TYPE
,
TABLE_STORAGE_DESC_TYPE
,
VERSION_1
,
COMPOSITION
,
PropagateTags
.
NONE
,
createRelationshipEndDef
(
TABLE_TYPE
,
"sd"
,
SINGLE
,
true
),
createRelationshipEndDef
(
TABLE_TYPE
,
"sd"
,
SINGLE
,
true
),
createRelationshipEndDef
(
STORAGE_DESC_TYPE
,
"table"
,
SINGLE
,
false
));
createRelationshipEndDef
(
STORAGE_DESC_TYPE
,
"table"
,
SINGLE
,
false
));
AtlasRelationshipDef
processProcessExecutionTypeDef
=
createRelationshipTypeDef
(
PROCESS_PROCESS_EXECUTION_DESC_TYPE
,
PROCESS_PROCESS_EXECUTION_DESC_TYPE
,
VERSION_1
,
AGGREGATION
,
PropagateTags
.
NONE
,
createRelationshipEndDef
(
LOAD_PROCESS_TYPE
,
"processExecutions"
,
SET
,
true
),
createRelationshipEndDef
(
LOAD_PROCESS_EXECUTION_TYPE
,
"process"
,
SINGLE
,
false
));
// Classification-Definitions
// Classification-Definitions
AtlasClassificationDef
dimClassifDef
=
createTraitTypeDef
(
DIMENSION_CLASSIFICATION
,
"Dimension Classification"
,
VERSION_1
,
Collections
.
emptySet
());
AtlasClassificationDef
dimClassifDef
=
createTraitTypeDef
(
DIMENSION_CLASSIFICATION
,
"Dimension Classification"
,
VERSION_1
,
Collections
.
emptySet
());
...
@@ -288,8 +312,8 @@ public class QuickStartV2 {
...
@@ -288,8 +312,8 @@ public class QuickStartV2 {
AtlasClassificationDef
jdbcClassifDef
=
createTraitTypeDef
(
JDBC_CLASSIFICATION
,
"JdbcAccess Classification"
,
VERSION_1
,
Collections
.
emptySet
());
AtlasClassificationDef
jdbcClassifDef
=
createTraitTypeDef
(
JDBC_CLASSIFICATION
,
"JdbcAccess Classification"
,
VERSION_1
,
Collections
.
emptySet
());
AtlasClassificationDef
logClassifDef
=
createTraitTypeDef
(
LOGDATA_CLASSIFICATION
,
"LogData Classification"
,
VERSION_1
,
Collections
.
emptySet
());
AtlasClassificationDef
logClassifDef
=
createTraitTypeDef
(
LOGDATA_CLASSIFICATION
,
"LogData Classification"
,
VERSION_1
,
Collections
.
emptySet
());
List
<
AtlasEntityDef
>
entityDefs
=
asList
(
dbTypeDef
,
sdTypeDef
,
colTypeDef
,
tableTypeDef
,
processTypeDef
,
viewTypeDef
);
List
<
AtlasEntityDef
>
entityDefs
=
asList
(
dbTypeDef
,
sdTypeDef
,
colTypeDef
,
tableTypeDef
,
processTypeDef
,
processExecutionTypeDef
,
viewTypeDef
);
List
<
AtlasRelationshipDef
>
relationshipDefs
=
asList
(
tableDatabaseTypeDef
,
viewDatabaseTypeDef
,
viewTablesTypeDef
,
tableColumnsTypeDef
,
tableStorageDescTypeDef
);
List
<
AtlasRelationshipDef
>
relationshipDefs
=
asList
(
tableDatabaseTypeDef
,
viewDatabaseTypeDef
,
viewTablesTypeDef
,
tableColumnsTypeDef
,
tableStorageDescTypeDef
,
processProcessExecutionTypeDef
);
List
<
AtlasClassificationDef
>
classificationDefs
=
asList
(
dimClassifDef
,
factClassifDef
,
piiClassifDef
,
metricClassifDef
,
etlClassifDef
,
jdbcClassifDef
,
logClassifDef
);
List
<
AtlasClassificationDef
>
classificationDefs
=
asList
(
dimClassifDef
,
factClassifDef
,
piiClassifDef
,
metricClassifDef
,
etlClassifDef
,
jdbcClassifDef
,
logClassifDef
);
return
new
AtlasTypesDef
(
Collections
.
emptyList
(),
Collections
.
emptyList
(),
classificationDefs
,
entityDefs
,
relationshipDefs
);
return
new
AtlasTypesDef
(
Collections
.
emptyList
(),
Collections
.
emptyList
(),
classificationDefs
,
entityDefs
,
relationshipDefs
);
...
@@ -362,20 +386,36 @@ public class QuickStartV2 {
...
@@ -362,20 +386,36 @@ public class QuickStartV2 {
createView
(
CUSTOMER_DIM_VIEW
,
reportingDB
,
asList
(
customerDim
),
DIMENSION_CLASSIFICATION
,
JDBC_CLASSIFICATION
);
createView
(
CUSTOMER_DIM_VIEW
,
reportingDB
,
asList
(
customerDim
),
DIMENSION_CLASSIFICATION
,
JDBC_CLASSIFICATION
);
// Process entities
// Process entities
createProcess
(
LOAD_SALES_DAILY_PROCESS
,
"hive query for daily summary"
,
"John ETL"
,
AtlasEntity
loadProcess
=
createProcess
(
LOAD_SALES_DAILY_PROCESS
,
"hive query for daily summary"
,
"John ETL"
,
asList
(
salesFact
,
timeDim
),
asList
(
salesFact
,
timeDim
),
asList
(
salesFactDaily
),
asList
(
salesFactDaily
),
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
createProcess
(
LOAD_SALES_MONTHLY_PROCESS
,
"hive query for monthly summary"
,
"John ETL"
,
createProcessExecution
(
loadProcess
,
LOAD_SALES_DAILY_PROCESS_EXEC1
,
"hive query execution 1 for daily summary"
,
"John ETL"
,
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
createProcessExecution
(
loadProcess
,
LOAD_SALES_DAILY_PROCESS_EXEC2
,
"hive query execution 2 for daily summary"
,
"John ETL"
,
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
AtlasEntity
loadProcess2
=
createProcess
(
LOAD_SALES_MONTHLY_PROCESS
,
"hive query for monthly summary"
,
"John ETL"
,
asList
(
salesFactDaily
),
asList
(
salesFactDaily
),
asList
(
salesFactMonthly
),
asList
(
salesFactMonthly
),
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
createProcessExecution
(
loadProcess2
,
LOAD_SALES_MONTHLY_PROCESS_EXEC1
,
"hive query execution 1 for monthly summary"
,
"John ETL"
,
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
createProcessExecution
(
loadProcess2
,
LOAD_SALES_MONTHLY_PROCESS_EXEC2
,
"hive query execution 2 for monthly summary"
,
"John ETL"
,
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
createProcess
(
LOAD_LOGS_MONTHLY_PROCESS
,
"hive query for monthly summary"
,
"Tim ETL"
,
AtlasEntity
loadProcess3
=
createProcess
(
LOAD_LOGS_MONTHLY_PROCESS
,
"hive query for monthly summary"
,
"Tim ETL"
,
asList
(
loggingFactDaily
),
asList
(
loggingFactDaily
),
asList
(
loggingFactMonthly
),
asList
(
loggingFactMonthly
),
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
createProcessExecution
(
loadProcess3
,
LOAD_LOGS_MONTHLY_PROCESS_EXEC1
,
"hive query execution 1 for monthly summary"
,
"Tim ETL"
,
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
createProcessExecution
(
loadProcess3
,
LOAD_LOGS_MONTHLY_PROCESS_EXEC2
,
"hive query execution 1 for monthly summary"
,
"Tim ETL"
,
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
ETL_CLASSIFICATION
);
}
}
private
AtlasEntity
createInstance
(
AtlasEntity
entity
)
throws
Exception
{
private
AtlasEntity
createInstance
(
AtlasEntity
entity
)
throws
Exception
{
...
@@ -522,6 +562,31 @@ public class QuickStartV2 {
...
@@ -522,6 +562,31 @@ public class QuickStartV2 {
return
createInstance
(
entity
);
return
createInstance
(
entity
);
}
}
AtlasEntity
createProcessExecution
(
AtlasEntity
hiveProcess
,
String
name
,
String
description
,
String
user
,
String
queryText
,
String
queryPlan
,
String
queryId
,
String
queryGraph
,
String
...
classificationNames
)
throws
Exception
{
AtlasEntity
entity
=
new
AtlasEntity
(
LOAD_PROCESS_EXECUTION_TYPE
);
Long
startTime
=
System
.
currentTimeMillis
();
Long
endTime
=
System
.
currentTimeMillis
()
+
10000
;
// set attributes
entity
.
setAttribute
(
"name"
,
name
);
entity
.
setAttribute
(
REFERENCEABLE_ATTRIBUTE_NAME
,
name
+
CLUSTER_SUFFIX
+
startTime
.
toString
()
+
endTime
.
toString
());
entity
.
setAttribute
(
"description"
,
description
);
entity
.
setAttribute
(
"user"
,
user
);
entity
.
setAttribute
(
"startTime"
,
startTime
);
entity
.
setAttribute
(
"endTime"
,
endTime
);
entity
.
setAttribute
(
"queryText"
,
queryText
);
entity
.
setAttribute
(
"queryPlan"
,
queryPlan
);
entity
.
setAttribute
(
"queryId"
,
queryId
);
entity
.
setAttribute
(
"queryGraph"
,
queryGraph
);
entity
.
setRelationshipAttribute
(
"process"
,
AtlasTypeUtil
.
toAtlasRelatedObjectId
(
hiveProcess
));
// set classifications
entity
.
setClassifications
(
toAtlasClassifications
(
classificationNames
));
return
createInstance
(
entity
);
}
AtlasEntity
createView
(
String
name
,
AtlasEntity
database
,
List
<
AtlasEntity
>
inputTables
,
String
...
classificationNames
)
throws
Exception
{
AtlasEntity
createView
(
String
name
,
AtlasEntity
database
,
List
<
AtlasEntity
>
inputTables
,
String
...
classificationNames
)
throws
Exception
{
AtlasEntity
entity
=
new
AtlasEntity
(
VIEW_TYPE
);
AtlasEntity
entity
=
new
AtlasEntity
(
VIEW_TYPE
);
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment