Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
atlas
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dataplatform
atlas
Commits
f51c8861
Commit
f51c8861
authored
Jul 01, 2016
by
Suma Shivaprasad
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ATLAS-917 Add hdfs paths to process qualified name for non-partition based queries(sumasai)
parent
f623bddf
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
473 additions
and
199 deletions
+473
-199
HiveMetaStoreBridge.java
...ava/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java
+2
-2
HiveHook.java
...ge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
+132
-50
HiveHookIT.java
.../src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
+337
-146
AtlasHook.java
...cation/src/main/java/org/apache/atlas/hook/AtlasHook.java
+1
-1
release-log.txt
release-log.txt
+1
-0
No files found.
addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java
View file @
f51c8861
...
@@ -426,8 +426,8 @@ public class HiveMetaStoreBridge {
...
@@ -426,8 +426,8 @@ public class HiveMetaStoreBridge {
createDate
=
new
Date
(
hiveTable
.
getTTable
().
getCreateTime
()
*
MILLIS_CONVERT_FACTOR
);
createDate
=
new
Date
(
hiveTable
.
getTTable
().
getCreateTime
()
*
MILLIS_CONVERT_FACTOR
);
LOG
.
debug
(
"Setting create time to {} "
,
createDate
);
LOG
.
debug
(
"Setting create time to {} "
,
createDate
);
tableReference
.
set
(
HiveDataModelGenerator
.
CREATE_TIME
,
createDate
);
tableReference
.
set
(
HiveDataModelGenerator
.
CREATE_TIME
,
createDate
);
}
catch
(
NumberFormat
Exception
ne
)
{
}
catch
(
Exception
ne
)
{
LOG
.
error
(
"Error while
upda
ting createTime for the table {} "
,
hiveTable
.
getCompleteName
(),
ne
);
LOG
.
error
(
"Error while
set
ting createTime for the table {} "
,
hiveTable
.
getCompleteName
(),
ne
);
}
}
}
}
...
...
addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
View file @
f51c8861
...
@@ -21,6 +21,7 @@ package org.apache.atlas.hive.hook;
...
@@ -21,6 +21,7 @@ package org.apache.atlas.hive.hook;
import
com.google.common.annotations.VisibleForTesting
;
import
com.google.common.annotations.VisibleForTesting
;
import
com.google.common.util.concurrent.ThreadFactoryBuilder
;
import
com.google.common.util.concurrent.ThreadFactoryBuilder
;
import
kafka.security.auth.Write
;
import
org.apache.atlas.AtlasClient
;
import
org.apache.atlas.AtlasClient
;
import
org.apache.atlas.AtlasConstants
;
import
org.apache.atlas.AtlasConstants
;
import
org.apache.atlas.hive.bridge.HiveMetaStoreBridge
;
import
org.apache.atlas.hive.bridge.HiveMetaStoreBridge
;
...
@@ -66,7 +67,9 @@ import java.util.List;
...
@@ -66,7 +67,9 @@ import java.util.List;
import
java.util.Map
;
import
java.util.Map
;
import
java.util.Set
;
import
java.util.Set
;
import
java.util.SortedMap
;
import
java.util.SortedMap
;
import
java.util.SortedSet
;
import
java.util.TreeMap
;
import
java.util.TreeMap
;
import
java.util.TreeSet
;
import
java.util.concurrent.ExecutorService
;
import
java.util.concurrent.ExecutorService
;
import
java.util.concurrent.LinkedBlockingQueue
;
import
java.util.concurrent.LinkedBlockingQueue
;
import
java.util.concurrent.ThreadPoolExecutor
;
import
java.util.concurrent.ThreadPoolExecutor
;
...
@@ -86,8 +89,8 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -86,8 +89,8 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
public
static
final
String
QUEUE_SIZE
=
CONF_PREFIX
+
"queueSize"
;
public
static
final
String
QUEUE_SIZE
=
CONF_PREFIX
+
"queueSize"
;
public
static
final
String
HOOK_NUM_RETRIES
=
CONF_PREFIX
+
"numRetries"
;
public
static
final
String
HOOK_NUM_RETRIES
=
CONF_PREFIX
+
"numRetries"
;
private
static
final
String
SEP
=
":"
.
intern
();
static
final
String
SEP
=
":"
.
intern
();
private
static
final
String
IO_SEP
=
"->"
.
intern
();
static
final
String
IO_SEP
=
"->"
.
intern
();
private
static
final
Map
<
String
,
HiveOperation
>
OPERATION_MAP
=
new
HashMap
<>();
private
static
final
Map
<
String
,
HiveOperation
>
OPERATION_MAP
=
new
HashMap
<>();
...
@@ -291,6 +294,8 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -291,6 +294,8 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
private
void
deleteDatabase
(
HiveMetaStoreBridge
dgiBridge
,
HiveEventContext
event
)
{
private
void
deleteDatabase
(
HiveMetaStoreBridge
dgiBridge
,
HiveEventContext
event
)
{
if
(
event
.
getOutputs
().
size
()
>
1
)
{
if
(
event
.
getOutputs
().
size
()
>
1
)
{
LOG
.
info
(
"Starting deletion of tables and databases with cascade {} "
,
event
.
getQueryStr
());
LOG
.
info
(
"Starting deletion of tables and databases with cascade {} "
,
event
.
getQueryStr
());
}
else
{
LOG
.
info
(
"Starting deletion of database {} "
,
event
.
getQueryStr
());
}
}
for
(
WriteEntity
output
:
event
.
getOutputs
())
{
for
(
WriteEntity
output
:
event
.
getOutputs
())
{
...
@@ -549,10 +554,6 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -549,10 +554,6 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
return
str
.
toLowerCase
().
trim
();
return
str
.
toLowerCase
().
trim
();
}
}
public
static
String
normalize
(
String
queryStr
)
{
return
lower
(
queryStr
);
}
private
void
registerProcess
(
HiveMetaStoreBridge
dgiBridge
,
HiveEventContext
event
)
throws
Exception
{
private
void
registerProcess
(
HiveMetaStoreBridge
dgiBridge
,
HiveEventContext
event
)
throws
Exception
{
Set
<
ReadEntity
>
inputs
=
event
.
getInputs
();
Set
<
ReadEntity
>
inputs
=
event
.
getInputs
();
Set
<
WriteEntity
>
outputs
=
event
.
getOutputs
();
Set
<
WriteEntity
>
outputs
=
event
.
getOutputs
();
...
@@ -567,8 +568,8 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -567,8 +568,8 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
LOG
.
info
(
"Query id/plan is missing for {}"
,
event
.
getQueryStr
());
LOG
.
info
(
"Query id/plan is missing for {}"
,
event
.
getQueryStr
());
}
}
final
SortedMap
<
Entity
,
Referenceable
>
source
=
new
TreeMap
<>(
entityComparator
);
final
SortedMap
<
Read
Entity
,
Referenceable
>
source
=
new
TreeMap
<>(
entityComparator
);
final
SortedMap
<
Entity
,
Referenceable
>
target
=
new
TreeMap
<>(
entityComparator
);
final
SortedMap
<
Write
Entity
,
Referenceable
>
target
=
new
TreeMap
<>(
entityComparator
);
final
Set
<
String
>
dataSets
=
new
HashSet
<>();
final
Set
<
String
>
dataSets
=
new
HashSet
<>();
final
Set
<
Referenceable
>
entities
=
new
LinkedHashSet
<>();
final
Set
<
Referenceable
>
entities
=
new
LinkedHashSet
<>();
...
@@ -577,16 +578,27 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -577,16 +578,27 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
// filter out select queries which do not modify data
// filter out select queries which do not modify data
if
(!
isSelectQuery
)
{
if
(!
isSelectQuery
)
{
for
(
ReadEntity
readEntity
:
event
.
getInputs
())
{
SortedSet
<
ReadEntity
>
sortedHiveInputs
=
new
TreeSet
<>(
entityComparator
);;
if
(
event
.
getInputs
()
!=
null
)
{
sortedHiveInputs
.
addAll
(
event
.
getInputs
());
}
SortedSet
<
WriteEntity
>
sortedHiveOutputs
=
new
TreeSet
<>(
entityComparator
);
if
(
event
.
getOutputs
()
!=
null
)
{
sortedHiveOutputs
.
addAll
(
event
.
getOutputs
());
}
for
(
ReadEntity
readEntity
:
sortedHiveInputs
)
{
processHiveEntity
(
dgiBridge
,
event
,
readEntity
,
dataSets
,
source
,
entities
);
processHiveEntity
(
dgiBridge
,
event
,
readEntity
,
dataSets
,
source
,
entities
);
}
}
for
(
WriteEntity
writeEntity
:
event
.
getOutputs
()
)
{
for
(
WriteEntity
writeEntity
:
sortedHiveOutputs
)
{
processHiveEntity
(
dgiBridge
,
event
,
writeEntity
,
dataSets
,
target
,
entities
);
processHiveEntity
(
dgiBridge
,
event
,
writeEntity
,
dataSets
,
target
,
entities
);
}
}
if
(
source
.
size
()
>
0
||
target
.
size
()
>
0
)
{
if
(
source
.
size
()
>
0
||
target
.
size
()
>
0
)
{
Referenceable
processReferenceable
=
getProcessReferenceable
(
dgiBridge
,
event
,
source
,
target
);
Referenceable
processReferenceable
=
getProcessReferenceable
(
dgiBridge
,
event
,
so
rtedHiveInputs
,
sortedHiveOutputs
,
so
urce
,
target
);
entities
.
add
(
processReferenceable
);
entities
.
add
(
processReferenceable
);
event
.
addMessage
(
new
HookNotification
.
EntityUpdateRequest
(
event
.
getUser
(),
new
ArrayList
<>(
entities
)));
event
.
addMessage
(
new
HookNotification
.
EntityUpdateRequest
(
event
.
getUser
(),
new
ArrayList
<>(
entities
)));
}
else
{
}
else
{
...
@@ -597,8 +609,8 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -597,8 +609,8 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
}
}
}
}
private
void
processHiveEntity
(
HiveMetaStoreBridge
dgiBridge
,
HiveEventContext
event
,
Entity
entity
,
Set
<
String
>
dataSetsProcessed
,
private
<
T
extends
Entity
>
void
processHiveEntity
(
HiveMetaStoreBridge
dgiBridge
,
HiveEventContext
event
,
T
entity
,
Set
<
String
>
dataSetsProcessed
,
SortedMap
<
Entity
,
Referenceable
>
dataSets
,
Set
<
Referenceable
>
entities
)
throws
Exception
{
SortedMap
<
T
,
Referenceable
>
dataSets
,
Set
<
Referenceable
>
entities
)
throws
Exception
{
if
(
entity
.
getType
()
==
Type
.
TABLE
||
entity
.
getType
()
==
Type
.
PARTITION
)
{
if
(
entity
.
getType
()
==
Type
.
TABLE
||
entity
.
getType
()
==
Type
.
PARTITION
)
{
final
String
tblQFName
=
dgiBridge
.
getTableQualifiedName
(
dgiBridge
.
getClusterName
(),
entity
.
getTable
());
final
String
tblQFName
=
dgiBridge
.
getTableQualifiedName
(
dgiBridge
.
getClusterName
(),
entity
.
getTable
());
if
(!
dataSetsProcessed
.
contains
(
tblQFName
))
{
if
(!
dataSetsProcessed
.
contains
(
tblQFName
))
{
...
@@ -609,7 +621,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -609,7 +621,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
}
}
}
else
if
(
entity
.
getType
()
==
Type
.
DFS_DIR
)
{
}
else
if
(
entity
.
getType
()
==
Type
.
DFS_DIR
)
{
final
String
pathUri
=
lower
(
new
Path
(
entity
.
getLocation
()).
toString
());
final
String
pathUri
=
lower
(
new
Path
(
entity
.
getLocation
()).
toString
());
LOG
.
info
(
"Registering DFS Path {} "
,
pathUri
);
LOG
.
debug
(
"Registering DFS Path {} "
,
pathUri
);
if
(!
dataSetsProcessed
.
contains
(
pathUri
))
{
if
(!
dataSetsProcessed
.
contains
(
pathUri
))
{
Referenceable
hdfsPath
=
dgiBridge
.
fillHDFSDataSet
(
pathUri
);
Referenceable
hdfsPath
=
dgiBridge
.
fillHDFSDataSet
(
pathUri
);
dataSets
.
put
(
entity
,
hdfsPath
);
dataSets
.
put
(
entity
,
hdfsPath
);
...
@@ -653,7 +665,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -653,7 +665,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
private
void
handleExternalTables
(
final
HiveMetaStoreBridge
dgiBridge
,
final
HiveEventContext
event
,
final
LinkedHashMap
<
Type
,
Referenceable
>
tables
)
throws
HiveException
,
MalformedURLException
{
private
void
handleExternalTables
(
final
HiveMetaStoreBridge
dgiBridge
,
final
HiveEventContext
event
,
final
LinkedHashMap
<
Type
,
Referenceable
>
tables
)
throws
HiveException
,
MalformedURLException
{
List
<
Referenceable
>
entities
=
new
ArrayList
<>();
List
<
Referenceable
>
entities
=
new
ArrayList
<>();
final
Entity
hiveEntity
=
getEntityByType
(
event
.
getOutputs
(),
Type
.
TABLE
);
final
WriteEntity
hiveEntity
=
(
WriteEntity
)
getEntityByType
(
event
.
getOutputs
(),
Type
.
TABLE
);
Table
hiveTable
=
hiveEntity
.
getTable
();
Table
hiveTable
=
hiveEntity
.
getTable
();
//Refresh to get the correct location
//Refresh to get the correct location
hiveTable
=
dgiBridge
.
hiveClient
.
getTable
(
hiveTable
.
getDbName
(),
hiveTable
.
getTableName
());
hiveTable
=
dgiBridge
.
hiveClient
.
getTable
(
hiveTable
.
getDbName
(),
hiveTable
.
getTableName
());
...
@@ -665,18 +677,25 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -665,18 +677,25 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
dfsEntity
.
setTyp
(
Type
.
DFS_DIR
);
dfsEntity
.
setTyp
(
Type
.
DFS_DIR
);
dfsEntity
.
setName
(
location
);
dfsEntity
.
setName
(
location
);
SortedMap
<
Entity
,
Referenceable
>
inputs
=
new
TreeMap
<
Entity
,
Referenceable
>(
entityComparator
)
{{
SortedMap
<
ReadEntity
,
Referenceable
>
hiveInputsMap
=
new
TreeMap
<
Read
Entity
,
Referenceable
>(
entityComparator
)
{{
put
(
dfsEntity
,
dgiBridge
.
fillHDFSDataSet
(
location
));
put
(
dfsEntity
,
dgiBridge
.
fillHDFSDataSet
(
location
));
}};
}};
SortedMap
<
Entity
,
Referenceable
>
outputs
=
new
TreeMap
<
Entity
,
Referenceable
>(
entityComparator
)
{{
SortedMap
<
WriteEntity
,
Referenceable
>
hiveOutputsMap
=
new
TreeMap
<
Write
Entity
,
Referenceable
>(
entityComparator
)
{{
put
(
hiveEntity
,
tables
.
get
(
Type
.
TABLE
));
put
(
hiveEntity
,
tables
.
get
(
Type
.
TABLE
));
}};
}};
Referenceable
processReferenceable
=
getProcessReferenceable
(
dgiBridge
,
event
,
inputs
,
outputs
);
SortedSet
<
ReadEntity
>
sortedIps
=
new
TreeSet
<>(
entityComparator
);
sortedIps
.
addAll
(
hiveInputsMap
.
keySet
());
SortedSet
<
WriteEntity
>
sortedOps
=
new
TreeSet
<>(
entityComparator
);
sortedOps
.
addAll
(
hiveOutputsMap
.
keySet
());
Referenceable
processReferenceable
=
getProcessReferenceable
(
dgiBridge
,
event
,
sortedIps
,
sortedOps
,
hiveInputsMap
,
hiveOutputsMap
);
String
tableQualifiedName
=
dgiBridge
.
getTableQualifiedName
(
dgiBridge
.
getClusterName
(),
hiveTable
);
String
tableQualifiedName
=
dgiBridge
.
getTableQualifiedName
(
dgiBridge
.
getClusterName
(),
hiveTable
);
if
(
isCreateOp
(
event
)){
if
(
isCreateOp
(
event
)){
LOG
.
info
(
"Overriding process qualified name to {}"
,
tableQualifiedName
);
processReferenceable
.
set
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
tableQualifiedName
);
processReferenceable
.
set
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
tableQualifiedName
);
}
}
entities
.
addAll
(
tables
.
values
());
entities
.
addAll
(
tables
.
values
());
...
@@ -689,6 +708,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -689,6 +708,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
if
(
HiveOperation
.
CREATETABLE
.
equals
(
hiveEvent
.
getOperation
())
if
(
HiveOperation
.
CREATETABLE
.
equals
(
hiveEvent
.
getOperation
())
||
HiveOperation
.
CREATEVIEW
.
equals
(
hiveEvent
.
getOperation
())
||
HiveOperation
.
CREATEVIEW
.
equals
(
hiveEvent
.
getOperation
())
||
HiveOperation
.
ALTERVIEW_AS
.
equals
(
hiveEvent
.
getOperation
())
||
HiveOperation
.
ALTERVIEW_AS
.
equals
(
hiveEvent
.
getOperation
())
||
HiveOperation
.
ALTERTABLE_LOCATION
.
equals
(
hiveEvent
.
getOperation
())
||
HiveOperation
.
CREATETABLE_AS_SELECT
.
equals
(
hiveEvent
.
getOperation
()))
{
||
HiveOperation
.
CREATETABLE_AS_SELECT
.
equals
(
hiveEvent
.
getOperation
()))
{
return
true
;
return
true
;
}
}
...
@@ -696,11 +716,11 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -696,11 +716,11 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
}
}
private
Referenceable
getProcessReferenceable
(
HiveMetaStoreBridge
dgiBridge
,
HiveEventContext
hiveEvent
,
private
Referenceable
getProcessReferenceable
(
HiveMetaStoreBridge
dgiBridge
,
HiveEventContext
hiveEvent
,
SortedMap
<
Entity
,
Referenceable
>
source
,
SortedMap
<
Entity
,
Referenceable
>
target
)
{
final
SortedSet
<
ReadEntity
>
sortedHiveInputs
,
final
SortedSet
<
WriteEntity
>
sortedHiveOutputs
,
SortedMap
<
ReadEntity
,
Referenceable
>
source
,
SortedMap
<
Write
Entity
,
Referenceable
>
target
)
{
Referenceable
processReferenceable
=
new
Referenceable
(
HiveDataTypes
.
HIVE_PROCESS
.
getName
());
Referenceable
processReferenceable
=
new
Referenceable
(
HiveDataTypes
.
HIVE_PROCESS
.
getName
());
String
queryStr
=
lower
(
hiveEvent
.
getQueryStr
());
String
queryStr
=
lower
(
hiveEvent
.
getQueryStr
());
processReferenceable
.
set
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
getProcessQualifiedName
(
hiveEvent
.
getOperation
()
,
source
,
target
));
processReferenceable
.
set
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
getProcessQualifiedName
(
hiveEvent
,
sortedHiveInputs
,
sortedHiveOutputs
,
source
,
target
));
LOG
.
debug
(
"Registering query: {}"
,
queryStr
);
LOG
.
debug
(
"Registering query: {}"
,
queryStr
);
List
<
Referenceable
>
sourceList
=
new
ArrayList
<>(
source
.
values
());
List
<
Referenceable
>
sourceList
=
new
ArrayList
<>(
source
.
values
());
...
@@ -733,51 +753,113 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
...
@@ -733,51 +753,113 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
}
}
@VisibleForTesting
@VisibleForTesting
static
String
getProcessQualifiedName
(
HiveOperation
op
,
SortedMap
<
Entity
,
Referenceable
>
inputs
,
SortedMap
<
Entity
,
Referenceable
>
outputs
)
{
static
String
getProcessQualifiedName
(
HiveEventContext
eventContext
,
final
SortedSet
<
ReadEntity
>
sortedHiveInputs
,
final
SortedSet
<
WriteEntity
>
sortedHiveOutputs
,
SortedMap
<
ReadEntity
,
Referenceable
>
hiveInputsMap
,
SortedMap
<
WriteEntity
,
Referenceable
>
hiveOutputsMap
)
{
HiveOperation
op
=
eventContext
.
getOperation
();
StringBuilder
buffer
=
new
StringBuilder
(
op
.
getOperationName
());
StringBuilder
buffer
=
new
StringBuilder
(
op
.
getOperationName
());
addDatasets
(
op
,
buffer
,
inputs
);
boolean
ignoreHDFSPathsinQFName
=
ignoreHDFSPathsinQFName
(
op
,
sortedHiveInputs
,
sortedHiveOutputs
);
if
(
ignoreHDFSPathsinQFName
&&
LOG
.
isDebugEnabled
())
{
LOG
.
debug
(
"Ignoring HDFS paths in qualifiedName for {} {} "
,
op
,
eventContext
.
getQueryStr
());
}
addInputs
(
op
,
sortedHiveInputs
,
buffer
,
hiveInputsMap
,
ignoreHDFSPathsinQFName
);
buffer
.
append
(
IO_SEP
);
buffer
.
append
(
IO_SEP
);
add
Datasets
(
op
,
buffer
,
outputs
);
add
Outputs
(
op
,
sortedHiveOutputs
,
buffer
,
hiveOutputsMap
,
ignoreHDFSPathsinQFName
);
LOG
.
info
(
"Setting process qualified name to {}"
,
buffer
);
LOG
.
info
(
"Setting process qualified name to {}"
,
buffer
);
return
buffer
.
toString
();
return
buffer
.
toString
();
}
}
private
static
void
addDatasets
(
HiveOperation
op
,
StringBuilder
buffer
,
final
Map
<
Entity
,
Referenceable
>
refs
)
{
private
static
boolean
ignoreHDFSPathsinQFName
(
final
HiveOperation
op
,
final
Set
<
ReadEntity
>
inputs
,
final
Set
<
WriteEntity
>
outputs
)
{
if
(
refs
!=
null
)
{
switch
(
op
)
{
for
(
Entity
input
:
refs
.
keySet
())
{
case
LOAD:
final
Entity
entity
=
input
;
case
IMPORT:
return
isPartitionBasedQuery
(
outputs
);
case
EXPORT:
return
isPartitionBasedQuery
(
inputs
);
case
QUERY:
return
true
;
}
return
false
;
}
//HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations
private
static
boolean
isPartitionBasedQuery
(
Set
<?
extends
Entity
>
entities
)
{
if
(
addQueryType
(
op
,
entity
))
{
for
(
Entity
entity
:
entities
)
{
buffer
.
append
(
SEP
);
if
(
Type
.
PARTITION
.
equals
(
entity
.
getType
()))
{
buffer
.
append
(((
WriteEntity
)
entity
).
getWriteType
().
name
());
return
true
;
}
}
return
false
;
}
private
static
void
addInputs
(
HiveOperation
op
,
SortedSet
<
ReadEntity
>
sortedInputs
,
StringBuilder
buffer
,
final
Map
<
ReadEntity
,
Referenceable
>
refs
,
final
boolean
ignoreHDFSPathsInQFName
)
{
if
(
refs
!=
null
)
{
if
(
sortedInputs
!=
null
)
{
Set
<
String
>
dataSetsProcessed
=
new
LinkedHashSet
<>();
for
(
Entity
input
:
sortedInputs
)
{
if
(!
dataSetsProcessed
.
contains
(
input
.
getName
().
toLowerCase
()))
{
//HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations
if
(
ignoreHDFSPathsInQFName
&&
(
Type
.
DFS_DIR
.
equals
(
input
.
getType
())
||
Type
.
LOCAL_DIR
.
equals
(
input
.
getType
())))
{
LOG
.
debug
(
"Skipping dfs dir input addition to process qualified name {} "
,
input
.
getName
());
}
else
if
(
refs
.
containsKey
(
input
))
{
addDataset
(
buffer
,
refs
.
get
(
input
));
}
dataSetsProcessed
.
add
(
input
.
getName
().
toLowerCase
());
}
}
}
if
(
Type
.
DFS_DIR
.
equals
(
entity
.
getType
())
||
Type
.
LOCAL_DIR
.
equals
(
entity
.
getType
()))
{
}
LOG
.
debug
(
"Skipping dfs dir addition into process qualified name {} "
,
refs
.
get
(
input
).
get
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
));
}
}
else
{
}
buffer
.
append
(
SEP
);
String
dataSetQlfdName
=
(
String
)
refs
.
get
(
input
).
get
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
);
private
static
void
addDataset
(
StringBuilder
buffer
,
Referenceable
ref
)
{
// '/' breaks query parsing on ATLAS
buffer
.
append
(
SEP
);
buffer
.
append
(
dataSetQlfdName
.
toLowerCase
().
replaceAll
(
"/"
,
""
));
String
dataSetQlfdName
=
(
String
)
ref
.
get
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
);
// '/' breaks query parsing on ATLAS
buffer
.
append
(
dataSetQlfdName
.
toLowerCase
().
replaceAll
(
"/"
,
""
));
}
private
static
void
addOutputs
(
HiveOperation
op
,
SortedSet
<
WriteEntity
>
sortedOutputs
,
StringBuilder
buffer
,
final
Map
<
WriteEntity
,
Referenceable
>
refs
,
final
boolean
ignoreHDFSPathsInQFName
)
{
if
(
refs
!=
null
)
{
Set
<
String
>
dataSetsProcessed
=
new
LinkedHashSet
<>();
if
(
sortedOutputs
!=
null
)
{
for
(
Entity
output
:
sortedOutputs
)
{
final
Entity
entity
=
output
;
if
(!
dataSetsProcessed
.
contains
(
output
.
getName
().
toLowerCase
()))
{
//HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations
if
(
addQueryType
(
op
,
(
WriteEntity
)
entity
))
{
buffer
.
append
(
SEP
);
buffer
.
append
(((
WriteEntity
)
entity
).
getWriteType
().
name
());
}
if
(
ignoreHDFSPathsInQFName
&&
(
Type
.
DFS_DIR
.
equals
(
output
.
getType
())
||
Type
.
LOCAL_DIR
.
equals
(
output
.
getType
())))
{
LOG
.
debug
(
"Skipping dfs dir output addition to process qualified name {} "
,
output
.
getName
());
}
else
if
(
refs
.
containsKey
(
output
))
{
addDataset
(
buffer
,
refs
.
get
(
output
));
}
dataSetsProcessed
.
add
(
output
.
getName
().
toLowerCase
());
}
}
}
}
}
}
}
}
}
private
static
boolean
addQueryType
(
HiveOperation
op
,
Entity
entity
)
{
private
static
boolean
addQueryType
(
HiveOperation
op
,
WriteEntity
entity
)
{
if
(
WriteEntity
.
class
.
isAssignableFrom
(
entity
.
getClass
()))
{
if
(((
WriteEntity
)
entity
).
getWriteType
()
!=
null
&&
HiveOperation
.
QUERY
.
equals
(
op
))
{
if
(((
WriteEntity
)
entity
).
getWriteType
()
!=
null
&&
switch
(((
WriteEntity
)
entity
).
getWriteType
())
{
op
.
equals
(
HiveOperation
.
QUERY
))
{
case
INSERT:
switch
(((
WriteEntity
)
entity
).
getWriteType
())
{
case
INSERT_OVERWRITE:
case
INSERT:
case
UPDATE:
case
INSERT_OVERWRITE:
case
DELETE:
case
UPDATE:
return
true
;
case
DELETE:
case
PATH_WRITE:
case
PATH_WRITE:
//Add query type only for DFS paths and ignore local paths since they are not added as outputs
if
(
!
Type
.
LOCAL_DIR
.
equals
(
entity
.
getType
()))
{
return
true
;
return
true
;
default
:
}
}
break
;
default
:
}
}
}
}
return
false
;
return
false
;
...
...
addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
View file @
f51c8861
...
@@ -62,15 +62,22 @@ import java.text.ParseException;
...
@@ -62,15 +62,22 @@ import java.text.ParseException;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.Iterator
;
import
java.util.LinkedHashSet
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
java.util.Set
;
import
java.util.SortedMap
;
import
java.util.SortedMap
;
import
java.util.SortedSet
;
import
java.util.TreeMap
;
import
java.util.TreeMap
;
import
java.util.TreeSet
;
import
static
org
.
apache
.
atlas
.
AtlasClient
.
NAME
;
import
static
org
.
apache
.
atlas
.
AtlasClient
.
NAME
;
import
static
org
.
apache
.
atlas
.
hive
.
hook
.
HiveHook
.
entityComparator
;
import
static
org
.
apache
.
atlas
.
hive
.
hook
.
HiveHook
.
entityComparator
;
import
static
org
.
apache
.
atlas
.
hive
.
hook
.
HiveHook
.
getProcessQualifiedName
;
import
static
org
.
apache
.
atlas
.
hive
.
hook
.
HiveHook
.
getProcessQualifiedName
;
import
static
org
.
apache
.
atlas
.
hive
.
hook
.
HiveHook
.
lower
;
import
static
org
.
apache
.
atlas
.
hive
.
hook
.
HiveHook
.
lower
;
import
static
org
.
apache
.
atlas
.
hive
.
hook
.
HiveHook
.
IO_SEP
;
import
static
org
.
apache
.
atlas
.
hive
.
hook
.
HiveHook
.
SEP
;
import
static
org
.
testng
.
Assert
.
assertEquals
;
import
static
org
.
testng
.
Assert
.
assertEquals
;
import
static
org
.
testng
.
Assert
.
assertNotNull
;
import
static
org
.
testng
.
Assert
.
assertNotNull
;
import
static
org
.
testng
.
Assert
.
assertTrue
;
import
static
org
.
testng
.
Assert
.
assertTrue
;
...
@@ -82,6 +89,8 @@ public class HiveHookIT {
...
@@ -82,6 +89,8 @@ public class HiveHookIT {
private
static
final
String
DGI_URL
=
"http://localhost:21000/"
;
private
static
final
String
DGI_URL
=
"http://localhost:21000/"
;
private
static
final
String
CLUSTER_NAME
=
"test"
;
private
static
final
String
CLUSTER_NAME
=
"test"
;
public
static
final
String
DEFAULT_DB
=
"default"
;
public
static
final
String
DEFAULT_DB
=
"default"
;
private
static
final
String
PART_FILE
=
"2015-01-01"
;
private
Driver
driver
;
private
Driver
driver
;
private
AtlasClient
atlasClient
;
private
AtlasClient
atlasClient
;
private
HiveMetaStoreBridge
hiveMetaStoreBridge
;
private
HiveMetaStoreBridge
hiveMetaStoreBridge
;
...
@@ -262,7 +271,7 @@ public class HiveHookIT {
...
@@ -262,7 +271,7 @@ public class HiveHookIT {
validateHDFSPaths
(
processReference
,
INPUTS
,
pFile
);
validateHDFSPaths
(
processReference
,
INPUTS
,
pFile
);
}
}
private
List
<
Entity
>
getInputs
(
String
inputName
,
Entity
.
Type
entityType
)
{
private
Set
<
Read
Entity
>
getInputs
(
String
inputName
,
Entity
.
Type
entityType
)
{
final
ReadEntity
entity
=
new
ReadEntity
();
final
ReadEntity
entity
=
new
ReadEntity
();
if
(
Entity
.
Type
.
DFS_DIR
.
equals
(
entityType
))
{
if
(
Entity
.
Type
.
DFS_DIR
.
equals
(
entityType
))
{
...
@@ -270,14 +279,13 @@ public class HiveHookIT {
...
@@ -270,14 +279,13 @@ public class HiveHookIT {
entity
.
setTyp
(
Entity
.
Type
.
DFS_DIR
);
entity
.
setTyp
(
Entity
.
Type
.
DFS_DIR
);
}
else
{
}
else
{
entity
.
setName
(
getQualifiedTblName
(
inputName
));
entity
.
setName
(
getQualifiedTblName
(
inputName
));
entity
.
setTyp
(
Entity
.
Type
.
TABLE
);
entity
.
setTyp
(
entityType
);
}
}
return
new
ArrayList
<
Entity
>()
{{
add
(
entity
);
}};
return
new
LinkedHashSet
<
Read
Entity
>()
{{
add
(
entity
);
}};
}
}
private
Set
<
WriteEntity
>
getOutputs
(
String
inputName
,
Entity
.
Type
entityType
)
{
private
List
<
Entity
>
getOutputs
(
String
inputName
,
Entity
.
Type
entityType
)
{
final
WriteEntity
entity
=
new
WriteEntity
();
final
WriteEntity
entity
=
new
WriteEntity
();
if
(
Entity
.
Type
.
DFS_DIR
.
equals
(
entityType
)
||
Entity
.
Type
.
LOCAL_DIR
.
equals
(
entityType
))
{
if
(
Entity
.
Type
.
DFS_DIR
.
equals
(
entityType
)
||
Entity
.
Type
.
LOCAL_DIR
.
equals
(
entityType
))
{
...
@@ -285,27 +293,32 @@ public class HiveHookIT {
...
@@ -285,27 +293,32 @@ public class HiveHookIT {
entity
.
setTyp
(
entityType
);
entity
.
setTyp
(
entityType
);
}
else
{
}
else
{
entity
.
setName
(
getQualifiedTblName
(
inputName
));
entity
.
setName
(
getQualifiedTblName
(
inputName
));
entity
.
setTyp
(
Entity
.
Type
.
TABLE
);
entity
.
setTyp
(
entityType
);
}
}
return
new
ArrayList
<
Entity
>()
{{
add
(
entity
);
}};
return
new
LinkedHashSet
<
Write
Entity
>()
{{
add
(
entity
);
}};
}
}
private
void
validateOutputTables
(
Referenceable
processReference
,
Set
<
WriteEntity
>
expectedTables
)
throws
Exception
{
private
void
validateOutputTables
(
Referenceable
processReference
,
List
<
Entity
>
expectedTables
)
throws
Exception
{
validateTables
(
processReference
,
OUTPUTS
,
expectedTables
);
validateTables
(
processReference
,
OUTPUTS
,
expectedTables
);
}
}
private
void
validateInputTables
(
Referenceable
processReference
,
List
<
Entity
>
expectedTables
)
throws
Exception
{
private
void
validateInputTables
(
Referenceable
processReference
,
Set
<
Read
Entity
>
expectedTables
)
throws
Exception
{
validateTables
(
processReference
,
INPUTS
,
expectedTables
);
validateTables
(
processReference
,
INPUTS
,
expectedTables
);
}
}
private
void
validateTables
(
Referenceable
processReference
,
String
attrName
,
List
<
Entity
>
expectedTables
)
throws
Exception
{
private
void
validateTables
(
Referenceable
processReference
,
String
attrName
,
Set
<?
extends
Entity
>
expectedTables
)
throws
Exception
{
List
<
Id
>
tableRef
=
(
List
<
Id
>)
processReference
.
get
(
attrName
);
List
<
Id
>
tableRef
=
(
List
<
Id
>)
processReference
.
get
(
attrName
);
Iterator
<?
extends
Entity
>
iterator
=
expectedTables
.
iterator
();
for
(
int
i
=
0
;
i
<
expectedTables
.
size
();
i
++)
{
for
(
int
i
=
0
;
i
<
expectedTables
.
size
();
i
++)
{
Referenceable
entity
=
atlasClient
.
getEntity
(
tableRef
.
get
(
i
).
_getId
());
Entity
hiveEntity
=
iterator
.
next
();
LOG
.
debug
(
"Validating output {} {} "
,
i
,
entity
);
if
(
Entity
.
Type
.
TABLE
.
equals
(
hiveEntity
.
getType
())
||
Assert
.
assertEquals
(
entity
.
get
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
),
expectedTables
.
get
(
i
).
getName
());
Entity
.
Type
.
DFS_DIR
.
equals
(
hiveEntity
.
getType
()))
{
Referenceable
entity
=
atlasClient
.
getEntity
(
tableRef
.
get
(
i
).
_getId
());
LOG
.
debug
(
"Validating output {} {} "
,
i
,
entity
);
Assert
.
assertEquals
(
entity
.
get
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
),
hiveEntity
.
getName
());
}
}
}
}
}
...
@@ -338,18 +351,22 @@ public class HiveHookIT {
...
@@ -338,18 +351,22 @@ public class HiveHookIT {
String
query
=
"create table "
+
ctasTableName
+
" as select * from "
+
tableName
;
String
query
=
"create table "
+
ctasTableName
+
" as select * from "
+
tableName
;
runCommand
(
query
);
runCommand
(
query
);
final
ReadEntity
entity
=
new
ReadEntity
();
final
Set
<
ReadEntity
>
readEntities
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
entity
.
setName
(
getQualifiedTblName
(
tableName
));
final
Set
<
WriteEntity
>
writeEntities
=
getOutputs
(
ctasTableName
,
Entity
.
Type
.
TABLE
);
entity
.
setTyp
(
Entity
.
Type
.
TABLE
);
final
WriteEntity
writeEntity
=
new
WriteEntity
();
assertProcessIsRegistered
(
constructEvent
(
query
,
HiveOperation
.
CREATETABLE_AS_SELECT
,
readEntities
,
writeEntities
));
writeEntity
.
setTyp
(
Entity
.
Type
.
TABLE
);
writeEntity
.
setName
(
getQualifiedTblName
(
ctasTableName
));
assertProcessIsRegistered
(
query
,
HiveOperation
.
CREATETABLE_AS_SELECT
,
new
ArrayList
<
Entity
>()
{{
add
(
entity
);
}},
new
ArrayList
<
Entity
>()
{{
add
(
writeEntity
);
}});
assertTableIsRegistered
(
DEFAULT_DB
,
ctasTableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
ctasTableName
);
}
}
private
HiveHook
.
HiveEventContext
constructEvent
(
String
query
,
HiveOperation
op
,
Set
<
ReadEntity
>
inputs
,
Set
<
WriteEntity
>
outputs
)
{
HiveHook
.
HiveEventContext
event
=
new
HiveHook
.
HiveEventContext
();
event
.
setQueryStr
(
query
);
event
.
setOperation
(
op
);
event
.
setInputs
(
inputs
);
event
.
setOutputs
(
outputs
);
return
event
;
}
@Test
@Test
public
void
testDropAndRecreateCTASOutput
()
throws
Exception
{
public
void
testDropAndRecreateCTASOutput
()
throws
Exception
{
String
tableName
=
createTable
();
String
tableName
=
createTable
();
...
@@ -359,10 +376,11 @@ public class HiveHookIT {
...
@@ -359,10 +376,11 @@ public class HiveHookIT {
assertTableIsRegistered
(
DEFAULT_DB
,
ctasTableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
ctasTableName
);
List
<
Entity
>
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
Set
<
Read
Entity
>
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
List
<
Entity
>
outputs
=
getOutputs
(
ctasTableName
,
Entity
.
Type
.
TABLE
);
Set
<
Write
Entity
>
outputs
=
getOutputs
(
ctasTableName
,
Entity
.
Type
.
TABLE
);
String
processId
=
assertProcessIsRegistered
(
query
,
HiveOperation
.
CREATETABLE_AS_SELECT
,
inputs
,
outputs
);
final
HiveHook
.
HiveEventContext
hiveEventContext
=
constructEvent
(
query
,
HiveOperation
.
CREATETABLE_AS_SELECT
,
inputs
,
outputs
);
String
processId
=
assertProcessIsRegistered
(
hiveEventContext
);
final
String
drpquery
=
String
.
format
(
"drop table %s "
,
ctasTableName
);
final
String
drpquery
=
String
.
format
(
"drop table %s "
,
ctasTableName
);
runCommand
(
drpquery
);
runCommand
(
drpquery
);
...
@@ -371,14 +389,13 @@ public class HiveHookIT {
...
@@ -371,14 +389,13 @@ public class HiveHookIT {
//Fix after ATLAS-876
//Fix after ATLAS-876
runCommand
(
query
);
runCommand
(
query
);
assertTableIsRegistered
(
DEFAULT_DB
,
ctasTableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
ctasTableName
);
String
process2Id
=
assertProcessIsRegistered
(
query
,
HiveOperation
.
CREATETABLE_AS_SELECT
,
inputs
,
outputs
);
String
process2Id
=
assertProcessIsRegistered
(
hiveEventContext
,
inputs
,
outputs
);
Assert
.
assertEquals
(
process2Id
,
processId
);
Assert
.
assertEquals
(
process2Id
,
processId
);
Referenceable
processRef
=
atlasClient
.
getEntity
(
processId
);
Referenceable
processRef
=
atlasClient
.
getEntity
(
processId
);
validateInputTables
(
processRef
,
inputs
);
outputs
.
add
(
outputs
.
iterator
().
next
());
outputs
.
add
(
outputs
.
get
(
0
));
validateOutputTables
(
processRef
,
outputs
);
validateOutputTables
(
processRef
,
outputs
);
}
}
...
@@ -389,7 +406,7 @@ public class HiveHookIT {
...
@@ -389,7 +406,7 @@ public class HiveHookIT {
String
query
=
"create view "
+
viewName
+
" as select * from "
+
tableName
;
String
query
=
"create view "
+
viewName
+
" as select * from "
+
tableName
;
runCommand
(
query
);
runCommand
(
query
);
assertProcessIsRegistered
(
query
,
HiveOperation
.
CREATEVIEW
,
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
),
getOutputs
(
viewName
,
Entity
.
Type
.
TABLE
));
assertProcessIsRegistered
(
constructEvent
(
query
,
HiveOperation
.
CREATEVIEW
,
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
),
getOutputs
(
viewName
,
Entity
.
Type
.
TABLE
)
));
assertTableIsRegistered
(
DEFAULT_DB
,
viewName
);
assertTableIsRegistered
(
DEFAULT_DB
,
viewName
);
}
}
...
@@ -403,7 +420,7 @@ public class HiveHookIT {
...
@@ -403,7 +420,7 @@ public class HiveHookIT {
runCommand
(
query
);
runCommand
(
query
);
String
table1Id
=
assertTableIsRegistered
(
DEFAULT_DB
,
table1Name
);
String
table1Id
=
assertTableIsRegistered
(
DEFAULT_DB
,
table1Name
);
assertProcessIsRegistered
(
query
,
HiveOperation
.
CREATEVIEW
,
getInputs
(
table1Name
,
Entity
.
Type
.
TABLE
),
getOutputs
(
viewName
,
Entity
.
Type
.
TABLE
));
assertProcessIsRegistered
(
constructEvent
(
query
,
HiveOperation
.
CREATEVIEW
,
getInputs
(
table1Name
,
Entity
.
Type
.
TABLE
),
getOutputs
(
viewName
,
Entity
.
Type
.
TABLE
)
));
String
viewId
=
assertTableIsRegistered
(
DEFAULT_DB
,
viewName
);
String
viewId
=
assertTableIsRegistered
(
DEFAULT_DB
,
viewName
);
//Check lineage which includes table1
//Check lineage which includes table1
...
@@ -419,7 +436,7 @@ public class HiveHookIT {
...
@@ -419,7 +436,7 @@ public class HiveHookIT {
runCommand
(
query
);
runCommand
(
query
);
//Check if alter view process is reqistered
//Check if alter view process is reqistered
assertProcessIsRegistered
(
query
,
HiveOperation
.
CREATEVIEW
,
getInputs
(
table2Name
,
Entity
.
Type
.
TABLE
),
getOutputs
(
viewName
,
Entity
.
Type
.
TABLE
));
assertProcessIsRegistered
(
constructEvent
(
query
,
HiveOperation
.
CREATEVIEW
,
getInputs
(
table2Name
,
Entity
.
Type
.
TABLE
),
getOutputs
(
viewName
,
Entity
.
Type
.
TABLE
)
));
String
table2Id
=
assertTableIsRegistered
(
DEFAULT_DB
,
table2Name
);
String
table2Id
=
assertTableIsRegistered
(
DEFAULT_DB
,
table2Name
);
Assert
.
assertEquals
(
assertTableIsRegistered
(
DEFAULT_DB
,
viewName
),
viewId
);
Assert
.
assertEquals
(
assertTableIsRegistered
(
DEFAULT_DB
,
viewName
),
viewId
);
...
@@ -456,9 +473,7 @@ public class HiveHookIT {
...
@@ -456,9 +473,7 @@ public class HiveHookIT {
String
query
=
"load data local inpath 'file://"
+
loadFile
+
"' into table "
+
tableName
;
String
query
=
"load data local inpath 'file://"
+
loadFile
+
"' into table "
+
tableName
;
runCommand
(
query
);
runCommand
(
query
);
List
<
Entity
>
outputs
=
getOutputs
(
tableName
,
Entity
.
Type
.
TABLE
);
assertProcessIsRegistered
(
constructEvent
(
query
,
HiveOperation
.
LOAD
,
null
,
getOutputs
(
tableName
,
Entity
.
Type
.
TABLE
)));
assertProcessIsRegistered
(
query
,
HiveOperation
.
LOAD
,
null
,
outputs
);
}
}
@Test
@Test
...
@@ -466,41 +481,56 @@ public class HiveHookIT {
...
@@ -466,41 +481,56 @@ public class HiveHookIT {
String
tableName
=
createTable
(
true
);
String
tableName
=
createTable
(
true
);
String
loadFile
=
file
(
"load"
);
String
loadFile
=
file
(
"load"
);
String
query
=
"load data local inpath 'file://"
+
loadFile
+
"' into table "
+
tableName
+
" partition(dt = '
2015-01-01
')"
;
String
query
=
"load data local inpath 'file://"
+
loadFile
+
"' into table "
+
tableName
+
" partition(dt = '
"
+
PART_FILE
+
"
')"
;
runCommand
(
query
);
runCommand
(
query
);
validateProcess
(
query
,
HiveOperation
.
LOAD
,
null
,
getOutputs
(
tableName
,
Entity
.
Type
.
TABLE
));
assertProcessIsRegistered
(
constructEvent
(
query
,
HiveOperation
.
LOAD
,
null
,
getOutputs
(
tableName
,
Entity
.
Type
.
TABLE
)
));
}
}
@Test
@Test
public
void
testLoadDFSPath
()
throws
Exception
{
public
void
testLoadDFSPath
Partitioned
()
throws
Exception
{
String
tableName
=
createTable
(
true
,
true
,
false
);
String
tableName
=
createTable
(
true
,
true
,
false
);
String
tableId
=
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
String
loadFile
=
createTestDFSFile
(
"loadDFSFile"
);
final
String
loadFile
=
createTestDFSFile
(
"loadDFSFile"
);
String
query
=
"load data inpath '"
+
loadFile
+
"' into table "
+
tableName
+
" partition(dt = '
2015-01-01
')"
;
String
query
=
"load data inpath '"
+
loadFile
+
"' into table "
+
tableName
+
" partition(dt = '
"
+
PART_FILE
+
"
')"
;
runCommand
(
query
);
runCommand
(
query
);
final
List
<
Entity
>
outputs
=
getOutputs
(
tableName
,
Entity
.
Type
.
TABLE
);
final
Set
<
Write
Entity
>
outputs
=
getOutputs
(
tableName
,
Entity
.
Type
.
TABLE
);
Referenceable
processReference
=
validateProcess
(
query
,
HiveOperation
.
LOAD
,
getInputs
(
loadFile
,
Entity
.
Type
.
DFS_DIR
),
outputs
);
final
Set
<
ReadEntity
>
inputs
=
getInputs
(
loadFile
,
Entity
.
Type
.
DFS_DIR
);
validateHDFSPaths
(
processReference
,
INPUTS
,
loadFile
);
final
Set
<
WriteEntity
>
partitionOps
=
new
LinkedHashSet
<>(
outputs
);
partitionOps
.
addAll
(
getOutputs
(
DEFAULT_DB
+
"@"
+
tableName
+
"@dt="
+
PART_FILE
,
Entity
.
Type
.
PARTITION
));
Referenceable
processReference
=
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
LOAD
,
inputs
,
partitionOps
),
inputs
,
outputs
);
validateHDFSPaths
(
processReference
,
INPUTS
,
loadFile
);
validateOutputTables
(
processReference
,
outputs
);
validateOutputTables
(
processReference
,
outputs
);
final
String
loadFile2
=
createTestDFSFile
(
"loadDFSFile1"
);
query
=
"load data inpath '"
+
loadFile2
+
"' into table "
+
tableName
+
" partition(dt = '"
+
PART_FILE
+
"')"
;
runCommand
(
query
);
Set
<
ReadEntity
>
process2Inputs
=
getInputs
(
loadFile2
,
Entity
.
Type
.
DFS_DIR
);
Set
<
ReadEntity
>
expectedInputs
=
new
LinkedHashSet
<>();
expectedInputs
.
addAll
(
process2Inputs
);
expectedInputs
.
addAll
(
inputs
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
LOAD
,
expectedInputs
,
partitionOps
),
expectedInputs
,
outputs
);
}
}
private
String
getQualifiedTblName
(
String
inputTable
)
{
private
String
getQualifiedTblName
(
String
inputTable
)
{
String
inputtblQlfdName
=
inputTable
;
String
inputtblQlfdName
=
inputTable
;
if
(
inputTable
!=
null
&&
!
inputTable
.
contains
(
"
.
"
))
{
if
(
inputTable
!=
null
&&
!
inputTable
.
contains
(
"
@
"
))
{
inputtblQlfdName
=
HiveMetaStoreBridge
.
getTableQualifiedName
(
CLUSTER_NAME
,
DEFAULT_DB
,
inputTable
);
inputtblQlfdName
=
HiveMetaStoreBridge
.
getTableQualifiedName
(
CLUSTER_NAME
,
DEFAULT_DB
,
inputTable
);
}
}
return
inputtblQlfdName
;
return
inputtblQlfdName
;
}
}
private
Referenceable
validateProcess
(
String
query
,
HiveOperation
op
,
List
<
Entity
>
inputTables
,
List
<
Entity
>
outputTables
)
throws
Exception
{
private
Referenceable
validateProcess
(
HiveHook
.
HiveEventContext
event
,
Set
<
ReadEntity
>
inputTables
,
Set
<
Write
Entity
>
outputTables
)
throws
Exception
{
String
processId
=
assertProcessIsRegistered
(
query
,
op
,
inputTables
,
outputTables
);
String
processId
=
assertProcessIsRegistered
(
event
,
inputTables
,
outputTables
);
Referenceable
process
=
atlasClient
.
getEntity
(
processId
);
Referenceable
process
=
atlasClient
.
getEntity
(
processId
);
if
(
inputTables
==
null
)
{
if
(
inputTables
==
null
)
{
Assert
.
assertNull
(
process
.
get
(
INPUTS
));
Assert
.
assertNull
(
process
.
get
(
INPUTS
));
...
@@ -519,25 +549,47 @@ public class HiveHookIT {
...
@@ -519,25 +549,47 @@ public class HiveHookIT {
return
process
;
return
process
;
}
}
private
Referenceable
validateProcess
(
HiveHook
.
HiveEventContext
event
)
throws
Exception
{
return
validateProcess
(
event
,
event
.
getInputs
(),
event
.
getOutputs
());
}
@Test
@Test
public
void
testInsertIntoTable
()
throws
Exception
{
public
void
testInsertIntoTable
()
throws
Exception
{
String
tableName
=
createTable
();
String
inputTable1Name
=
createTable
();
String
inputTable2Name
=
createTable
();
String
insertTableName
=
createTable
();
String
insertTableName
=
createTable
();
assertTableIsRegistered
(
DEFAULT_DB
,
table
Name
);
assertTableIsRegistered
(
DEFAULT_DB
,
inputTable1
Name
);
assertTableIsRegistered
(
DEFAULT_DB
,
insertTableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
insertTableName
);
String
query
=
"insert into "
+
insertTableName
+
" select
id, name from "
+
tableName
;
String
query
=
"insert into "
+
insertTableName
+
" select
t1.id, t1.name from "
+
inputTable2Name
+
" as t2, "
+
inputTable1Name
+
" as t1 where t1.id=t2.id"
;
runCommand
(
query
);
runCommand
(
query
);
List
<
Entity
>
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
final
Set
<
ReadEntity
>
inputs
=
getInputs
(
inputTable1Name
,
Entity
.
Type
.
TABLE
);
List
<
Entity
>
outputs
=
getOutputs
(
insertTableName
,
Entity
.
Type
.
TABLE
);
inputs
.
addAll
(
getInputs
(
inputTable2Name
,
Entity
.
Type
.
TABLE
));
((
WriteEntity
)
outputs
.
get
(
0
)).
setWriteType
(
WriteEntity
.
WriteType
.
INSERT
);
Set
<
WriteEntity
>
outputs
=
getOutputs
(
insertTableName
,
Entity
.
Type
.
TABLE
);
(
outputs
.
iterator
().
next
()).
setWriteType
(
WriteEntity
.
WriteType
.
INSERT
);
HiveHook
.
HiveEventContext
event
=
constructEvent
(
query
,
HiveOperation
.
QUERY
,
inputs
,
outputs
);
Set
<
ReadEntity
>
expectedInputs
=
new
TreeSet
<
ReadEntity
>(
entityComparator
)
{{
addAll
(
inputs
);
}};
assertTableIsRegistered
(
DEFAULT_DB
,
insertTableName
);
Referenceable
processRef1
=
validateProcess
(
event
,
expectedInputs
,
outputs
);
Referenceable
processRef1
=
validateProcess
(
query
,
HiveOperation
.
QUERY
,
inputs
,
outputs
);
//Test sorting of tbl names
SortedSet
<
String
>
sortedTblNames
=
new
TreeSet
<>();
sortedTblNames
.
add
(
getQualifiedTblName
(
inputTable1Name
));
sortedTblNames
.
add
(
getQualifiedTblName
(
inputTable2Name
));
//Verify sorted orer of inputs in qualified name
Assert
.
assertEquals
(
Joiner
.
on
(
SEP
).
join
(
"QUERY"
,
sortedTblNames
.
first
(),
sortedTblNames
.
last
())
+
IO_SEP
+
SEP
+
Joiner
.
on
(
SEP
).
join
(
WriteEntity
.
WriteType
.
INSERT
.
name
(),
getQualifiedTblName
(
insertTableName
))
,
processRef1
.
get
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
));
//Rerun same query. Should result in same process
//Rerun same query. Should result in same process
runCommandWithDelay
(
query
,
1000
);
runCommandWithDelay
(
query
,
1000
);
Referenceable
processRef2
=
validateProcess
(
query
,
HiveOperation
.
QUERY
,
i
nputs
,
outputs
);
Referenceable
processRef2
=
validateProcess
(
event
,
expectedI
nputs
,
outputs
);
Assert
.
assertEquals
(
processRef1
.
getId
().
_getId
(),
processRef2
.
getId
().
_getId
());
Assert
.
assertEquals
(
processRef1
.
getId
().
_getId
(),
processRef2
.
getId
().
_getId
());
}
}
...
@@ -550,7 +602,7 @@ public class HiveHookIT {
...
@@ -550,7 +602,7 @@ public class HiveHookIT {
"insert overwrite LOCAL DIRECTORY '"
+
randomLocalPath
.
getAbsolutePath
()
+
"' select id, name from "
+
tableName
;
"insert overwrite LOCAL DIRECTORY '"
+
randomLocalPath
.
getAbsolutePath
()
+
"' select id, name from "
+
tableName
;
runCommand
(
query
);
runCommand
(
query
);
validateProcess
(
query
,
HiveOperation
.
QUERY
,
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
),
null
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
QUERY
,
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
),
null
)
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
}
}
...
@@ -564,72 +616,78 @@ public class HiveHookIT {
...
@@ -564,72 +616,78 @@ public class HiveHookIT {
runCommand
(
query
);
runCommand
(
query
);
List
<
Entity
>
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
Set
<
Read
Entity
>
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
final
List
<
Entity
>
outputs
=
getOutputs
(
pFile1
,
Entity
.
Type
.
DFS_DIR
);
final
Set
<
Write
Entity
>
outputs
=
getOutputs
(
pFile1
,
Entity
.
Type
.
DFS_DIR
);
((
WriteEntity
)
outputs
.
get
(
0
)).
setWriteType
(
WriteEntity
.
WriteType
.
PATH_WRITE
);
((
WriteEntity
)
outputs
.
iterator
().
next
(
)).
setWriteType
(
WriteEntity
.
WriteType
.
PATH_WRITE
);
Referenceable
processReference
=
validateProcess
(
query
,
HiveOperation
.
QUERY
,
inputs
,
outputs
);
final
HiveHook
.
HiveEventContext
hiveEventContext
=
constructEvent
(
query
,
HiveOperation
.
QUERY
,
inputs
,
outputs
);
Referenceable
processReference
=
validateProcess
(
hiveEventContext
);
validateHDFSPaths
(
processReference
,
OUTPUTS
,
pFile1
);
validateHDFSPaths
(
processReference
,
OUTPUTS
,
pFile1
);
String
tableId
=
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
String
tableId
=
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
validateInputTables
(
processReference
,
inputs
);
validateInputTables
(
processReference
,
inputs
);
//Rerun same query with same HDFS path
//Rerun same query with same HDFS path
runCommandWithDelay
(
query
,
1000
);
runCommand
(
query
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
Referenceable
process2Reference
=
validateProcess
(
query
,
HiveOperation
.
QUERY
,
inputs
,
outputs
);
Referenceable
process2Reference
=
validateProcess
(
hiveEventContext
);
validateHDFSPaths
(
process2Reference
,
OUTPUTS
,
pFile1
);
validateHDFSPaths
(
process2Reference
,
OUTPUTS
,
pFile1
);
Assert
.
assertEquals
(
process2Reference
.
getId
().
_getId
(),
processReference
.
getId
().
_getId
());
Assert
.
assertEquals
(
process2Reference
.
getId
().
_getId
(),
processReference
.
getId
().
_getId
());
//Rerun same query with a new HDFS path. Will result in same process since HDFS paths
are not part of qualifiedName.
//Rerun same query with a new HDFS path. Will result in same process since HDFS paths
is not part of qualified name for QUERY operations
final
String
pFile2
=
createTestDFSPath
(
"somedfspath2"
);
final
String
pFile2
=
createTestDFSPath
(
"somedfspath2"
);
query
=
"insert overwrite DIRECTORY '"
+
pFile2
+
"' select id, name from "
+
tableName
;
query
=
"insert overwrite DIRECTORY '"
+
pFile2
+
"' select id, name from "
+
tableName
;
runCommand
(
query
);
runCommandWithDelay
(
query
,
1000
);
List
<
Entity
>
p3Outputs
=
new
ArrayList
<
Entity
>()
{{
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
Set
<
WriteEntity
>
p3Outputs
=
new
LinkedHashSet
<
WriteEntity
>()
{{
addAll
(
getOutputs
(
pFile2
,
Entity
.
Type
.
DFS_DIR
));
addAll
(
getOutputs
(
pFile2
,
Entity
.
Type
.
DFS_DIR
));
addAll
(
outputs
);
addAll
(
outputs
);
}};
}};
Referenceable
process3Reference
=
validateProcess
(
query
,
HiveOperation
.
QUERY
,
inputs
,
p3Outputs
);
Referenceable
process3Reference
=
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
QUERY
,
inputs
,
p3Outputs
)
);
validateHDFSPaths
(
process3Reference
,
OUTPUTS
,
pFile2
);
validateHDFSPaths
(
process3Reference
,
OUTPUTS
,
pFile2
);
Assert
.
assertEquals
(
process3Reference
.
getId
().
_getId
(),
processReference
.
getId
().
_getId
());
Assert
.
assertEquals
(
process3Reference
.
getId
().
_getId
(),
processReference
.
getId
().
_getId
());
}
}
@Test
@Test
public
void
testInsertIntoDFSDir
()
throws
Exception
{
public
void
testInsertIntoDFSDirPartitioned
()
throws
Exception
{
String
tableName
=
createTable
();
//Test with partitioned table
String
tableName
=
createTable
(
true
);
String
pFile1
=
createTestDFSPath
(
"somedfspath1"
);
String
pFile1
=
createTestDFSPath
(
"somedfspath1"
);
String
query
=
String
query
=
"insert overwrite DIRECTORY '"
+
pFile1
+
"' select id, name from "
+
tableName
;
"insert overwrite DIRECTORY '"
+
pFile1
+
"' select id, name from "
+
tableName
+
" where dt = '"
+
PART_FILE
+
"'"
;
runCommand
(
query
);
runCommand
(
query
);
List
<
Entity
>
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
Set
<
Read
Entity
>
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
final
List
<
Entity
>
outputs
=
getOutputs
(
pFile1
,
Entity
.
Type
.
DFS_DIR
);
final
Set
<
Write
Entity
>
outputs
=
getOutputs
(
pFile1
,
Entity
.
Type
.
DFS_DIR
);
((
WriteEntity
)
outputs
.
get
(
0
)).
setWriteType
(
WriteEntity
.
WriteType
.
PATH_WRITE
);
((
WriteEntity
)
outputs
.
iterator
().
next
(
)).
setWriteType
(
WriteEntity
.
WriteType
.
PATH_WRITE
);
Referenceable
processReference
=
validateProcess
(
query
,
HiveOperation
.
QUERY
,
inputs
,
out
puts
);
final
Set
<
ReadEntity
>
partitionIps
=
new
LinkedHashSet
<>(
in
puts
);
validateHDFSPaths
(
processReference
,
OUTPUTS
,
pFile1
);
partitionIps
.
addAll
(
getInputs
(
DEFAULT_DB
+
"@"
+
tableName
+
"@dt='"
+
PART_FILE
+
"'"
,
Entity
.
Type
.
PARTITION
)
);
String
tableId
=
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
Referenceable
processReference
=
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
QUERY
,
partitionIps
,
outputs
),
inputs
,
outputs
);
validateInputTables
(
processReference
,
inputs
);
//Rerun same query with different HDFS path. Should not create another process and should update it.
//Rerun same query with different HDFS path
final
String
pFile2
=
createTestDFSPath
(
"somedfspath2"
);
final
String
pFile2
=
createTestDFSPath
(
"somedfspath2"
);
query
=
query
=
"insert overwrite DIRECTORY '"
+
pFile2
+
"' select id, name from "
+
tableName
;
"insert overwrite DIRECTORY '"
+
pFile2
+
"' select id, name from "
+
tableName
+
" where dt = '"
+
PART_FILE
+
"'"
;
runCommand
(
query
);
runCommand
(
query
);
List
<
Entity
>
p2Outputs
=
new
ArrayList
<
Entity
>()
{{
addAll
(
getOutputs
(
pFile2
,
Entity
.
Type
.
DFS_DIR
));
final
Set
<
WriteEntity
>
pFile2Outputs
=
getOutputs
(
pFile2
,
Entity
.
Type
.
DFS_DIR
);
((
WriteEntity
)
pFile2Outputs
.
iterator
().
next
()).
setWriteType
(
WriteEntity
.
WriteType
.
PATH_WRITE
);
//Now the process has 2 paths - one older with deleted reference to partition and another with the the latest partition
Set
<
WriteEntity
>
p2Outputs
=
new
LinkedHashSet
<
WriteEntity
>()
{{
addAll
(
pFile2Outputs
);
addAll
(
outputs
);
addAll
(
outputs
);
}};
}};
Referenceable
process2Reference
=
validateProcess
(
query
,
HiveOperation
.
QUERY
,
inputs
,
p2Outputs
);
Referenceable
process2Reference
=
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
QUERY
,
partitionIps
,
pFile2Outputs
)
,
inputs
,
p2Outputs
);
validateHDFSPaths
(
process2Reference
,
OUTPUTS
,
pFile2
);
validateHDFSPaths
(
process2Reference
,
OUTPUTS
,
pFile2
);
Assert
.
assertEquals
(
process2Reference
.
getId
().
_getId
(),
processReference
.
getId
().
_getId
());
Assert
.
assertEquals
(
process2Reference
.
getId
().
_getId
(),
processReference
.
getId
().
_getId
());
...
@@ -647,12 +705,12 @@ public class HiveHookIT {
...
@@ -647,12 +705,12 @@ public class HiveHookIT {
runCommand
(
query
);
runCommand
(
query
);
List
<
Entity
>
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
Set
<
Read
Entity
>
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
List
<
Entity
>
outputs
=
getOutputs
(
insertTableName
,
Entity
.
Type
.
TABLE
);
Set
<
Write
Entity
>
outputs
=
getOutputs
(
insertTableName
,
Entity
.
Type
.
TABLE
);
outputs
.
get
(
0
).
setName
(
getQualifiedTblName
(
insertTableName
+
HiveMetaStoreBridge
.
TEMP_TABLE_PREFIX
+
SessionState
.
get
().
getSessionId
()));
outputs
.
iterator
().
next
(
).
setName
(
getQualifiedTblName
(
insertTableName
+
HiveMetaStoreBridge
.
TEMP_TABLE_PREFIX
+
SessionState
.
get
().
getSessionId
()));
((
WriteEntity
)
outputs
.
get
(
0
)).
setWriteType
(
WriteEntity
.
WriteType
.
INSERT
);
((
WriteEntity
)
outputs
.
iterator
().
next
(
)).
setWriteType
(
WriteEntity
.
WriteType
.
INSERT
);
validateProcess
(
query
,
HiveOperation
.
QUERY
,
inputs
,
outputs
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
QUERY
,
inputs
,
outputs
)
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
insertTableName
,
null
,
true
);
assertTableIsRegistered
(
DEFAULT_DB
,
insertTableName
,
null
,
true
);
...
@@ -660,21 +718,40 @@ public class HiveHookIT {
...
@@ -660,21 +718,40 @@ public class HiveHookIT {
@Test
@Test
public
void
testInsertIntoPartition
()
throws
Exception
{
public
void
testInsertIntoPartition
()
throws
Exception
{
String
tableName
=
createTable
(
true
);
final
boolean
isPartitionedTable
=
true
;
String
insertTableName
=
createTable
(
true
);
String
tableName
=
createTable
(
isPartitionedTable
);
String
insertTableName
=
createTable
(
isPartitionedTable
);
String
query
=
String
query
=
"insert into "
+
insertTableName
+
" partition(dt = '
2015-01-01
') select id, name from "
+
tableName
"insert into "
+
insertTableName
+
" partition(dt = '
"
+
PART_FILE
+
"
') select id, name from "
+
tableName
+
" where dt = '
2015-01-01
'"
;
+
" where dt = '
"
+
PART_FILE
+
"
'"
;
runCommand
(
query
);
runCommand
(
query
);
List
<
Entity
>
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
final
Set
<
ReadEntity
>
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
List
<
Entity
>
outputs
=
getOutputs
(
insertTableName
,
Entity
.
Type
.
TABLE
);
final
Set
<
WriteEntity
>
outputs
=
getOutputs
(
insertTableName
,
Entity
.
Type
.
TABLE
);
((
WriteEntity
)
outputs
.
get
(
0
)).
setWriteType
(
WriteEntity
.
WriteType
.
INSERT
);
((
WriteEntity
)
outputs
.
iterator
().
next
()).
setWriteType
(
WriteEntity
.
WriteType
.
INSERT
);
final
Set
<
ReadEntity
>
partitionIps
=
new
LinkedHashSet
<
ReadEntity
>()
{
{
addAll
(
inputs
);
add
(
getPartitionInput
());
}
};
final
Set
<
WriteEntity
>
partitionOps
=
new
LinkedHashSet
<
WriteEntity
>()
{
{
addAll
(
outputs
);
add
(
getPartitionOutput
());
}
};
validateProcess
(
query
,
HiveOperation
.
QUERY
,
inputs
,
outputs
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
QUERY
,
partitionIps
,
partitionOps
)
,
inputs
,
outputs
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
insertTableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
insertTableName
);
//TODO - update
}
}
private
String
random
()
{
private
String
random
()
{
...
@@ -701,65 +778,111 @@ public class HiveHookIT {
...
@@ -701,65 +778,111 @@ public class HiveHookIT {
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
String
filename
=
"pfile://"
+
mkdir
(
"export"
);
String
filename
=
"pfile://"
+
mkdir
(
"export
UnPartitioned
"
);
String
query
=
"export table "
+
tableName
+
" to \""
+
filename
+
"\""
;
String
query
=
"export table "
+
tableName
+
" to \""
+
filename
+
"\""
;
runCommand
(
query
);
runCommand
(
query
);
List
<
Entity
>
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
Set
<
Read
Entity
>
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
List
<
Entity
>
outputs
=
getOutputs
(
filename
,
Entity
.
Type
.
DFS_DIR
);
Set
<
Write
Entity
>
outputs
=
getOutputs
(
filename
,
Entity
.
Type
.
DFS_DIR
);
Referenceable
processReference
=
validateProcess
(
query
,
HiveOperation
.
EXPORT
,
inputs
,
outputs
);
Referenceable
processReference
=
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
inputs
,
outputs
)
);
validateHDFSPaths
(
processReference
,
OUTPUTS
,
filename
);
validateHDFSPaths
(
processReference
,
OUTPUTS
,
filename
);
validateInputTables
(
processReference
,
inputs
);
validateInputTables
(
processReference
,
inputs
);
//Import
//Import
t
ableName
=
createTable
(
false
);
String
importT
ableName
=
createTable
(
false
);
assertTableIsRegistered
(
DEFAULT_DB
,
t
ableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
importT
ableName
);
query
=
"import table "
+
t
ableName
+
" from '"
+
filename
+
"'"
;
query
=
"import table "
+
importT
ableName
+
" from '"
+
filename
+
"'"
;
runCommand
(
query
);
runCommand
(
query
);
outputs
=
getOutputs
(
tableName
,
Entity
.
Type
.
TABLE
);
outputs
=
getOutputs
(
importTableName
,
Entity
.
Type
.
TABLE
);
processReference
=
validateProcess
(
query
,
HiveOperation
.
IMPORT
,
getInputs
(
filename
,
Entity
.
Type
.
DFS_DIR
),
outputs
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
getInputs
(
filename
,
Entity
.
Type
.
DFS_DIR
),
outputs
));
validateHDFSPaths
(
processReference
,
INPUTS
,
filename
);
validateOutputTables
(
processReference
,
outputs
);
//Should create another process
filename
=
"pfile://"
+
mkdir
(
"export2UnPartitioned"
);
query
=
"export table "
+
tableName
+
" to \""
+
filename
+
"\""
;
runCommand
(
query
);
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
outputs
=
getOutputs
(
filename
,
Entity
.
Type
.
DFS_DIR
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
inputs
,
outputs
));
//import again shouyld create another process
query
=
"import table "
+
importTableName
+
" from '"
+
filename
+
"'"
;
runCommand
(
query
);
outputs
=
getOutputs
(
importTableName
,
Entity
.
Type
.
TABLE
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
getInputs
(
filename
,
Entity
.
Type
.
DFS_DIR
),
outputs
));
}
}
@Test
@Test
public
void
testExportImportPartitionedTable
()
throws
Exception
{
public
void
testExportImportPartitionedTable
()
throws
Exception
{
String
tableName
=
createTable
(
true
);
boolean
isPartitionedTable
=
true
;
String
tableId
=
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
final
String
tableName
=
createTable
(
isPartitionedTable
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
//Add a partition
//Add a partition
String
partFile
=
"pfile://"
+
mkdir
(
"partition"
);
String
partFile
=
"pfile://"
+
mkdir
(
"partition"
);
String
query
=
"alter table "
+
tableName
+
" add partition (dt='
2015-01-01
') location '"
+
partFile
+
"'"
;
String
query
=
"alter table "
+
tableName
+
" add partition (dt='
"
+
PART_FILE
+
"
') location '"
+
partFile
+
"'"
;
runCommand
(
query
);
runCommand
(
query
);
String
filename
=
"pfile://"
+
mkdir
(
"export"
);
String
filename
=
"pfile://"
+
mkdir
(
"export"
);
query
=
"export table "
+
tableName
+
" to \""
+
filename
+
"\""
;
query
=
"export table "
+
tableName
+
" to \""
+
filename
+
"\""
;
runCommand
(
query
);
runCommand
(
query
);
List
<
Entity
>
i
nputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
final
Set
<
ReadEntity
>
expectedExportI
nputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
List
<
Entity
>
outputs
=
getOutputs
(
filename
,
Entity
.
Type
.
DFS_DIR
);
final
Set
<
Write
Entity
>
outputs
=
getOutputs
(
filename
,
Entity
.
Type
.
DFS_DIR
);
Referenceable
processReference
=
validateProcess
(
query
,
HiveOperation
.
EXPORT
,
inputs
,
outputs
);
//Note that export has only partition as input in this case
validateHDFSPaths
(
processReference
,
OUTPUTS
,
filename
);
final
Set
<
ReadEntity
>
partitionIps
=
getInputs
(
DEFAULT_DB
+
"@"
+
tableName
+
"@dt="
+
PART_FILE
,
Entity
.
Type
.
PARTITION
);
partitionIps
.
addAll
(
expectedExportInputs
);
validateInputTables
(
processReference
,
inputs
);
Referenceable
processReference
=
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
partitionIps
,
outputs
),
expectedExportInputs
,
outputs
);
validateHDFSPaths
(
processReference
,
OUTPUTS
,
filename
);
//Import
//Import
t
ableName
=
createTable
(
true
);
String
importT
ableName
=
createTable
(
true
);
tableId
=
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
query
=
"import table "
+
t
ableName
+
" from '"
+
filename
+
"'"
;
query
=
"import table "
+
importT
ableName
+
" from '"
+
filename
+
"'"
;
runCommand
(
query
);
runCommand
(
query
);
outputs
=
getOutputs
(
tableName
,
Entity
.
Type
.
TABLE
);
final
Set
<
ReadEntity
>
expectedImportInputs
=
getInputs
(
filename
,
Entity
.
Type
.
DFS_DIR
);
processReference
=
validateProcess
(
query
,
HiveOperation
.
IMPORT
,
getInputs
(
filename
,
Entity
.
Type
.
DFS_DIR
),
outputs
);
final
Set
<
WriteEntity
>
importOutputs
=
getOutputs
(
importTableName
,
Entity
.
Type
.
TABLE
);
validateHDFSPaths
(
processReference
,
INPUTS
,
filename
);
validateOutputTables
(
processReference
,
outputs
);
final
Set
<
WriteEntity
>
partitionOps
=
getOutputs
(
DEFAULT_DB
+
"@"
+
importTableName
+
"@dt="
+
PART_FILE
,
Entity
.
Type
.
PARTITION
);
partitionOps
.
addAll
(
importOutputs
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
expectedImportInputs
,
partitionOps
),
expectedImportInputs
,
importOutputs
);
//Export should update same process
filename
=
"pfile://"
+
mkdir
(
"export2"
);
query
=
"export table "
+
tableName
+
" to \""
+
filename
+
"\""
;
runCommand
(
query
);
final
Set
<
WriteEntity
>
outputs2
=
getOutputs
(
filename
,
Entity
.
Type
.
DFS_DIR
);
Set
<
WriteEntity
>
p3Outputs
=
new
LinkedHashSet
<
WriteEntity
>()
{{
addAll
(
outputs2
);
addAll
(
outputs
);
}};
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
EXPORT
,
partitionIps
,
outputs2
),
expectedExportInputs
,
p3Outputs
);
query
=
"alter table "
+
importTableName
+
" drop partition (dt='"
+
PART_FILE
+
"')"
;
runCommand
(
query
);
//Import should update same process
query
=
"import table "
+
importTableName
+
" from '"
+
filename
+
"'"
;
runCommandWithDelay
(
query
,
1000
);
final
Set
<
ReadEntity
>
importInputs
=
getInputs
(
filename
,
Entity
.
Type
.
DFS_DIR
);
final
Set
<
ReadEntity
>
expectedImport2Inputs
=
new
LinkedHashSet
<
ReadEntity
>()
{{
addAll
(
importInputs
);
addAll
(
expectedImportInputs
);
}};
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
IMPORT
,
importInputs
,
partitionOps
),
expectedImport2Inputs
,
importOutputs
);
}
}
@Test
@Test
...
@@ -767,13 +890,14 @@ public class HiveHookIT {
...
@@ -767,13 +890,14 @@ public class HiveHookIT {
String
tableName
=
createTable
();
String
tableName
=
createTable
();
String
query
=
"select * from "
+
tableName
;
String
query
=
"select * from "
+
tableName
;
runCommand
(
query
);
runCommand
(
query
);
List
<
Entity
>
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
Set
<
ReadEntity
>
inputs
=
getInputs
(
tableName
,
Entity
.
Type
.
TABLE
);
assertProcessIsNotRegistered
(
query
,
HiveOperation
.
QUERY
,
inputs
,
null
);
HiveHook
.
HiveEventContext
hiveEventContext
=
constructEvent
(
query
,
HiveOperation
.
QUERY
,
inputs
,
null
);
assertProcessIsNotRegistered
(
hiveEventContext
);
//check with uppercase table name
//check with uppercase table name
query
=
"SELECT * from "
+
tableName
.
toUpperCase
();
query
=
"SELECT * from "
+
tableName
.
toUpperCase
();
runCommand
(
query
);
runCommand
(
query
);
assertProcessIsNotRegistered
(
query
,
HiveOperation
.
QUERY
,
inputs
,
null
);
assertProcessIsNotRegistered
(
hiveEventContext
);
}
}
@Test
@Test
...
@@ -1042,10 +1166,10 @@ public class HiveHookIT {
...
@@ -1042,10 +1166,10 @@ public class HiveHookIT {
String
query
=
String
.
format
(
"truncate table %s"
,
tableName
);
String
query
=
String
.
format
(
"truncate table %s"
,
tableName
);
runCommand
(
query
);
runCommand
(
query
);
List
<
Entity
>
outputs
=
getIn
puts
(
tableName
,
Entity
.
Type
.
TABLE
);
Set
<
WriteEntity
>
outputs
=
getOut
puts
(
tableName
,
Entity
.
Type
.
TABLE
);
String
tableId
=
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
String
tableId
=
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
validateProcess
(
query
,
HiveOperation
.
TRUNCATETABLE
,
null
,
outputs
);
validateProcess
(
constructEvent
(
query
,
HiveOperation
.
TRUNCATETABLE
,
null
,
outputs
)
);
//Check lineage
//Check lineage
String
datasetName
=
HiveMetaStoreBridge
.
getTableQualifiedName
(
CLUSTER_NAME
,
DEFAULT_DB
,
tableName
);
String
datasetName
=
HiveMetaStoreBridge
.
getTableQualifiedName
(
CLUSTER_NAME
,
DEFAULT_DB
,
tableName
);
...
@@ -1144,7 +1268,7 @@ public class HiveHookIT {
...
@@ -1144,7 +1268,7 @@ public class HiveHookIT {
String
query
=
"alter table "
+
tableName
+
" set location '"
+
testPath
+
"'"
;
String
query
=
"alter table "
+
tableName
+
" set location '"
+
testPath
+
"'"
;
runCommand
(
query
);
runCommand
(
query
);
String
tableId
=
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
,
new
AssertPredicate
()
{
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
,
new
AssertPredicate
()
{
@Override
@Override
public
void
assertOnEntity
(
Referenceable
tableRef
)
throws
Exception
{
public
void
assertOnEntity
(
Referenceable
tableRef
)
throws
Exception
{
Referenceable
sdRef
=
(
Referenceable
)
tableRef
.
get
(
HiveDataModelGenerator
.
STORAGE_DESC
);
Referenceable
sdRef
=
(
Referenceable
)
tableRef
.
get
(
HiveDataModelGenerator
.
STORAGE_DESC
);
...
@@ -1152,10 +1276,11 @@ public class HiveHookIT {
...
@@ -1152,10 +1276,11 @@ public class HiveHookIT {
}
}
});
});
List
<
Entity
>
inputs
=
getInputs
(
testPath
,
Entity
.
Type
.
DFS_DIR
);
String
processId
=
assertEntityIsRegistered
(
HiveDataTypes
.
HIVE_PROCESS
.
getName
(),
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
List
<
Entity
>
outputs
=
getOutputs
(
tableName
,
Entity
.
Type
.
TABLE
);
HiveMetaStoreBridge
.
getTableQualifiedName
(
CLUSTER_NAME
,
DEFAULT_DB
,
tableName
,
false
),
null
);
Referenceable
processReference
=
atlasClient
.
getEntity
(
processId
);
Referenceable
processReference
=
validateProcess
(
query
,
HiveOperation
.
ALTERTABLE_LOCATION
,
inputs
,
outputs
);
validateHDFSPaths
(
processReference
,
INPUTS
,
testPath
);
validateHDFSPaths
(
processReference
,
INPUTS
,
testPath
);
}
}
...
@@ -1302,6 +1427,20 @@ public class HiveHookIT {
...
@@ -1302,6 +1427,20 @@ public class HiveHookIT {
assertTableIsNotRegistered
(
DEFAULT_DB
,
tableName
);
assertTableIsNotRegistered
(
DEFAULT_DB
,
tableName
);
}
}
private
WriteEntity
getPartitionOutput
()
{
WriteEntity
partEntity
=
new
WriteEntity
();
partEntity
.
setName
(
PART_FILE
);
partEntity
.
setTyp
(
Entity
.
Type
.
PARTITION
);
return
partEntity
;
}
private
ReadEntity
getPartitionInput
()
{
ReadEntity
partEntity
=
new
ReadEntity
();
partEntity
.
setName
(
PART_FILE
);
partEntity
.
setTyp
(
Entity
.
Type
.
PARTITION
);
return
partEntity
;
}
@Test
@Test
public
void
testDropDatabaseWithCascade
()
throws
Exception
{
public
void
testDropDatabaseWithCascade
()
throws
Exception
{
//Test Deletion of database and its corresponding tables
//Test Deletion of database and its corresponding tables
...
@@ -1550,26 +1689,66 @@ public class HiveHookIT {
...
@@ -1550,26 +1689,66 @@ public class HiveHookIT {
}
}
}
}
private
String
assertProcessIsRegistered
(
final
String
queryStr
,
HiveOperation
op
,
final
List
<
Entity
>
inputTbls
,
final
List
<
Entity
>
outputTbls
)
throws
Exception
{
private
String
assertProcessIsRegistered
(
final
HiveHook
.
HiveEventContext
event
)
throws
Exception
{
String
processQFName
=
getProcessQualifiedName
(
op
,
getSortedProcessDataSets
(
inputTbls
),
getSortedProcessDataSets
(
outputTbls
));
try
{
LOG
.
debug
(
"Searching for process with query {}"
,
processQFName
);
SortedSet
<
ReadEntity
>
sortedHiveInputs
=
event
.
getInputs
()
==
null
?
null
:
new
TreeSet
<
ReadEntity
>(
entityComparator
);
return
assertEntityIsRegistered
(
HiveDataTypes
.
HIVE_PROCESS
.
getName
(),
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
processQFName
,
new
AssertPredicate
()
{
SortedSet
<
WriteEntity
>
sortedHiveOutputs
=
event
.
getOutputs
()
==
null
?
null
:
new
TreeSet
<
WriteEntity
>(
entityComparator
);
@Override
public
void
assertOnEntity
(
final
Referenceable
entity
)
throws
Exception
{
if
(
event
.
getInputs
()
!=
null
)
{
List
<
String
>
recentQueries
=
(
List
<
String
>)
entity
.
get
(
"recentQueries"
);
sortedHiveInputs
.
addAll
(
event
.
getInputs
());
Assert
.
assertEquals
(
recentQueries
.
get
(
0
),
lower
(
queryStr
));
}
}
});
if
(
event
.
getOutputs
()
!=
null
)
{
sortedHiveOutputs
.
addAll
(
event
.
getOutputs
());
}
String
processQFName
=
getProcessQualifiedName
(
event
,
sortedHiveInputs
,
sortedHiveOutputs
,
getSortedProcessDataSets
(
event
.
getInputs
()),
getSortedProcessDataSets
(
event
.
getOutputs
()));
LOG
.
debug
(
"Searching for process with query {}"
,
processQFName
);
return
assertEntityIsRegistered
(
HiveDataTypes
.
HIVE_PROCESS
.
getName
(),
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
processQFName
,
new
AssertPredicate
()
{
@Override
public
void
assertOnEntity
(
final
Referenceable
entity
)
throws
Exception
{
List
<
String
>
recentQueries
=
(
List
<
String
>)
entity
.
get
(
"recentQueries"
);
Assert
.
assertEquals
(
recentQueries
.
get
(
0
),
lower
(
event
.
getQueryStr
()));
}
});
}
catch
(
Exception
e
)
{
LOG
.
error
(
"Exception : "
,
e
);
throw
e
;
}
}
private
String
assertProcessIsRegistered
(
final
HiveHook
.
HiveEventContext
event
,
final
Set
<
ReadEntity
>
inputTbls
,
final
Set
<
WriteEntity
>
outputTbls
)
throws
Exception
{
try
{
SortedSet
<
ReadEntity
>
sortedHiveInputs
=
event
.
getInputs
()
==
null
?
null
:
new
TreeSet
<
ReadEntity
>(
entityComparator
);
SortedSet
<
WriteEntity
>
sortedHiveOutputs
=
event
.
getOutputs
()
==
null
?
null
:
new
TreeSet
<
WriteEntity
>(
entityComparator
);
if
(
event
.
getInputs
()
!=
null
)
{
sortedHiveInputs
.
addAll
(
event
.
getInputs
());
}
if
(
event
.
getOutputs
()
!=
null
)
{
sortedHiveOutputs
.
addAll
(
event
.
getOutputs
());
}
String
processQFName
=
getProcessQualifiedName
(
event
,
sortedHiveInputs
,
sortedHiveOutputs
,
getSortedProcessDataSets
(
inputTbls
),
getSortedProcessDataSets
(
outputTbls
));
LOG
.
debug
(
"Searching for process with query {}"
,
processQFName
);
return
assertEntityIsRegistered
(
HiveDataTypes
.
HIVE_PROCESS
.
getName
(),
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
processQFName
,
new
AssertPredicate
()
{
@Override
public
void
assertOnEntity
(
final
Referenceable
entity
)
throws
Exception
{
List
<
String
>
recentQueries
=
(
List
<
String
>)
entity
.
get
(
"recentQueries"
);
Assert
.
assertEquals
(
recentQueries
.
get
(
0
),
lower
(
event
.
getQueryStr
()));
}
});
}
catch
(
Exception
e
)
{
LOG
.
error
(
"Exception : "
,
e
);
throw
e
;
}
}
}
private
String
getDSTypeName
(
Entity
entity
)
{
private
String
getDSTypeName
(
Entity
entity
)
{
return
Entity
.
Type
.
TABLE
.
equals
(
entity
.
getType
())
?
HiveDataTypes
.
HIVE_TABLE
.
name
()
:
FSDataTypes
.
HDFS_PATH
().
toString
();
return
Entity
.
Type
.
TABLE
.
equals
(
entity
.
getType
())
?
HiveDataTypes
.
HIVE_TABLE
.
name
()
:
FSDataTypes
.
HDFS_PATH
().
toString
();
}
}
private
SortedMap
<
Entity
,
Referenceable
>
getSortedProcessDataSets
(
List
<
Entity
>
inputTbls
)
{
private
<
T
extends
Entity
>
SortedMap
<
T
,
Referenceable
>
getSortedProcessDataSets
(
Set
<
T
>
inputTbls
)
{
SortedMap
<
Entity
,
Referenceable
>
inputs
=
new
TreeMap
<
Entity
,
Referenceable
>(
entityComparator
);
SortedMap
<
T
,
Referenceable
>
inputs
=
new
TreeMap
<
T
,
Referenceable
>(
entityComparator
);
if
(
inputTbls
!=
null
)
{
if
(
inputTbls
!=
null
)
{
for
(
final
Entity
tbl
:
inputTbls
)
{
for
(
final
T
tbl
:
inputTbls
)
{
Referenceable
inputTableRef
=
new
Referenceable
(
getDSTypeName
(
tbl
),
new
HashMap
<
String
,
Object
>()
{{
Referenceable
inputTableRef
=
new
Referenceable
(
getDSTypeName
(
tbl
),
new
HashMap
<
String
,
Object
>()
{{
put
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
tbl
.
getName
());
put
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
tbl
.
getName
());
}});
}});
...
@@ -1579,10 +1758,22 @@ public class HiveHookIT {
...
@@ -1579,10 +1758,22 @@ public class HiveHookIT {
return
inputs
;
return
inputs
;
}
}
private
void
assertProcessIsNotRegistered
(
String
queryStr
,
HiveOperation
op
,
final
List
<
Entity
>
inputTbls
,
final
List
<
Entity
>
outputTbls
)
throws
Exception
{
private
void
assertProcessIsNotRegistered
(
HiveHook
.
HiveEventContext
event
)
throws
Exception
{
String
processQFName
=
getProcessQualifiedName
(
op
,
getSortedProcessDataSets
(
inputTbls
),
getSortedProcessDataSets
(
outputTbls
));
try
{
LOG
.
debug
(
"Searching for process with query {}"
,
processQFName
);
SortedSet
<
ReadEntity
>
sortedHiveInputs
=
event
.
getInputs
()
==
null
?
null
:
new
TreeSet
<
ReadEntity
>(
entityComparator
);
assertEntityIsNotRegistered
(
HiveDataTypes
.
HIVE_PROCESS
.
getName
(),
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
processQFName
);
SortedSet
<
WriteEntity
>
sortedHiveOutputs
=
event
.
getOutputs
()
==
null
?
null
:
new
TreeSet
<
WriteEntity
>(
entityComparator
);
if
(
event
.
getInputs
()
!=
null
)
{
sortedHiveInputs
.
addAll
(
event
.
getInputs
());
}
if
(
event
.
getOutputs
()
!=
null
)
{
sortedHiveOutputs
.
addAll
(
event
.
getOutputs
());
}
String
processQFName
=
getProcessQualifiedName
(
event
,
sortedHiveInputs
,
sortedHiveOutputs
,
getSortedProcessDataSets
(
event
.
getInputs
()),
getSortedProcessDataSets
(
event
.
getOutputs
()));
LOG
.
debug
(
"Searching for process with query {}"
,
processQFName
);
assertEntityIsNotRegistered
(
HiveDataTypes
.
HIVE_PROCESS
.
getName
(),
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
processQFName
);
}
catch
(
Exception
e
)
{
LOG
.
error
(
"Exception : "
,
e
);
}
}
}
private
void
assertTableIsNotRegistered
(
String
dbName
,
String
tableName
,
boolean
isTemporaryTable
)
throws
Exception
{
private
void
assertTableIsNotRegistered
(
String
dbName
,
String
tableName
,
boolean
isTemporaryTable
)
throws
Exception
{
...
...
notification/src/main/java/org/apache/atlas/hook/AtlasHook.java
View file @
f51c8861
...
@@ -128,7 +128,7 @@ public abstract class AtlasHook {
...
@@ -128,7 +128,7 @@ public abstract class AtlasHook {
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
numRetries
++;
numRetries
++;
if
(
numRetries
<
maxRetries
)
{
if
(
numRetries
<
maxRetries
)
{
LOG
.
debug
(
"Failed to notify atlas for entity {}. Retrying"
,
message
,
e
);
LOG
.
info
(
"Failed to notify atlas for entity {}. Retrying"
,
message
,
e
);
}
else
{
}
else
{
if
(
shouldLogFailedMessages
&&
e
instanceof
NotificationException
)
{
if
(
shouldLogFailedMessages
&&
e
instanceof
NotificationException
)
{
List
<
String
>
failedMessages
=
((
NotificationException
)
e
).
getFailedMessages
();
List
<
String
>
failedMessages
=
((
NotificationException
)
e
).
getFailedMessages
();
...
...
release-log.txt
View file @
f51c8861
...
@@ -7,6 +7,7 @@ INCOMPATIBLE CHANGES:
...
@@ -7,6 +7,7 @@ INCOMPATIBLE CHANGES:
ALL CHANGES:
ALL CHANGES:
ATLAS-966 Exit execution of import_hive.sh if HIVE_HOME is not set (svimal2106 via sumasai)
ATLAS-966 Exit execution of import_hive.sh if HIVE_HOME is not set (svimal2106 via sumasai)
ATLAS-917 Add hdfs paths to process qualified name for non-partition based queries (sumasai)
--Release 0.7-incubating
--Release 0.7-incubating
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment