Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
atlas
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dataplatform
atlas
Commits
f7df0f1b
Commit
f7df0f1b
authored
Oct 13, 2019
by
Sid
Committed by
Sarath Subramanian
Oct 23, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ATLAS-3461: Changed from hardcoded match to pattern bases regex match ATLAS-3461: UT Added
Signed-off-by:
Sarath Subramanian
<
sarath@apache.org
>
parent
848c799e
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
402 additions
and
13 deletions
+402
-13
ImpalaOperationParser.java
...a/org/apache/atlas/impala/hook/ImpalaOperationParser.java
+25
-13
ImpalaLineageToolIT.java
...est/java/org/apache/atlas/impala/ImpalaLineageToolIT.java
+176
-0
impalaAlterViewAsSelectWithCommentSpaces.json
...t/resources/impalaAlterViewAsSelectWithCommentSpaces.json
+67
-0
impalaCreateTableAsSelectWithCommentSpaces.json
...resources/impalaCreateTableAsSelectWithCommentSpaces.json
+67
-0
impalaCreateViewWithCommentSpaces.json
...src/test/resources/impalaCreateViewWithCommentSpaces.json
+67
-0
No files found.
addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaOperationParser.java
View file @
f7df0f1b
...
@@ -20,32 +20,40 @@ package org.apache.atlas.impala.hook;
...
@@ -20,32 +20,40 @@ package org.apache.atlas.impala.hook;
import
org.apache.atlas.impala.model.ImpalaOperationType
;
import
org.apache.atlas.impala.model.ImpalaOperationType
;
import
org.apache.commons.lang.StringUtils
;
import
org.apache.commons.lang.StringUtils
;
import
java.util.regex.Pattern
;
/**
/**
* Parse an Impala query text and output the impala operation type
* Parse an Impala query text and output the impala operation type
*/
*/
public
class
ImpalaOperationParser
{
public
class
ImpalaOperationParser
{
private
static
final
Pattern
COMMENT_PATTERN
=
Pattern
.
compile
(
"/\\*.*?\\*/"
,
Pattern
.
DOTALL
);
private
static
final
Pattern
CREATE_VIEW_PATTERN
=
Pattern
.
compile
(
"^[ ]*\\bcreate\\b.*\\bview\\b.*"
,
Pattern
.
DOTALL
|
Pattern
.
CASE_INSENSITIVE
);
private
static
final
Pattern
CREATE_TABLE_AS_SELECT_PATTERN
=
Pattern
.
compile
(
"^[ ]*\\bcreate\\b.*\\btable\\b.*\\bas\\b.*\\bselect\\b.*"
,
Pattern
.
DOTALL
|
Pattern
.
CASE_INSENSITIVE
);
private
static
final
Pattern
ALTER_VIEW_AS_SELECT_PATTERN
=
Pattern
.
compile
(
"^[ ]*\\balter\\b.*\\bview\\b.*\\bas.*\\bselect\\b.*"
,
Pattern
.
DOTALL
|
Pattern
.
CASE_INSENSITIVE
);
private
static
final
Pattern
INSERT_SELECT_FROM_PATTERN
=
Pattern
.
compile
(
"^[ ]*\\binsert\\b.*\\b(into|overwrite)\\b.*\\bselect\\b.*\\bfrom\\b.*"
,
Pattern
.
DOTALL
|
Pattern
.
CASE_INSENSITIVE
);
public
ImpalaOperationParser
()
{
public
ImpalaOperationParser
()
{
}
}
public
static
ImpalaOperationType
getImpalaOperationType
(
String
queryText
)
{
public
static
ImpalaOperationType
getImpalaOperationType
(
String
queryText
)
{
// Impala does no generate lineage record for command "LOAD DATA INPATH"
// Impala does no generate lineage record for command "LOAD DATA IN PATH"
if
(
StringUtils
.
startsWithIgnoreCase
(
queryText
,
"create view"
))
{
String
queryTextWithNoComments
=
COMMENT_PATTERN
.
matcher
(
queryText
).
replaceAll
(
""
);
if
(
doesMatch
(
queryTextWithNoComments
,
CREATE_VIEW_PATTERN
))
{
return
ImpalaOperationType
.
CREATEVIEW
;
return
ImpalaOperationType
.
CREATEVIEW
;
}
else
if
(
StringUtils
.
startsWithIgnoreCase
(
queryText
,
"create table"
)
&&
}
else
if
(
doesMatch
(
queryTextWithNoComments
,
CREATE_TABLE_AS_SELECT_PATTERN
))
{
StringUtils
.
containsIgnoreCase
(
queryText
,
"as select"
))
{
return
ImpalaOperationType
.
CREATETABLE_AS_SELECT
;
return
ImpalaOperationType
.
CREATETABLE_AS_SELECT
;
}
else
if
(
StringUtils
.
startsWithIgnoreCase
(
queryText
,
"alter view"
)
&&
}
else
if
(
doesMatch
(
queryTextWithNoComments
,
ALTER_VIEW_AS_SELECT_PATTERN
))
{
StringUtils
.
containsIgnoreCase
(
queryText
,
"as select"
))
{
return
ImpalaOperationType
.
ALTERVIEW_AS
;
return
ImpalaOperationType
.
ALTERVIEW_AS
;
}
else
if
(
StringUtils
.
containsIgnoreCase
(
queryText
,
"insert into"
)
&&
}
else
if
(
doesMatch
(
queryTextWithNoComments
,
INSERT_SELECT_FROM_PATTERN
))
{
StringUtils
.
containsIgnoreCase
(
queryText
,
"select"
)
&&
StringUtils
.
containsIgnoreCase
(
queryText
,
"from"
))
{
return
ImpalaOperationType
.
QUERY
;
}
else
if
(
StringUtils
.
containsIgnoreCase
(
queryText
,
"insert overwrite"
)
&&
StringUtils
.
containsIgnoreCase
(
queryText
,
"select"
)
&&
StringUtils
.
containsIgnoreCase
(
queryText
,
"from"
))
{
return
ImpalaOperationType
.
QUERY
;
return
ImpalaOperationType
.
QUERY
;
}
}
...
@@ -64,5 +72,8 @@ public class ImpalaOperationParser {
...
@@ -64,5 +72,8 @@ public class ImpalaOperationParser {
return
ImpalaOperationType
.
UNKNOWN
;
return
ImpalaOperationType
.
UNKNOWN
;
}
}
private
static
boolean
doesMatch
(
final
String
queryText
,
final
Pattern
pattern
)
{
return
pattern
.
matcher
(
queryText
).
matches
();
}
}
}
\ No newline at end of file
addons/impala-bridge/src/test/java/org/apache/atlas/impala/ImpalaLineageToolIT.java
View file @
f7df0f1b
...
@@ -102,6 +102,68 @@ public class ImpalaLineageToolIT extends ImpalaLineageITBase {
...
@@ -102,6 +102,68 @@ public class ImpalaLineageToolIT extends ImpalaLineageITBase {
}
}
/**
/**
* This tests is for create view query with extra comment and spaces added in between:
* 1) ImpalaLineageTool can parse one lineage file that contains " create view" command lineage
* 2) Lineage is sent to Atlas
* 3) Atlas can get this lineage from Atlas
*/
@Test
public
void
testCreateViewWithCommentSpacesFromFile
()
{
// this file contains a single lineage record for "create view".
// It has table vertex with createTime
String
IMPALA
=
dir
+
"impalaCreateViewWithCommentSpaces.json"
;
String
IMPALA_WAL
=
dir
+
"WALimpala.wal"
;
List
<
ImpalaQuery
>
lineageList
=
new
ArrayList
<>();
ImpalaLineageHook
impalaLineageHook
=
new
ImpalaLineageHook
();
try
{
// create database and tables to simulate Impala behavior that Impala updates metadata
// to HMS and HMSHook sends the metadata to Atlas, which has to happen before
// Atlas can handle lineage notification
String
dbName
=
"db_8"
;
createDatabase
(
dbName
);
String
sourceTableName
=
"table_1"
;
createTable
(
dbName
,
sourceTableName
,
"(id string, count int)"
,
false
);
String
targetTableName
=
"view_1"
;
createTable
(
dbName
,
targetTableName
,
"(count int, id string)"
,
false
);
// process lineage record, and send corresponding notification to Atlas
String
[]
args
=
new
String
[]{
"-d"
,
"./"
,
"-p"
,
"impala"
};
ImpalaLineageTool
toolInstance
=
new
ImpalaLineageTool
(
args
);
toolInstance
.
importHImpalaEntities
(
impalaLineageHook
,
IMPALA
,
IMPALA_WAL
);
// verify the process is saved in Atlas
// the value is from info in IMPALA_3
String
createTime
=
new
Long
((
long
)(
1554750072
)*
1000
).
toString
();
String
processQFName
=
"db_8.view_1"
+
AtlasImpalaHookContext
.
QNAME_SEP_METADATA_NAMESPACE
+
CLUSTER_NAME
+
AtlasImpalaHookContext
.
QNAME_SEP_PROCESS
+
createTime
;
processQFName
=
processQFName
.
toLowerCase
();
String
queryString
=
" create /* comment1 */ view db_8.view_1 as select /* comment2 */ count, id from db_8.table_1"
;
AtlasEntity
processEntity1
=
validateProcess
(
processQFName
,
queryString
);
AtlasEntity
processExecutionEntity1
=
validateProcessExecution
(
processEntity1
,
queryString
);
AtlasObjectId
process1
=
toAtlasObjectId
(
processExecutionEntity1
.
getRelationshipAttribute
(
BaseImpalaEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process1
.
getGuid
(),
processEntity1
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity1
),
1
);
String
guid
=
assertTableIsRegistered
(
dbName
,
targetTableName
);
AtlasEntity
entity
=
atlasClientV2
.
getEntityByGuid
(
guid
).
getEntity
();
List
ddlQueries
=
(
List
)
entity
.
getRelationshipAttribute
(
ATTRIBUTE_DDL_QUERIES
);
assertNotNull
(
ddlQueries
);
assertEquals
(
ddlQueries
.
size
(),
1
);
}
catch
(
Exception
e
)
{
System
.
out
.
print
(
"Appending file error"
);
}
}
/**
* This tests
* This tests
* 1) ImpalaLineageTool can parse one lineage file that contains "create view" command lineage,
* 1) ImpalaLineageTool can parse one lineage file that contains "create view" command lineage,
* but there is no table vertex with createTime.
* but there is no table vertex with createTime.
...
@@ -232,6 +294,63 @@ public class ImpalaLineageToolIT extends ImpalaLineageITBase {
...
@@ -232,6 +294,63 @@ public class ImpalaLineageToolIT extends ImpalaLineageITBase {
}
}
/**
/**
* This tests is based on extra comment and spaces adding to create table as select query
* 1) ImpalaLineageTool can parse one lineage file that contains "create table as select" command lineage,
* there is table vertex with createTime.
* 2) Lineage is sent to Atlas
* 3) Atlas can get this lineage from Atlas
*/
@Test
public
void
testCreateTableAsSelectWithCommentSpacesFromFile
()
throws
Exception
{
String
IMPALA
=
dir
+
"impalaCreateTableAsSelectWithCommentSpaces.json"
;
String
IMPALA_WAL
=
dir
+
"WALimpala.wal"
;
ImpalaLineageHook
impalaLineageHook
=
new
ImpalaLineageHook
();
// create database and tables to simulate Impala behavior that Impala updates metadata
// to HMS and HMSHook sends the metadata to Atlas, which has to happen before
// Atlas can handle lineage notification
String
dbName
=
"db_9"
;
createDatabase
(
dbName
);
String
sourceTableName
=
"table_1"
;
createTable
(
dbName
,
sourceTableName
,
"(id string, count int)"
,
false
);
String
targetTableName
=
"table_2"
;
createTable
(
dbName
,
targetTableName
,
"(count int, id string)"
,
false
);
// process lineage record, and send corresponding notification to Atlas
String
[]
args
=
new
String
[]{
"-d"
,
"./"
,
"-p"
,
"impala"
};
ImpalaLineageTool
toolInstance
=
new
ImpalaLineageTool
(
args
);
toolInstance
.
importHImpalaEntities
(
impalaLineageHook
,
IMPALA
,
IMPALA_WAL
);
// verify the process is saved in Atlas
// the value is from info in IMPALA_4.
String
createTime
=
new
Long
(
TABLE_CREATE_TIME
*
1000
).
toString
();
String
processQFName
=
dbName
+
"."
+
targetTableName
+
AtlasImpalaHookContext
.
QNAME_SEP_METADATA_NAMESPACE
+
CLUSTER_NAME
+
AtlasImpalaHookContext
.
QNAME_SEP_PROCESS
+
createTime
;
processQFName
=
processQFName
.
toLowerCase
();
String
queryString
=
"create /* Test */ table "
+
dbName
+
"."
+
targetTableName
+
" as /* Test */ select count, id from "
+
dbName
+
"."
+
sourceTableName
;
AtlasEntity
processEntity1
=
validateProcess
(
processQFName
,
queryString
);
AtlasEntity
processExecutionEntity1
=
validateProcessExecution
(
processEntity1
,
queryString
);
AtlasObjectId
process1
=
toAtlasObjectId
(
processExecutionEntity1
.
getRelationshipAttribute
(
BaseImpalaEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process1
.
getGuid
(),
processEntity1
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity1
),
1
);
String
guid
=
assertTableIsRegistered
(
dbName
,
targetTableName
);
AtlasEntity
entity
=
atlasClientV2
.
getEntityByGuid
(
guid
).
getEntity
();
List
ddlQueries
=
(
List
)
entity
.
getRelationshipAttribute
(
ATTRIBUTE_DDL_QUERIES
);
assertNotNull
(
ddlQueries
);
assertEquals
(
ddlQueries
.
size
(),
1
);
}
/**
* This tests
* This tests
* 1) ImpalaLineageTool can parse one lineage file that contains "alter view as select" command lineage,
* 1) ImpalaLineageTool can parse one lineage file that contains "alter view as select" command lineage,
* there is table vertex with createTime.
* there is table vertex with createTime.
...
@@ -288,6 +407,63 @@ public class ImpalaLineageToolIT extends ImpalaLineageITBase {
...
@@ -288,6 +407,63 @@ public class ImpalaLineageToolIT extends ImpalaLineageITBase {
}
}
/**
/**
* This tests is for extra comment and spaces present in alter view as select query
* 1) ImpalaLineageTool can parse one lineage file that contains "alter view as select" command lineage,
* there is table vertex with createTime.
* 2) Lineage is sent to Atlas
* 3) Atlas can get this lineage from Atlas
*/
@Test
public
void
testAlterViewAsSelectWithCommentSpacesFromFile
()
throws
Exception
{
String
IMPALA
=
dir
+
"impalaAlterViewAsSelectWithCommentSpaces.json"
;
String
IMPALA_WAL
=
dir
+
"WALimpala.wal"
;
ImpalaLineageHook
impalaLineageHook
=
new
ImpalaLineageHook
();
// create database and tables to simulate Impala behavior that Impala updates metadata
// to HMS and HMSHook sends the metadata to Atlas, which has to happen before
// Atlas can handle lineage notification
String
dbName
=
"db_10"
;
createDatabase
(
dbName
);
String
sourceTableName
=
"table_1"
;
createTable
(
dbName
,
sourceTableName
,
"(id string, count int)"
,
false
);
String
targetTableName
=
"view_1"
;
createTable
(
dbName
,
targetTableName
,
"(count int, id string)"
,
false
);
// process lineage record, and send corresponding notification to Atlas
String
[]
args
=
new
String
[]{
"-d"
,
"./"
,
"-p"
,
"impala"
};
ImpalaLineageTool
toolInstance
=
new
ImpalaLineageTool
(
args
);
toolInstance
.
importHImpalaEntities
(
impalaLineageHook
,
IMPALA
,
IMPALA_WAL
);
// verify the process is saved in Atlas
// the value is from info in IMPALA_4.
String
createTime
=
new
Long
(
TABLE_CREATE_TIME
*
1000
).
toString
();
String
processQFName
=
dbName
+
"."
+
targetTableName
+
AtlasImpalaHookContext
.
QNAME_SEP_METADATA_NAMESPACE
+
CLUSTER_NAME
+
AtlasImpalaHookContext
.
QNAME_SEP_PROCESS
+
createTime
;
processQFName
=
processQFName
.
toLowerCase
();
String
queryString
=
"alter /* comment1 */ view "
+
dbName
+
"."
+
targetTableName
+
" as select /* comment1 */ count, id from "
+
dbName
+
"."
+
sourceTableName
;
AtlasEntity
processEntity1
=
validateProcess
(
processQFName
,
queryString
);
AtlasEntity
processExecutionEntity1
=
validateProcessExecution
(
processEntity1
,
queryString
);
AtlasObjectId
process1
=
toAtlasObjectId
(
processExecutionEntity1
.
getRelationshipAttribute
(
BaseImpalaEvent
.
ATTRIBUTE_PROCESS
));
Assert
.
assertEquals
(
process1
.
getGuid
(),
processEntity1
.
getGuid
());
Assert
.
assertEquals
(
numberOfProcessExecutions
(
processEntity1
),
1
);
String
guid
=
assertTableIsRegistered
(
dbName
,
targetTableName
);
AtlasEntity
entity
=
atlasClientV2
.
getEntityByGuid
(
guid
).
getEntity
();
List
ddlQueries
=
(
List
)
entity
.
getRelationshipAttribute
(
ATTRIBUTE_DDL_QUERIES
);
assertNotNull
(
ddlQueries
);
assertEquals
(
ddlQueries
.
size
(),
1
);
}
/**
* This tests
* This tests
* 1) ImpalaLineageTool can parse one lineage file that contains "insert into" command lineage,
* 1) ImpalaLineageTool can parse one lineage file that contains "insert into" command lineage,
* there is table vertex with createTime.
* there is table vertex with createTime.
...
...
addons/impala-bridge/src/test/resources/impalaAlterViewAsSelectWithCommentSpaces.json
0 → 100644
View file @
f7df0f1b
{
"queryText"
:
"alter /* comment1 */ view db_10.view_1 as select /* comment1 */ count, id from db_10.table_1"
,
"queryId"
:
"3a441d0c130962f8:7f634aec00000000"
,
"hash"
:
"64ff0425ccdfaada53e3f2fd76f566f7"
,
"user"
:
"admin"
,
"timestamp"
:
1554750072
,
"endTime"
:
1554750554
,
"edges"
:[
{
"sources"
:[
1
],
"targets"
:[
0
],
"edgeType"
:
"PROJECTION"
},
{
"sources"
:[
3
],
"targets"
:[
2
],
"edgeType"
:
"PROJECTION"
}
],
"vertices"
:[
{
"id"
:
0
,
"vertexType"
:
"COLUMN"
,
"vertexId"
:
"db_10.view_1.count"
,
"metadata"
:
{
"tableName"
:
"db_10.view_1"
,
"tableCreateTime"
:
1554750072
}
},
{
"id"
:
1
,
"vertexType"
:
"COLUMN"
,
"vertexId"
:
"db_10.table_1.count"
,
"metadata"
:
{
"tableName"
:
"db_10.table_1"
,
"tableCreateTime"
:
1554750070
}
},
{
"id"
:
2
,
"vertexType"
:
"COLUMN"
,
"vertexId"
:
"db_10.view_1.id"
,
"metadata"
:
{
"tableName"
:
"db_10.view_1"
,
"tableCreateTime"
:
1554750072
}
},
{
"id"
:
3
,
"vertexType"
:
"COLUMN"
,
"vertexId"
:
"db_10.table_1.id"
,
"metadata"
:
{
"tableName"
:
"db_10.table_1"
,
"tableCreateTime"
:
1554750070
}
}
]
}
\ No newline at end of file
addons/impala-bridge/src/test/resources/impalaCreateTableAsSelectWithCommentSpaces.json
0 → 100644
View file @
f7df0f1b
{
"queryText"
:
"create /* Test */ table db_9.table_2 as /* Test */ select count, id from db_9.table_1"
,
"queryId"
:
"3a441d0c130962f8:7f634aec00000000"
,
"hash"
:
"64ff0425ccdfaada53e3f2fd76f566f7"
,
"user"
:
"admin"
,
"timestamp"
:
1554750072
,
"endTime"
:
1554750554
,
"edges"
:[
{
"sources"
:[
1
],
"targets"
:[
0
],
"edgeType"
:
"PROJECTION"
},
{
"sources"
:[
3
],
"targets"
:[
2
],
"edgeType"
:
"PROJECTION"
}
],
"vertices"
:[
{
"id"
:
0
,
"vertexType"
:
"COLUMN"
,
"vertexId"
:
"db_9.table_2.count"
,
"metadata"
:
{
"tableName"
:
"db_9.table_2"
,
"tableCreateTime"
:
1554750072
}
},
{
"id"
:
1
,
"vertexType"
:
"COLUMN"
,
"vertexId"
:
"db_9.table_1.count"
,
"metadata"
:
{
"tableName"
:
"db_9.table_1"
,
"tableCreateTime"
:
1554750070
}
},
{
"id"
:
2
,
"vertexType"
:
"COLUMN"
,
"vertexId"
:
"db_9.table_2.id"
,
"metadata"
:
{
"tableName"
:
"db_9.table_2"
,
"tableCreateTime"
:
1554750072
}
},
{
"id"
:
3
,
"vertexType"
:
"COLUMN"
,
"vertexId"
:
"db_9.table_1.id"
,
"metadata"
:
{
"tableName"
:
"db_9.table_1"
,
"tableCreateTime"
:
1554750070
}
}
]
}
\ No newline at end of file
addons/impala-bridge/src/test/resources/impalaCreateViewWithCommentSpaces.json
0 → 100644
View file @
f7df0f1b
{
"queryText"
:
" create /* comment1 */ view db_8.view_1 as select /* comment2 */ count, id from db_8.table_1"
,
"queryId"
:
"3a441d0c130962f8:7f634aec00000000"
,
"hash"
:
"64ff0425ccdfaada53e3f2fd76f566f7"
,
"user"
:
"admin"
,
"timestamp"
:
1554750072
,
"endTime"
:
1554750554
,
"edges"
:[
{
"sources"
:[
1
],
"targets"
:[
0
],
"edgeType"
:
"PROJECTION"
},
{
"sources"
:[
3
],
"targets"
:[
2
],
"edgeType"
:
"PROJECTION"
}
],
"vertices"
:[
{
"id"
:
0
,
"vertexType"
:
"COLUMN"
,
"vertexId"
:
"db_8.view_1.count"
,
"metadata"
:
{
"tableName"
:
"db_8.view_1"
,
"tableCreateTime"
:
1554750072
}
},
{
"id"
:
1
,
"vertexType"
:
"COLUMN"
,
"vertexId"
:
"db_8.table_1.count"
,
"metadata"
:
{
"tableName"
:
"db_8.table_1"
,
"tableCreateTime"
:
1554750070
}
},
{
"id"
:
2
,
"vertexType"
:
"COLUMN"
,
"vertexId"
:
"db_8.view_1.id"
,
"metadata"
:
{
"tableName"
:
"db_8.view_1"
,
"tableCreateTime"
:
1554750072
}
},
{
"id"
:
3
,
"vertexType"
:
"COLUMN"
,
"vertexId"
:
"db_8.table_1.id"
,
"metadata"
:
{
"tableName"
:
"db_8.table_1"
,
"tableCreateTime"
:
1554750070
}
}
]
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment