Commit f7df0f1b by Sid Committed by Sarath Subramanian

ATLAS-3461: Changed from hardcoded match to pattern bases regex match ATLAS-3461: UT Added

parent 848c799e
......@@ -20,32 +20,40 @@ package org.apache.atlas.impala.hook;
import org.apache.atlas.impala.model.ImpalaOperationType;
import org.apache.commons.lang.StringUtils;
import java.util.regex.Pattern;
/**
* Parse an Impala query text and output the impala operation type
*/
public class ImpalaOperationParser {
private static final Pattern COMMENT_PATTERN = Pattern.compile("/\\*.*?\\*/", Pattern.DOTALL);
private static final Pattern CREATE_VIEW_PATTERN =
Pattern.compile("^[ ]*\\bcreate\\b.*\\bview\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
private static final Pattern CREATE_TABLE_AS_SELECT_PATTERN =
Pattern.compile("^[ ]*\\bcreate\\b.*\\btable\\b.*\\bas\\b.*\\bselect\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
private static final Pattern ALTER_VIEW_AS_SELECT_PATTERN =
Pattern.compile("^[ ]*\\balter\\b.*\\bview\\b.*\\bas.*\\bselect\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
private static final Pattern INSERT_SELECT_FROM_PATTERN =
Pattern.compile("^[ ]*\\binsert\\b.*\\b(into|overwrite)\\b.*\\bselect\\b.*\\bfrom\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
public ImpalaOperationParser() {
}
public static ImpalaOperationType getImpalaOperationType(String queryText) {
// Impala does no generate lineage record for command "LOAD DATA INPATH"
if (StringUtils.startsWithIgnoreCase(queryText, "create view")) {
// Impala does no generate lineage record for command "LOAD DATA IN PATH"
String queryTextWithNoComments = COMMENT_PATTERN.matcher(queryText).replaceAll("");
if (doesMatch(queryTextWithNoComments, CREATE_VIEW_PATTERN)) {
return ImpalaOperationType.CREATEVIEW;
} else if (StringUtils.startsWithIgnoreCase(queryText, "create table") &&
StringUtils.containsIgnoreCase(queryText, "as select")) {
} else if (doesMatch(queryTextWithNoComments, CREATE_TABLE_AS_SELECT_PATTERN)) {
return ImpalaOperationType.CREATETABLE_AS_SELECT;
} else if (StringUtils.startsWithIgnoreCase(queryText, "alter view") &&
StringUtils.containsIgnoreCase(queryText, "as select")) {
} else if (doesMatch(queryTextWithNoComments, ALTER_VIEW_AS_SELECT_PATTERN)) {
return ImpalaOperationType.ALTERVIEW_AS;
} else if (StringUtils.containsIgnoreCase(queryText, "insert into") &&
StringUtils.containsIgnoreCase(queryText, "select") &&
StringUtils.containsIgnoreCase(queryText, "from")) {
return ImpalaOperationType.QUERY;
} else if (StringUtils.containsIgnoreCase(queryText,"insert overwrite") &&
StringUtils.containsIgnoreCase(queryText, "select") &&
StringUtils.containsIgnoreCase(queryText, "from")) {
} else if (doesMatch(queryTextWithNoComments, INSERT_SELECT_FROM_PATTERN)) {
return ImpalaOperationType.QUERY;
}
......@@ -64,5 +72,8 @@ public class ImpalaOperationParser {
return ImpalaOperationType.UNKNOWN;
}
private static boolean doesMatch(final String queryText, final Pattern pattern) {
return pattern.matcher(queryText).matches();
}
}
\ No newline at end of file
{
"queryText":"alter /* comment1 */ view db_10.view_1 as select /* comment1 */ count, id from db_10.table_1",
"queryId":"3a441d0c130962f8:7f634aec00000000",
"hash":"64ff0425ccdfaada53e3f2fd76f566f7",
"user":"admin",
"timestamp":1554750072,
"endTime":1554750554,
"edges":[
{
"sources":[
1
],
"targets":[
0
],
"edgeType":"PROJECTION"
},
{
"sources":[
3
],
"targets":[
2
],
"edgeType":"PROJECTION"
}
],
"vertices":[
{
"id":0,
"vertexType":"COLUMN",
"vertexId":"db_10.view_1.count",
"metadata": {
"tableName": "db_10.view_1",
"tableCreateTime": 1554750072
}
},
{
"id":1,
"vertexType":"COLUMN",
"vertexId":"db_10.table_1.count",
"metadata": {
"tableName": "db_10.table_1",
"tableCreateTime": 1554750070
}
},
{
"id":2,
"vertexType":"COLUMN",
"vertexId":"db_10.view_1.id",
"metadata": {
"tableName": "db_10.view_1",
"tableCreateTime": 1554750072
}
},
{
"id":3,
"vertexType":"COLUMN",
"vertexId":"db_10.table_1.id",
"metadata": {
"tableName": "db_10.table_1",
"tableCreateTime": 1554750070
}
}
]
}
\ No newline at end of file
{
"queryText":"create /* Test */ table db_9.table_2 as /* Test */ select count, id from db_9.table_1",
"queryId":"3a441d0c130962f8:7f634aec00000000",
"hash":"64ff0425ccdfaada53e3f2fd76f566f7",
"user":"admin",
"timestamp":1554750072,
"endTime":1554750554,
"edges":[
{
"sources":[
1
],
"targets":[
0
],
"edgeType":"PROJECTION"
},
{
"sources":[
3
],
"targets":[
2
],
"edgeType":"PROJECTION"
}
],
"vertices":[
{
"id":0,
"vertexType":"COLUMN",
"vertexId":"db_9.table_2.count",
"metadata": {
"tableName": "db_9.table_2",
"tableCreateTime": 1554750072
}
},
{
"id":1,
"vertexType":"COLUMN",
"vertexId":"db_9.table_1.count",
"metadata": {
"tableName": "db_9.table_1",
"tableCreateTime": 1554750070
}
},
{
"id":2,
"vertexType":"COLUMN",
"vertexId":"db_9.table_2.id",
"metadata": {
"tableName": "db_9.table_2",
"tableCreateTime": 1554750072
}
},
{
"id":3,
"vertexType":"COLUMN",
"vertexId":"db_9.table_1.id",
"metadata": {
"tableName": "db_9.table_1",
"tableCreateTime": 1554750070
}
}
]
}
\ No newline at end of file
{
"queryText":" create /* comment1 */ view db_8.view_1 as select /* comment2 */ count, id from db_8.table_1",
"queryId":"3a441d0c130962f8:7f634aec00000000",
"hash":"64ff0425ccdfaada53e3f2fd76f566f7",
"user":"admin",
"timestamp":1554750072,
"endTime":1554750554,
"edges":[
{
"sources":[
1
],
"targets":[
0
],
"edgeType":"PROJECTION"
},
{
"sources":[
3
],
"targets":[
2
],
"edgeType":"PROJECTION"
}
],
"vertices":[
{
"id":0,
"vertexType":"COLUMN",
"vertexId":"db_8.view_1.count",
"metadata": {
"tableName": "db_8.view_1",
"tableCreateTime": 1554750072
}
},
{
"id":1,
"vertexType":"COLUMN",
"vertexId":"db_8.table_1.count",
"metadata": {
"tableName": "db_8.table_1",
"tableCreateTime": 1554750070
}
},
{
"id":2,
"vertexType":"COLUMN",
"vertexId":"db_8.view_1.id",
"metadata": {
"tableName": "db_8.view_1",
"tableCreateTime": 1554750072
}
},
{
"id":3,
"vertexType":"COLUMN",
"vertexId":"db_8.table_1.id",
"metadata": {
"tableName": "db_8.table_1",
"tableCreateTime": 1554750070
}
}
]
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment