Commit f7df0f1b by Sid Committed by Sarath Subramanian

ATLAS-3461: Changed from hardcoded match to pattern bases regex match ATLAS-3461: UT Added

parent 848c799e
......@@ -20,32 +20,40 @@ package org.apache.atlas.impala.hook;
import org.apache.atlas.impala.model.ImpalaOperationType;
import org.apache.commons.lang.StringUtils;
import java.util.regex.Pattern;
/**
* Parse an Impala query text and output the impala operation type
*/
public class ImpalaOperationParser {
private static final Pattern COMMENT_PATTERN = Pattern.compile("/\\*.*?\\*/", Pattern.DOTALL);
private static final Pattern CREATE_VIEW_PATTERN =
Pattern.compile("^[ ]*\\bcreate\\b.*\\bview\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
private static final Pattern CREATE_TABLE_AS_SELECT_PATTERN =
Pattern.compile("^[ ]*\\bcreate\\b.*\\btable\\b.*\\bas\\b.*\\bselect\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
private static final Pattern ALTER_VIEW_AS_SELECT_PATTERN =
Pattern.compile("^[ ]*\\balter\\b.*\\bview\\b.*\\bas.*\\bselect\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
private static final Pattern INSERT_SELECT_FROM_PATTERN =
Pattern.compile("^[ ]*\\binsert\\b.*\\b(into|overwrite)\\b.*\\bselect\\b.*\\bfrom\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
public ImpalaOperationParser() {
}
public static ImpalaOperationType getImpalaOperationType(String queryText) {
// Impala does no generate lineage record for command "LOAD DATA INPATH"
if (StringUtils.startsWithIgnoreCase(queryText, "create view")) {
// Impala does no generate lineage record for command "LOAD DATA IN PATH"
String queryTextWithNoComments = COMMENT_PATTERN.matcher(queryText).replaceAll("");
if (doesMatch(queryTextWithNoComments, CREATE_VIEW_PATTERN)) {
return ImpalaOperationType.CREATEVIEW;
} else if (StringUtils.startsWithIgnoreCase(queryText, "create table") &&
StringUtils.containsIgnoreCase(queryText, "as select")) {
} else if (doesMatch(queryTextWithNoComments, CREATE_TABLE_AS_SELECT_PATTERN)) {
return ImpalaOperationType.CREATETABLE_AS_SELECT;
} else if (StringUtils.startsWithIgnoreCase(queryText, "alter view") &&
StringUtils.containsIgnoreCase(queryText, "as select")) {
} else if (doesMatch(queryTextWithNoComments, ALTER_VIEW_AS_SELECT_PATTERN)) {
return ImpalaOperationType.ALTERVIEW_AS;
} else if (StringUtils.containsIgnoreCase(queryText, "insert into") &&
StringUtils.containsIgnoreCase(queryText, "select") &&
StringUtils.containsIgnoreCase(queryText, "from")) {
return ImpalaOperationType.QUERY;
} else if (StringUtils.containsIgnoreCase(queryText,"insert overwrite") &&
StringUtils.containsIgnoreCase(queryText, "select") &&
StringUtils.containsIgnoreCase(queryText, "from")) {
} else if (doesMatch(queryTextWithNoComments, INSERT_SELECT_FROM_PATTERN)) {
return ImpalaOperationType.QUERY;
}
......@@ -64,5 +72,8 @@ public class ImpalaOperationParser {
return ImpalaOperationType.UNKNOWN;
}
private static boolean doesMatch(final String queryText, final Pattern pattern) {
return pattern.matcher(queryText).matches();
}
}
\ No newline at end of file
......@@ -102,6 +102,68 @@ public class ImpalaLineageToolIT extends ImpalaLineageITBase {
}
/**
* This tests is for create view query with extra comment and spaces added in between:
* 1) ImpalaLineageTool can parse one lineage file that contains " create view" command lineage
* 2) Lineage is sent to Atlas
* 3) Atlas can get this lineage from Atlas
*/
@Test
public void testCreateViewWithCommentSpacesFromFile() {
// this file contains a single lineage record for "create view".
// It has table vertex with createTime
String IMPALA = dir + "impalaCreateViewWithCommentSpaces.json";
String IMPALA_WAL = dir + "WALimpala.wal";
List<ImpalaQuery> lineageList = new ArrayList<>();
ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook();
try {
// create database and tables to simulate Impala behavior that Impala updates metadata
// to HMS and HMSHook sends the metadata to Atlas, which has to happen before
// Atlas can handle lineage notification
String dbName = "db_8";
createDatabase(dbName);
String sourceTableName = "table_1";
createTable(dbName, sourceTableName,"(id string, count int)", false);
String targetTableName = "view_1";
createTable(dbName, targetTableName,"(count int, id string)", false);
// process lineage record, and send corresponding notification to Atlas
String[] args = new String[]{"-d", "./", "-p", "impala"};
ImpalaLineageTool toolInstance = new ImpalaLineageTool(args);
toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL);
// verify the process is saved in Atlas
// the value is from info in IMPALA_3
String createTime = new Long((long)(1554750072)*1000).toString();
String processQFName =
"db_8.view_1" + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE +
CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime;
processQFName = processQFName.toLowerCase();
String queryString = " create /* comment1 */ view db_8.view_1 as select /* comment2 */ count, id from db_8.table_1";
AtlasEntity processEntity1 = validateProcess(processQFName, queryString);
AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, queryString);
AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
BaseImpalaEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process1.getGuid(), processEntity1.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
String guid = assertTableIsRegistered(dbName, targetTableName);
AtlasEntity entity = atlasClientV2.getEntityByGuid(guid).getEntity();
List ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES);
assertNotNull(ddlQueries);
assertEquals(ddlQueries.size(), 1);
} catch (Exception e) {
System.out.print("Appending file error");
}
}
/**
* This tests
* 1) ImpalaLineageTool can parse one lineage file that contains "create view" command lineage,
* but there is no table vertex with createTime.
......@@ -232,6 +294,63 @@ public class ImpalaLineageToolIT extends ImpalaLineageITBase {
}
/**
* This tests is based on extra comment and spaces adding to create table as select query
* 1) ImpalaLineageTool can parse one lineage file that contains "create table as select" command lineage,
* there is table vertex with createTime.
* 2) Lineage is sent to Atlas
* 3) Atlas can get this lineage from Atlas
*/
@Test
public void testCreateTableAsSelectWithCommentSpacesFromFile() throws Exception {
String IMPALA = dir + "impalaCreateTableAsSelectWithCommentSpaces.json";
String IMPALA_WAL = dir + "WALimpala.wal";
ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook();
// create database and tables to simulate Impala behavior that Impala updates metadata
// to HMS and HMSHook sends the metadata to Atlas, which has to happen before
// Atlas can handle lineage notification
String dbName = "db_9";
createDatabase(dbName);
String sourceTableName = "table_1";
createTable(dbName, sourceTableName,"(id string, count int)", false);
String targetTableName = "table_2";
createTable(dbName, targetTableName,"(count int, id string)", false);
// process lineage record, and send corresponding notification to Atlas
String[] args = new String[]{"-d", "./", "-p", "impala"};
ImpalaLineageTool toolInstance = new ImpalaLineageTool(args);
toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL);
// verify the process is saved in Atlas
// the value is from info in IMPALA_4.
String createTime = new Long(TABLE_CREATE_TIME*1000).toString();
String processQFName =
dbName + "." + targetTableName + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE +
CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime;
processQFName = processQFName.toLowerCase();
String queryString = "create /* Test */ table " + dbName + "."
+ targetTableName + " as /* Test */ select count, id from " + dbName + "." + sourceTableName;
AtlasEntity processEntity1 = validateProcess(processQFName, queryString);
AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, queryString);
AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
BaseImpalaEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process1.getGuid(), processEntity1.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
String guid = assertTableIsRegistered(dbName, targetTableName);
AtlasEntity entity = atlasClientV2.getEntityByGuid(guid).getEntity();
List ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES);
assertNotNull(ddlQueries);
assertEquals(ddlQueries.size(), 1);
}
/**
* This tests
* 1) ImpalaLineageTool can parse one lineage file that contains "alter view as select" command lineage,
* there is table vertex with createTime.
......@@ -288,6 +407,63 @@ public class ImpalaLineageToolIT extends ImpalaLineageITBase {
}
/**
* This tests is for extra comment and spaces present in alter view as select query
* 1) ImpalaLineageTool can parse one lineage file that contains "alter view as select" command lineage,
* there is table vertex with createTime.
* 2) Lineage is sent to Atlas
* 3) Atlas can get this lineage from Atlas
*/
@Test
public void testAlterViewAsSelectWithCommentSpacesFromFile() throws Exception {
String IMPALA = dir + "impalaAlterViewAsSelectWithCommentSpaces.json";
String IMPALA_WAL = dir + "WALimpala.wal";
ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook();
// create database and tables to simulate Impala behavior that Impala updates metadata
// to HMS and HMSHook sends the metadata to Atlas, which has to happen before
// Atlas can handle lineage notification
String dbName = "db_10";
createDatabase(dbName);
String sourceTableName = "table_1";
createTable(dbName, sourceTableName,"(id string, count int)", false);
String targetTableName = "view_1";
createTable(dbName, targetTableName,"(count int, id string)", false);
// process lineage record, and send corresponding notification to Atlas
String[] args = new String[]{"-d", "./", "-p", "impala"};
ImpalaLineageTool toolInstance = new ImpalaLineageTool(args);
toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL);
// verify the process is saved in Atlas
// the value is from info in IMPALA_4.
String createTime = new Long(TABLE_CREATE_TIME*1000).toString();
String processQFName =
dbName + "." + targetTableName + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE +
CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime;
processQFName = processQFName.toLowerCase();
String queryString = "alter /* comment1 */ view " + dbName + "." + targetTableName
+ " as select /* comment1 */ count, id from " + dbName + "." + sourceTableName;
AtlasEntity processEntity1 = validateProcess(processQFName, queryString);
AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, queryString);
AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
BaseImpalaEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process1.getGuid(), processEntity1.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
String guid = assertTableIsRegistered(dbName, targetTableName);
AtlasEntity entity = atlasClientV2.getEntityByGuid(guid).getEntity();
List ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES);
assertNotNull(ddlQueries);
assertEquals(ddlQueries.size(), 1);
}
/**
* This tests
* 1) ImpalaLineageTool can parse one lineage file that contains "insert into" command lineage,
* there is table vertex with createTime.
......
{
"queryText":"alter /* comment1 */ view db_10.view_1 as select /* comment1 */ count, id from db_10.table_1",
"queryId":"3a441d0c130962f8:7f634aec00000000",
"hash":"64ff0425ccdfaada53e3f2fd76f566f7",
"user":"admin",
"timestamp":1554750072,
"endTime":1554750554,
"edges":[
{
"sources":[
1
],
"targets":[
0
],
"edgeType":"PROJECTION"
},
{
"sources":[
3
],
"targets":[
2
],
"edgeType":"PROJECTION"
}
],
"vertices":[
{
"id":0,
"vertexType":"COLUMN",
"vertexId":"db_10.view_1.count",
"metadata": {
"tableName": "db_10.view_1",
"tableCreateTime": 1554750072
}
},
{
"id":1,
"vertexType":"COLUMN",
"vertexId":"db_10.table_1.count",
"metadata": {
"tableName": "db_10.table_1",
"tableCreateTime": 1554750070
}
},
{
"id":2,
"vertexType":"COLUMN",
"vertexId":"db_10.view_1.id",
"metadata": {
"tableName": "db_10.view_1",
"tableCreateTime": 1554750072
}
},
{
"id":3,
"vertexType":"COLUMN",
"vertexId":"db_10.table_1.id",
"metadata": {
"tableName": "db_10.table_1",
"tableCreateTime": 1554750070
}
}
]
}
\ No newline at end of file
{
"queryText":"create /* Test */ table db_9.table_2 as /* Test */ select count, id from db_9.table_1",
"queryId":"3a441d0c130962f8:7f634aec00000000",
"hash":"64ff0425ccdfaada53e3f2fd76f566f7",
"user":"admin",
"timestamp":1554750072,
"endTime":1554750554,
"edges":[
{
"sources":[
1
],
"targets":[
0
],
"edgeType":"PROJECTION"
},
{
"sources":[
3
],
"targets":[
2
],
"edgeType":"PROJECTION"
}
],
"vertices":[
{
"id":0,
"vertexType":"COLUMN",
"vertexId":"db_9.table_2.count",
"metadata": {
"tableName": "db_9.table_2",
"tableCreateTime": 1554750072
}
},
{
"id":1,
"vertexType":"COLUMN",
"vertexId":"db_9.table_1.count",
"metadata": {
"tableName": "db_9.table_1",
"tableCreateTime": 1554750070
}
},
{
"id":2,
"vertexType":"COLUMN",
"vertexId":"db_9.table_2.id",
"metadata": {
"tableName": "db_9.table_2",
"tableCreateTime": 1554750072
}
},
{
"id":3,
"vertexType":"COLUMN",
"vertexId":"db_9.table_1.id",
"metadata": {
"tableName": "db_9.table_1",
"tableCreateTime": 1554750070
}
}
]
}
\ No newline at end of file
{
"queryText":" create /* comment1 */ view db_8.view_1 as select /* comment2 */ count, id from db_8.table_1",
"queryId":"3a441d0c130962f8:7f634aec00000000",
"hash":"64ff0425ccdfaada53e3f2fd76f566f7",
"user":"admin",
"timestamp":1554750072,
"endTime":1554750554,
"edges":[
{
"sources":[
1
],
"targets":[
0
],
"edgeType":"PROJECTION"
},
{
"sources":[
3
],
"targets":[
2
],
"edgeType":"PROJECTION"
}
],
"vertices":[
{
"id":0,
"vertexType":"COLUMN",
"vertexId":"db_8.view_1.count",
"metadata": {
"tableName": "db_8.view_1",
"tableCreateTime": 1554750072
}
},
{
"id":1,
"vertexType":"COLUMN",
"vertexId":"db_8.table_1.count",
"metadata": {
"tableName": "db_8.table_1",
"tableCreateTime": 1554750070
}
},
{
"id":2,
"vertexType":"COLUMN",
"vertexId":"db_8.view_1.id",
"metadata": {
"tableName": "db_8.view_1",
"tableCreateTime": 1554750072
}
},
{
"id":3,
"vertexType":"COLUMN",
"vertexId":"db_8.table_1.id",
"metadata": {
"tableName": "db_8.table_1",
"tableCreateTime": 1554750070
}
}
]
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment