@@ -176,54 +128,81 @@ public class GraphBackedDiscoveryServiceTest {
@DataProvider(name="dslQueriesProvider")
privateObject[][]createDSLQueries(){
returnnewString[][]{
{"from DB"},{"DB"},{"DB where DB.name=\"Reporting\""},{"DB DB.name = \"Reporting\""},
{"DB where DB.name=\"Reporting\" select name, owner"},{"DB has name"},{"DB, Table"},
{"DB is JdbcAccess"},
returnnewObject[][]{
{"from hive_db",2},
{"hive_db",2},
{"hive_db where hive_db.name=\"Reporting\"",1},
{"hive_db hive_db.name = \"Reporting\"",1},
{"hive_db where hive_db.name=\"Reporting\" select name, owner",1},
{"hive_db has name",2},
{"hive_db, hive_table",6},
{"View is JdbcAccess",2},
{"hive_db as db1, hive_table where db1.name = \"Reporting\"",0},//Not working - ATLAS-145
// - Final working query -> discoveryService.searchByGremlin("L:{_var_0 = [] as Set;g.V().has(\"__typeName\", \"hive_db\").fill(_var_0);g.V().has(\"__superTypeNames\", \"hive_db\").fill(_var_0);_var_0._().as(\"db1\").in(\"__hive_table.db\").back(\"db1\").and(_().has(\"hive_db.name\", T.eq, \"Reporting\")).toList()}")
/*
{"DB, LoadProcess has name"},
{"DB as db1, Table where db1.name = \"Reporting\""},
{"DB where DB.name=\"Reporting\" and DB.createTime < " + System.currentTimeMillis()},
{"hive_db, hive_process has name"}, //Invalid query
{"hive_db where hive_db.name=\"Reporting\" and hive_db.createTime < " + System.currentTimeMillis()}
*/
{"from Table"},{"Table"},{"Table is Dimension"},{"Column where Column isa PII"},
{"View is Dimension"},
/*{"Column where Column isa PII select Column.name"},*/
{"Column select Column.name"},{"Column select name"},{"Column where Column.name=\"customer_id\""},
{"from Table select Table.name"},{"DB where (name = \"Reporting\")"},
{"DB where (name = \"Reporting\") select name as _col_0, owner as _col_1"},
{"DB where DB is JdbcAccess"},{"DB where DB has name"},{"DB Table"},{"DB where DB has name"},
{"DB as db1 Table where (db1.name = \"Reporting\")"},
{"DB where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 "},
{"Table where (name = \"sales_fact\" and created > \"2014-01-01\" ) select name as _col_0, created as _col_1 "},
{"Table where (name = \"sales_fact\" and created > \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, created as _col_1 "},
{"from hive_table",6},
{"hive_table",6},
{"hive_table isa Dimension",3},
{"hive_column where hive_column isa PII",6},
{"View is Dimension",2},
// {"hive_column where hive_column isa PII select hive_column.name", 6}, //Not working - ATLAS-175
{"hive_column select hive_column.name",27},
{"hive_column select name",27},
{"hive_column where hive_column.name=\"customer_id\"",4},
{"from hive_table select hive_table.name",6},
{"hive_db where (name = \"Reporting\")",1},
{"hive_db where (name = \"Reporting\") select name as _col_0, owner as _col_1",1},
{"hive_db where hive_db is JdbcAccess",0},//Not supposed to work
{"hive_db hive_table",6},
{"hive_db where hive_db has name",2},
{"hive_db as db1 hive_table where (db1.name = \"Reporting\")",0},//Not working -> ATLAS-145
{"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 ",1},
{"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 ",1},
{"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 ",1},
/*
todo: does not work
{"DB where (name = \"Reporting\") and ((createTime + 1) > 0)"},
{"DB as db1 Table as tab where ((db1.createTime + 1) > 0) and (db1.name = \"Reporting\") select db1.name
todo: does not work - ATLAS-146
{"hive_db where (name = \"Reporting\") and ((createTime + 1) > 0)"},
{"hive_db as db1 hive_table as tab where ((db1.createTime + 1) > 0) and (db1.name = \"Reporting\") select db1.name
as dbName, tab.name as tabName"},
{"DB as db1 Table as tab where ((db1.createTime + 1) > 0) or (db1.name = \"Reporting\") select db1.name
{"hive_db as db1 hive_table as tab where ((db1.createTime + 1) > 0) or (db1.name = \"Reporting\") select db1.name
as dbName, tab.name as tabName"},
{"DB as db1 Table as tab where ((db1.createTime + 1) > 0) and (db1.name = \"Reporting\") or db1 has owner
{"hive_db as db1 hive_table as tab where ((db1.createTime + 1) > 0) and (db1.name = \"Reporting\") or db1 has owner
select db1.name as dbName, tab.name as tabName"},
{"DB as db1 Table as tab where ((db1.createTime + 1) > 0) and (db1.name = \"Reporting\") or db1 has owner
{"hive_db as db1 hive_table as tab where ((db1.createTime + 1) > 0) and (db1.name = \"Reporting\") or db1 has owner
select db1.name as dbName, tab.name as tabName"},
*/
// trait searches
{"Dimension"},
/*{"Fact"}, - todo: does not work*/
{"JdbcAccess"},{"ETL"},{"Metric"},{"PII"},
{"Dimension",5},
{"JdbcAccess",2},
{"ETL",2},
{"Metric",5},
{"PII",6},
/* Lineage queries are fired through ClosureQuery and are tested through HiveLineageJerseyResourceIt in webapp module.
Commenting out the below queries since DSL to Gremlin parsing/translation fails with lineage queries when there are array types
used within loop expressions which is the case with DataSet.inputs and outputs.`
validateJson(r,"{\n \"query\":\"DB as db1 where (name = \\\"Sales\\\") Table as tab where DB as db1 where (name = \\\"Sales\\\") Table as tab is Dimension as _src1 select db1 as dbO, tab.name as tabName\",\n \"dataType\":{\n \"typeName\":\"\",\n \"attributeDefinitions\":[\n {\n \"name\":\"dbO\",\n \"dataTypeName\":\"DB\",\n \"multiplicity\":{\n \"lower\":0,\n \"upper\":1,\n \"isUnique\":false\n },\n \"isComposite\":false,\n \"isUnique\":false,\n \"isIndexable\":true,\n \"reverseAttributeName\":null\n },\n {\n \"name\":\"tabName\",\n \"dataTypeName\":\"string\",\n \"multiplicity\":{\n \"lower\":0,\n \"upper\":1,\n \"isUnique\":false\n },\n \"isComposite\":false,\n \"isUnique\":false,\n \"isIndexable\":true,\n \"reverseAttributeName\":null\n }\n ]\n },\n \"rows\":[\n {\n \"$typeName$\":\"\",\n \"dbO\":{\n \"id\":\"256\",\n \"$typeName$\":\"DB\",\n \"version\":0\n },\n \"tabName\":\"product_dim\"\n },\n {\n \"$typeName$\":\"\",\n \"dbO\":{\n \"id\":\"256\",\n \"$typeName$\":\"DB\",\n \"version\":0\n },\n \"tabName\":\"time_dim\"\n },\n {\n \"$typeName$\":\"\",\n \"dbO\":{\n \"id\":\"256\",\n \"$typeName$\":\"DB\",\n \"version\":0\n },\n \"tabName\":\"customer_dim\"\n }\n ]\n}")
validateJson(r,"{\n \"query\":\"DB as db1 where (name = \\\"Sales\\\") Table as tab where DB as db1 where (name = \\\"Sales\\\") Table as tab is Dimension as _src1 select db1 as dbO, tab.name as tabName\",\n \"dataType\":{\n \"typeName\":\"\",\n \"attributeDefinitions\":[\n {\n \"name\":\"dbO\",\n \"dataTypeName\":\"DB\",\n \"multiplicity\":{\n \"lower\":0,\n \"upper\":1,\n \"isUnique\":false\n },\n \"isComposite\":false,\n \"isUnique\":false,\n \"isIndexable\":true,\n \"reverseAttributeName\":null\n },\n {\n \"name\":\"tabName\",\n \"dataTypeName\":\"string\",\n \"multiplicity\":{\n \"lower\":0,\n \"upper\":1,\n \"isUnique\":false\n },\n \"isComposite\":false,\n \"isUnique\":false,\n \"isIndexable\":true,\n \"reverseAttributeName\":null\n }\n ]\n },\n \"rows\":[\n {\n \"$typeName$\":\"\",\n \"dbO\":{\n \"$typeName$\":\"DB\",\n \"version\":0\n },\n \"tabName\":\"product_dim\"\n },\n {\n \"$typeName$\":\"\",\n \"dbO\":{\n \"$typeName$\":\"DB\",\n \"version\":0\n },\n \"tabName\":\"time_dim\"\n },\n {\n \"$typeName$\":\"\",\n \"dbO\":{\n \"$typeName$\":\"DB\",\n \"version\":0\n },\n \"tabName\":\"customer_dim\"\n }\n ]\n}")
}
test("testArrayComparision"){
valp=newQueryParser
vale=p("Partition as p where values = ['2015-01-01'],"+
" table where name = 'sales_fact_daily_mv',"+
" db where name = 'Reporting' and clusterName = 'test' select p").right.get
valr=QueryProcessor.evaluate(e,g,gp)
validateJson(r,"""{
| "query":"Partition as p where (values = [\"2015-01-01\"]) table where (name = \"sales_fact_daily_mv\") db where (name = \"Reporting\") and (clusterName = \"test\") as _src1 select p as _col_0",
| "dataType":{
| "typeName":"__tempQueryResultStruct2",
| "attributeDefinitions":[
| {
| "name":"_col_0",
| "dataTypeName":"Partition",
| "multiplicity":{
| "lower":0,
| "upper":1,
| "isUnique":false
| },
| "isComposite":false,
| "isUnique":false,
| "isIndexable":true,
| "reverseAttributeName":null
| }
| ]
| },
| "rows":[
| {
| "$typeName$":"__tempQueryResultStruct2",
| "_col_0":{
| "$typeName$":"Partition",
| "version":0
| }
| }
| ]
|}""".stripMargin)
}
test("testArrayComparisionWithSelectOnArray"){
valp=newQueryParser
vale=p("Partition as p where values = ['2015-01-01'],"+
" table where name = 'sales_fact_daily_mv',"+
" db where name = 'Reporting' and clusterName = 'test' select p.values").right.get
valr=QueryProcessor.evaluate(e,g,gp)
validateJson(r,
"""{
| "query":"Partition as p where (values = [\"2015-01-01\"]) table where (name = \"sales_fact_daily_mv\") db where (name = \"Reporting\") and (clusterName = \"test\") as _src1 select p.values as _col_0",
| "dataType":{
| "typeName":"__tempQueryResultStruct2",
| "attributeDefinitions":[
| {
| "name":"_col_0",
| "dataTypeName":"array<string>",
| "multiplicity":{
| "lower":0,
| "upper":1,
| "isUnique":false
| },
| "isComposite":false,
| "isUnique":false,
| "isIndexable":true,
| "reverseAttributeName":null
| }
| ]
| },
| "rows":[
| {
| "$typeName$":"__tempQueryResultStruct2",
| "_col_0":[
| "2015-01-01"
| ]
| }
| ]
|}
""".stripMargin)
}
test("testArrayInWhereClause"){
valp=newQueryParser
vale=p("Partition as p where values = ['2015-01-01']").right.get
valr=QueryProcessor.evaluate(e,g,gp)
validateJson(r,"""{
| "query":"Partition as p where (values = [\"2015-01-01\"])",