Commit 30a2ec1f by Shwetha GS

ATLAS-263 Searching for a multi word trait always returns empty result (girishrp via shwethags)

parent a230f4ff
......@@ -81,6 +81,8 @@ Language Notes:
* There are couple of Predicate functions different from SQL:
* _is_ or _isa_can be used to filter Entities that have a particular Trait.
* _has_ can be used to filter Entities that have a value for a particular Attribute.
* When querying for a space delimited multiple-word identifier, it need to be enclosed within
backquote (`)
---+++ DSL Examples
......@@ -91,6 +93,7 @@ Language Notes:
* Column where Column isa PII
* Table where name="sales_fact", columns
* Table where name="sales_fact", columns as column select column.name, column.dataType, column.comment
* `Log Data`
---++ Full-text Search
......
......@@ -5,7 +5,7 @@ Apache Atlas Release Notes
INCOMPATIBLE CHANGES:
ALL CHANGES:
ATLAS-263 Searching for a multi word trait always returns empty result (girishrp via shwethags)
--Release 0.6-incubating
INCOMPATIBLE CHANGES:
......
......@@ -166,8 +166,10 @@ public class BaseHiveRepositoryTest {
HierarchicalTypeDefinition<TraitType> jdbcTraitDef = TypesUtil.createTraitTypeDef("JdbcAccess", null);
HierarchicalTypeDefinition<TraitType> logTraitDef = TypesUtil.createTraitTypeDef("Log Data", null);
return TypesUtil.getTypesDef(ImmutableList.<EnumTypeDefinition>of(), ImmutableList.<StructTypeDefinition>of(),
ImmutableList.of(dimTraitDef, factTraitDef, piiTraitDef, metricTraitDef, etlTraitDef, jdbcTraitDef),
ImmutableList.of(dimTraitDef, factTraitDef, piiTraitDef, metricTraitDef, etlTraitDef, jdbcTraitDef, logTraitDef),
ImmutableList.of(dbClsDef, storageDescClsDef, columnClsDef, tblClsDef, loadProcessClsDef, viewClsDef, partClsDef));
}
......@@ -201,6 +203,10 @@ public class BaseHiveRepositoryTest {
Id salesFact = table("sales_fact", "sales fact table", salesDB, sd, "Joe", "Managed", salesFactColumns, "Fact");
List<Referenceable> logFactColumns = ImmutableList
.of(column("time_id", "int", "time id"), column("app_id", "int", "app id"),
column("machine_id", "int", "machine id"), column("log", "string", "log data", "Log Data"));
List<Referenceable> timeDimColumns = ImmutableList
.of(column("time_id", "int", "time id"),
column("dayOfYear", "int", "day Of Year"),
......@@ -219,6 +225,12 @@ public class BaseHiveRepositoryTest {
loadProcess("loadSalesDaily", "hive query for daily summary", "John ETL", ImmutableList.of(salesFact, timeDim),
ImmutableList.of(salesFactDaily), "create table as select ", "plan", "id", "graph", "ETL");
Id logDB = database("Logging", "logging database", "Tim ETL", "hdfs://host:8000/apps/warehouse/logging");
Id loggingFactDaily =
table("log_fact_daily_mv", "log fact daily materialized view", logDB, sd, "Tim ETL", "Managed",
logFactColumns, "Log Data");
List<Referenceable> productDimColumns = ImmutableList
.of(column("product_id", "int", "product id"),
column("product_name", "string", "product name"),
......@@ -248,6 +260,13 @@ public class BaseHiveRepositoryTest {
loadProcess("loadSalesMonthly", "hive query for monthly summary", "John ETL", ImmutableList.of(salesFactDaily),
ImmutableList.of(salesFactMonthly), "create table as select ", "plan", "id", "graph", "ETL");
Id loggingFactMonthly =
table("logging_fact_monthly_mv", "logging fact monthly materialized view", logDB, sd, "Tim ETL",
"Managed", logFactColumns, "Log Data");
loadProcess("loadLogsMonthly", "hive query for monthly summary", "Tim ETL", ImmutableList.of(loggingFactDaily),
ImmutableList.of(loggingFactMonthly), "create table as select ", "plan", "id", "graph", "ETL");
partition(new ArrayList() {{ add("2015-01-01"); }}, salesFactDaily);
}
......
......@@ -129,13 +129,13 @@ public class GraphBackedDiscoveryServiceTest extends BaseHiveRepositoryTest {
@DataProvider(name = "dslQueriesProvider")
private Object[][] createDSLQueries() {
return new Object[][]{
{"from hive_db", 2},
{"hive_db", 2},
{"from hive_db", 3},
{"hive_db", 3},
{"hive_db where hive_db.name=\"Reporting\"", 1},
{"hive_db hive_db.name = \"Reporting\"", 1},
{"hive_db where hive_db.name=\"Reporting\" select name, owner", 1},
{"hive_db has name", 2},
{"hive_db, hive_table", 6},
{"hive_db has name", 3},
{"hive_db, hive_table", 8},
{"View is JdbcAccess", 2},
{"hive_db as db1, hive_table where db1.name = \"Reporting\"", 0}, //Not working - ATLAS-145
// - Final working query -> discoveryService.searchByGremlin("L:{_var_0 = [] as Set;g.V().has(\"__typeName\", \"hive_db\").fill(_var_0);g.V().has(\"__superTypeNames\", \"hive_db\").fill(_var_0);_var_0._().as(\"db1\").in(\"__hive_table.db\").back(\"db1\").and(_().has(\"hive_db.name\", T.eq, \"Reporting\")).toList()}")
......@@ -143,21 +143,21 @@ public class GraphBackedDiscoveryServiceTest extends BaseHiveRepositoryTest {
{"hive_db, hive_process has name"}, //Invalid query
{"hive_db where hive_db.name=\"Reporting\" and hive_db.createTime < " + System.currentTimeMillis()}
*/
{"from hive_table", 6},
{"hive_table", 6},
{"from hive_table", 8},
{"hive_table", 8},
{"hive_table isa Dimension", 3},
{"hive_column where hive_column isa PII", 6},
{"View is Dimension" , 2},
// {"hive_column where hive_column isa PII select hive_column.name", 6}, //Not working - ATLAS-175
{"hive_column select hive_column.name", 27},
{"hive_column select name", 27},
{"hive_column select hive_column.name", 37},
{"hive_column select name", 37},
{"hive_column where hive_column.name=\"customer_id\"", 4},
{"from hive_table select hive_table.name", 6},
{"from hive_table select hive_table.name", 8},
{"hive_db where (name = \"Reporting\")", 1},
{"hive_db where (name = \"Reporting\") select name as _col_0, owner as _col_1", 1},
{"hive_db where hive_db is JdbcAccess", 0}, //Not supposed to work
{"hive_db hive_table", 6},
{"hive_db where hive_db has name", 2},
{"hive_db hive_table", 8},
{"hive_db where hive_db has name", 3},
{"hive_db as db1 hive_table where (db1.name = \"Reporting\")", 0}, //Not working -> ATLAS-145
{"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 ", 1},
{"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 ", 1},
......@@ -178,9 +178,10 @@ public class GraphBackedDiscoveryServiceTest extends BaseHiveRepositoryTest {
// trait searches
{"Dimension", 5},
{"JdbcAccess", 2},
{"ETL", 2},
{"ETL", 3},
{"Metric", 5},
{"PII", 6},
{"`Log Data`", 4},
/* Lineage queries are fired through ClosureQuery and are tested through HiveLineageJerseyResourceIt in webapp module.
Commenting out the below queries since DSL to Gremlin parsing/translation fails with lineage queries when there are array types
......
......@@ -89,7 +89,7 @@ public class QuickStart {
private static final String[] TYPES =
{DATABASE_TYPE, TABLE_TYPE, STORAGE_DESC_TYPE, COLUMN_TYPE, LOAD_PROCESS_TYPE, VIEW_TYPE, "JdbcAccess",
"ETL", "Metric", "PII", "Fact", "Dimension"};
"ETL", "Metric", "PII", "Fact", "Dimension", "Log Data"};
private final AtlasClient metadataServiceClient;
......@@ -162,8 +162,10 @@ public class QuickStart {
HierarchicalTypeDefinition<TraitType> jdbcTraitDef = TypesUtil.createTraitTypeDef("JdbcAccess", null);
HierarchicalTypeDefinition<TraitType> logTraitDef = TypesUtil.createTraitTypeDef("Log Data", null);
return TypesUtil.getTypesDef(ImmutableList.<EnumTypeDefinition>of(), ImmutableList.<StructTypeDefinition>of(),
ImmutableList.of(dimTraitDef, factTraitDef, piiTraitDef, metricTraitDef, etlTraitDef, jdbcTraitDef),
ImmutableList.of(dimTraitDef, factTraitDef, piiTraitDef, metricTraitDef, etlTraitDef, jdbcTraitDef, logTraitDef),
ImmutableList.of(dbClsDef, storageDescClsDef, columnClsDef, tblClsDef, loadProcessClsDef, viewClsDef));
}
......@@ -195,6 +197,10 @@ public class QuickStart {
rawColumn("customer_id", "int", "customer id", "PII"),
rawColumn("sales", "double", "product id", "Metric"));
List<Referenceable> logFactColumns = ImmutableList
.of(rawColumn("time_id", "int", "time id"), rawColumn("app_id", "int", "app id"),
rawColumn("machine_id", "int", "machine id"), rawColumn("log", "string", "log data", "Log Data"));
Id salesFact = table("sales_fact", "sales fact table", salesDB, sd, "Joe", "Managed", salesFactColumns, "Fact");
List<Referenceable> productDimColumns = ImmutableList
......@@ -225,10 +231,16 @@ public class QuickStart {
Id reportingDB =
database("Reporting", "reporting database", "Jane BI", "hdfs://host:8000/apps/warehouse/reporting");
Id logDB = database("Logging", "logging database", "Tim ETL", "hdfs://host:8000/apps/warehouse/logging");
Id salesFactDaily =
table("sales_fact_daily_mv", "sales fact daily materialized view", reportingDB, sd, "Joe BI", "Managed",
salesFactColumns, "Metric");
Id loggingFactDaily =
table("log_fact_daily_mv", "log fact daily materialized view", logDB, sd, "Tim ETL", "Managed",
logFactColumns, "Log Data");
loadProcess("loadSalesDaily", "hive query for daily summary", "John ETL", ImmutableList.of(salesFact, timeDim),
ImmutableList.of(salesFactDaily), "create table as select ", "plan", "id", "graph", "ETL");
......@@ -242,6 +254,13 @@ public class QuickStart {
loadProcess("loadSalesMonthly", "hive query for monthly summary", "John ETL", ImmutableList.of(salesFactDaily),
ImmutableList.of(salesFactMonthly), "create table as select ", "plan", "id", "graph", "ETL");
Id loggingFactMonthly =
table("logging_fact_monthly_mv", "logging fact monthly materialized view", logDB, sd, "Tim ETL",
"Managed", logFactColumns, "Log Data");
loadProcess("loadLogsMonthly", "hive query for monthly summary", "Tim ETL", ImmutableList.of(loggingFactDaily),
ImmutableList.of(loggingFactMonthly), "create table as select ", "plan", "id", "graph", "ETL");
}
private Id createInstance(Referenceable referenceable) throws Exception {
......@@ -377,7 +396,7 @@ public class QuickStart {
// trait searches
"Dimension",
/*"Fact", - todo: does not work*/
"JdbcAccess", "ETL", "Metric", "PII",
"JdbcAccess", "ETL", "Metric", "PII", "`Log Data`",
/*
// Lineage - todo - fix this, its not working
"Table hive_process outputTables",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment