Commit 309af260 by Venkatesh Seetharam

Add more Search DSL tests and fix Example

parent 0d38165d
......@@ -58,12 +58,10 @@ import org.apache.hadoop.hive.ql.parse.ParseDriver;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.metadata.MetadataServiceClient;
import org.apache.hadoop.metadata.MetadataServiceException;
import org.apache.hadoop.metadata.hive.bridge.HiveMetaStoreBridge;
import org.apache.hadoop.metadata.hive.model.HiveDataTypes;
import org.apache.hadoop.metadata.typesystem.Referenceable;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
import java.io.BufferedWriter;
......@@ -258,7 +256,7 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
String typeName = HiveDataTypes.HIVE_DB.getName();
MetadataServiceClient dgiClient = dgiBridge.getMetadataServiceClient();
JSONObject result = dgiClient.search(typeName, "name", dbName);
JSONObject result = dgiClient.rawSearch(typeName, "name", dbName);
JSONArray results = (JSONArray) result.get("results");
if (results.length() == 0) {
......@@ -283,7 +281,7 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
String typeName = HiveDataTypes.HIVE_TABLE.getName();
MetadataServiceClient dgiClient = dgiBridge.getMetadataServiceClient();
JSONObject result = dgiClient.search(typeName, "tableName", tableName);
JSONObject result = dgiClient.rawSearch(typeName, "tableName", tableName);
JSONArray results = (JSONArray) result.get("results");
if (results.length() == 0) {
......
......@@ -20,7 +20,6 @@ package org.apache.hadoop.metadata.hive.hook;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.CommandNeedRetryException;
import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.metadata.MetadataServiceClient;
......@@ -29,7 +28,6 @@ import org.apache.hadoop.metadata.hive.model.HiveDataModelGenerator;
import org.apache.hadoop.metadata.hive.model.HiveDataTypes;
import org.apache.hadoop.metadata.typesystem.TypesDef;
import org.apache.hadoop.metadata.typesystem.json.TypesSerialization;
import org.apache.hadoop.metadata.typesystem.types.TypeSystem;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONObject;
import org.testng.Assert;
......@@ -130,7 +128,7 @@ public class HiveHookIT {
}
private void assertInstanceIsRegistered(String typeName, String colName, String colValue) throws Exception{
JSONObject result = dgiCLient.search(typeName, colName, colValue);
JSONObject result = dgiCLient.rawSearch(typeName, colName, colValue);
JSONArray results = (JSONArray) result.get("results");
Assert.assertEquals(results.length(), 1);
JSONObject resultRow = (JSONObject) results.get(0);
......
......@@ -137,6 +137,12 @@ public class MetadataServiceClient {
return callAPI(API.GET_ENTITY, null, guid);
}
public JSONObject searchEntity(String searchQuery) throws MetadataServiceException {
WebResource resource = getResource(API.SEARCH);
resource = resource.queryParam("query", searchQuery);
return callAPIWithResource(API.SEARCH, resource);
}
/**
* Search given type name, an attribute and its value. Uses search dsl
* @param typeName name of the entity type
......@@ -145,10 +151,12 @@ public class MetadataServiceClient {
* @return result json object
* @throws MetadataServiceException
*/
public JSONObject search(String typeName, String attributeName, Object attributeValue) throws MetadataServiceException {
String gremlinQuery = String.format("g.V.has(\"typeName\",\"%s\").and(_().has(\"%s.%s\", T.eq, \"%s\")).toList()",
public JSONObject rawSearch(String typeName, String attributeName,
Object attributeValue) throws MetadataServiceException {
String gremlinQuery = String.format(
"g.V.has(\"typeName\",\"%s\").and(_().has(\"%s.%s\", T.eq, \"%s\")).toList()",
typeName, typeName, attributeName, attributeValue);
return search(gremlinQuery);
return searchByGremlin(gremlinQuery);
}
/**
......@@ -169,10 +177,10 @@ public class MetadataServiceClient {
* @return result json object
* @throws MetadataServiceException
*/
public JSONObject search(String gremlinQuery) throws MetadataServiceException {
WebResource resource = getResource(API.SEARCH);
public JSONObject searchByGremlin(String gremlinQuery) throws MetadataServiceException {
WebResource resource = getResource(API.SEARCH_GREMLIN);
resource = resource.queryParam("query", gremlinQuery);
return callAPIWithResource(API.SEARCH, resource);
return callAPIWithResource(API.SEARCH_GREMLIN, resource);
}
public String getRequestId(JSONObject json) throws MetadataServiceException {
......@@ -193,13 +201,16 @@ public class MetadataServiceClient {
return resource;
}
private JSONObject callAPIWithResource(API api, WebResource resource) throws MetadataServiceException {
private JSONObject callAPIWithResource(API api,
WebResource resource) throws MetadataServiceException {
return callAPIWithResource(api, resource, null);
}
private JSONObject callAPIWithResource(API api, WebResource resource, Object requestObject)
throws MetadataServiceException {
ClientResponse clientResponse = resource.accept(MediaType.APPLICATION_JSON).type(MediaType.APPLICATION_JSON)
ClientResponse clientResponse = resource
.accept(MediaType.APPLICATION_JSON)
.type(MediaType.APPLICATION_JSON)
.method(api.getMethod(), ClientResponse.class, requestObject);
if (clientResponse.getStatus() == Response.Status.OK.getStatusCode()) {
......@@ -213,7 +224,8 @@ public class MetadataServiceClient {
throw new MetadataServiceException(api, clientResponse.getClientResponseStatus());
}
private JSONObject callAPI(API api, Object requestObject, String... pathParams) throws MetadataServiceException {
private JSONObject callAPI(API api, Object requestObject,
String... pathParams) throws MetadataServiceException {
WebResource resource = getResource(api, pathParams);
return callAPIWithResource(api, resource, requestObject);
}
......
......@@ -47,6 +47,9 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Graph backed implementation of Search.
*/
public class GraphBackedDiscoveryService implements DiscoveryService {
private static final Logger LOG = LoggerFactory.getLogger(GraphBackedDiscoveryService.class);
......@@ -69,6 +72,8 @@ public class GraphBackedDiscoveryService implements DiscoveryService {
*/
@Override
public String searchByDSL(String dslQuery) throws DiscoveryException {
LOG.info("Executing dsl query={}", dslQuery);
try {
QueryParser queryParser = new QueryParser();
Either<Parsers.NoSuccess, Expressions.Expression> either = queryParser.apply(dslQuery);
if (either.isRight()) {
......@@ -76,6 +81,9 @@ public class GraphBackedDiscoveryService implements DiscoveryService {
GremlinQueryResult queryResult = evaluate(expression);
return queryResult.toJson();
}
} catch (Exception e) { // unable to catch ExpressionException
throw new DiscoveryException("Invalid expression : " + dslQuery);
}
throw new DiscoveryException("Invalid expression : " + dslQuery);
}
......@@ -102,6 +110,7 @@ public class GraphBackedDiscoveryService implements DiscoveryService {
@Override
public List<Map<String, String>> searchByGremlin(String gremlinQuery)
throws DiscoveryException {
LOG.info("Executing gremlin query={}", gremlinQuery);
ScriptEngineManager manager = new ScriptEngineManager();
ScriptEngine engine = manager.getEngineByName("gremlin-groovy");
Bindings bindings = engine.createBindings();
......
......@@ -167,17 +167,55 @@ public class GraphBackedDiscoveryServiceTest {
return new String[][] {
{"from DB"},
{"DB"},
{"DB where DB.name=\"Reporting\""},
{"DB DB.name = \"Reporting\""},
{"DB where DB.name=\"Reporting\" select name, owner"},
{"DB has name"},
{"DB, Table"},
{"DB is JdbcAccess"},
/*
{"DB, LoadProcess has name"},
{"DB as db1, Table where db1.name = \"Reporting\""},
{"DB where DB.name=\"Reporting\" and DB.createTime < " + System.currentTimeMillis()},
*/
{"from Table"},
{"Table"},
{"DB, Table"},
/*{"DB as db1 Table where db1.name = \"Reporting\""},*/
{"DB name = \"Reporting\""},
{"Column where Column isa PII"},
{"Table is Dimension"},
{"Column where Column isa PII"},
{"View is Dimension"},
/*{"Column where Column isa PII select Column.name"},*/
{"Column select Column.name"},
{"Column select name"},
{"Column where Column.name=\"customer_id\""},
{"from Table select Table.name"},
{"DB where (name = \"Reporting\")"},
{"DB where (name = \"Reporting\") select name as _col_0, owner as _col_1"},
{"DB where DB is JdbcAccess"},
{"DB where DB has name"},
{"DB Table"},
{"DB where DB has name"},
{"DB as db1 Table where (db1.name = \"Reporting\")"},
{"DB where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 "},
/*
todo: does not work
{"DB where (name = \"Reporting\") and ((createTime + 1) > 0)"},
{"DB as db1 Table as tab where ((db1.createTime + 1) > 0) and (db1.name = \"Reporting\") select db1.name as dbName, tab.name as tabName"},
{"DB as db1 Table as tab where ((db1.createTime + 1) > 0) or (db1.name = \"Reporting\") select db1.name as dbName, tab.name as tabName"},
{"DB as db1 Table as tab where ((db1.createTime + 1) > 0) and (db1.name = \"Reporting\") or db1 has owner select db1.name as dbName, tab.name as tabName"},
{"DB as db1 Table as tab where ((db1.createTime + 1) > 0) and (db1.name = \"Reporting\") or db1 has owner select db1.name as dbName, tab.name as tabName"},
*/
// trait searches
{"Dimension"},
/*{"Fact"}, - todo: does not work*/
{"JdbcAccess"},
{"ETL"},
{"Metric"},
{"PII"},
// Lineage
{"Table LoadProcess outputTable"},
{"Table loop (LoadProcess outputTable)"},
{"Table as _loop0 loop (LoadProcess outputTable) withPath"},
{"Table as src loop (LoadProcess outputTable) as dest select src.name as srcTable, dest.name as destTable withPath"},
};
}
......@@ -201,7 +239,24 @@ public class GraphBackedDiscoveryServiceTest {
JSONArray rows = results.getJSONArray("rows");
Assert.assertNotNull(rows);
Assert.assertTrue(rows.length() > 0);
Assert.assertTrue(rows.length() >= 0); // some queries may not have any results
System.out.println("query [" + dslQuery + "] returned [" + rows.length() + "] rows");
}
@DataProvider(name = "invalidDslQueriesProvider")
private Object[][] createInvalidDSLQueries() {
return new String[][] {
{"from Unknown"},
{"Unknown"},
{"Unknown is Blah"},
};
}
@Test (dataProvider = "invalidDslQueriesProvider", expectedExceptions = DiscoveryException.class)
public void testSearchByDSLInvalidQueries(String dslQuery) throws Exception {
System.out.println("Executing dslQuery = " + dslQuery);
discoveryService.searchByDSL(dslQuery);
Assert.fail();
}
@Test
......
......@@ -36,15 +36,16 @@ import org.apache.hadoop.metadata.typesystem.types.StructTypeDefinition;
import org.apache.hadoop.metadata.typesystem.types.TraitType;
import org.apache.hadoop.metadata.typesystem.types.TypeUtils;
import org.apache.hadoop.metadata.typesystem.types.utils.TypesUtil;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONObject;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* A driver that sets up sample types and data for testing purposes.
* todo - move this to examples module. Fix collections as well.
* Please take a look at QueryDSL in docs for the Meta Model.
* todo - move this to examples module. Fix failing collections.
*/
public class QuickStart {
......@@ -52,13 +53,14 @@ public class QuickStart {
String baseUrl = getServerUrl(args);
QuickStart quickStart = new QuickStart(baseUrl);
// Shows how to create types in DGI for your meta model
quickStart.createTypes();
// verify types created
quickStart.verifyTypesCreated();
// Shows how to create entities (instances) for the added types in DGI
quickStart.createEntities();
// verify entity created
quickStart.verifyEntityCreated();
// Shows some search queries using DSL based on types
quickStart.search();
}
static String getServerUrl(String[] args) {
......@@ -89,12 +91,16 @@ public class QuickStart {
}
void createTypes() throws Exception {
TypesDef typesDef = setupTypes();
TypesDef typesDef = createTypeDefinitions();
String typesAsJSON = TypesSerialization.toJson(typesDef);
metadataServiceClient.createType(typesAsJSON);
// verify types created
verifyTypesCreated();
}
TypesDef setupTypes() throws Exception {
TypesDef createTypeDefinitions() throws Exception {
HierarchicalTypeDefinition<ClassType> dbClsDef
= TypesUtil.createClassTypeDef(DATABASE_TYPE, null,
attrDef("name", DataTypes.STRING_TYPE),
......@@ -119,9 +125,9 @@ public class QuickStart {
attrDef("dataType", DataTypes.STRING_TYPE),
attrDef("comment", DataTypes.STRING_TYPE),
new AttributeDefinition("sd", STORAGE_DESC_TYPE,
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("table", "Table",
Multiplicity.OPTIONAL, false, null)
Multiplicity.REQUIRED, false, null)
// new AttributeDefinition("table", DataTypes.STRING_TYPE.getName(),
// Multiplicity.REQUIRED, false, null)
);
HierarchicalTypeDefinition<ClassType> tblClsDef =
......@@ -140,10 +146,10 @@ public class QuickStart {
attrDef("viewExpandedText", DataTypes.STRING_TYPE),
attrDef("tableType", DataTypes.STRING_TYPE),
attrDef("temporary", DataTypes.BOOLEAN_TYPE),
// todo - fix this post serialization support for collections
new AttributeDefinition("columns",
DataTypes.arrayTypeName(DataTypes.STRING_TYPE.getName()),
Multiplicity.COLLECTION, false, null)
// todo - fix this post serialization support for collections
// new AttributeDefinition("columns", DataTypes.arrayTypeName(COLUMN_TYPE),
// Multiplicity.COLLECTION, true, null)
);
......@@ -233,35 +239,35 @@ public class QuickStart {
ArrayList<Referenceable> salesFactColumns = new ArrayList<>();
Referenceable column = column("time_id", "int", "time id", null);
Referenceable column = column("time_id", "int", "time id", sd);
salesFactColumns.add(column);
column = column("product_id", "int", "product id", null);
column = column("product_id", "int", "product id", sd);
salesFactColumns.add(column);
column = column("customer_id", "int", "customer id", null, "PII");
column = column("customer_id", "int", "customer id", sd, "PII");
salesFactColumns.add(column);
column = column("sales", "double", "product id", null, "Metric");
column = column("sales", "double", "product id", sd, "Metric");
salesFactColumns.add(column);
Referenceable salesFact = table("sales_fact", "sales fact table",
salesDB, sd, "Joe", "Managed", salesFactColumns, "Fact");
ArrayList<Referenceable> productDimColumns = new ArrayList<>();
column = column("product_id", "int", "product id", null);
column = column("product_id", "int", "product id", sd);
productDimColumns.add(column);
column = column("product_name", "string", "product name", null);
column = column("product_name", "string", "product name", sd);
productDimColumns.add(column);
column = column("brand_name", "int", "brand name", null);
column = column("brand_name", "int", "brand name", sd);
productDimColumns.add(column);
Referenceable productDim = table("product_dim", "product dimension table",
salesDB, sd, "John Doe", "Managed", productDimColumns, "Dimension");
ArrayList<Referenceable> timeDimColumns = new ArrayList<>();
column = column("time_id", "int", "time id", null);
column = column("time_id", "int", "time id", sd);
timeDimColumns.add(column);
column = column("dayOfYear", "int", "day Of Year", null);
column = column("dayOfYear", "int", "day Of Year", sd);
timeDimColumns.add(column);
column = column("weekDay", "int", "week Day", null);
column = column("weekDay", "int", "week Day", sd);
timeDimColumns.add(column);
Referenceable timeDim = table("time_dim", "time dimension table",
......@@ -269,11 +275,11 @@ public class QuickStart {
ArrayList<Referenceable> customerDimColumns = new ArrayList<>();
column = column("customer_id", "int", "customer id", null, "PII");
column = column("customer_id", "int", "customer id", sd, "PII");
customerDimColumns.add(column);
column = column("name", "string", "customer name", null, "PII");
column = column("name", "string", "customer name", sd, "PII");
customerDimColumns.add(column);
column = column("address", "string", "customer address", null, "PII");
column = column("address", "string", "customer address", sd, "PII");
customerDimColumns.add(column);
Referenceable customerDim = table("customer_dim", "customer dimension table",
......@@ -288,17 +294,17 @@ public class QuickStart {
"Joe BI", "Managed", salesFactColumns, "Metric");
Referenceable loadSalesFactDaily = loadProcess("loadSalesDaily", "John ETL",
Arrays.asList(salesFact, timeDim), salesFactDaily,
ImmutableList.of(salesFact, timeDim), salesFactDaily,
"create table as select ", "plan", "id", "graph",
"ETL");
System.out.println("added loadSalesFactDaily = " + loadSalesFactDaily);
Referenceable productDimView = view("product_dim_view", reportingDB,
Arrays.asList(productDim), "Dimension", "JdbcAccess");
ImmutableList.of(productDim), "Dimension", "JdbcAccess");
System.out.println("added productDimView = " + productDimView);
Referenceable customerDimView = view("customer_dim_view", reportingDB,
Arrays.asList(customerDim), "Dimension", "JdbcAccess");
ImmutableList.of(customerDim), "Dimension", "JdbcAccess");
System.out.println("added customerDimView = " + customerDimView);
Referenceable salesFactMonthly = table("sales_fact_monthly_mv",
......@@ -306,7 +312,7 @@ public class QuickStart {
reportingDB, sd, "Jane BI", "Managed", salesFactColumns, "Metric");
Referenceable loadSalesFactMonthly = loadProcess("loadSalesMonthly", "John ETL",
Arrays.asList(salesFactDaily), salesFactMonthly,
ImmutableList.of(salesFactDaily), salesFactMonthly,
"create table as select ", "plan", "id", "graph",
"ETL");
System.out.println("added loadSalesFactMonthly = " + loadSalesFactMonthly);
......@@ -314,7 +320,6 @@ public class QuickStart {
private Referenceable createInstance(Referenceable referenceable) throws Exception {
String typeName = referenceable.getTypeName();
System.out.println("creating instance of type " + typeName);
String entityJSON = InstanceSerialization.toJson(referenceable, true);
System.out.println("Submitting new entity= " + entityJSON);
......@@ -380,6 +385,7 @@ public class QuickStart {
referenceable.set("sd", sd);
// todo - fix this post serialization support for collections
// referenceable.set("columns", columns);
ArrayList<String> columnNames = new ArrayList<>(columns.size());
for (Referenceable column : columns) {
columnNames.add(String.valueOf(column.get("name")));
......@@ -402,6 +408,10 @@ public class QuickStart {
referenceable.set("endTime", System.currentTimeMillis() + 10000);
// todo - fix this post serialization support for collections
/*
referenceable.set("inputTables", inputTables);
referenceable.set("outputTable", outputTable);
*/
ArrayList<String> inputTableNames = new ArrayList<>(inputTables.size());
for (Referenceable inputTable : inputTables) {
inputTableNames.add(String.valueOf(inputTable.get("name")));
......@@ -442,7 +452,78 @@ public class QuickStart {
}
}
private void verifyEntityCreated() {
// todo
private String[] getDSLQueries() {
return new String[]{
"from DB",
"DB",
"DB where name=\"Reporting\"",
"DB where DB.name=\"Reporting\"",
"DB name = \"Reporting\"",
"DB DB.name = \"Reporting\"",
"DB where name=\"Reporting\" select name, owner",
"DB where DB.name=\"Reporting\" select name, owner",
"DB has name",
"DB where DB has name",
"DB, Table",
"DB is JdbcAccess",
/*
"DB, LoadProcess has name",
"DB as db1, Table where db1.name = \"Reporting\"",
"DB where DB.name=\"Reporting\" and DB.createTime < " + System.currentTimeMillis()},
*/
"from Table",
"Table",
"Table is Dimension",
"Column where Column isa PII",
"View is Dimension",
/*"Column where Column isa PII select Column.name",*/
"Column select Column.name",
"Column select name",
"Column where Column.name=\"customer_id\"",
"from Table select Table.name",
"DB where (name = \"Reporting\")",
"DB where (name = \"Reporting\") select name as _col_0, owner as _col_1",
"DB where DB is JdbcAccess",
"DB where DB has name",
"DB Table",
"DB where DB has name",
"DB as db1 Table where (db1.name = \"Reporting\")",
"DB where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 ",
/*
todo: does not work
"DB where (name = \"Reporting\") and ((createTime + 1) > 0)",
"DB as db1 Table as tab where ((db1.createTime + 1) > 0) and (db1.name = \"Reporting\") select db1.name as dbName, tab.name as tabName",
"DB as db1 Table as tab where ((db1.createTime + 1) > 0) or (db1.name = \"Reporting\") select db1.name as dbName, tab.name as tabName",
"DB as db1 Table as tab where ((db1.createTime + 1) > 0) and (db1.name = \"Reporting\") or db1 has owner select db1.name as dbName, tab.name as tabName",
"DB as db1 Table as tab where ((db1.createTime + 1) > 0) and (db1.name = \"Reporting\") or db1 has owner select db1.name as dbName, tab.name as tabName",
*/
// trait searches
"Dimension",
/*"Fact", - todo: does not work*/
"JdbcAccess",
"ETL",
"Metric",
"PII",
/*
// Lineage - todo - fix this, its not working
"Table LoadProcess outputTable",
"Table loop (LoadProcess outputTable)",
"Table as _loop0 loop (LoadProcess outputTable) withPath",
"Table as src loop (LoadProcess outputTable) as dest select src.name as srcTable, dest.name as destTable withPath",
*/
};
}
private void search() throws Exception {
for (String dslQuery : getDSLQueries()) {
JSONObject response = metadataServiceClient.searchEntity(dslQuery);
JSONObject results = response.getJSONObject(MetadataServiceClient.RESULTS);
if (!results.isNull("rows")) {
JSONArray rows = results.getJSONArray("rows");
System.out.println("query [" + dslQuery + "] returned [" + rows.length() + "] rows");
} else {
System.out.println("query [" + dslQuery + "] failed, results:" + results.toString());
}
}
}
}
......@@ -44,7 +44,9 @@ import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import java.util.List;
/**
* Search Integration Tests.
*/
public class MetadataDiscoveryJerseyResourceIT extends BaseResourceIT {
@BeforeClass
......@@ -96,7 +98,7 @@ public class MetadataDiscoveryJerseyResourceIT extends BaseResourceIT {
.type(MediaType.APPLICATION_JSON)
.method(HttpMethod.GET, ClientResponse.class);
Assert.assertEquals(clientResponse.getStatus(),
Response.Status.INTERNAL_SERVER_ERROR.getStatusCode());
Response.Status.BAD_REQUEST.getStatusCode());
}
@Test
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment