Commit 574da752 by Suma Shivaprasad

ATLAS-435 Add ORDER BY and Limit to search DSL (neerugupta via sumasai)

parent 53574720
......@@ -13,7 +13,7 @@ queryWithPath: query ~ opt(WITHPATH)
query: rep1sep(singleQuery, opt(COMMA))
singleQuery: singleQrySrc ~ opt(loopExpression) ~ opt(selectClause)
singleQuery: singleQrySrc ~ opt(loopExpression) ~ opt(selectClause) ~ opt(orderby) ~ opt(limitOffset)
singleQrySrc = FROM ~ fromSrc ~ opt(WHERE) ~ opt(expr ^? notIdExpression) |
WHERE ~ (expr ^? notIdExpression) |
......@@ -22,6 +22,14 @@ singleQrySrc = FROM ~ fromSrc ~ opt(WHERE) ~ opt(expr ^? notIdExpression) |
fromSrc: identifier ~ AS ~ alias | identifier
orderby: ORDERBY ~ order ~ opt (sortOrder)
limitOffset: LIMIT ~ lmt ~ opt (offset)
offset: OFFSET ~ offsetValue
sortOrder = ASC | DESC
loopExpression: LOOP ~ (LPAREN ~> query <~ RPAREN) ~ opt(intConstant <~ TIMES) ~ opt(AS ~> alias)
selectClause: SELECT ~ rep1sep(selectExpression, COMMA)
......@@ -79,6 +87,9 @@ Language Notes:
* The _!WithPath_ clause can be used with transitive closure queries to retrieve the Path that
connects the two related Entities. (We also provide a higher level interface for Closure Queries
see scaladoc for 'org.apache.atlas.query.ClosureQuery')
* ORDERBY is optional. Orderby clause should be specified in single quote ('). When order by clause is specified case insensitive sorting is done in ascending order.
For sorting in descending order specify 'DESC' after order by clause. If no order by is specified then no default sorting is applied.
* LIMIT is optional. It limits the maximum number of objects to be fetched starting from specified optional offset. If no offset is specified count starts from beginning.
* There are couple of Predicate functions different from SQL:
* _is_ or _isa_can be used to filter Entities that have a particular Trait.
* _has_ can be used to filter Entities that have a value for a particular Attribute.
......@@ -89,6 +100,11 @@ Language Notes:
* from DB
* DB where name="Reporting" select name, owner
* DB where name="Reporting" select name, owner orderby 'name'
* DB where name="Reporting" select name limit 10
* DB where name="Reporting" select name, owner limit 10 offset 0
* DB where name="Reporting" select name, owner orderby 'name' limit 10 offset 5
* DB where name="Reporting" select name, owner orderby 'name' desc limit 10 offset 5
* DB has name
* DB is !JdbcAccess
* Column where Column isa PII
......
......@@ -17,6 +17,7 @@ ATLAS-409 Atlas will not import avro tables with schema read from a file (dosset
ATLAS-379 Create sqoop and falcon metadata addons (venkatnrangan,bvellanki,sowmyaramesh via shwethags)
ALL CHANGES:
ATLAS-435 Add ORDER BY and Limit to search DSL (neerugupta via sumasai)
ATLAS-543 Entity Instance requests should not require ID element for new Entities (harishjp via shwethags)
ATLAS-681 update committer/ppmc members in the pom.xml (sneethiraj via shwethags)
ATLAS-616 Resolve OOM - Zookeeper throws exceptions when trying to fire DSL queries at Atlas at large scale. (yhemanth via sumasai)
......
......@@ -330,6 +330,10 @@ object Expressions {
def instance() = new InstanceExpression(this)
def path() = new PathExpression(this)
def limit(lmt: Literal[Integer], offset : Literal[Integer]) = new LimitExpression(this, lmt, offset)
def order(odr: String, asc: Boolean) = new OrderExpression(this, odr, asc)
}
trait BinaryNode {
......@@ -766,4 +770,30 @@ object Expressions {
override def toString = s"$child withPath"
}
case class LimitExpression(child: Expression, limit: Literal[Integer], offset: Literal[Integer]) extends Expression with UnaryNode {
override def toString = s"$child limit $limit offset $offset "
lazy val dataType = {
if (!resolved) {
throw new UnresolvedException(this,
s"datatype. Can not resolve due to unresolved children")
}
child.dataType
}
}
case class OrderExpression(child: Expression, odr: String, asc: Boolean) extends Expression with UnaryNode {
override def toString = s"$child order $odr asc $asc"
lazy val dataType = {
if (!resolved) {
throw new UnresolvedException(this,
s"datatype. Can not resolve due to unresolved children")
}
child.dataType
}
}
}
......@@ -19,7 +19,7 @@
package org.apache.atlas.query
import javax.script.{Bindings, ScriptEngine, ScriptEngineManager}
import org.apache.atlas.query.Expressions._
import com.thinkaurelius.titan.core.TitanGraph
import com.tinkerpop.pipes.util.structures.Row
import org.apache.atlas.query.TypeUtils.ResultWithPathStruct
......@@ -27,8 +27,8 @@ import org.apache.atlas.typesystem.json._
import org.apache.atlas.typesystem.types._
import org.json4s._
import org.json4s.native.Serialization._
import scala.language.existentials
import org.apache.atlas.query.Expressions._
case class GremlinQueryResult(query: String,
resultDataType: IDataType[_],
......@@ -82,7 +82,6 @@ class GremlinEvaluator(qry: GremlinQuery, persistenceStrategy: GraphPersistenceS
val rType = qry.expr.dataType
val oType = if (qry.isPathExpresion) qry.expr.children(0).dataType else rType
val rawRes = engine.eval(qry.queryStr, bindings)
if (!qry.hasSelectList) {
val rows = rawRes.asInstanceOf[java.util.List[AnyRef]].map { v =>
val iV = instanceObject(v)
......@@ -95,14 +94,16 @@ class GremlinEvaluator(qry: GremlinQuery, persistenceStrategy: GraphPersistenceS
val rows = rawRes.asInstanceOf[java.util.List[AnyRef]].map { r =>
val rV = instanceObject(r).asInstanceOf[Row[java.util.List[AnyRef]]]
val sInstance = sType.createInstance()
val selExpr =
(if (qry.isPathExpresion) qry.expr.children(0) else qry.expr).
asInstanceOf[Expressions.SelectExpression]
selExpr.selectListWithAlias.foreach { aE =>
val cName = aE.alias
val (src, idx) = qry.resultMaping(cName)
val v = rV.getColumn(src).get(idx)
sInstance.set(cName, persistenceStrategy.constructInstance(aE.dataType, v))
val selObj = SelectExpressionHelper.extractSelectExpression(qry.expr)
if (selObj.isDefined)
{
val selExpr = selObj.get.asInstanceOf[Expressions.SelectExpression]
selExpr.selectListWithAlias.foreach { aE =>
val cName = aE.alias
val (src, idx) = qry.resultMaping(cName)
val v = rV.getColumn(src).get(idx)
sInstance.set(cName, persistenceStrategy.constructInstance(aE.dataType, v))
}
}
addPathStruct(r, sInstance)
}
......
......@@ -35,6 +35,7 @@ case class GremlinQuery(expr: Expression, queryStr: String, resultMaping: Map[St
def hasSelectList = resultMaping != null
def isPathExpresion = expr.isInstanceOf[PathExpression]
}
trait SelectExpressionHandling {
......@@ -122,7 +123,6 @@ trait SelectExpressionHandling {
}
m.toMap
}
}
class GremlinTranslationException(expr: Expression, reason: String) extends
......@@ -186,7 +186,8 @@ class GremlinTranslator(expr: Expression,
}
def traitClauseWithInstanceForTop(topE : Expression) : PartialFunction[Expression, Expression] = {
case te : TraitExpression if (te fastEquals topE) => {
// This topE check prevented the comparison of trait expression when it is a child. Like trait as t limit 2
case te : TraitExpression => {
val theTrait = te.as("theTrait")
val theInstance = theTrait.traitInstance().as("theInstance")
val outE =
......@@ -202,6 +203,7 @@ class GremlinTranslator(expr: Expression,
stats.last
}
private def genQuery(expr: Expression, inSelect: Boolean): String = expr match {
case ClassExpression(clsName) =>
typeTestExpression(clsName)
......@@ -324,12 +326,26 @@ class GremlinTranslator(expr: Expression,
case pe@PathExpression(child) => {
s"${genQuery(child, inSelect)}.path"
}
case order@OrderExpression(child, odr, asc) => {
var orderby = ""
asc match {
//builds a closure comparison function based on provided order by clause in DSL. This will be used to sort the results by gremlin order pipe.
//Ordering is case insensitive.
case false=> orderby = s"order{it.b.getProperty('$odr').toLowerCase() <=> it.a.getProperty('$odr').toLowerCase()}"//descending
case _ => orderby = s"order{it.a.getProperty('$odr').toLowerCase() <=> it.b.getProperty('$odr').toLowerCase()}"
}
s"""${genQuery(child, inSelect)}.$orderby"""
}
case limitOffset@LimitExpression(child, limit, offset) => {
val totalResultRows = limit.value + offset.value
s"""${genQuery(child, inSelect)} [$offset..<$totalResultRows]"""
}
case x => throw new GremlinTranslationException(x, "expression not yet supported")
}
def genFullQuery(expr: Expression): String = {
var q = genQuery(expr, false)
if(gPersistenceBehavior.addGraphVertexPrefix(preStatements)) {
q = s"g.V.$q"
}
......@@ -354,21 +370,48 @@ class GremlinTranslator(expr: Expression,
e1 = e1.transformUp(addAliasToLoopInput())
e1 = e1.transformUp(instanceClauseToTop(e1))
e1 = e1.transformUp(traitClauseWithInstanceForTop(e1))
e1 match {
case e1: SelectExpression => {
val rMap = buildResultMapping(e1)
//Following code extracts the select expressions from expression tree.
val se = SelectExpressionHelper.extractSelectExpression(e1)
if (se.isDefined)
{
val rMap = buildResultMapping(se.get)
GremlinQuery(e1, genFullQuery(e1), rMap)
}
case pe@PathExpression(se@SelectExpression(child, selectList)) => {
val rMap = buildResultMapping(se)
GremlinQuery(e1, genFullQuery(e1), rMap)
}
case e1 => GremlinQuery(e1, genFullQuery(e1), null)
}
else
{
GremlinQuery(e1, genFullQuery(e1), null)
}
}
}
object SelectExpressionHelper {
/**
* This method extracts the child select expression from parent expression
*/
def extractSelectExpression(child: Expression): Option[SelectExpression] = {
child match {
case se@SelectExpression(child, selectList) =>{
Some(se)
}
case limit@LimitExpression(child, lmt, offset) => {
extractSelectExpression(child)
}
case order@OrderExpression(child, odr, odrBy) => {
extractSelectExpression(child)
}
case path@PathExpression(child) => {
extractSelectExpression(child)
}
case _ => {
None
}
}
}
}
/*
* TODO
* Translation Issues:
......@@ -379,4 +422,3 @@ class GremlinTranslator(expr: Expression,
* The solution is to to do predicate pushdown and apply the filter immediately on top of the referred Expression.
*/
}
\ No newline at end of file
......@@ -64,10 +64,14 @@ trait QueryKeywords {
protected val AS = Keyword("as")
protected val TIMES = Keyword("times")
protected val WITHPATH = Keyword("withPath")
protected val LIMIT = Keyword("limit")
protected val OFFSET = Keyword("offset")
protected val ORDERBY = Keyword("orderby")
}
trait ExpressionUtils {
protected val DESC = "desc"
def loop(input: Expression, l: (Expression, Option[Literal[Integer]], Option[String])) = l match {
case (c, None, None) => input.loop(c)
case (c, t, None) => input.loop(c, t.get)
......@@ -85,6 +89,14 @@ trait ExpressionUtils {
input.select(selList: _*)
}
def limit(input: Expression, lmt: Literal[Integer], offset: Literal[Integer]) = {
input.limit(lmt, offset)
}
def order(input: Expression, odr: String, asc: Boolean) = {
input.order(odr, asc)
}
def leftmostId(e: Expression) = {
var le: IdExpression = null
e.traverseUp { case i: IdExpression if le == null => le = i}
......@@ -106,7 +118,10 @@ trait ExpressionUtils {
sngQuery2.transformUp(replaceIdWithField(leftSrcId, snglQuery1.field(leftSrcId.name)))
}
}
/**
* Query parser is used to parse the DSL query. It uses scala PackratParsers and pattern matching to extract the expressions.
* It builds up a expression tree.
*/
object QueryParser extends StandardTokenParsers with QueryKeywords with ExpressionUtils with PackratParsers {
import scala.language.higherKinds
......@@ -137,12 +152,33 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi
case h :: t => t.foldLeft(h)(merge(_, _))
}
}
def singleQuery = singleQrySrc ~ opt(loopExpression) ~ opt(selectClause) ^^ {
case s ~ None ~ None => s
case s ~ l ~ None => loop(s, l.get)
case s ~ l ~ sel if l.isDefined => select(loop(s, l.get), sel.get)
case s ~ None ~ sel => select(s, sel.get)
/**
* A singleQuery can have the following forms:
* 1. SrcQuery [select] [orderby desc] [Limit x offset y] -> source query followed by optional select statement followed by optional order by followed by optional limit
* eg: Select "hive_db where hive_db has name orderby 'hive_db.owner' limit 2 offset 1"
* @return
*/
def singleQuery = singleQrySrc ~ opt(loopExpression) ~ opt(selectClause) ~ opt(orderby) ~ opt(limitOffset) ^^ {
case s ~ l ~ sel ~ odr ~ lmtoff => {
var expressiontree = s
if (l.isDefined) //Note: The order of if statements is important.
{
expressiontree = loop(expressiontree, l.get);
}
if (odr.isDefined)
{
expressiontree = order(expressiontree, odr.get._1, odr.get._2)
}
if (lmtoff.isDefined)
{
expressiontree = limit(expressiontree, int (lmtoff.get._1), int (lmtoff.get._2))
}
if (sel.isDefined)
{
expressiontree = select(expressiontree, sel.get)
}
expressiontree
}
}
/**
......@@ -182,7 +218,25 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi
def fromSrc = identifier ~ AS ~ alias ^^ { case s ~ a ~ al => s.as(al)} |
identifier
def orderby = ORDERBY ~ order ~ opt (asce) ^^ {
case o ~ odr ~ None => (odr, true)
case o ~ odr ~ asc => (odr, asc.get)
}
def limitOffset = LIMIT ~ lmt ~ opt (offset) ^^ {
case l ~ lt ~ None => (lt, 0)
case l ~ lt ~ of => (lt, of.get)
}
def offset = OFFSET ~ ofset ^^ {
case offset ~ of => of
}
def asce = asc ^^ {
case DESC => false
case _ => true
}
def loopExpression: Parser[(Expression, Option[Literal[Integer]], Option[String])] =
LOOP ~ (LPAREN ~> query <~ RPAREN) ~ opt(intConstant <~ TIMES) ~ opt(AS ~> alias) ^^ {
case l ~ e ~ None ~ a => (e, None, a)
......@@ -192,7 +246,6 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi
def selectClause: Parser[List[(Expression, Option[String])]] = SELECT ~ rep1sep(selectExpression, COMMA) ^^ {
case s ~ cs => cs
}
def selectExpression: Parser[(Expression, Option[String])] = expr ~ opt(AS ~> alias) ^^ {
case e ~ a => (e, a)
}
......@@ -239,7 +292,15 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi
}
def alias = ident | stringLit
def lmt = intConstant
def ofset = intConstant
def order = ident | stringLit
def asc = ident | stringLit
def literal = booleanConstant ^^ {
boolean(_)
} |
......
......@@ -90,6 +90,14 @@ class Resolver(srcExpr: Option[Expression] = None, aliases: Map[String, Expressi
case l@LoopExpression(inputExpr, loopExpr, t) if inputExpr.resolved => {
val r = new Resolver(Some(inputExpr), inputExpr.namedExpressions, true)
return new LoopExpression(inputExpr, loopExpr.transformUp(r), t)
}
case lmt@LimitExpression(child, limit, offset) => {
val r = new Resolver(Some(child), child.namedExpressions)
return new LimitExpression(child.transformUp(r), limit, offset)
}
case order@OrderExpression(child, odr, asc) => {
val r = new Resolver(Some(child), child.namedExpressions)
return new OrderExpression(child.transformUp(r), odr, asc)
}
case x => x
}
......
......@@ -19,6 +19,7 @@
package org.apache.atlas.discovery;
import com.google.common.collect.ImmutableSet;
import org.apache.atlas.BaseHiveRepositoryTest;
import org.apache.atlas.RepositoryMetadataModule;
import org.apache.atlas.RequestContext;
......@@ -45,8 +46,10 @@ import org.testng.annotations.Guice;
import org.testng.annotations.Test;
import javax.inject.Inject;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
......@@ -248,6 +251,297 @@ public class GraphBackedDiscoveryServiceTest extends BaseHiveRepositoryTest {
};
}
@DataProvider(name = "dslLimitQueriesProvider")
private Object[][] createDSLQueriesWithLimit() {
return new Object[][]{
{"hive_column limit 10 ", 10},
{"hive_column select hive_column.name limit 10 ", 10},
{"hive_column select hive_column.name withPath", 29},
{"hive_column select hive_column.name limit 10 withPath", 10},
{"from hive_db", 3},
{"from hive_db limit 2", 2},
{"from hive_db limit 2 offset 0", 2},
{"from hive_db limit 2 offset 1", 2},
{"from hive_db limit 3 offset 1", 2},
{"hive_db", 3},
{"hive_db where hive_db.name=\"Reporting\"", 1},
{"hive_db where hive_db.name=\"Reporting\" limit 10 ", 1},
{"hive_db hive_db.name = \"Reporting\"", 1},
{"hive_db where hive_db.name=\"Reporting\" select name, owner", 1},
{"hive_db has name", 3},
{"hive_db has name limit 2 offset 0", 2},
{"hive_db has name limit 2 offset 1", 2},
{"hive_db has name limit 10 offset 1", 2},
{"hive_db has name limit 10 offset 0", 3},
{"hive_db, hive_table", 8},
{"hive_db, hive_table limit 5", 5},
{"hive_db, hive_table limit 5 offset 0", 5},
{"hive_db, hive_table limit 5 offset 5", 3},
{"View is JdbcAccess", 2},
{"View is JdbcAccess limit 1", 1},
{"View is JdbcAccess limit 2 offset 1", 1},
{"hive_db as db1, hive_table where db1.name = \"Reporting\"", 0}, //Not working - ATLAS-145
{"from hive_table", 8},
{"from hive_table limit 5", 5},
{"from hive_table limit 5 offset 5", 3},
{"hive_table", 8},
{"hive_table limit 5", 5},
{"hive_table limit 5 offset 5", 3},
{"hive_table isa Dimension", 3},
{"hive_table isa Dimension limit 2", 2},
{"hive_table isa Dimension limit 2 offset 0", 2},
{"hive_table isa Dimension limit 2 offset 1", 2},
{"hive_table isa Dimension limit 3 offset 1", 2},
{"hive_column where hive_column isa PII", 6},
{"hive_column where hive_column isa PII limit 5", 5},
{"hive_column where hive_column isa PII limit 5 offset 1", 5},
{"hive_column where hive_column isa PII limit 5 offset 5", 1},
{"View is Dimension" , 2},
{"View is Dimension limit 1" , 1},
{"View is Dimension limit 1 offset 1" , 1},
{"View is Dimension limit 10 offset 1" , 1},
{"hive_column select hive_column.name", 29},
{"hive_column select hive_column.name limit 5", 5},
{"hive_column select hive_column.name limit 5 offset 28", 1},
{"hive_column select name", 29},
{"hive_column select name limit 5", 5},
{"hive_column select name limit 5 offset 28 ", 1},
{"hive_column where hive_column.name=\"customer_id\"", 4},
{"hive_column where hive_column.name=\"customer_id\" limit 2", 2},
{"hive_column where hive_column.name=\"customer_id\" limit 2 offset 1", 2},
{"hive_column where hive_column.name=\"customer_id\" limit 10 offset 3", 1},
{"from hive_table select hive_table.name", 8},
{"from hive_table select hive_table.name limit 5", 5},
{"from hive_table select hive_table.name limit 5 offset 5", 3},
{"hive_db where (name = \"Reporting\")", 1},
{"hive_db where (name = \"Reporting\") limit 10", 1},
{"hive_db where (name = \"Reporting\") select name as _col_0, owner as _col_1", 1},
{"hive_db where (name = \"Reporting\") select name as _col_0, owner as _col_1 limit 10", 1},
{"hive_db where hive_db is JdbcAccess", 0}, //Not supposed to work
{"hive_db hive_table", 8},
{"hive_db hive_table limit 5", 5},
{"hive_db hive_table limit 5 offset 5", 3},
{"hive_db where hive_db has name", 3},
{"hive_db where hive_db has name limit 5", 3},
{"hive_db where hive_db has name limit 2 offset 0", 2},
{"hive_db where hive_db has name limit 2 offset 1", 2},
{"hive_db as db1 hive_table where (db1.name = \"Reporting\")", 0}, //Not working -> ATLAS-145
{"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 ", 1},
{"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 limit 10", 1},
{"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 limit 10 offset 1", 0},
{"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 limit 10 offset 0", 1},
{"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 ", 1},
{"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 limit 10 ", 1},
{"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 limit 10 offset 0", 1},
{"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 limit 10 offset 5", 0},
{"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 ", 1},
{"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 limit 10 offset 0", 1},
{"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 limit 10 offset 1", 0},
{"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 limit 10", 1},
{"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 limit 0 offset 1", 0},
// trait searches
{"Dimension", 5},
{"Dimension limit 2", 2},
{"Dimension limit 2 offset 1", 2},
{"Dimension limit 5 offset 4", 1},
{"JdbcAccess", 2},
{"JdbcAccess limit 5 offset 0", 2},
{"JdbcAccess limit 2 offset 1", 1},
{"JdbcAccess limit 1", 1},
{"ETL", 3},
{"ETL limit 2", 2},
{"ETL limit 1", 1},
{"ETL limit 1 offset 0", 1},
{"ETL limit 2 offset 1", 2},
{"Metric", 5},
{"Metric limit 10", 5},
{"Metric limit 2", 2},
{"Metric limit 10 offset 1", 4},
{"PII", 6},
{"PII limit 10", 6},
{"PII limit 2", 2},
{"PII limit 10 offset 1", 5},
{"`Log Data`", 4},
{"`Log Data` limit 3", 3},
{"`Log Data` limit 10 offset 2", 2},
{"hive_table where name='sales_fact', db where name='Sales'", 1},
{"hive_table where name='sales_fact', db where name='Sales' limit 10", 1},
{"hive_table where name='sales_fact', db where name='Sales' limit 10 offset 1", 0},
{"hive_table where name='sales_fact', db where name='Reporting'", 0},
{"hive_table where name='sales_fact', db where name='Reporting' limit 10", 0},
{"hive_table where name='sales_fact', db where name='Reporting' limit 10 offset 1", 0},
{"hive_partition as p where values = ['2015-01-01']", 1},
{"hive_partition as p where values = ['2015-01-01'] limit 10", 1},
{"hive_partition as p where values = ['2015-01-01'] limit 10 offset 1", 0},
};
}
@DataProvider(name = "dslOrderByQueriesProvider")
private Object[][] createDSLQueriesWithOrderBy() {
Boolean isAscending = Boolean.TRUE;
return new Object[][]{
//Not working, the problem is in server code not figuring out how to sort. not sure if it is valid use case.
// {"hive_db hive_table orderby 'hive_db.owner'", 8, "owner", isAscending},
// {"hive_db hive_table orderby 'hive_db.owner' limit 5", 5, "owner", isAscending},
// {"hive_db hive_table orderby 'hive_db.owner' limit 5 offset 5", 3, "owner", isAscending},
{"hive_column select hive_column.name orderby 'hive_column.name' limit 10 withPath", 10, "name", isAscending},
{"hive_column select hive_column.name orderby 'hive_column.name' asc limit 10 withPath", 10, "name", isAscending},
{"hive_column select hive_column.name orderby 'hive_column.name' desc limit 10 withPath", 10, "name", !isAscending},
{"from hive_db orderby 'hive_db.owner' limit 3", 3, "owner", isAscending},
{"hive_db where hive_db.name=\"Reporting\" orderby 'owner'", 1, "owner", isAscending},
{"hive_db where hive_db.name=\"Reporting\" orderby 'hive_db.owner' limit 10 ", 1, "owner", isAscending},
{"hive_db where hive_db.name=\"Reporting\" select name, owner orderby 'hive_db.name' ", 1, "name", isAscending},
{"hive_db has name orderby 'hive_db.owner' limit 10 offset 0", 3, "owner", isAscending},
{"from hive_table orderby 'hive_table.owner'", 8, "owner", isAscending},
{"from hive_table orderby 'hive_table.owner' limit 8", 8, "owner", isAscending},
{"hive_table orderby 'hive_table.owner'", 8, "owner", isAscending},
{"hive_table orderby 'hive_table.owner' limit 8", 8, "owner", isAscending},
{"hive_table orderby 'hive_table.owner' limit 8 offset 0", 8, "owner", isAscending},
{"hive_table orderby 'hive_table.owner' desc limit 8 offset 0", 8, "owner", !isAscending},
{"hive_table isa Dimension orderby 'hive_table.owner'", 3, "owner", isAscending},//order not working
{"hive_table isa Dimension orderby 'hive_table.owner' limit 3", 3, "owner", isAscending},
{"hive_table isa Dimension orderby 'hive_table.owner' limit 3 offset 0", 3, "owner", isAscending},
{"hive_table isa Dimension orderby 'hive_table.owner' desc limit 3 offset 0", 3, "owner", !isAscending},
{"hive_column where hive_column isa PII orderby 'hive_column.name'", 6, "name", isAscending},
{"hive_column where hive_column isa PII orderby 'hive_column.name' limit 5", 5, "name", isAscending},
{"hive_column where hive_column isa PII orderby 'hive_column.name' limit 5 offset 1", 5, "name", isAscending},
{"hive_column where hive_column isa PII orderby 'hive_column.name' desc limit 5 offset 1", 5, "name", !isAscending},
{"hive_column select hive_column.name orderby 'hive_column.name' ", 29, "hive_column.name", isAscending},
{"hive_column select hive_column.name orderby 'hive_column.name' limit 5", 5, "hive_column.name", isAscending},
{"hive_column select hive_column.name orderby 'hive_column.name' desc limit 5", 5, "hive_column.name", !isAscending},
{"hive_column select hive_column.name orderby 'hive_column.name' limit 5 offset 28", 1, "hive_column.name", isAscending},
{"hive_column select name orderby 'hive_column.name'", 29, "name", isAscending},
{"hive_column select name orderby 'hive_column.name' limit 5", 5, "name", isAscending},
{"hive_column select name orderby 'hive_column.name' desc", 29, "name", !isAscending},
{"hive_column where hive_column.name=\"customer_id\" orderby 'hive_column.name'", 4, "name", isAscending},
{"hive_column where hive_column.name=\"customer_id\" orderby 'hive_column.name' limit 2", 2, "name", isAscending},
{"hive_column where hive_column.name=\"customer_id\" orderby 'hive_column.name' limit 2 offset 1", 2, "name", isAscending},
{"from hive_table select owner orderby 'hive_table.owner'", 8, "owner", isAscending},
{"from hive_table select owner orderby 'hive_table.owner' limit 5", 5, "owner", isAscending},
{"from hive_table select owner orderby 'hive_table.owner' desc limit 5", 5, "owner", !isAscending},
{"from hive_table select owner orderby 'hive_table.owner' limit 5 offset 5", 3, "owner", isAscending},
{"hive_db where (name = \"Reporting\") orderby 'hive_db.name'", 1, "name", isAscending},
{"hive_db where (name = \"Reporting\") orderby 'hive_db.name' limit 10", 1, "name", isAscending},
{"hive_db where (name = \"Reporting\") select name as _col_0, owner as _col_1 orderby '_col_1'", 1, "_col_1", isAscending}, //will it work
{"hive_db where (name = \"Reporting\") select name as _col_0, owner as _col_1 orderby '_col_1' limit 10", 1, "_col_1", isAscending},
{"hive_db where hive_db has name orderby 'hive_db.owner'", 3, "owner", isAscending},
{"hive_db where hive_db has name orderby 'hive_db.owner' limit 5", 3, "owner", isAscending},
{"hive_db where hive_db has name orderby 'hive_db.owner' limit 2 offset 0", 2, "owner", isAscending},
{"hive_db where hive_db has name orderby 'hive_db.owner' limit 2 offset 1", 2, "owner", isAscending},
{"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 orderby '_col_1'", 1, "_col_1", isAscending},
{"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 orderby '_col_1' limit 10", 1, "_col_1", isAscending},
{"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 orderby '_col_1' limit 10 offset 1", 0, "_col_1", isAscending},
{"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 orderby '_col_1' limit 10 offset 0", 1, "_col_1", isAscending},
{"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 orderby '_col_1' ", 1, "_col_1", isAscending},
{"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 orderby '_col_1' limit 10 ", 1, "_col_1", isAscending},
{"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 orderby '_col_1' limit 10 offset 0", 1, "_col_1", isAscending},
{"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 orderby '_col_1' limit 10 offset 5", 0, "_col_1", isAscending},
{"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 orderby '_col_0' ", 1, "_col_0", isAscending},
{"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 orderby '_col_0' limit 10 offset 0", 1, "_col_0", isAscending},
{"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 orderby '_col_0' limit 10 offset 1", 0, "_col_0", isAscending},
{"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 orderby '_col_0' limit 10", 1, "_col_0", isAscending},
{"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 orderby '_col_0' limit 0 offset 1", 0, "_col_0", isAscending},
};
}
@Test(dataProvider = "dslOrderByQueriesProvider")
public void testSearchByDSLQueriesWithOrderBy(String dslQuery, Integer expectedNumRows, String orderBy, boolean ascending) throws Exception {
System.out.println("Executing dslQuery = " + dslQuery);
String jsonResults = discoveryService.searchByDSL(dslQuery);
Assert.assertNotNull(jsonResults);
JSONObject results = new JSONObject(jsonResults);
Assert.assertEquals(results.length(), 3);
Object query = results.get("query");
Assert.assertNotNull(query);
JSONObject dataType = results.getJSONObject("dataType");
Assert.assertNotNull(dataType);
String typeName = dataType.getString("typeName");
Assert.assertNotNull(typeName);
JSONArray rows = results.getJSONArray("rows");
Assert.assertNotNull(rows);
Assert.assertEquals(rows.length(), expectedNumRows.intValue()); // some queries may not have any results
List<String> returnedList = new ArrayList<String> ();
for (int i = 0; i < rows.length(); i++) {
JSONObject row = rows.getJSONObject(i);
try
{
returnedList.add(row.get(orderBy).toString());
}
catch(Exception ex)
{
System.out.println( " Exception occured " + ex.getMessage());
}
}
Iterator<String> iter = returnedList.iterator();
String _current = null, _prev = null;
//Following code compares the results in rows and makes sure data is sorted as expected.
while(iter.hasNext())
{
_prev = _current;
_current = iter.next().toLowerCase();
if (_prev != null && _prev.compareTo(_current) != 0)
{
if(ascending)
{
Assert.assertTrue(_prev.compareTo(_current) < 0);
}
else
{
Assert.assertTrue(_prev.compareTo(_current) > 0);
}
}
}
System.out.println("query [" + dslQuery + "] returned [" + rows.length() + "] rows");
}
@Test(dataProvider = "dslQueriesProvider")
public void testSearchByDSLQueries(String dslQuery, Integer expectedNumRows) throws Exception {
System.out.println("Executing dslQuery = " + dslQuery);
......@@ -272,6 +566,30 @@ public class GraphBackedDiscoveryServiceTest extends BaseHiveRepositoryTest {
System.out.println("query [" + dslQuery + "] returned [" + rows.length() + "] rows");
}
@Test(dataProvider = "dslLimitQueriesProvider")
public void testSearchByDSLQueriesWithLimit(String dslQuery, Integer expectedNumRows) throws Exception {
System.out.println("Executing dslQuery = " + dslQuery);
String jsonResults = discoveryService.searchByDSL(dslQuery);
Assert.assertNotNull(jsonResults);
JSONObject results = new JSONObject(jsonResults);
Assert.assertEquals(results.length(), 3);
System.out.println("results = " + results);
Object query = results.get("query");
Assert.assertNotNull(query);
JSONObject dataType = results.getJSONObject("dataType");
Assert.assertNotNull(dataType);
String typeName = dataType.getString("typeName");
Assert.assertNotNull(typeName);
JSONArray rows = results.getJSONArray("rows");
Assert.assertNotNull(rows);
Assert.assertEquals(rows.length(), expectedNumRows.intValue()); // some queries may not have any results
System.out.println("query [" + dslQuery + "] returned [" + rows.length() + "] rows");
}
@DataProvider(name = "invalidDslQueriesProvider")
private Object[][] createInvalidDSLQueries() {
return new String[][]{{"from Unknown"}, {"Unknown"}, {"Unknown is Blah"},};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment