Commit 574da752 by Suma Shivaprasad

ATLAS-435 Add ORDER BY and Limit to search DSL (neerugupta via sumasai)

parent 53574720
...@@ -13,7 +13,7 @@ queryWithPath: query ~ opt(WITHPATH) ...@@ -13,7 +13,7 @@ queryWithPath: query ~ opt(WITHPATH)
query: rep1sep(singleQuery, opt(COMMA)) query: rep1sep(singleQuery, opt(COMMA))
singleQuery: singleQrySrc ~ opt(loopExpression) ~ opt(selectClause) singleQuery: singleQrySrc ~ opt(loopExpression) ~ opt(selectClause) ~ opt(orderby) ~ opt(limitOffset)
singleQrySrc = FROM ~ fromSrc ~ opt(WHERE) ~ opt(expr ^? notIdExpression) | singleQrySrc = FROM ~ fromSrc ~ opt(WHERE) ~ opt(expr ^? notIdExpression) |
WHERE ~ (expr ^? notIdExpression) | WHERE ~ (expr ^? notIdExpression) |
...@@ -22,6 +22,14 @@ singleQrySrc = FROM ~ fromSrc ~ opt(WHERE) ~ opt(expr ^? notIdExpression) | ...@@ -22,6 +22,14 @@ singleQrySrc = FROM ~ fromSrc ~ opt(WHERE) ~ opt(expr ^? notIdExpression) |
fromSrc: identifier ~ AS ~ alias | identifier fromSrc: identifier ~ AS ~ alias | identifier
orderby: ORDERBY ~ order ~ opt (sortOrder)
limitOffset: LIMIT ~ lmt ~ opt (offset)
offset: OFFSET ~ offsetValue
sortOrder = ASC | DESC
loopExpression: LOOP ~ (LPAREN ~> query <~ RPAREN) ~ opt(intConstant <~ TIMES) ~ opt(AS ~> alias) loopExpression: LOOP ~ (LPAREN ~> query <~ RPAREN) ~ opt(intConstant <~ TIMES) ~ opt(AS ~> alias)
selectClause: SELECT ~ rep1sep(selectExpression, COMMA) selectClause: SELECT ~ rep1sep(selectExpression, COMMA)
...@@ -79,6 +87,9 @@ Language Notes: ...@@ -79,6 +87,9 @@ Language Notes:
* The _!WithPath_ clause can be used with transitive closure queries to retrieve the Path that * The _!WithPath_ clause can be used with transitive closure queries to retrieve the Path that
connects the two related Entities. (We also provide a higher level interface for Closure Queries connects the two related Entities. (We also provide a higher level interface for Closure Queries
see scaladoc for 'org.apache.atlas.query.ClosureQuery') see scaladoc for 'org.apache.atlas.query.ClosureQuery')
* ORDERBY is optional. Orderby clause should be specified in single quote ('). When order by clause is specified case insensitive sorting is done in ascending order.
For sorting in descending order specify 'DESC' after order by clause. If no order by is specified then no default sorting is applied.
* LIMIT is optional. It limits the maximum number of objects to be fetched starting from specified optional offset. If no offset is specified count starts from beginning.
* There are couple of Predicate functions different from SQL: * There are couple of Predicate functions different from SQL:
* _is_ or _isa_can be used to filter Entities that have a particular Trait. * _is_ or _isa_can be used to filter Entities that have a particular Trait.
* _has_ can be used to filter Entities that have a value for a particular Attribute. * _has_ can be used to filter Entities that have a value for a particular Attribute.
...@@ -89,6 +100,11 @@ Language Notes: ...@@ -89,6 +100,11 @@ Language Notes:
* from DB * from DB
* DB where name="Reporting" select name, owner * DB where name="Reporting" select name, owner
* DB where name="Reporting" select name, owner orderby 'name'
* DB where name="Reporting" select name limit 10
* DB where name="Reporting" select name, owner limit 10 offset 0
* DB where name="Reporting" select name, owner orderby 'name' limit 10 offset 5
* DB where name="Reporting" select name, owner orderby 'name' desc limit 10 offset 5
* DB has name * DB has name
* DB is !JdbcAccess * DB is !JdbcAccess
* Column where Column isa PII * Column where Column isa PII
......
...@@ -17,6 +17,7 @@ ATLAS-409 Atlas will not import avro tables with schema read from a file (dosset ...@@ -17,6 +17,7 @@ ATLAS-409 Atlas will not import avro tables with schema read from a file (dosset
ATLAS-379 Create sqoop and falcon metadata addons (venkatnrangan,bvellanki,sowmyaramesh via shwethags) ATLAS-379 Create sqoop and falcon metadata addons (venkatnrangan,bvellanki,sowmyaramesh via shwethags)
ALL CHANGES: ALL CHANGES:
ATLAS-435 Add ORDER BY and Limit to search DSL (neerugupta via sumasai)
ATLAS-543 Entity Instance requests should not require ID element for new Entities (harishjp via shwethags) ATLAS-543 Entity Instance requests should not require ID element for new Entities (harishjp via shwethags)
ATLAS-681 update committer/ppmc members in the pom.xml (sneethiraj via shwethags) ATLAS-681 update committer/ppmc members in the pom.xml (sneethiraj via shwethags)
ATLAS-616 Resolve OOM - Zookeeper throws exceptions when trying to fire DSL queries at Atlas at large scale. (yhemanth via sumasai) ATLAS-616 Resolve OOM - Zookeeper throws exceptions when trying to fire DSL queries at Atlas at large scale. (yhemanth via sumasai)
......
...@@ -330,6 +330,10 @@ object Expressions { ...@@ -330,6 +330,10 @@ object Expressions {
def instance() = new InstanceExpression(this) def instance() = new InstanceExpression(this)
def path() = new PathExpression(this) def path() = new PathExpression(this)
def limit(lmt: Literal[Integer], offset : Literal[Integer]) = new LimitExpression(this, lmt, offset)
def order(odr: String, asc: Boolean) = new OrderExpression(this, odr, asc)
} }
trait BinaryNode { trait BinaryNode {
...@@ -766,4 +770,30 @@ object Expressions { ...@@ -766,4 +770,30 @@ object Expressions {
override def toString = s"$child withPath" override def toString = s"$child withPath"
} }
case class LimitExpression(child: Expression, limit: Literal[Integer], offset: Literal[Integer]) extends Expression with UnaryNode {
override def toString = s"$child limit $limit offset $offset "
lazy val dataType = {
if (!resolved) {
throw new UnresolvedException(this,
s"datatype. Can not resolve due to unresolved children")
}
child.dataType
}
}
case class OrderExpression(child: Expression, odr: String, asc: Boolean) extends Expression with UnaryNode {
override def toString = s"$child order $odr asc $asc"
lazy val dataType = {
if (!resolved) {
throw new UnresolvedException(this,
s"datatype. Can not resolve due to unresolved children")
}
child.dataType
}
}
} }
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
package org.apache.atlas.query package org.apache.atlas.query
import javax.script.{Bindings, ScriptEngine, ScriptEngineManager} import javax.script.{Bindings, ScriptEngine, ScriptEngineManager}
import org.apache.atlas.query.Expressions._
import com.thinkaurelius.titan.core.TitanGraph import com.thinkaurelius.titan.core.TitanGraph
import com.tinkerpop.pipes.util.structures.Row import com.tinkerpop.pipes.util.structures.Row
import org.apache.atlas.query.TypeUtils.ResultWithPathStruct import org.apache.atlas.query.TypeUtils.ResultWithPathStruct
...@@ -27,8 +27,8 @@ import org.apache.atlas.typesystem.json._ ...@@ -27,8 +27,8 @@ import org.apache.atlas.typesystem.json._
import org.apache.atlas.typesystem.types._ import org.apache.atlas.typesystem.types._
import org.json4s._ import org.json4s._
import org.json4s.native.Serialization._ import org.json4s.native.Serialization._
import scala.language.existentials import scala.language.existentials
import org.apache.atlas.query.Expressions._
case class GremlinQueryResult(query: String, case class GremlinQueryResult(query: String,
resultDataType: IDataType[_], resultDataType: IDataType[_],
...@@ -82,7 +82,6 @@ class GremlinEvaluator(qry: GremlinQuery, persistenceStrategy: GraphPersistenceS ...@@ -82,7 +82,6 @@ class GremlinEvaluator(qry: GremlinQuery, persistenceStrategy: GraphPersistenceS
val rType = qry.expr.dataType val rType = qry.expr.dataType
val oType = if (qry.isPathExpresion) qry.expr.children(0).dataType else rType val oType = if (qry.isPathExpresion) qry.expr.children(0).dataType else rType
val rawRes = engine.eval(qry.queryStr, bindings) val rawRes = engine.eval(qry.queryStr, bindings)
if (!qry.hasSelectList) { if (!qry.hasSelectList) {
val rows = rawRes.asInstanceOf[java.util.List[AnyRef]].map { v => val rows = rawRes.asInstanceOf[java.util.List[AnyRef]].map { v =>
val iV = instanceObject(v) val iV = instanceObject(v)
...@@ -95,15 +94,17 @@ class GremlinEvaluator(qry: GremlinQuery, persistenceStrategy: GraphPersistenceS ...@@ -95,15 +94,17 @@ class GremlinEvaluator(qry: GremlinQuery, persistenceStrategy: GraphPersistenceS
val rows = rawRes.asInstanceOf[java.util.List[AnyRef]].map { r => val rows = rawRes.asInstanceOf[java.util.List[AnyRef]].map { r =>
val rV = instanceObject(r).asInstanceOf[Row[java.util.List[AnyRef]]] val rV = instanceObject(r).asInstanceOf[Row[java.util.List[AnyRef]]]
val sInstance = sType.createInstance() val sInstance = sType.createInstance()
val selExpr = val selObj = SelectExpressionHelper.extractSelectExpression(qry.expr)
(if (qry.isPathExpresion) qry.expr.children(0) else qry.expr). if (selObj.isDefined)
asInstanceOf[Expressions.SelectExpression] {
val selExpr = selObj.get.asInstanceOf[Expressions.SelectExpression]
selExpr.selectListWithAlias.foreach { aE => selExpr.selectListWithAlias.foreach { aE =>
val cName = aE.alias val cName = aE.alias
val (src, idx) = qry.resultMaping(cName) val (src, idx) = qry.resultMaping(cName)
val v = rV.getColumn(src).get(idx) val v = rV.getColumn(src).get(idx)
sInstance.set(cName, persistenceStrategy.constructInstance(aE.dataType, v)) sInstance.set(cName, persistenceStrategy.constructInstance(aE.dataType, v))
} }
}
addPathStruct(r, sInstance) addPathStruct(r, sInstance)
} }
GremlinQueryResult(qry.expr.toString, rType, rows.toList) GremlinQueryResult(qry.expr.toString, rType, rows.toList)
......
...@@ -35,6 +35,7 @@ case class GremlinQuery(expr: Expression, queryStr: String, resultMaping: Map[St ...@@ -35,6 +35,7 @@ case class GremlinQuery(expr: Expression, queryStr: String, resultMaping: Map[St
def hasSelectList = resultMaping != null def hasSelectList = resultMaping != null
def isPathExpresion = expr.isInstanceOf[PathExpression] def isPathExpresion = expr.isInstanceOf[PathExpression]
} }
trait SelectExpressionHandling { trait SelectExpressionHandling {
...@@ -122,7 +123,6 @@ trait SelectExpressionHandling { ...@@ -122,7 +123,6 @@ trait SelectExpressionHandling {
} }
m.toMap m.toMap
} }
} }
class GremlinTranslationException(expr: Expression, reason: String) extends class GremlinTranslationException(expr: Expression, reason: String) extends
...@@ -186,7 +186,8 @@ class GremlinTranslator(expr: Expression, ...@@ -186,7 +186,8 @@ class GremlinTranslator(expr: Expression,
} }
def traitClauseWithInstanceForTop(topE : Expression) : PartialFunction[Expression, Expression] = { def traitClauseWithInstanceForTop(topE : Expression) : PartialFunction[Expression, Expression] = {
case te : TraitExpression if (te fastEquals topE) => { // This topE check prevented the comparison of trait expression when it is a child. Like trait as t limit 2
case te : TraitExpression => {
val theTrait = te.as("theTrait") val theTrait = te.as("theTrait")
val theInstance = theTrait.traitInstance().as("theInstance") val theInstance = theTrait.traitInstance().as("theInstance")
val outE = val outE =
...@@ -202,6 +203,7 @@ class GremlinTranslator(expr: Expression, ...@@ -202,6 +203,7 @@ class GremlinTranslator(expr: Expression,
stats.last stats.last
} }
private def genQuery(expr: Expression, inSelect: Boolean): String = expr match { private def genQuery(expr: Expression, inSelect: Boolean): String = expr match {
case ClassExpression(clsName) => case ClassExpression(clsName) =>
typeTestExpression(clsName) typeTestExpression(clsName)
...@@ -324,12 +326,26 @@ class GremlinTranslator(expr: Expression, ...@@ -324,12 +326,26 @@ class GremlinTranslator(expr: Expression,
case pe@PathExpression(child) => { case pe@PathExpression(child) => {
s"${genQuery(child, inSelect)}.path" s"${genQuery(child, inSelect)}.path"
} }
case order@OrderExpression(child, odr, asc) => {
var orderby = ""
asc match {
//builds a closure comparison function based on provided order by clause in DSL. This will be used to sort the results by gremlin order pipe.
//Ordering is case insensitive.
case false=> orderby = s"order{it.b.getProperty('$odr').toLowerCase() <=> it.a.getProperty('$odr').toLowerCase()}"//descending
case _ => orderby = s"order{it.a.getProperty('$odr').toLowerCase() <=> it.b.getProperty('$odr').toLowerCase()}"
}
s"""${genQuery(child, inSelect)}.$orderby"""
}
case limitOffset@LimitExpression(child, limit, offset) => {
val totalResultRows = limit.value + offset.value
s"""${genQuery(child, inSelect)} [$offset..<$totalResultRows]"""
}
case x => throw new GremlinTranslationException(x, "expression not yet supported") case x => throw new GremlinTranslationException(x, "expression not yet supported")
} }
def genFullQuery(expr: Expression): String = { def genFullQuery(expr: Expression): String = {
var q = genQuery(expr, false) var q = genQuery(expr, false)
if(gPersistenceBehavior.addGraphVertexPrefix(preStatements)) { if(gPersistenceBehavior.addGraphVertexPrefix(preStatements)) {
q = s"g.V.$q" q = s"g.V.$q"
} }
...@@ -355,20 +371,47 @@ class GremlinTranslator(expr: Expression, ...@@ -355,20 +371,47 @@ class GremlinTranslator(expr: Expression,
e1 = e1.transformUp(instanceClauseToTop(e1)) e1 = e1.transformUp(instanceClauseToTop(e1))
e1 = e1.transformUp(traitClauseWithInstanceForTop(e1)) e1 = e1.transformUp(traitClauseWithInstanceForTop(e1))
e1 match { //Following code extracts the select expressions from expression tree.
case e1: SelectExpression => {
val rMap = buildResultMapping(e1) val se = SelectExpressionHelper.extractSelectExpression(e1)
if (se.isDefined)
{
val rMap = buildResultMapping(se.get)
GremlinQuery(e1, genFullQuery(e1), rMap) GremlinQuery(e1, genFullQuery(e1), rMap)
} }
case pe@PathExpression(se@SelectExpression(child, selectList)) => { else
val rMap = buildResultMapping(se) {
GremlinQuery(e1, genFullQuery(e1), rMap) GremlinQuery(e1, genFullQuery(e1), null)
}
} }
case e1 => GremlinQuery(e1, genFullQuery(e1), null)
} }
object SelectExpressionHelper {
/**
* This method extracts the child select expression from parent expression
*/
def extractSelectExpression(child: Expression): Option[SelectExpression] = {
child match {
case se@SelectExpression(child, selectList) =>{
Some(se)
}
case limit@LimitExpression(child, lmt, offset) => {
extractSelectExpression(child)
}
case order@OrderExpression(child, odr, odrBy) => {
extractSelectExpression(child)
}
case path@PathExpression(child) => {
extractSelectExpression(child)
}
case _ => {
None
} }
}
}
}
/* /*
* TODO * TODO
* Translation Issues: * Translation Issues:
...@@ -379,4 +422,3 @@ class GremlinTranslator(expr: Expression, ...@@ -379,4 +422,3 @@ class GremlinTranslator(expr: Expression,
* The solution is to to do predicate pushdown and apply the filter immediately on top of the referred Expression. * The solution is to to do predicate pushdown and apply the filter immediately on top of the referred Expression.
*/ */
}
\ No newline at end of file
...@@ -64,10 +64,14 @@ trait QueryKeywords { ...@@ -64,10 +64,14 @@ trait QueryKeywords {
protected val AS = Keyword("as") protected val AS = Keyword("as")
protected val TIMES = Keyword("times") protected val TIMES = Keyword("times")
protected val WITHPATH = Keyword("withPath") protected val WITHPATH = Keyword("withPath")
protected val LIMIT = Keyword("limit")
protected val OFFSET = Keyword("offset")
protected val ORDERBY = Keyword("orderby")
} }
trait ExpressionUtils { trait ExpressionUtils {
protected val DESC = "desc"
def loop(input: Expression, l: (Expression, Option[Literal[Integer]], Option[String])) = l match { def loop(input: Expression, l: (Expression, Option[Literal[Integer]], Option[String])) = l match {
case (c, None, None) => input.loop(c) case (c, None, None) => input.loop(c)
case (c, t, None) => input.loop(c, t.get) case (c, t, None) => input.loop(c, t.get)
...@@ -85,6 +89,14 @@ trait ExpressionUtils { ...@@ -85,6 +89,14 @@ trait ExpressionUtils {
input.select(selList: _*) input.select(selList: _*)
} }
def limit(input: Expression, lmt: Literal[Integer], offset: Literal[Integer]) = {
input.limit(lmt, offset)
}
def order(input: Expression, odr: String, asc: Boolean) = {
input.order(odr, asc)
}
def leftmostId(e: Expression) = { def leftmostId(e: Expression) = {
var le: IdExpression = null var le: IdExpression = null
e.traverseUp { case i: IdExpression if le == null => le = i} e.traverseUp { case i: IdExpression if le == null => le = i}
...@@ -106,7 +118,10 @@ trait ExpressionUtils { ...@@ -106,7 +118,10 @@ trait ExpressionUtils {
sngQuery2.transformUp(replaceIdWithField(leftSrcId, snglQuery1.field(leftSrcId.name))) sngQuery2.transformUp(replaceIdWithField(leftSrcId, snglQuery1.field(leftSrcId.name)))
} }
} }
/**
* Query parser is used to parse the DSL query. It uses scala PackratParsers and pattern matching to extract the expressions.
* It builds up a expression tree.
*/
object QueryParser extends StandardTokenParsers with QueryKeywords with ExpressionUtils with PackratParsers { object QueryParser extends StandardTokenParsers with QueryKeywords with ExpressionUtils with PackratParsers {
import scala.language.higherKinds import scala.language.higherKinds
...@@ -137,12 +152,33 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi ...@@ -137,12 +152,33 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi
case h :: t => t.foldLeft(h)(merge(_, _)) case h :: t => t.foldLeft(h)(merge(_, _))
} }
} }
/**
def singleQuery = singleQrySrc ~ opt(loopExpression) ~ opt(selectClause) ^^ { * A singleQuery can have the following forms:
case s ~ None ~ None => s * 1. SrcQuery [select] [orderby desc] [Limit x offset y] -> source query followed by optional select statement followed by optional order by followed by optional limit
case s ~ l ~ None => loop(s, l.get) * eg: Select "hive_db where hive_db has name orderby 'hive_db.owner' limit 2 offset 1"
case s ~ l ~ sel if l.isDefined => select(loop(s, l.get), sel.get) * @return
case s ~ None ~ sel => select(s, sel.get) */
def singleQuery = singleQrySrc ~ opt(loopExpression) ~ opt(selectClause) ~ opt(orderby) ~ opt(limitOffset) ^^ {
case s ~ l ~ sel ~ odr ~ lmtoff => {
var expressiontree = s
if (l.isDefined) //Note: The order of if statements is important.
{
expressiontree = loop(expressiontree, l.get);
}
if (odr.isDefined)
{
expressiontree = order(expressiontree, odr.get._1, odr.get._2)
}
if (lmtoff.isDefined)
{
expressiontree = limit(expressiontree, int (lmtoff.get._1), int (lmtoff.get._2))
}
if (sel.isDefined)
{
expressiontree = select(expressiontree, sel.get)
}
expressiontree
}
} }
/** /**
...@@ -182,6 +218,24 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi ...@@ -182,6 +218,24 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi
def fromSrc = identifier ~ AS ~ alias ^^ { case s ~ a ~ al => s.as(al)} | def fromSrc = identifier ~ AS ~ alias ^^ { case s ~ a ~ al => s.as(al)} |
identifier identifier
def orderby = ORDERBY ~ order ~ opt (asce) ^^ {
case o ~ odr ~ None => (odr, true)
case o ~ odr ~ asc => (odr, asc.get)
}
def limitOffset = LIMIT ~ lmt ~ opt (offset) ^^ {
case l ~ lt ~ None => (lt, 0)
case l ~ lt ~ of => (lt, of.get)
}
def offset = OFFSET ~ ofset ^^ {
case offset ~ of => of
}
def asce = asc ^^ {
case DESC => false
case _ => true
}
def loopExpression: Parser[(Expression, Option[Literal[Integer]], Option[String])] = def loopExpression: Parser[(Expression, Option[Literal[Integer]], Option[String])] =
LOOP ~ (LPAREN ~> query <~ RPAREN) ~ opt(intConstant <~ TIMES) ~ opt(AS ~> alias) ^^ { LOOP ~ (LPAREN ~> query <~ RPAREN) ~ opt(intConstant <~ TIMES) ~ opt(AS ~> alias) ^^ {
...@@ -192,7 +246,6 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi ...@@ -192,7 +246,6 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi
def selectClause: Parser[List[(Expression, Option[String])]] = SELECT ~ rep1sep(selectExpression, COMMA) ^^ { def selectClause: Parser[List[(Expression, Option[String])]] = SELECT ~ rep1sep(selectExpression, COMMA) ^^ {
case s ~ cs => cs case s ~ cs => cs
} }
def selectExpression: Parser[(Expression, Option[String])] = expr ~ opt(AS ~> alias) ^^ { def selectExpression: Parser[(Expression, Option[String])] = expr ~ opt(AS ~> alias) ^^ {
case e ~ a => (e, a) case e ~ a => (e, a)
} }
...@@ -240,6 +293,14 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi ...@@ -240,6 +293,14 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi
def alias = ident | stringLit def alias = ident | stringLit
def lmt = intConstant
def ofset = intConstant
def order = ident | stringLit
def asc = ident | stringLit
def literal = booleanConstant ^^ { def literal = booleanConstant ^^ {
boolean(_) boolean(_)
} | } |
......
...@@ -91,6 +91,14 @@ class Resolver(srcExpr: Option[Expression] = None, aliases: Map[String, Expressi ...@@ -91,6 +91,14 @@ class Resolver(srcExpr: Option[Expression] = None, aliases: Map[String, Expressi
val r = new Resolver(Some(inputExpr), inputExpr.namedExpressions, true) val r = new Resolver(Some(inputExpr), inputExpr.namedExpressions, true)
return new LoopExpression(inputExpr, loopExpr.transformUp(r), t) return new LoopExpression(inputExpr, loopExpr.transformUp(r), t)
} }
case lmt@LimitExpression(child, limit, offset) => {
val r = new Resolver(Some(child), child.namedExpressions)
return new LimitExpression(child.transformUp(r), limit, offset)
}
case order@OrderExpression(child, odr, asc) => {
val r = new Resolver(Some(child), child.namedExpressions)
return new OrderExpression(child.transformUp(r), odr, asc)
}
case x => x case x => x
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment