Commit 574da752 by Suma Shivaprasad

ATLAS-435 Add ORDER BY and Limit to search DSL (neerugupta via sumasai)

parent 53574720
......@@ -13,7 +13,7 @@ queryWithPath: query ~ opt(WITHPATH)
query: rep1sep(singleQuery, opt(COMMA))
singleQuery: singleQrySrc ~ opt(loopExpression) ~ opt(selectClause)
singleQuery: singleQrySrc ~ opt(loopExpression) ~ opt(selectClause) ~ opt(orderby) ~ opt(limitOffset)
singleQrySrc = FROM ~ fromSrc ~ opt(WHERE) ~ opt(expr ^? notIdExpression) |
WHERE ~ (expr ^? notIdExpression) |
......@@ -22,6 +22,14 @@ singleQrySrc = FROM ~ fromSrc ~ opt(WHERE) ~ opt(expr ^? notIdExpression) |
fromSrc: identifier ~ AS ~ alias | identifier
orderby: ORDERBY ~ order ~ opt (sortOrder)
limitOffset: LIMIT ~ lmt ~ opt (offset)
offset: OFFSET ~ offsetValue
sortOrder = ASC | DESC
loopExpression: LOOP ~ (LPAREN ~> query <~ RPAREN) ~ opt(intConstant <~ TIMES) ~ opt(AS ~> alias)
selectClause: SELECT ~ rep1sep(selectExpression, COMMA)
......@@ -79,6 +87,9 @@ Language Notes:
* The _!WithPath_ clause can be used with transitive closure queries to retrieve the Path that
connects the two related Entities. (We also provide a higher level interface for Closure Queries
see scaladoc for 'org.apache.atlas.query.ClosureQuery')
* ORDERBY is optional. Orderby clause should be specified in single quote ('). When order by clause is specified case insensitive sorting is done in ascending order.
For sorting in descending order specify 'DESC' after order by clause. If no order by is specified then no default sorting is applied.
* LIMIT is optional. It limits the maximum number of objects to be fetched starting from specified optional offset. If no offset is specified count starts from beginning.
* There are couple of Predicate functions different from SQL:
* _is_ or _isa_can be used to filter Entities that have a particular Trait.
* _has_ can be used to filter Entities that have a value for a particular Attribute.
......@@ -89,6 +100,11 @@ Language Notes:
* from DB
* DB where name="Reporting" select name, owner
* DB where name="Reporting" select name, owner orderby 'name'
* DB where name="Reporting" select name limit 10
* DB where name="Reporting" select name, owner limit 10 offset 0
* DB where name="Reporting" select name, owner orderby 'name' limit 10 offset 5
* DB where name="Reporting" select name, owner orderby 'name' desc limit 10 offset 5
* DB has name
* DB is !JdbcAccess
* Column where Column isa PII
......
......@@ -17,6 +17,7 @@ ATLAS-409 Atlas will not import avro tables with schema read from a file (dosset
ATLAS-379 Create sqoop and falcon metadata addons (venkatnrangan,bvellanki,sowmyaramesh via shwethags)
ALL CHANGES:
ATLAS-435 Add ORDER BY and Limit to search DSL (neerugupta via sumasai)
ATLAS-543 Entity Instance requests should not require ID element for new Entities (harishjp via shwethags)
ATLAS-681 update committer/ppmc members in the pom.xml (sneethiraj via shwethags)
ATLAS-616 Resolve OOM - Zookeeper throws exceptions when trying to fire DSL queries at Atlas at large scale. (yhemanth via sumasai)
......
......@@ -330,6 +330,10 @@ object Expressions {
def instance() = new InstanceExpression(this)
def path() = new PathExpression(this)
def limit(lmt: Literal[Integer], offset : Literal[Integer]) = new LimitExpression(this, lmt, offset)
def order(odr: String, asc: Boolean) = new OrderExpression(this, odr, asc)
}
trait BinaryNode {
......@@ -766,4 +770,30 @@ object Expressions {
override def toString = s"$child withPath"
}
case class LimitExpression(child: Expression, limit: Literal[Integer], offset: Literal[Integer]) extends Expression with UnaryNode {
override def toString = s"$child limit $limit offset $offset "
lazy val dataType = {
if (!resolved) {
throw new UnresolvedException(this,
s"datatype. Can not resolve due to unresolved children")
}
child.dataType
}
}
case class OrderExpression(child: Expression, odr: String, asc: Boolean) extends Expression with UnaryNode {
override def toString = s"$child order $odr asc $asc"
lazy val dataType = {
if (!resolved) {
throw new UnresolvedException(this,
s"datatype. Can not resolve due to unresolved children")
}
child.dataType
}
}
}
......@@ -19,7 +19,7 @@
package org.apache.atlas.query
import javax.script.{Bindings, ScriptEngine, ScriptEngineManager}
import org.apache.atlas.query.Expressions._
import com.thinkaurelius.titan.core.TitanGraph
import com.tinkerpop.pipes.util.structures.Row
import org.apache.atlas.query.TypeUtils.ResultWithPathStruct
......@@ -27,8 +27,8 @@ import org.apache.atlas.typesystem.json._
import org.apache.atlas.typesystem.types._
import org.json4s._
import org.json4s.native.Serialization._
import scala.language.existentials
import org.apache.atlas.query.Expressions._
case class GremlinQueryResult(query: String,
resultDataType: IDataType[_],
......@@ -82,7 +82,6 @@ class GremlinEvaluator(qry: GremlinQuery, persistenceStrategy: GraphPersistenceS
val rType = qry.expr.dataType
val oType = if (qry.isPathExpresion) qry.expr.children(0).dataType else rType
val rawRes = engine.eval(qry.queryStr, bindings)
if (!qry.hasSelectList) {
val rows = rawRes.asInstanceOf[java.util.List[AnyRef]].map { v =>
val iV = instanceObject(v)
......@@ -95,14 +94,16 @@ class GremlinEvaluator(qry: GremlinQuery, persistenceStrategy: GraphPersistenceS
val rows = rawRes.asInstanceOf[java.util.List[AnyRef]].map { r =>
val rV = instanceObject(r).asInstanceOf[Row[java.util.List[AnyRef]]]
val sInstance = sType.createInstance()
val selExpr =
(if (qry.isPathExpresion) qry.expr.children(0) else qry.expr).
asInstanceOf[Expressions.SelectExpression]
selExpr.selectListWithAlias.foreach { aE =>
val cName = aE.alias
val (src, idx) = qry.resultMaping(cName)
val v = rV.getColumn(src).get(idx)
sInstance.set(cName, persistenceStrategy.constructInstance(aE.dataType, v))
val selObj = SelectExpressionHelper.extractSelectExpression(qry.expr)
if (selObj.isDefined)
{
val selExpr = selObj.get.asInstanceOf[Expressions.SelectExpression]
selExpr.selectListWithAlias.foreach { aE =>
val cName = aE.alias
val (src, idx) = qry.resultMaping(cName)
val v = rV.getColumn(src).get(idx)
sInstance.set(cName, persistenceStrategy.constructInstance(aE.dataType, v))
}
}
addPathStruct(r, sInstance)
}
......
......@@ -35,6 +35,7 @@ case class GremlinQuery(expr: Expression, queryStr: String, resultMaping: Map[St
def hasSelectList = resultMaping != null
def isPathExpresion = expr.isInstanceOf[PathExpression]
}
trait SelectExpressionHandling {
......@@ -122,7 +123,6 @@ trait SelectExpressionHandling {
}
m.toMap
}
}
class GremlinTranslationException(expr: Expression, reason: String) extends
......@@ -186,7 +186,8 @@ class GremlinTranslator(expr: Expression,
}
def traitClauseWithInstanceForTop(topE : Expression) : PartialFunction[Expression, Expression] = {
case te : TraitExpression if (te fastEquals topE) => {
// This topE check prevented the comparison of trait expression when it is a child. Like trait as t limit 2
case te : TraitExpression => {
val theTrait = te.as("theTrait")
val theInstance = theTrait.traitInstance().as("theInstance")
val outE =
......@@ -202,6 +203,7 @@ class GremlinTranslator(expr: Expression,
stats.last
}
private def genQuery(expr: Expression, inSelect: Boolean): String = expr match {
case ClassExpression(clsName) =>
typeTestExpression(clsName)
......@@ -324,12 +326,26 @@ class GremlinTranslator(expr: Expression,
case pe@PathExpression(child) => {
s"${genQuery(child, inSelect)}.path"
}
case order@OrderExpression(child, odr, asc) => {
var orderby = ""
asc match {
//builds a closure comparison function based on provided order by clause in DSL. This will be used to sort the results by gremlin order pipe.
//Ordering is case insensitive.
case false=> orderby = s"order{it.b.getProperty('$odr').toLowerCase() <=> it.a.getProperty('$odr').toLowerCase()}"//descending
case _ => orderby = s"order{it.a.getProperty('$odr').toLowerCase() <=> it.b.getProperty('$odr').toLowerCase()}"
}
s"""${genQuery(child, inSelect)}.$orderby"""
}
case limitOffset@LimitExpression(child, limit, offset) => {
val totalResultRows = limit.value + offset.value
s"""${genQuery(child, inSelect)} [$offset..<$totalResultRows]"""
}
case x => throw new GremlinTranslationException(x, "expression not yet supported")
}
def genFullQuery(expr: Expression): String = {
var q = genQuery(expr, false)
if(gPersistenceBehavior.addGraphVertexPrefix(preStatements)) {
q = s"g.V.$q"
}
......@@ -354,21 +370,48 @@ class GremlinTranslator(expr: Expression,
e1 = e1.transformUp(addAliasToLoopInput())
e1 = e1.transformUp(instanceClauseToTop(e1))
e1 = e1.transformUp(traitClauseWithInstanceForTop(e1))
e1 match {
case e1: SelectExpression => {
val rMap = buildResultMapping(e1)
//Following code extracts the select expressions from expression tree.
val se = SelectExpressionHelper.extractSelectExpression(e1)
if (se.isDefined)
{
val rMap = buildResultMapping(se.get)
GremlinQuery(e1, genFullQuery(e1), rMap)
}
case pe@PathExpression(se@SelectExpression(child, selectList)) => {
val rMap = buildResultMapping(se)
GremlinQuery(e1, genFullQuery(e1), rMap)
}
case e1 => GremlinQuery(e1, genFullQuery(e1), null)
}
else
{
GremlinQuery(e1, genFullQuery(e1), null)
}
}
}
object SelectExpressionHelper {
/**
* This method extracts the child select expression from parent expression
*/
def extractSelectExpression(child: Expression): Option[SelectExpression] = {
child match {
case se@SelectExpression(child, selectList) =>{
Some(se)
}
case limit@LimitExpression(child, lmt, offset) => {
extractSelectExpression(child)
}
case order@OrderExpression(child, odr, odrBy) => {
extractSelectExpression(child)
}
case path@PathExpression(child) => {
extractSelectExpression(child)
}
case _ => {
None
}
}
}
}
/*
* TODO
* Translation Issues:
......@@ -379,4 +422,3 @@ class GremlinTranslator(expr: Expression,
* The solution is to to do predicate pushdown and apply the filter immediately on top of the referred Expression.
*/
}
\ No newline at end of file
......@@ -64,10 +64,14 @@ trait QueryKeywords {
protected val AS = Keyword("as")
protected val TIMES = Keyword("times")
protected val WITHPATH = Keyword("withPath")
protected val LIMIT = Keyword("limit")
protected val OFFSET = Keyword("offset")
protected val ORDERBY = Keyword("orderby")
}
trait ExpressionUtils {
protected val DESC = "desc"
def loop(input: Expression, l: (Expression, Option[Literal[Integer]], Option[String])) = l match {
case (c, None, None) => input.loop(c)
case (c, t, None) => input.loop(c, t.get)
......@@ -85,6 +89,14 @@ trait ExpressionUtils {
input.select(selList: _*)
}
def limit(input: Expression, lmt: Literal[Integer], offset: Literal[Integer]) = {
input.limit(lmt, offset)
}
def order(input: Expression, odr: String, asc: Boolean) = {
input.order(odr, asc)
}
def leftmostId(e: Expression) = {
var le: IdExpression = null
e.traverseUp { case i: IdExpression if le == null => le = i}
......@@ -106,7 +118,10 @@ trait ExpressionUtils {
sngQuery2.transformUp(replaceIdWithField(leftSrcId, snglQuery1.field(leftSrcId.name)))
}
}
/**
* Query parser is used to parse the DSL query. It uses scala PackratParsers and pattern matching to extract the expressions.
* It builds up a expression tree.
*/
object QueryParser extends StandardTokenParsers with QueryKeywords with ExpressionUtils with PackratParsers {
import scala.language.higherKinds
......@@ -137,12 +152,33 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi
case h :: t => t.foldLeft(h)(merge(_, _))
}
}
def singleQuery = singleQrySrc ~ opt(loopExpression) ~ opt(selectClause) ^^ {
case s ~ None ~ None => s
case s ~ l ~ None => loop(s, l.get)
case s ~ l ~ sel if l.isDefined => select(loop(s, l.get), sel.get)
case s ~ None ~ sel => select(s, sel.get)
/**
* A singleQuery can have the following forms:
* 1. SrcQuery [select] [orderby desc] [Limit x offset y] -> source query followed by optional select statement followed by optional order by followed by optional limit
* eg: Select "hive_db where hive_db has name orderby 'hive_db.owner' limit 2 offset 1"
* @return
*/
def singleQuery = singleQrySrc ~ opt(loopExpression) ~ opt(selectClause) ~ opt(orderby) ~ opt(limitOffset) ^^ {
case s ~ l ~ sel ~ odr ~ lmtoff => {
var expressiontree = s
if (l.isDefined) //Note: The order of if statements is important.
{
expressiontree = loop(expressiontree, l.get);
}
if (odr.isDefined)
{
expressiontree = order(expressiontree, odr.get._1, odr.get._2)
}
if (lmtoff.isDefined)
{
expressiontree = limit(expressiontree, int (lmtoff.get._1), int (lmtoff.get._2))
}
if (sel.isDefined)
{
expressiontree = select(expressiontree, sel.get)
}
expressiontree
}
}
/**
......@@ -182,7 +218,25 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi
def fromSrc = identifier ~ AS ~ alias ^^ { case s ~ a ~ al => s.as(al)} |
identifier
def orderby = ORDERBY ~ order ~ opt (asce) ^^ {
case o ~ odr ~ None => (odr, true)
case o ~ odr ~ asc => (odr, asc.get)
}
def limitOffset = LIMIT ~ lmt ~ opt (offset) ^^ {
case l ~ lt ~ None => (lt, 0)
case l ~ lt ~ of => (lt, of.get)
}
def offset = OFFSET ~ ofset ^^ {
case offset ~ of => of
}
def asce = asc ^^ {
case DESC => false
case _ => true
}
def loopExpression: Parser[(Expression, Option[Literal[Integer]], Option[String])] =
LOOP ~ (LPAREN ~> query <~ RPAREN) ~ opt(intConstant <~ TIMES) ~ opt(AS ~> alias) ^^ {
case l ~ e ~ None ~ a => (e, None, a)
......@@ -192,7 +246,6 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi
def selectClause: Parser[List[(Expression, Option[String])]] = SELECT ~ rep1sep(selectExpression, COMMA) ^^ {
case s ~ cs => cs
}
def selectExpression: Parser[(Expression, Option[String])] = expr ~ opt(AS ~> alias) ^^ {
case e ~ a => (e, a)
}
......@@ -239,7 +292,15 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi
}
def alias = ident | stringLit
def lmt = intConstant
def ofset = intConstant
def order = ident | stringLit
def asc = ident | stringLit
def literal = booleanConstant ^^ {
boolean(_)
} |
......
......@@ -90,6 +90,14 @@ class Resolver(srcExpr: Option[Expression] = None, aliases: Map[String, Expressi
case l@LoopExpression(inputExpr, loopExpr, t) if inputExpr.resolved => {
val r = new Resolver(Some(inputExpr), inputExpr.namedExpressions, true)
return new LoopExpression(inputExpr, loopExpr.transformUp(r), t)
}
case lmt@LimitExpression(child, limit, offset) => {
val r = new Resolver(Some(child), child.namedExpressions)
return new LimitExpression(child.transformUp(r), limit, offset)
}
case order@OrderExpression(child, odr, asc) => {
val r = new Resolver(Some(child), child.namedExpressions)
return new OrderExpression(child.transformUp(r), odr, asc)
}
case x => x
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment