Commit 8b4bc508 by Harish Butani

BUG-38340: gen gremlinQuery in such a way to trigger an index scan

parent 3d9b865d
......@@ -20,9 +20,7 @@ package org.apache.hadoop.metadata.discovery.graph;
import com.thinkaurelius.titan.core.TitanVertex;
import org.apache.hadoop.metadata.MetadataException;
import org.apache.hadoop.metadata.query.Expressions;
import org.apache.hadoop.metadata.query.GraphPersistenceStrategies;
import org.apache.hadoop.metadata.query.GraphPersistenceStrategies$class;
import org.apache.hadoop.metadata.query.*;
import org.apache.hadoop.metadata.query.TypeUtils;
import org.apache.hadoop.metadata.repository.MetadataRepository;
import org.apache.hadoop.metadata.repository.Constants;
......@@ -33,6 +31,7 @@ import org.apache.hadoop.metadata.typesystem.persistence.Id;
import org.apache.hadoop.metadata.typesystem.types.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.collection.Traversable;
import java.util.List;
......@@ -187,8 +186,18 @@ public class DefaultGraphPersistenceStrategy implements GraphPersistenceStrategi
public String idAttributeName() { return metadataRepository.getIdAttributeName(); }
@Override
public String typeTestExpression(String typeName) {
return GraphPersistenceStrategies$class.typeTestExpression(this, typeName);
public scala.collection.Seq<String> typeTestExpression(String typeName, IntSequence intSeq) {
return GraphPersistenceStrategies$class.typeTestExpression(this, typeName, intSeq);
}
@Override
public boolean collectTypeInstancesIntoVar() {
return GraphPersistenceStrategies$class.collectTypeInstancesIntoVar(this);
}
@Override
public boolean addGraphVertexPrefix(scala.collection.Traversable<String> preStatements) {
return GraphPersistenceStrategies$class.addGraphVertexPrefix(this, preStatements);
}
}
......@@ -115,8 +115,39 @@ trait GraphPersistenceStrategies {
_typeTestExpression(dataType.getName, "it.object")
}
def typeTestExpression(typeName : String) :String = {
_typeTestExpression(typeName, "it")
def addGraphVertexPrefix(preStatements : Traversable[String]) = !collectTypeInstancesIntoVar
/**
* Controls behavior of how instances of a Type are discovered.
* - query is generated in a way that indexes are exercised using a local set variable across multiple lookups
* - query is generated using an 'or' expression.
*
* '''This is a very bad idea: controlling query execution behavior via query generation.''' But our current
* knowledge of seems to indicate we have no choice. See
* [[https://groups.google.com/forum/#!topic/gremlin-users/n1oV86yr4yU discussion in Gremlin group]].
* Also this seems a fragile solution, dependend on the memory requirements of the Set variable.
* For now enabling via the '''collectTypeInstancesIntoVar''' behavior setting. Reverting back would require
* setting this to false.
*
* Long term have to get to the bottom of Gremlin:
* - there doesn't seem to be way to see the physical query plan. Maybe we should directly interface with Titan.
* - At least from querying perspective a columnar db maybe a better route. Daniel Abadi did some good work
* on showing how to use a columnar store as a Graph Db.
*
*
* @return
*/
def collectTypeInstancesIntoVar = true
def typeTestExpression(typeName : String, intSeq : IntSequence) : Seq[String] = {
if (collectTypeInstancesIntoVar)
typeTestExpressionMultiStep(typeName, intSeq)
else
typeTestExpressionUsingFilter(typeName)
}
private def typeTestExpressionUsingFilter(typeName : String) : Seq[String] = {
Seq(s"""filter${_typeTestExpression(typeName, "it")}""")
}
private def _typeTestExpression(typeName: String, itRef: String): String = {
......@@ -125,6 +156,27 @@ trait GraphPersistenceStrategies {
|${itRef}.'${superTypeAttributeName}'.contains('${typeName}') : false)}""".
stripMargin.replace(System.getProperty("line.separator"), "")
}
private def typeTestExpressionMultiStep(typeName : String, intSeq : IntSequence) : Seq[String] = {
val varName = s"_var_${intSeq.next}"
Seq(
newSetVar(varName),
fillVarWithTypeInstances(typeName, varName),
fillVarWithSubTypeInstances(typeName, varName),
s"$varName._()"
)
}
private def newSetVar(varName : String) = s"$varName = [] as Set"
private def fillVarWithTypeInstances(typeName : String, fillVar : String) = {
s"""g.V().has("${typeAttributeName}", "${typeName}").fill($fillVar)"""
}
private def fillVarWithSubTypeInstances(typeName : String, fillVar : String) = {
s"""g.V().has("${superTypeAttributeName}", "${typeName}").fill($fillVar)"""
}
}
object GraphPersistenceStrategy1 extends GraphPersistenceStrategies {
......
......@@ -25,6 +25,9 @@ import org.apache.hadoop.metadata.typesystem.types.TypeSystem
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
trait IntSequence {
def next: Int
}
case class GremlinQuery(expr: Expression, queryStr: String, resultMaping: Map[String, (String, Int)]) {
......@@ -127,6 +130,9 @@ class GremlinTranslator(expr: Expression,
gPersistenceBehavior: GraphPersistenceStrategies)
extends SelectExpressionHandling {
val preStatements = ArrayBuffer[String]()
val postStatements = ArrayBuffer[String]()
val wrapAndRule: PartialFunction[Expression, Expression] = {
case f: FilterExpression if !f.condExpr.isInstanceOf[LogicalExpression] =>
FilterExpression(f.child, new LogicalExpression("and", List(f.condExpr)))
......@@ -144,7 +150,7 @@ class GremlinTranslator(expr: Expression,
()
}
class counter {
val counter = new IntSequence {
var i: Int = -1;
def next: Int = {
......@@ -152,7 +158,7 @@ class GremlinTranslator(expr: Expression,
}
}
def addAliasToLoopInput(c: counter = new counter()): PartialFunction[Expression, Expression] = {
def addAliasToLoopInput(c: IntSequence = counter): PartialFunction[Expression, Expression] = {
case l@LoopExpression(aliasE@AliasExpression(_, _), _, _) => l
case l@LoopExpression(inputExpr, loopExpr, t) => {
val aliasE = AliasExpression(inputExpr, s"_loop${c.next}")
......@@ -183,11 +189,17 @@ class GremlinTranslator(expr: Expression,
}
}
def typeTestExpression(typeName : String) : String = {
val stats = gPersistenceBehavior.typeTestExpression(typeName, counter)
preStatements ++= stats.init
stats.last
}
private def genQuery(expr: Expression, inSelect: Boolean): String = expr match {
case ClassExpression(clsName) =>
s"""filter${gPersistenceBehavior.typeTestExpression(clsName)}"""
typeTestExpression(clsName)
case TraitExpression(clsName) =>
s"""filter${gPersistenceBehavior.typeTestExpression(clsName)}"""
typeTestExpression(clsName)
case fe@FieldExpression(fieldName, fInfo, child) if fe.dataType.getTypeCategory == TypeCategory.PRIMITIVE => {
val fN = "\"" + gPersistenceBehavior.fieldNameInVertex(fInfo.dataType, fInfo.attrInfo) + "\""
child match {
......@@ -283,6 +295,23 @@ class GremlinTranslator(expr: Expression,
case x => throw new GremlinTranslationException(x, "expression not yet supported")
}
def genFullQuery(expr: Expression): String = {
var q = genQuery(expr, false)
if(gPersistenceBehavior.addGraphVertexPrefix(preStatements)) {
q = s"g.V.$q"
}
q = s"$q.toList()"
q = (preStatements ++ Seq(q) ++ postStatements).mkString("", ";", "")
/*
* the L:{} represents a groovy code block; the label is needed
* to distinguish it from a groovy closure.
*/
s"L:{$q}"
}
def translate(): GremlinQuery = {
var e1 = expr.transformUp(wrapAndRule)
......@@ -297,13 +326,13 @@ class GremlinTranslator(expr: Expression,
e1 match {
case e1: SelectExpression => {
val rMap = buildResultMapping(e1)
GremlinQuery(e1, s"g.V.${genQuery(e1, false)}.toList()", rMap)
GremlinQuery(e1, genFullQuery(e1), rMap)
}
case pe@PathExpression(se@SelectExpression(child, selectList)) => {
val rMap = buildResultMapping(se)
GremlinQuery(e1, s"g.V.${genQuery(pe, false)}.toList()", rMap)
GremlinQuery(e1, genFullQuery(e1), rMap)
}
case e1 => GremlinQuery(e1, s"g.V.${genQuery(e1, false)}.toList()", null)
case e1 => GremlinQuery(e1, genFullQuery(e1), null)
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment