Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
atlas
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dataplatform
atlas
Commits
8b4bc508
Commit
8b4bc508
authored
Jun 08, 2015
by
Harish Butani
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
BUG-38340: gen gremlinQuery in such a way to trigger an index scan
parent
3d9b865d
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
104 additions
and
14 deletions
+104
-14
DefaultGraphPersistenceStrategy.java
...data/discovery/graph/DefaultGraphPersistenceStrategy.java
+14
-5
GraphPersistenceStrategies.scala
...he/hadoop/metadata/query/GraphPersistenceStrategies.scala
+54
-2
GremlinQuery.scala
...scala/org/apache/hadoop/metadata/query/GremlinQuery.scala
+36
-7
No files found.
repository/src/main/java/org/apache/hadoop/metadata/discovery/graph/DefaultGraphPersistenceStrategy.java
View file @
8b4bc508
...
...
@@ -20,9 +20,7 @@ package org.apache.hadoop.metadata.discovery.graph;
import
com.thinkaurelius.titan.core.TitanVertex
;
import
org.apache.hadoop.metadata.MetadataException
;
import
org.apache.hadoop.metadata.query.Expressions
;
import
org.apache.hadoop.metadata.query.GraphPersistenceStrategies
;
import
org.apache.hadoop.metadata.query.GraphPersistenceStrategies
$class
;
import
org.apache.hadoop.metadata.query.*
;
import
org.apache.hadoop.metadata.query.TypeUtils
;
import
org.apache.hadoop.metadata.repository.MetadataRepository
;
import
org.apache.hadoop.metadata.repository.Constants
;
...
...
@@ -33,6 +31,7 @@ import org.apache.hadoop.metadata.typesystem.persistence.Id;
import
org.apache.hadoop.metadata.typesystem.types.*
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
scala.collection.Traversable
;
import
java.util.List
;
...
...
@@ -187,8 +186,18 @@ public class DefaultGraphPersistenceStrategy implements GraphPersistenceStrategi
public
String
idAttributeName
()
{
return
metadataRepository
.
getIdAttributeName
();
}
@Override
public
String
typeTestExpression
(
String
typeName
)
{
return
GraphPersistenceStrategies$class
.
typeTestExpression
(
this
,
typeName
);
public
scala
.
collection
.
Seq
<
String
>
typeTestExpression
(
String
typeName
,
IntSequence
intSeq
)
{
return
GraphPersistenceStrategies$class
.
typeTestExpression
(
this
,
typeName
,
intSeq
);
}
@Override
public
boolean
collectTypeInstancesIntoVar
()
{
return
GraphPersistenceStrategies$class
.
collectTypeInstancesIntoVar
(
this
);
}
@Override
public
boolean
addGraphVertexPrefix
(
scala
.
collection
.
Traversable
<
String
>
preStatements
)
{
return
GraphPersistenceStrategies$class
.
addGraphVertexPrefix
(
this
,
preStatements
);
}
}
repository/src/main/scala/org/apache/hadoop/metadata/query/GraphPersistenceStrategies.scala
View file @
8b4bc508
...
...
@@ -115,8 +115,39 @@ trait GraphPersistenceStrategies {
_typeTestExpression
(
dataType
.
getName
,
"it.object"
)
}
def
typeTestExpression
(
typeName
:
String
)
:
String
=
{
_typeTestExpression
(
typeName
,
"it"
)
def
addGraphVertexPrefix
(
preStatements
:
Traversable
[
String
])
=
!
collectTypeInstancesIntoVar
/**
* Controls behavior of how instances of a Type are discovered.
* - query is generated in a way that indexes are exercised using a local set variable across multiple lookups
* - query is generated using an 'or' expression.
*
* '''This is a very bad idea: controlling query execution behavior via query generation.''' But our current
* knowledge of seems to indicate we have no choice. See
* [[https://groups.google.com/forum/#!topic/gremlin-users/n1oV86yr4yU discussion in Gremlin group]].
* Also this seems a fragile solution, dependend on the memory requirements of the Set variable.
* For now enabling via the '''collectTypeInstancesIntoVar''' behavior setting. Reverting back would require
* setting this to false.
*
* Long term have to get to the bottom of Gremlin:
* - there doesn't seem to be way to see the physical query plan. Maybe we should directly interface with Titan.
* - At least from querying perspective a columnar db maybe a better route. Daniel Abadi did some good work
* on showing how to use a columnar store as a Graph Db.
*
*
* @return
*/
def
collectTypeInstancesIntoVar
=
true
def
typeTestExpression
(
typeName
:
String
,
intSeq
:
IntSequence
)
:
Seq
[
String
]
=
{
if
(
collectTypeInstancesIntoVar
)
typeTestExpressionMultiStep
(
typeName
,
intSeq
)
else
typeTestExpressionUsingFilter
(
typeName
)
}
private
def
typeTestExpressionUsingFilter
(
typeName
:
String
)
:
Seq
[
String
]
=
{
Seq
(
s
"""filter${_typeTestExpression(typeName, "it")}"""
)
}
private
def
_typeTestExpression
(
typeName
:
String
,
itRef
:
String
)
:
String
=
{
...
...
@@ -125,6 +156,27 @@ trait GraphPersistenceStrategies {
|${itRef}.'${superTypeAttributeName}'.contains('${typeName}') : false)}"""
.
stripMargin
.
replace
(
System
.
getProperty
(
"line.separator"
),
""
)
}
private
def
typeTestExpressionMultiStep
(
typeName
:
String
,
intSeq
:
IntSequence
)
:
Seq
[
String
]
=
{
val
varName
=
s
"_var_${intSeq.next}"
Seq
(
newSetVar
(
varName
),
fillVarWithTypeInstances
(
typeName
,
varName
),
fillVarWithSubTypeInstances
(
typeName
,
varName
),
s
"$varName._()"
)
}
private
def
newSetVar
(
varName
:
String
)
=
s
"$varName = [] as Set"
private
def
fillVarWithTypeInstances
(
typeName
:
String
,
fillVar
:
String
)
=
{
s
"""g.V().has("${typeAttributeName}", "${typeName}").fill($fillVar)"""
}
private
def
fillVarWithSubTypeInstances
(
typeName
:
String
,
fillVar
:
String
)
=
{
s
"""g.V().has("${superTypeAttributeName}", "${typeName}").fill($fillVar)"""
}
}
object
GraphPersistenceStrategy1
extends
GraphPersistenceStrategies
{
...
...
repository/src/main/scala/org/apache/hadoop/metadata/query/GremlinQuery.scala
View file @
8b4bc508
...
...
@@ -25,6 +25,9 @@ import org.apache.hadoop.metadata.typesystem.types.TypeSystem
import
scala.collection.mutable
import
scala.collection.mutable.ArrayBuffer
trait
IntSequence
{
def
next
:
Int
}
case
class
GremlinQuery
(
expr
:
Expression
,
queryStr
:
String
,
resultMaping
:
Map
[
String
,
(
String
,
Int
)])
{
...
...
@@ -127,6 +130,9 @@ class GremlinTranslator(expr: Expression,
gPersistenceBehavior
:
GraphPersistenceStrategies
)
extends
SelectExpressionHandling
{
val
preStatements
=
ArrayBuffer
[
String
]()
val
postStatements
=
ArrayBuffer
[
String
]()
val
wrapAndRule
:
PartialFunction
[
Expression
,
Expression
]
=
{
case
f
:
FilterExpression
if
!f.condExpr.isInstanceOf
[
LogicalExpression
]
=>
FilterExpression
(
f
.
child
,
new
LogicalExpression
(
"and"
,
List
(
f
.
condExpr
)))
...
...
@@ -144,7 +150,7 @@ class GremlinTranslator(expr: Expression,
()
}
class
counter
{
val
counter
=
new
IntSequence
{
var
i
:
Int
=
-
1
;
def
next
:
Int
=
{
...
...
@@ -152,7 +158,7 @@ class GremlinTranslator(expr: Expression,
}
}
def
addAliasToLoopInput
(
c
:
counter
=
new
counter
()
)
:
PartialFunction
[
Expression
,
Expression
]
=
{
def
addAliasToLoopInput
(
c
:
IntSequence
=
counter
)
:
PartialFunction
[
Expression
,
Expression
]
=
{
case
l
@LoopExpression
(
aliasE
@AliasExpression
(
_
,
_
),
_
,
_
)
=>
l
case
l
@LoopExpression
(
inputExpr
,
loopExpr
,
t
)
=>
{
val
aliasE
=
AliasExpression
(
inputExpr
,
s
"_loop${c.next}"
)
...
...
@@ -183,11 +189,17 @@ class GremlinTranslator(expr: Expression,
}
}
def
typeTestExpression
(
typeName
:
String
)
:
String
=
{
val
stats
=
gPersistenceBehavior
.
typeTestExpression
(
typeName
,
counter
)
preStatements
++=
stats
.
init
stats
.
last
}
private
def
genQuery
(
expr
:
Expression
,
inSelect
:
Boolean
)
:
String
=
expr
match
{
case
ClassExpression
(
clsName
)
=>
s
"""filter${gPersistenceBehavior.typeTestExpression(clsName)}"""
typeTestExpression
(
clsName
)
case
TraitExpression
(
clsName
)
=>
s
"""filter${gPersistenceBehavior.typeTestExpression(clsName)}"""
typeTestExpression
(
clsName
)
case
fe
@FieldExpression
(
fieldName
,
fInfo
,
child
)
if
fe
.
dataType
.
getTypeCategory
==
TypeCategory
.
PRIMITIVE
=>
{
val
fN
=
"\""
+
gPersistenceBehavior
.
fieldNameInVertex
(
fInfo
.
dataType
,
fInfo
.
attrInfo
)
+
"\""
child
match
{
...
...
@@ -283,6 +295,23 @@ class GremlinTranslator(expr: Expression,
case
x
=>
throw
new
GremlinTranslationException
(
x
,
"expression not yet supported"
)
}
def
genFullQuery
(
expr
:
Expression
)
:
String
=
{
var
q
=
genQuery
(
expr
,
false
)
if
(
gPersistenceBehavior
.
addGraphVertexPrefix
(
preStatements
))
{
q
=
s
"g.V.$q"
}
q
=
s
"$q.toList()"
q
=
(
preStatements
++
Seq
(
q
)
++
postStatements
).
mkString
(
""
,
";"
,
""
)
/*
* the L:{} represents a groovy code block; the label is needed
* to distinguish it from a groovy closure.
*/
s
"L:{$q}"
}
def
translate
()
:
GremlinQuery
=
{
var
e1
=
expr
.
transformUp
(
wrapAndRule
)
...
...
@@ -297,13 +326,13 @@ class GremlinTranslator(expr: Expression,
e1
match
{
case
e1
:
SelectExpression
=>
{
val
rMap
=
buildResultMapping
(
e1
)
GremlinQuery
(
e1
,
s
"g.V.${genQuery(e1, false)}.toList()"
,
rMap
)
GremlinQuery
(
e1
,
genFullQuery
(
e1
)
,
rMap
)
}
case
pe
@PathExpression
(
se
@SelectExpression
(
child
,
selectList
))
=>
{
val
rMap
=
buildResultMapping
(
se
)
GremlinQuery
(
e1
,
s
"g.V.${genQuery(pe, false)}.toList()"
,
rMap
)
GremlinQuery
(
e1
,
genFullQuery
(
e1
)
,
rMap
)
}
case
e1
=>
GremlinQuery
(
e1
,
s
"g.V.${genQuery(e1, false)}.toList()"
,
null
)
case
e1
=>
GremlinQuery
(
e1
,
genFullQuery
(
e1
)
,
null
)
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment