Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
atlas
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dataplatform
atlas
Commits
c71b5b31
Commit
c71b5b31
authored
9 years ago
by
Harish Butani
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
BUG-38340: gen gremlinQuery in such a way to trigger an index scan
parent
e1bdafa2
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
104 additions
and
14 deletions
+104
-14
DefaultGraphPersistenceStrategy.java
...data/discovery/graph/DefaultGraphPersistenceStrategy.java
+14
-5
GraphPersistenceStrategies.scala
...he/hadoop/metadata/query/GraphPersistenceStrategies.scala
+54
-2
GremlinQuery.scala
...scala/org/apache/hadoop/metadata/query/GremlinQuery.scala
+36
-7
No files found.
repository/src/main/java/org/apache/hadoop/metadata/discovery/graph/DefaultGraphPersistenceStrategy.java
View file @
c71b5b31
...
@@ -20,9 +20,7 @@ package org.apache.hadoop.metadata.discovery.graph;
...
@@ -20,9 +20,7 @@ package org.apache.hadoop.metadata.discovery.graph;
import
com.thinkaurelius.titan.core.TitanVertex
;
import
com.thinkaurelius.titan.core.TitanVertex
;
import
org.apache.hadoop.metadata.MetadataException
;
import
org.apache.hadoop.metadata.MetadataException
;
import
org.apache.hadoop.metadata.query.Expressions
;
import
org.apache.hadoop.metadata.query.*
;
import
org.apache.hadoop.metadata.query.GraphPersistenceStrategies
;
import
org.apache.hadoop.metadata.query.GraphPersistenceStrategies
$class
;
import
org.apache.hadoop.metadata.query.TypeUtils
;
import
org.apache.hadoop.metadata.query.TypeUtils
;
import
org.apache.hadoop.metadata.repository.MetadataRepository
;
import
org.apache.hadoop.metadata.repository.MetadataRepository
;
import
org.apache.hadoop.metadata.repository.Constants
;
import
org.apache.hadoop.metadata.repository.Constants
;
...
@@ -33,6 +31,7 @@ import org.apache.hadoop.metadata.typesystem.persistence.Id;
...
@@ -33,6 +31,7 @@ import org.apache.hadoop.metadata.typesystem.persistence.Id;
import
org.apache.hadoop.metadata.typesystem.types.*
;
import
org.apache.hadoop.metadata.typesystem.types.*
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
scala.collection.Traversable
;
import
java.util.List
;
import
java.util.List
;
...
@@ -187,8 +186,18 @@ public class DefaultGraphPersistenceStrategy implements GraphPersistenceStrategi
...
@@ -187,8 +186,18 @@ public class DefaultGraphPersistenceStrategy implements GraphPersistenceStrategi
public
String
idAttributeName
()
{
return
metadataRepository
.
getIdAttributeName
();
}
public
String
idAttributeName
()
{
return
metadataRepository
.
getIdAttributeName
();
}
@Override
@Override
public
String
typeTestExpression
(
String
typeName
)
{
public
scala
.
collection
.
Seq
<
String
>
typeTestExpression
(
String
typeName
,
IntSequence
intSeq
)
{
return
GraphPersistenceStrategies$class
.
typeTestExpression
(
this
,
typeName
);
return
GraphPersistenceStrategies$class
.
typeTestExpression
(
this
,
typeName
,
intSeq
);
}
@Override
public
boolean
collectTypeInstancesIntoVar
()
{
return
GraphPersistenceStrategies$class
.
collectTypeInstancesIntoVar
(
this
);
}
@Override
public
boolean
addGraphVertexPrefix
(
scala
.
collection
.
Traversable
<
String
>
preStatements
)
{
return
GraphPersistenceStrategies$class
.
addGraphVertexPrefix
(
this
,
preStatements
);
}
}
}
}
This diff is collapsed.
Click to expand it.
repository/src/main/scala/org/apache/hadoop/metadata/query/GraphPersistenceStrategies.scala
View file @
c71b5b31
...
@@ -115,8 +115,39 @@ trait GraphPersistenceStrategies {
...
@@ -115,8 +115,39 @@ trait GraphPersistenceStrategies {
_typeTestExpression
(
dataType
.
getName
,
"it.object"
)
_typeTestExpression
(
dataType
.
getName
,
"it.object"
)
}
}
def
typeTestExpression
(
typeName
:
String
)
:
String
=
{
def
addGraphVertexPrefix
(
preStatements
:
Traversable
[
String
])
=
!
collectTypeInstancesIntoVar
_typeTestExpression
(
typeName
,
"it"
)
/**
* Controls behavior of how instances of a Type are discovered.
* - query is generated in a way that indexes are exercised using a local set variable across multiple lookups
* - query is generated using an 'or' expression.
*
* '''This is a very bad idea: controlling query execution behavior via query generation.''' But our current
* knowledge of seems to indicate we have no choice. See
* [[https://groups.google.com/forum/#!topic/gremlin-users/n1oV86yr4yU discussion in Gremlin group]].
* Also this seems a fragile solution, dependend on the memory requirements of the Set variable.
* For now enabling via the '''collectTypeInstancesIntoVar''' behavior setting. Reverting back would require
* setting this to false.
*
* Long term have to get to the bottom of Gremlin:
* - there doesn't seem to be way to see the physical query plan. Maybe we should directly interface with Titan.
* - At least from querying perspective a columnar db maybe a better route. Daniel Abadi did some good work
* on showing how to use a columnar store as a Graph Db.
*
*
* @return
*/
def
collectTypeInstancesIntoVar
=
true
def
typeTestExpression
(
typeName
:
String
,
intSeq
:
IntSequence
)
:
Seq
[
String
]
=
{
if
(
collectTypeInstancesIntoVar
)
typeTestExpressionMultiStep
(
typeName
,
intSeq
)
else
typeTestExpressionUsingFilter
(
typeName
)
}
private
def
typeTestExpressionUsingFilter
(
typeName
:
String
)
:
Seq
[
String
]
=
{
Seq
(
s
"""filter${_typeTestExpression(typeName, "it")}"""
)
}
}
private
def
_typeTestExpression
(
typeName
:
String
,
itRef
:
String
)
:
String
=
{
private
def
_typeTestExpression
(
typeName
:
String
,
itRef
:
String
)
:
String
=
{
...
@@ -125,6 +156,27 @@ trait GraphPersistenceStrategies {
...
@@ -125,6 +156,27 @@ trait GraphPersistenceStrategies {
|${itRef}.'${superTypeAttributeName}'.contains('${typeName}') : false)}"""
.
|${itRef}.'${superTypeAttributeName}'.contains('${typeName}') : false)}"""
.
stripMargin
.
replace
(
System
.
getProperty
(
"line.separator"
),
""
)
stripMargin
.
replace
(
System
.
getProperty
(
"line.separator"
),
""
)
}
}
private
def
typeTestExpressionMultiStep
(
typeName
:
String
,
intSeq
:
IntSequence
)
:
Seq
[
String
]
=
{
val
varName
=
s
"_var_${intSeq.next}"
Seq
(
newSetVar
(
varName
),
fillVarWithTypeInstances
(
typeName
,
varName
),
fillVarWithSubTypeInstances
(
typeName
,
varName
),
s
"$varName._()"
)
}
private
def
newSetVar
(
varName
:
String
)
=
s
"$varName = [] as Set"
private
def
fillVarWithTypeInstances
(
typeName
:
String
,
fillVar
:
String
)
=
{
s
"""g.V().has("${typeAttributeName}", "${typeName}").fill($fillVar)"""
}
private
def
fillVarWithSubTypeInstances
(
typeName
:
String
,
fillVar
:
String
)
=
{
s
"""g.V().has("${superTypeAttributeName}", "${typeName}").fill($fillVar)"""
}
}
}
object
GraphPersistenceStrategy1
extends
GraphPersistenceStrategies
{
object
GraphPersistenceStrategy1
extends
GraphPersistenceStrategies
{
...
...
This diff is collapsed.
Click to expand it.
repository/src/main/scala/org/apache/hadoop/metadata/query/GremlinQuery.scala
View file @
c71b5b31
...
@@ -25,6 +25,9 @@ import org.apache.hadoop.metadata.typesystem.types.TypeSystem
...
@@ -25,6 +25,9 @@ import org.apache.hadoop.metadata.typesystem.types.TypeSystem
import
scala.collection.mutable
import
scala.collection.mutable
import
scala.collection.mutable.ArrayBuffer
import
scala.collection.mutable.ArrayBuffer
trait
IntSequence
{
def
next
:
Int
}
case
class
GremlinQuery
(
expr
:
Expression
,
queryStr
:
String
,
resultMaping
:
Map
[
String
,
(
String
,
Int
)])
{
case
class
GremlinQuery
(
expr
:
Expression
,
queryStr
:
String
,
resultMaping
:
Map
[
String
,
(
String
,
Int
)])
{
...
@@ -127,6 +130,9 @@ class GremlinTranslator(expr: Expression,
...
@@ -127,6 +130,9 @@ class GremlinTranslator(expr: Expression,
gPersistenceBehavior
:
GraphPersistenceStrategies
)
gPersistenceBehavior
:
GraphPersistenceStrategies
)
extends
SelectExpressionHandling
{
extends
SelectExpressionHandling
{
val
preStatements
=
ArrayBuffer
[
String
]()
val
postStatements
=
ArrayBuffer
[
String
]()
val
wrapAndRule
:
PartialFunction
[
Expression
,
Expression
]
=
{
val
wrapAndRule
:
PartialFunction
[
Expression
,
Expression
]
=
{
case
f
:
FilterExpression
if
!f.condExpr.isInstanceOf
[
LogicalExpression
]
=>
case
f
:
FilterExpression
if
!f.condExpr.isInstanceOf
[
LogicalExpression
]
=>
FilterExpression
(
f
.
child
,
new
LogicalExpression
(
"and"
,
List
(
f
.
condExpr
)))
FilterExpression
(
f
.
child
,
new
LogicalExpression
(
"and"
,
List
(
f
.
condExpr
)))
...
@@ -144,7 +150,7 @@ class GremlinTranslator(expr: Expression,
...
@@ -144,7 +150,7 @@ class GremlinTranslator(expr: Expression,
()
()
}
}
class
counter
{
val
counter
=
new
IntSequence
{
var
i
:
Int
=
-
1
;
var
i
:
Int
=
-
1
;
def
next
:
Int
=
{
def
next
:
Int
=
{
...
@@ -152,7 +158,7 @@ class GremlinTranslator(expr: Expression,
...
@@ -152,7 +158,7 @@ class GremlinTranslator(expr: Expression,
}
}
}
}
def
addAliasToLoopInput
(
c
:
counter
=
new
counter
()
)
:
PartialFunction
[
Expression
,
Expression
]
=
{
def
addAliasToLoopInput
(
c
:
IntSequence
=
counter
)
:
PartialFunction
[
Expression
,
Expression
]
=
{
case
l
@LoopExpression
(
aliasE
@AliasExpression
(
_
,
_
),
_
,
_
)
=>
l
case
l
@LoopExpression
(
aliasE
@AliasExpression
(
_
,
_
),
_
,
_
)
=>
l
case
l
@LoopExpression
(
inputExpr
,
loopExpr
,
t
)
=>
{
case
l
@LoopExpression
(
inputExpr
,
loopExpr
,
t
)
=>
{
val
aliasE
=
AliasExpression
(
inputExpr
,
s
"_loop${c.next}"
)
val
aliasE
=
AliasExpression
(
inputExpr
,
s
"_loop${c.next}"
)
...
@@ -183,11 +189,17 @@ class GremlinTranslator(expr: Expression,
...
@@ -183,11 +189,17 @@ class GremlinTranslator(expr: Expression,
}
}
}
}
def
typeTestExpression
(
typeName
:
String
)
:
String
=
{
val
stats
=
gPersistenceBehavior
.
typeTestExpression
(
typeName
,
counter
)
preStatements
++=
stats
.
init
stats
.
last
}
private
def
genQuery
(
expr
:
Expression
,
inSelect
:
Boolean
)
:
String
=
expr
match
{
private
def
genQuery
(
expr
:
Expression
,
inSelect
:
Boolean
)
:
String
=
expr
match
{
case
ClassExpression
(
clsName
)
=>
case
ClassExpression
(
clsName
)
=>
s
"""filter${gPersistenceBehavior.typeTestExpression(clsName)}"""
typeTestExpression
(
clsName
)
case
TraitExpression
(
clsName
)
=>
case
TraitExpression
(
clsName
)
=>
s
"""filter${gPersistenceBehavior.typeTestExpression(clsName)}"""
typeTestExpression
(
clsName
)
case
fe
@FieldExpression
(
fieldName
,
fInfo
,
child
)
if
fe
.
dataType
.
getTypeCategory
==
TypeCategory
.
PRIMITIVE
=>
{
case
fe
@FieldExpression
(
fieldName
,
fInfo
,
child
)
if
fe
.
dataType
.
getTypeCategory
==
TypeCategory
.
PRIMITIVE
=>
{
val
fN
=
"\""
+
gPersistenceBehavior
.
fieldNameInVertex
(
fInfo
.
dataType
,
fInfo
.
attrInfo
)
+
"\""
val
fN
=
"\""
+
gPersistenceBehavior
.
fieldNameInVertex
(
fInfo
.
dataType
,
fInfo
.
attrInfo
)
+
"\""
child
match
{
child
match
{
...
@@ -283,6 +295,23 @@ class GremlinTranslator(expr: Expression,
...
@@ -283,6 +295,23 @@ class GremlinTranslator(expr: Expression,
case
x
=>
throw
new
GremlinTranslationException
(
x
,
"expression not yet supported"
)
case
x
=>
throw
new
GremlinTranslationException
(
x
,
"expression not yet supported"
)
}
}
def
genFullQuery
(
expr
:
Expression
)
:
String
=
{
var
q
=
genQuery
(
expr
,
false
)
if
(
gPersistenceBehavior
.
addGraphVertexPrefix
(
preStatements
))
{
q
=
s
"g.V.$q"
}
q
=
s
"$q.toList()"
q
=
(
preStatements
++
Seq
(
q
)
++
postStatements
).
mkString
(
""
,
";"
,
""
)
/*
* the L:{} represents a groovy code block; the label is needed
* to distinguish it from a groovy closure.
*/
s
"L:{$q}"
}
def
translate
()
:
GremlinQuery
=
{
def
translate
()
:
GremlinQuery
=
{
var
e1
=
expr
.
transformUp
(
wrapAndRule
)
var
e1
=
expr
.
transformUp
(
wrapAndRule
)
...
@@ -297,13 +326,13 @@ class GremlinTranslator(expr: Expression,
...
@@ -297,13 +326,13 @@ class GremlinTranslator(expr: Expression,
e1
match
{
e1
match
{
case
e1
:
SelectExpression
=>
{
case
e1
:
SelectExpression
=>
{
val
rMap
=
buildResultMapping
(
e1
)
val
rMap
=
buildResultMapping
(
e1
)
GremlinQuery
(
e1
,
s
"g.V.${genQuery(e1, false)}.toList()"
,
rMap
)
GremlinQuery
(
e1
,
genFullQuery
(
e1
)
,
rMap
)
}
}
case
pe
@PathExpression
(
se
@SelectExpression
(
child
,
selectList
))
=>
{
case
pe
@PathExpression
(
se
@SelectExpression
(
child
,
selectList
))
=>
{
val
rMap
=
buildResultMapping
(
se
)
val
rMap
=
buildResultMapping
(
se
)
GremlinQuery
(
e1
,
s
"g.V.${genQuery(pe, false)}.toList()"
,
rMap
)
GremlinQuery
(
e1
,
genFullQuery
(
e1
)
,
rMap
)
}
}
case
e1
=>
GremlinQuery
(
e1
,
s
"g.V.${genQuery(e1, false)}.toList()"
,
null
)
case
e1
=>
GremlinQuery
(
e1
,
genFullQuery
(
e1
)
,
null
)
}
}
}
}
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment