Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
atlas
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dataplatform
atlas
Commits
b06e45b8
Commit
b06e45b8
authored
Feb 04, 2015
by
Harish Butani
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add support for loop expression, add lineage tests
parent
47594923
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
492 additions
and
18 deletions
+492
-18
Expressions.scala
.../scala/org/apache/hadoop/metadata/query/Expressions.scala
+30
-1
GraphPersistenceStrategies.scala
...he/hadoop/metadata/query/GraphPersistenceStrategies.scala
+4
-0
GremlinQuery.scala
...scala/org/apache/hadoop/metadata/query/GremlinQuery.scala
+20
-0
Resolver.scala
...ain/scala/org/apache/hadoop/metadata/query/Resolver.scala
+42
-12
ExpressionTest.scala
...ala/org/apache/hadoop/metadata/query/ExpressionTest.scala
+5
-0
GremlinTest.scala
.../scala/org/apache/hadoop/metadata/query/GremlinTest.scala
+5
-5
LineageQueryTest.scala
...a/org/apache/hadoop/metadata/query/LineageQueryTest.scala
+386
-0
No files found.
typesystem/src/main/scala/org/apache/hadoop/metadata/query/Expressions.scala
View file @
b06e45b8
...
...
@@ -298,7 +298,10 @@ object Expressions {
def
where
(
condExpr
:
Expression
)
=
new
FilterExpression
(
this
,
condExpr
)
def
select
(
selectList
:
Expression*
)
=
new
SelectExpression
(
this
,
selectList
.
toList
)
def
loop
(
loopingExpr
:
Expression
)
=
new
LoopExpression
(
this
,
loopingExpr
,
None
)
def
loop
(
loopingExpr
:
Expression
,
times
:
Literal
[
Int
])
=
new
LoopExpression
(
this
,
loopingExpr
,
Some
(
times
))
}
...
...
@@ -623,5 +626,31 @@ object Expressions {
override
def
toString
=
s
"""$child select ${selectListWithAlias.mkString("", ", ", "")}"""
}
case
class
LoopExpression
(
val
input
:
Expression
,
val
loopingExpression
:
Expression
,
val
times
:
Option
[
Literal
[
Int
]])
extends
Expression
{
val
children
=
List
(
input
,
loopingExpression
)
lazy
val
dataType
=
{
if
(!
resolved
)
{
throw
new
UnresolvedException
(
this
,
s
"datatype. Can not resolve due to unresolved children"
)
}
if
(
input
.
dataType
.
getTypeCategory
!=
TypeCategory
.
CLASS
)
{
throw
new
ExpressionException
(
this
,
s
"Loop Expression applied to type : '${input.dataType.getName}';"
+
" loop can only be applied to Class Expressions"
)
}
if
(
input
.
dataType
!=
loopingExpression
.
dataType
)
{
throw
new
ExpressionException
(
this
,
s
"Invalid Loop Expression; input and loopExpression dataTypes don't match: "
+
s
"(${input.dataType.getName},${loopingExpression.dataType.getName}})"
)
}
input
.
dataType
}
override
def
namedExpressions
=
input
.
namedExpressions
override
def
toString
=
{
if
(
times
.
isDefined
)
s
"$input loop ($loopingExpression) times ${times.get.value}"
else
s
"$input loop ($loopingExpression)"
}
}
}
typesystem/src/main/scala/org/apache/hadoop/metadata/query/GraphPersistenceStrategies.scala
View file @
b06e45b8
...
...
@@ -91,6 +91,10 @@ trait GraphPersistenceStrategies {
case
"<="
=>
"T.lte"
case
_
=>
throw
new
ExpressionException
(
op
,
"Comparison operator not supported in Gremlin"
)
}
def
loopObjectExpression
(
dataType
:
IDataType
[
_
])
=
{
s
"{it.object.'${typeAttributeName}' == '${dataType.getName}'}"
}
}
object
GraphPersistenceStrategy1
extends
GraphPersistenceStrategies
{
...
...
typesystem/src/main/scala/org/apache/hadoop/metadata/query/GremlinQuery.scala
View file @
b06e45b8
...
...
@@ -20,6 +20,7 @@ package org.apache.hadoop.metadata.query
import
org.apache.hadoop.metadata.query.Expressions._
import
org.apache.hadoop.metadata.types.DataTypes.TypeCategory
import
org.apache.hadoop.metadata.types.IDataType
import
scala.collection.mutable
import
scala.collection.mutable.ArrayBuffer
...
...
@@ -141,6 +142,16 @@ class GremlinTranslator(expr: Expression,
()
}
class
counter
{
var
i
:
Int
=
-
1
;
def
next
:
Int
=
{
i
+=
1
;
i
}}
def
addAliasToLoopInput
(
c
:
counter
=
new
counter
())
:
PartialFunction
[
Expression
,
Expression
]
=
{
case
l
@LoopExpression
(
aliasE
@AliasExpression
(
_
,
_
),
_
,
_
)
=>
l
case
l
@LoopExpression
(
inputExpr
,
loopExpr
,
t
)
=>
{
val
aliasE
=
AliasExpression
(
inputExpr
,
s
"_loop${c.next}"
)
LoopExpression
(
aliasE
,
loopExpr
,
t
)
}
}
private
def
genQuery
(
expr
:
Expression
,
inSelect
:
Boolean
)
:
String
=
expr
match
{
case
ClassExpression
(
clsName
)
=>
s
"""has("${gPersistenceBehavior.typeAttributeName}","$clsName")"""
case
TraitExpression
(
clsName
)
=>
s
"""has("${gPersistenceBehavior.typeAttributeName}","$clsName")"""
...
...
@@ -192,6 +203,14 @@ class GremlinTranslator(expr: Expression,
val
srcExprsString
=
srcExprsStringList
.
foldLeft
(
""
)(
_
+
"{"
+
_
+
"}"
)
s
"${genQuery(child, inSelect)}.select($srcNamesString)$srcExprsString"
}
case
loop
@LoopExpression
(
input
,
loopExpr
,
t
)
=>
{
val
inputQry
=
genQuery
(
input
,
inSelect
)
val
loopingPathGExpr
=
genQuery
(
loopExpr
,
inSelect
)
val
loopGExpr
=
s
"""loop("${input.asInstanceOf[AliasExpression].alias}")"""
val
untilCriteria
=
if
(
t
.
isDefined
)
s
"{it.loops < ${t.get.value}}"
else
"{true}"
val
loopObjectGExpr
=
gPersistenceBehavior
.
loopObjectExpression
(
input
.
dataType
)
s
"""${inputQry}.${loopingPathGExpr}.${loopGExpr}${untilCriteria}${loopObjectGExpr}"""
}
case
BackReference
(
alias
,
_
,
_
)
=>
if
(
inSelect
)
gPersistenceBehavior
.
fieldPrefixInSelect
else
s
"""back("$alias")"""
case
AliasExpression
(
child
,
alias
)
=>
s
"""${genQuery(child, inSelect)}.as("$alias")"""
...
...
@@ -215,6 +234,7 @@ class GremlinTranslator(expr: Expression,
e1
=
e1
.
transformUp
(
new
AddAliasToSelectInput
)
e1
.
traverseUp
(
validateSelectExprHaveOneSrc
)
e1
=
e1
.
transformUp
(
addAliasToLoopInput
())
e1
match
{
case
e1
:
SelectExpression
=>
{
...
...
typesystem/src/main/scala/org/apache/hadoop/metadata/query/Resolver.scala
View file @
b06e45b8
...
...
@@ -21,7 +21,8 @@ package org.apache.hadoop.metadata.query
import
Expressions._
import
org.apache.hadoop.metadata.types.IDataType
class
Resolver
(
srcExpr
:
Option
[
Expression
]
=
None
,
aliases
:
Map
[
String
,
Expression
]
=
Map
())
class
Resolver
(
srcExpr
:
Option
[
Expression
]
=
None
,
aliases
:
Map
[
String
,
Expression
]
=
Map
(),
connectClassExprToSrc
:
Boolean
=
false
)
extends
PartialFunction
[
Expression
,
Expression
]
{
import
TypeUtils._
...
...
@@ -30,13 +31,9 @@ extends PartialFunction[Expression, Expression] {
def
apply
(
e
:
Expression
)
:
Expression
=
e
match
{
case
idE
@IdExpression
(
name
)
=>
{
val
cType
=
resolveAsClassType
(
name
)
if
(
cType
.
isDefined
)
{
return
new
ClassExpression
(
name
)
}
val
tType
=
resolveAsTraitType
(
name
)
if
(
tType
.
isDefined
)
{
return
new
TraitExpression
(
name
)
val
backExpr
=
aliases
.
get
(
name
)
if
(
backExpr
.
isDefined
)
{
return
new
BackReference
(
name
,
backExpr
.
get
,
None
)
}
if
(
srcExpr
.
isDefined
)
{
val
fInfo
=
resolveReference
(
srcExpr
.
get
.
dataType
,
name
)
...
...
@@ -44,12 +41,23 @@ extends PartialFunction[Expression, Expression] {
return
new
FieldExpression
(
name
,
fInfo
.
get
,
None
)
}
}
val
backExpr
=
aliases
.
get
(
name
)
if
(
backExpr
.
isDefined
)
{
return
new
BackReference
(
name
,
backExpr
.
get
,
None
)
val
cType
=
resolveAsClassType
(
name
)
if
(
cType
.
isDefined
)
{
return
new
ClassExpression
(
name
)
}
val
tType
=
resolveAsTraitType
(
name
)
if
(
tType
.
isDefined
)
{
return
new
TraitExpression
(
name
)
}
idE
}
case
ce
@ClassExpression
(
clsName
)
if
connectClassExprToSrc
&&
srcExpr
.
isDefined
=>
{
val
fInfo
=
resolveReference
(
srcExpr
.
get
.
dataType
,
clsName
)
if
(
fInfo
.
isDefined
)
{
return
new
FieldExpression
(
clsName
,
fInfo
.
get
,
None
)
}
ce
}
case
f
@UnresolvedFieldExpression
(
child
,
fieldName
)
if
child
.
resolved
=>
{
var
fInfo
:
Option
[
FieldInfo
]
=
None
...
...
@@ -73,6 +81,10 @@ extends PartialFunction[Expression, Expression] {
val
r
=
new
Resolver
(
Some
(
child
),
child
.
namedExpressions
)
return
new
SelectExpression
(
child
,
selectList
.
map
{
_
.
transformUp
(
r
)})
}
case
l
@LoopExpression
(
inputExpr
,
loopExpr
,
t
)
if
inputExpr
.
resolved
=>
{
val
r
=
new
Resolver
(
Some
(
inputExpr
),
inputExpr
.
namedExpressions
,
true
)
return
new
LoopExpression
(
inputExpr
,
loopExpr
.
transformUp
(
r
),
t
)
}
case
x
=>
x
}
}
...
...
@@ -88,7 +100,8 @@ object FieldValidator extends PartialFunction[Expression, Expression] {
def
isSrc
(
e
:
Expression
)
=
e
.
isInstanceOf
[
ClassExpression
]
||
e
.
isInstanceOf
[
TraitExpression
]
def
validateQualifiedField
(
srcDataType
:
IDataType
[
_
])
:
PartialFunction
[
Expression
,
Expression
]
=
{
case
FieldExpression
(
fNm
,
fInfo
,
Some
(
child
))
if
(
child
.
children
==
Nil
&&
child
.
dataType
==
srcDataType
)
=>
case
FieldExpression
(
fNm
,
fInfo
,
Some
(
child
))
if
(
child
.
children
==
Nil
&&
!
child
.
isInstanceOf
[
BackReference
]
&&
child
.
dataType
==
srcDataType
)
=>
FieldExpression
(
fNm
,
fInfo
,
None
)
case
fe
@FieldExpression
(
fNm
,
fInfo
,
Some
(
child
))
if
isSrc
(
child
)
=>
throw
new
ExpressionException
(
fe
,
s
"srcType of field doesn't match input type"
)
...
...
@@ -102,6 +115,13 @@ object FieldValidator extends PartialFunction[Expression, Expression] {
throw
new
ExpressionException
(
iT
,
s
"srcType of field doesn't match input type"
)
}
def
validateOnlyFieldReferencesInLoopExpressions
(
loopExpr
:
LoopExpression
)
:
PartialFunction
[
Expression
,
Unit
]
=
{
case
f
:
FieldExpression
=>
()
case
x
=>
throw
new
ExpressionException
(
loopExpr
,
s
"Loop Expression can only contain field references; '${x.toString}' not supported."
)
}
def
apply
(
e
:
Expression
)
:
Expression
=
e
match
{
case
f
@FilterExpression
(
inputExpr
,
condExpr
)
=>
{
val
validatedCE
=
condExpr
.
transformUp
(
validateQualifiedField
(
inputExpr
.
dataType
))
...
...
@@ -115,6 +135,15 @@ object FieldValidator extends PartialFunction[Expression, Expression] {
val
v
=
validateQualifiedField
(
child
.
dataType
)
return
new
SelectExpression
(
child
,
selectList
.
map
{
_
.
transformUp
(
v
)})
}
case
l
@LoopExpression
(
inputExpr
,
loopExpr
,
t
)
=>
{
val
validatedLE
=
loopExpr
.
transformUp
(
validateQualifiedField
(
inputExpr
.
dataType
))
val
l1
=
{
if
(
validatedLE
.
fastEquals
(
loopExpr
)
)
l
else
new
LoopExpression
(
inputExpr
,
validatedLE
,
t
)
}
l1
.
loopingExpression
.
traverseUp
(
validateOnlyFieldReferencesInLoopExpressions
(
l1
))
l1
}
case
x
=>
x
}
}
\ No newline at end of file
typesystem/src/test/scala/org/apache/hadoop/metadata/query/ExpressionTest.scala
View file @
b06e45b8
...
...
@@ -165,4 +165,9 @@ class ExpressionTest extends BaseTest {
)
println
(
e
)
}
@Test
def
testLineageAll
:
Unit
=
{
val
e
=
QueryProcessor
.
validate
(
_class
(
"Table"
).
loop
(
id
(
"LoadProcess"
).
field
(
"outputTable"
)))
println
(
e
)
}
}
typesystem/src/test/scala/org/apache/hadoop/metadata/query/GremlinTest.scala
View file @
b06e45b8
...
...
@@ -29,6 +29,7 @@ class GremlinTest extends FunSuite with BeforeAndAfterAll {
if
(
expected
!=
null
)
{
val
a
=
STRUCT_NAME_REGEX
.
replaceAllIn
(
rJ
,
""
)
val
b
=
STRUCT_NAME_REGEX
.
replaceAllIn
(
expected
,
""
)
println
(
rJ
)
Assertions
.
assert
(
a
==
b
)
}
else
{
println
(
rJ
)
...
...
@@ -81,8 +82,7 @@ class GremlinTest extends FunSuite with BeforeAndAfterAll {
test
(
"testBackReference"
)
{
val
r
=
QueryProcessor
.
evaluate
(
_class
(
"DB"
).
as
(
"db"
).
field
(
"Table"
).
where
(
id
(
"db"
).
field
(
"name"
).
`=`
(
string
(
"Reporting"
))),
g
)
validateJson
(
r
,
"{\n \"query\":\"DB as db Table where (db.name = \\\"Reporting\\\")\",\n \"dataType\":{\n \"superTypes\":[\n \n ],\n \"hierarchicalMetaTypeName\":\"org.apache.hadoop.metadata.types.ClassType\",\n \"typeName\":\"Table\",\n \"attributeDefinitions\":[\n {\n \"name\":\"name\",\n \"dataTypeName\":\"string\",\n \"multiplicity\":{\n \"lower\":0,\n \"upper\":1,\n \"isUnique\":false\n },\n \"isComposite\":false,\n \"isUnique\":false,\n \"isIndexable\":true,\n \"reverseAttributeName\":null\n },\n {\n \"name\":\"db\",\n \"dataTypeName\":\"DB\",\n \"multiplicity\":{\n \"lower\":1,\n \"upper\":1,\n \"isUnique\":false\n },\n \"isComposite\":false,\n \"isUnique\":false,\n \"isIndexable\":true,\n \"reverseAttributeName\":null\n },\n {\n \"name\":\"sd\",\n \"dataTypeName\":\"StorageDesc\",\n \"multiplicity\":{\n \"lower\":1,\n \"upper\":1,\n \"isUnique\":false\n },\n \"isComposite\":false,\n \"isUnique\":false,\n \"isIndexable\":true,\n \"reverseAttributeName\":null\n }\n ]\n },\n \"rows\":[\n {\n \"$typeName$\":\"Table\",\n \"$id$\":{\n \"id\":\"8960\",\n \"$typeName$\":\"Table\",\n \"version\":0\n },\n \"sd\":{\n \"id\":\"7424\",\n \"$typeName$\":\"StorageDesc\",\n \"version\":0\n },\n \"db\":{\n \"id\":\"7168\",\n \"$typeName$\":\"DB\",\n \"version\":0\n },\n \"name\":\"sales_fact_daily_mv\"\n },\n {\n \"$typeName$\":\"Table\",\n \"$id$\":{\n \"id\":\"12800\",\n \"$typeName$\":\"Table\",\n \"version\":0\n },\n \"sd\":{\n \"id\":\"11264\",\n \"$typeName$\":\"StorageDesc\",\n \"version\":0\n },\n \"db\":{\n \"id\":\"7168\",\n \"$typeName$\":\"DB\",\n \"version\":0\n },\n \"name\":\"sales_fact_monthly_mv\"\n }\n ]\n}"
)
}
validateJson
(
r
,
null
)
}
test
(
"testArith"
)
{
val
r
=
QueryProcessor
.
evaluate
(
_class
(
"DB"
).
where
(
id
(
"name"
).
`=`
(
string
(
"Reporting"
))).
...
...
@@ -102,7 +102,7 @@ class GremlinTest extends FunSuite with BeforeAndAfterAll {
where
((
isTrait
(
"Dimension"
))).
select
(
id
(
"db1"
).
field
(
"name"
).
as
(
"dbName"
),
id
(
"tab"
).
field
(
"name"
).
as
(
"tabName"
)),
g
)
validateJson
(
r
,
"{\n \"query\":\"DB as db1 where (name = \\\"Sales\\\") Table as tab where DB as db1 where (name = \\\"Sales\\\") Table as tab is Dimension as _src1 select db1.name as dbName,
_src1
.name as tabName\",\n \"dataType\":{\n \"typeName\":\"__tempQueryResultStruct5\",\n \"attributeDefinitions\":[\n {\n \"name\":\"dbName\",\n \"dataTypeName\":\"string\",\n \"multiplicity\":{\n \"lower\":0,\n \"upper\":1,\n \"isUnique\":false\n },\n \"isComposite\":false,\n \"isUnique\":false,\n \"isIndexable\":true,\n \"reverseAttributeName\":null\n },\n {\n \"name\":\"tabName\",\n \"dataTypeName\":\"string\",\n \"multiplicity\":{\n \"lower\":0,\n \"upper\":1,\n \"isUnique\":false\n },\n \"isComposite\":false,\n \"isUnique\":false,\n \"isIndexable\":true,\n \"reverseAttributeName\":null\n }\n ]\n },\n \"rows\":[\n {\n \"$typeName$\":\"__tempQueryResultStruct5\",\n \"dbName\":\"Sales\",\n \"tabName\":\"product_dim\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct5\",\n \"dbName\":\"Sales\",\n \"tabName\":\"time_dim\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct5\",\n \"dbName\":\"Sales\",\n \"tabName\":\"customer_dim\"\n }\n ]\n}"
)
validateJson
(
r
,
"{\n \"query\":\"DB as db1 where (name = \\\"Sales\\\") Table as tab where DB as db1 where (name = \\\"Sales\\\") Table as tab is Dimension as _src1 select db1.name as dbName,
tab
.name as tabName\",\n \"dataType\":{\n \"typeName\":\"__tempQueryResultStruct5\",\n \"attributeDefinitions\":[\n {\n \"name\":\"dbName\",\n \"dataTypeName\":\"string\",\n \"multiplicity\":{\n \"lower\":0,\n \"upper\":1,\n \"isUnique\":false\n },\n \"isComposite\":false,\n \"isUnique\":false,\n \"isIndexable\":true,\n \"reverseAttributeName\":null\n },\n {\n \"name\":\"tabName\",\n \"dataTypeName\":\"string\",\n \"multiplicity\":{\n \"lower\":0,\n \"upper\":1,\n \"isUnique\":false\n },\n \"isComposite\":false,\n \"isUnique\":false,\n \"isIndexable\":true,\n \"reverseAttributeName\":null\n }\n ]\n },\n \"rows\":[\n {\n \"$typeName$\":\"__tempQueryResultStruct5\",\n \"dbName\":\"Sales\",\n \"tabName\":\"product_dim\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct5\",\n \"dbName\":\"Sales\",\n \"tabName\":\"time_dim\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct5\",\n \"dbName\":\"Sales\",\n \"tabName\":\"customer_dim\"\n }\n ]\n}"
)
}
test
(
"testJoinAndSelect2"
)
{
...
...
@@ -111,7 +111,7 @@ class GremlinTest extends FunSuite with BeforeAndAfterAll {
.
or
(
id
(
"name"
).
`=`
(
string
(
"Reporting"
)))).
field
(
"Table"
).
as
(
"tab"
)
.
select
(
id
(
"db1"
).
field
(
"name"
).
as
(
"dbName"
),
id
(
"tab"
).
field
(
"name"
).
as
(
"tabName"
)),
g
)
validateJson
(
r
,
"{\n \"query\":\"DB as db1 where (createTime > 0) or (name = \\\"Reporting\\\") Table as tab select db1.name as dbName, tab.name as tabName\",\n \"dataType\":{\n \"typeName\":\"__tempQueryResultStruct6\",\n \"attributeDefinitions\":[\n {\n \"name\":\"dbName\",\n \"dataTypeName\":\"string\",\n \"multiplicity\":{\n \"lower\":0,\n \"upper\":1,\n \"isUnique\":false\n },\n \"isComposite\":false,\n \"isUnique\":false,\n \"isIndexable\":true,\n \"reverseAttributeName\":null\n },\n {\n \"name\":\"tabName\",\n \"dataTypeName\":\"string\",\n \"multiplicity\":{\n \"lower\":0,\n \"upper\":1,\n \"isUnique\":false\n },\n \"isComposite\":false,\n \"isUnique\":false,\n \"isIndexable\":true,\n \"reverseAttributeName\":null\n }\n ]\n },\n \"rows\":[\n {\n \"$typeName$\":\"__tempQueryResultStruct6\",\n \"dbName\":\"Sales\",\n \"tabName\":\"sales_fact\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct6\",\n \"dbName\":\"Sales\",\n \"tabName\":\"product_dim\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct6\",\n \"dbName\":\"Sales\",\n \"tabName\":\"time_dim\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct6\",\n \"dbName\":\"Sales\",\n \"tabName\":\"customer_dim\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct6\",\n \"dbName\":\"Reporting\",\n \"tabName\":\"sales_fact_daily_mv\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct6\",\n \"dbName\":\"Reporting\",\n \"tabName\":\"sales_fact_monthly_mv\"\n }\n ]\n}"
)
validateJson
(
r
,
"{\n \"query\":\"DB as db1 where (
db1.
createTime > 0) or (name = \\\"Reporting\\\") Table as tab select db1.name as dbName, tab.name as tabName\",\n \"dataType\":{\n \"typeName\":\"__tempQueryResultStruct6\",\n \"attributeDefinitions\":[\n {\n \"name\":\"dbName\",\n \"dataTypeName\":\"string\",\n \"multiplicity\":{\n \"lower\":0,\n \"upper\":1,\n \"isUnique\":false\n },\n \"isComposite\":false,\n \"isUnique\":false,\n \"isIndexable\":true,\n \"reverseAttributeName\":null\n },\n {\n \"name\":\"tabName\",\n \"dataTypeName\":\"string\",\n \"multiplicity\":{\n \"lower\":0,\n \"upper\":1,\n \"isUnique\":false\n },\n \"isComposite\":false,\n \"isUnique\":false,\n \"isIndexable\":true,\n \"reverseAttributeName\":null\n }\n ]\n },\n \"rows\":[\n {\n \"$typeName$\":\"__tempQueryResultStruct6\",\n \"dbName\":\"Sales\",\n \"tabName\":\"sales_fact\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct6\",\n \"dbName\":\"Sales\",\n \"tabName\":\"product_dim\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct6\",\n \"dbName\":\"Sales\",\n \"tabName\":\"time_dim\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct6\",\n \"dbName\":\"Sales\",\n \"tabName\":\"customer_dim\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct6\",\n \"dbName\":\"Reporting\",\n \"tabName\":\"sales_fact_daily_mv\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct6\",\n \"dbName\":\"Reporting\",\n \"tabName\":\"sales_fact_monthly_mv\"\n }\n ]\n}"
)
}
test
(
"testJoinAndSelect3"
)
{
...
...
@@ -121,7 +121,7 @@ class GremlinTest extends FunSuite with BeforeAndAfterAll {
.
or
(
id
(
"db1"
).
hasField
(
"owner"
))).
field
(
"Table"
).
as
(
"tab"
)
.
select
(
id
(
"db1"
).
field
(
"name"
).
as
(
"dbName"
),
id
(
"tab"
).
field
(
"name"
).
as
(
"tabName"
)),
g
)
validateJson
(
r
,
"{\n \"query\":\"DB as db1 where (
createTime > 0) and (
name = \\\"Reporting\\\") or DB as db1 has owner Table as tab select db1.name as dbName, tab.name as tabName\",\n \"dataType\":{\n \"typeName\":\"__tempQueryResultStruct7\",\n \"attributeDefinitions\":[\n {\n \"name\":\"dbName\",\n \"dataTypeName\":\"string\",\n \"multiplicity\":{\n \"lower\":0,\n \"upper\":1,\n \"isUnique\":false\n },\n \"isComposite\":false,\n \"isUnique\":false,\n \"isIndexable\":true,\n \"reverseAttributeName\":null\n },\n {\n \"name\":\"tabName\",\n \"dataTypeName\":\"string\",\n \"multiplicity\":{\n \"lower\":0,\n \"upper\":1,\n \"isUnique\":false\n },\n \"isComposite\":false,\n \"isUnique\":false,\n \"isIndexable\":true,\n \"reverseAttributeName\":null\n }\n ]\n },\n \"rows\":[\n {\n \"$typeName$\":\"__tempQueryResultStruct7\",\n \"dbName\":\"Sales\",\n \"tabName\":\"sales_fact\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct7\",\n \"dbName\":\"Sales\",\n \"tabName\":\"product_dim\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct7\",\n \"dbName\":\"Sales\",\n \"tabName\":\"time_dim\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct7\",\n \"dbName\":\"Sales\",\n \"tabName\":\"customer_dim\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct7\",\n \"dbName\":\"Reporting\",\n \"tabName\":\"sales_fact_daily_mv\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct7\",\n \"dbName\":\"Reporting\",\n \"tabName\":\"sales_fact_monthly_mv\"\n }\n ]\n}"
)
validateJson
(
r
,
"{\n \"query\":\"DB as db1 where (
db1.createTime > 0) and (db1.
name = \\\"Reporting\\\") or DB as db1 has owner Table as tab select db1.name as dbName, tab.name as tabName\",\n \"dataType\":{\n \"typeName\":\"__tempQueryResultStruct7\",\n \"attributeDefinitions\":[\n {\n \"name\":\"dbName\",\n \"dataTypeName\":\"string\",\n \"multiplicity\":{\n \"lower\":0,\n \"upper\":1,\n \"isUnique\":false\n },\n \"isComposite\":false,\n \"isUnique\":false,\n \"isIndexable\":true,\n \"reverseAttributeName\":null\n },\n {\n \"name\":\"tabName\",\n \"dataTypeName\":\"string\",\n \"multiplicity\":{\n \"lower\":0,\n \"upper\":1,\n \"isUnique\":false\n },\n \"isComposite\":false,\n \"isUnique\":false,\n \"isIndexable\":true,\n \"reverseAttributeName\":null\n }\n ]\n },\n \"rows\":[\n {\n \"$typeName$\":\"__tempQueryResultStruct7\",\n \"dbName\":\"Sales\",\n \"tabName\":\"sales_fact\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct7\",\n \"dbName\":\"Sales\",\n \"tabName\":\"product_dim\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct7\",\n \"dbName\":\"Sales\",\n \"tabName\":\"time_dim\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct7\",\n \"dbName\":\"Sales\",\n \"tabName\":\"customer_dim\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct7\",\n \"dbName\":\"Reporting\",\n \"tabName\":\"sales_fact_daily_mv\"\n },\n {\n \"$typeName$\":\"__tempQueryResultStruct7\",\n \"dbName\":\"Reporting\",\n \"tabName\":\"sales_fact_monthly_mv\"\n }\n ]\n}"
)
}
}
typesystem/src/test/scala/org/apache/hadoop/metadata/query/LineageQueryTest.scala
0 → 100644
View file @
b06e45b8
package
org.apache.hadoop.metadata.query
import
com.thinkaurelius.titan.core.TitanGraph
import
org.apache.hadoop.metadata.query.Expressions._
import
org.apache.hadoop.metadata.types.TypeSystem
import
org.junit.Test
import
org.junit.runner.RunWith
import
org.scalatest.
{
Assertions
,
BeforeAndAfterAll
,
FunSuite
}
import
org.scalatest.junit.JUnitRunner
@RunWith
(
classOf
[
JUnitRunner
])
class
LineageQueryTest
extends
FunSuite
with
BeforeAndAfterAll
{
var
g
:
TitanGraph
=
null
override
def
beforeAll
()
{
TypeSystem
.
getInstance
().
reset
()
QueryTestsUtils
.
setupTypes
g
=
QueryTestsUtils
.
setupTestGraph
}
override
def
afterAll
()
{
g
.
shutdown
()
}
val
STRUCT_NAME_REGEX
=
(
TypeUtils
.
TEMP_STRUCT_NAME_PREFIX
+
"\\d+"
).
r
val
PREFIX_SPACES_REGEX
=
(
"\\n\\s*"
).
r
def
validateJson
(
r
:
GremlinQueryResult
,
expected
:
String
=
null
)
:
Unit
=
{
val
rJ
=
r
.
toJson
if
(
expected
!=
null
)
{
var
a
=
STRUCT_NAME_REGEX
.
replaceAllIn
(
rJ
,
""
)
a
=
PREFIX_SPACES_REGEX
.
replaceAllIn
(
a
,
""
)
var
b
=
STRUCT_NAME_REGEX
.
replaceAllIn
(
expected
,
""
)
b
=
PREFIX_SPACES_REGEX
.
replaceAllIn
(
b
,
""
)
Assertions
.
assert
(
a
==
b
)
}
else
{
println
(
rJ
)
}
}
test
(
"testInputTables"
)
{
val
r
=
QueryProcessor
.
evaluate
(
_class
(
"LoadProcess"
).
field
(
"inputTables"
),
g
)
val
x
=
r
.
toJson
validateJson
(
r
,
"""{
"query":"LoadProcess inputTables",
"dataType":{
"superTypes":[
],
"hierarchicalMetaTypeName":"org.apache.hadoop.metadata.types.ClassType",
"typeName":"Table",
"attributeDefinitions":[
{
"name":"name",
"dataTypeName":"string",
"multiplicity":{
"lower":0,
"upper":1,
"isUnique":false
},
"isComposite":false,
"isUnique":false,
"isIndexable":true,
"reverseAttributeName":null
},
{
"name":"db",
"dataTypeName":"DB",
"multiplicity":{
"lower":1,
"upper":1,
"isUnique":false
},
"isComposite":false,
"isUnique":false,
"isIndexable":true,
"reverseAttributeName":null
},
{
"name":"sd",
"dataTypeName":"StorageDesc",
"multiplicity":{
"lower":1,
"upper":1,
"isUnique":false
},
"isComposite":false,
"isUnique":false,
"isIndexable":true,
"reverseAttributeName":null
}
]
},
"rows":[
{
"$typeName$":"Table",
"$id$":{
"id":"2048",
"$typeName$":"Table",
"version":0
},
"sd":{
"id":"512",
"$typeName$":"StorageDesc",
"version":0
},
"db":{
"id":"256",
"$typeName$":"DB",
"version":0
},
"name":"sales_fact"
},
{
"$typeName$":"Table",
"$id$":{
"id":"4864",
"$typeName$":"Table",
"version":0
},
"sd":{
"id":"3840",
"$typeName$":"StorageDesc",
"version":0
},
"db":{
"id":"256",
"$typeName$":"DB",
"version":0
},
"name":"time_dim",
"$traits$":{
"Dimension":{
"$typeName$":"Dimension"
}
}
},
{
"$typeName$":"Table",
"$id$":{
"id":"8960",
"$typeName$":"Table",
"version":0
},
"sd":{
"id":"7424",
"$typeName$":"StorageDesc",
"version":0
},
"db":{
"id":"7168",
"$typeName$":"DB",
"version":0
},
"name":"sales_fact_daily_mv"
}
]
}"""
)
}
test
(
"testLoadProcessOut"
)
{
val
r
=
QueryProcessor
.
evaluate
(
_class
(
"Table"
).
field
(
"LoadProcess"
).
field
(
"outputTable"
),
g
)
validateJson
(
r
,
null
)
}
test
(
"testLineageAll"
)
{
val
r
=
QueryProcessor
.
evaluate
(
_class
(
"Table"
).
loop
(
id
(
"LoadProcess"
).
field
(
"outputTable"
)),
g
)
validateJson
(
r
,
"""{
"query":"Table as _loop0 loop (LoadProcess outputTable)",
"dataType":{
"superTypes":[
],
"hierarchicalMetaTypeName":"org.apache.hadoop.metadata.types.ClassType",
"typeName":"Table",
"attributeDefinitions":[
{
"name":"name",
"dataTypeName":"string",
"multiplicity":{
"lower":0,
"upper":1,
"isUnique":false
},
"isComposite":false,
"isUnique":false,
"isIndexable":true,
"reverseAttributeName":null
},
{
"name":"db",
"dataTypeName":"DB",
"multiplicity":{
"lower":1,
"upper":1,
"isUnique":false
},
"isComposite":false,
"isUnique":false,
"isIndexable":true,
"reverseAttributeName":null
},
{
"name":"sd",
"dataTypeName":"StorageDesc",
"multiplicity":{
"lower":1,
"upper":1,
"isUnique":false
},
"isComposite":false,
"isUnique":false,
"isIndexable":true,
"reverseAttributeName":null
}
]
},
"rows":[
{
"$typeName$":"Table",
"$id$":{
"id":"8960",
"$typeName$":"Table",
"version":0
},
"sd":{
"id":"7424",
"$typeName$":"StorageDesc",
"version":0
},
"db":{
"id":"7168",
"$typeName$":"DB",
"version":0
},
"name":"sales_fact_daily_mv"
},
{
"$typeName$":"Table",
"$id$":{
"id":"12800",
"$typeName$":"Table",
"version":0
},
"sd":{
"id":"11264",
"$typeName$":"StorageDesc",
"version":0
},
"db":{
"id":"7168",
"$typeName$":"DB",
"version":0
},
"name":"sales_fact_monthly_mv"
},
{
"$typeName$":"Table",
"$id$":{
"id":"8960",
"$typeName$":"Table",
"version":0
},
"sd":{
"id":"7424",
"$typeName$":"StorageDesc",
"version":0
},
"db":{
"id":"7168",
"$typeName$":"DB",
"version":0
},
"name":"sales_fact_daily_mv"
},
{
"$typeName$":"Table",
"$id$":{
"id":"12800",
"$typeName$":"Table",
"version":0
},
"sd":{
"id":"11264",
"$typeName$":"StorageDesc",
"version":0
},
"db":{
"id":"7168",
"$typeName$":"DB",
"version":0
},
"name":"sales_fact_monthly_mv"
},
{
"$typeName$":"Table",
"$id$":{
"id":"12800",
"$typeName$":"Table",
"version":0
},
"sd":{
"id":"11264",
"$typeName$":"StorageDesc",
"version":0
},
"db":{
"id":"7168",
"$typeName$":"DB",
"version":0
},
"name":"sales_fact_monthly_mv"
}
]
}"""
)
}
test
(
"testLineageAllSelect"
)
{
val
r
=
QueryProcessor
.
evaluate
(
_class
(
"Table"
).
as
(
"src"
).
loop
(
id
(
"LoadProcess"
).
field
(
"outputTable"
)).
as
(
"dest"
).
select
(
id
(
"src"
).
field
(
"name"
).
as
(
"srcTable"
),
id
(
"dest"
).
field
(
"name"
).
as
(
"destTable"
)),
g
)
validateJson
(
r
,
"""{
"query":"Table as src loop (LoadProcess outputTable) as dest select src.name as srcTable, dest.name as destTable",
"dataType":{
"typeName":"__tempQueryResultStruct2",
"attributeDefinitions":[
{
"name":"srcTable",
"dataTypeName":"string",
"multiplicity":{
"lower":0,
"upper":1,
"isUnique":false
},
"isComposite":false,
"isUnique":false,
"isIndexable":true,
"reverseAttributeName":null
},
{
"name":"destTable",
"dataTypeName":"string",
"multiplicity":{
"lower":0,
"upper":1,
"isUnique":false
},
"isComposite":false,
"isUnique":false,
"isIndexable":true,
"reverseAttributeName":null
}
]
},
"rows":[
{
"$typeName$":"__tempQueryResultStruct2",
"srcTable":"sales_fact",
"destTable":"sales_fact_daily_mv"
},
{
"$typeName$":"__tempQueryResultStruct2",
"srcTable":"sales_fact",
"destTable":"sales_fact_monthly_mv"
},
{
"$typeName$":"__tempQueryResultStruct2",
"srcTable":"time_dim",
"destTable":"sales_fact_daily_mv"
},
{
"$typeName$":"__tempQueryResultStruct2",
"srcTable":"time_dim",
"destTable":"sales_fact_monthly_mv"
},
{
"$typeName$":"__tempQueryResultStruct2",
"srcTable":"sales_fact_daily_mv",
"destTable":"sales_fact_monthly_mv"
}
]
}"""
)
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment