Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
atlas
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dataplatform
atlas
Commits
879eda63
Commit
879eda63
authored
Apr 08, 2015
by
Venkatesh Seetharam
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add schema API to Hive and minor refactoring
parent
b1e6c379
Show whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
491 additions
and
183 deletions
+491
-183
HiveLineageService.java
.../apache/hadoop/metadata/discovery/HiveLineageService.java
+52
-30
LineageService.java
.../org/apache/hadoop/metadata/discovery/LineageService.java
+8
-0
GraphBackedDiscoveryService.java
...metadata/discovery/graph/GraphBackedDiscoveryService.java
+10
-5
GraphBackedMetadataRepository.java
...adata/repository/graph/GraphBackedMetadataRepository.java
+15
-8
TestUtils.java
...y/src/test/java/org/apache/hadoop/metadata/TestUtils.java
+32
-0
HiveLineageServiceTest.java
...che/hadoop/metadata/discovery/HiveLineageServiceTest.java
+136
-48
application.properties
repository/src/test/resources/application.properties
+9
-2
application.properties
src/conf/application.properties
+10
-0
QuickStart.java
.../java/org/apache/hadoop/metadata/examples/QuickStart.java
+30
-29
HiveLineageResource.java
...he/hadoop/metadata/web/resources/HiveLineageResource.java
+34
-0
application.properties
webapp/src/main/resources/application.properties
+9
-2
BaseResourceIT.java
.../apache/hadoop/metadata/web/resources/BaseResourceIT.java
+4
-3
EntityJerseyResourceIT.java
...hadoop/metadata/web/resources/EntityJerseyResourceIT.java
+12
-22
HiveLineageJerseyResourceIT.java
...p/metadata/web/resources/HiveLineageJerseyResourceIT.java
+126
-31
MetadataDiscoveryJerseyResourceIT.java
...data/web/resources/MetadataDiscoveryJerseyResourceIT.java
+4
-3
No files found.
repository/src/main/java/org/apache/hadoop/metadata/discovery/HiveLineageService.java
View file @
879eda63
...
...
@@ -19,13 +19,13 @@
package
org
.
apache
.
hadoop
.
metadata
.
discovery
;
import
com.thinkaurelius.titan.core.TitanGraph
;
import
org.apache.commons.configuration.ConfigurationException
;
import
org.apache.commons.configuration.PropertiesConfiguration
;
import
org.apache.hadoop.metadata.discovery.graph.DefaultGraphPersistenceStrategy
;
import
org.apache.hadoop.metadata.discovery.graph.GraphBackedDiscoveryService
;
import
org.apache.hadoop.metadata.query.Expressions
;
import
org.apache.hadoop.metadata.query.GremlinQuery
;
import
org.apache.hadoop.metadata.query.GremlinTranslator
;
import
org.apache.hadoop.metadata.query.HiveLineageQuery
;
import
org.apache.hadoop.metadata.query.HiveWhereUsedQuery
;
import
org.apache.hadoop.metadata.query.QueryProcessor
;
import
org.apache.hadoop.metadata.repository.MetadataRepository
;
import
org.apache.hadoop.metadata.repository.graph.GraphProvider
;
import
org.slf4j.Logger
;
...
...
@@ -45,23 +45,47 @@ public class HiveLineageService implements LineageService {
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
HiveLineageService
.
class
);
// todo - externalize these into configuration
private
static
final
String
HIVE_TABLE_TYPE_NAME
=
"hive_table"
;
private
static
final
String
HIVE_PROCESS_TYPE_NAME
=
"hive_process"
;
private
static
final
String
HIVE_PROCESS_INPUT_ATTRIBUTE_NAME
=
"inputTables"
;
private
static
final
String
HIVE_PROCESS_OUTPUT_ATTRIBUTE_NAME
=
"outputTables"
;
private
static
final
Option
<
List
<
String
>>
SELECT_ATTRIBUTES
=
Some
.<
List
<
String
>>
apply
(
List
.<
String
>
fromArray
(
new
String
[]{
"name"
}));
private
static
final
String
HIVE_TABLE_TYPE_NAME
;
private
static
final
String
HIVE_TABLE_COLUMNS_ATTRIBUTE_NAME
;
private
static
final
String
HIVE_PROCESS_TYPE_NAME
;
private
static
final
String
HIVE_PROCESS_INPUT_ATTRIBUTE_NAME
;
private
static
final
String
HIVE_PROCESS_OUTPUT_ATTRIBUTE_NAME
;
static
{
// todo - externalize this using type system - dog food
try
{
PropertiesConfiguration
conf
=
new
PropertiesConfiguration
(
"application.properties"
);
HIVE_TABLE_TYPE_NAME
=
conf
.
getString
(
"metadata.lineage.hive.table.type.name"
,
"hive_table"
);
HIVE_TABLE_COLUMNS_ATTRIBUTE_NAME
=
conf
.
getString
(
"metadata.lineage.hive.table.column.name"
,
"columns"
);
HIVE_PROCESS_TYPE_NAME
=
conf
.
getString
(
"metadata.lineage.hive.process.type.name"
,
"hive_process"
);
HIVE_PROCESS_INPUT_ATTRIBUTE_NAME
=
conf
.
getString
(
"metadata.lineage.hive.process.inputs.name"
,
"inputTables"
);
HIVE_PROCESS_OUTPUT_ATTRIBUTE_NAME
=
conf
.
getString
(
"metadata.lineage.hive.process.outputs.name"
,
"outputTables"
);
}
catch
(
ConfigurationException
e
)
{
throw
new
RuntimeException
(
e
);
}
}
private
final
TitanGraph
titanGraph
;
private
final
DefaultGraphPersistenceStrategy
graphPersistenceStrategy
;
private
final
GraphBackedDiscoveryService
discoveryService
;
@Inject
HiveLineageService
(
GraphProvider
<
TitanGraph
>
graphProvider
,
MetadataRepository
metadataRepository
)
throws
DiscoveryException
{
MetadataRepository
metadataRepository
,
GraphBackedDiscoveryService
discoveryService
)
throws
DiscoveryException
{
this
.
titanGraph
=
graphProvider
.
get
();
this
.
graphPersistenceStrategy
=
new
DefaultGraphPersistenceStrategy
(
metadataRepository
);
this
.
discoveryService
=
discoveryService
;
}
/**
...
...
@@ -82,16 +106,7 @@ public class HiveLineageService implements LineageService {
graphPersistenceStrategy
,
titanGraph
);
Expressions
.
Expression
expression
=
outputsQuery
.
expr
();
Expressions
.
Expression
validatedExpression
=
QueryProcessor
.
validate
(
expression
);
GremlinQuery
gremlinQuery
=
new
GremlinTranslator
(
validatedExpression
,
graphPersistenceStrategy
).
translate
();
if
(
LOG
.
isDebugEnabled
())
{
System
.
out
.
println
(
"Query = "
+
validatedExpression
);
System
.
out
.
println
(
"Expression Tree = "
+
validatedExpression
.
treeString
());
System
.
out
.
println
(
"Gremlin Query = "
+
gremlinQuery
.
queryStr
());
}
return
outputsQuery
.
evaluate
().
toJson
();
return
discoveryService
.
evaluate
(
expression
).
toJson
();
}
catch
(
Exception
e
)
{
// unable to catch ExpressionException
throw
new
DiscoveryException
(
"Invalid expression"
,
e
);
}
...
...
@@ -115,18 +130,25 @@ public class HiveLineageService implements LineageService {
graphPersistenceStrategy
,
titanGraph
);
Expressions
.
Expression
expression
=
inputsQuery
.
expr
();
Expressions
.
Expression
validatedExpression
=
QueryProcessor
.
validate
(
expression
);
GremlinQuery
gremlinQuery
=
new
GremlinTranslator
(
validatedExpression
,
graphPersistenceStrategy
).
translate
();
if
(
LOG
.
isDebugEnabled
())
{
System
.
out
.
println
(
"Query = "
+
validatedExpression
);
System
.
out
.
println
(
"Expression Tree = "
+
validatedExpression
.
treeString
());
System
.
out
.
println
(
"Gremlin Query = "
+
gremlinQuery
.
queryStr
());
}
return
inputsQuery
.
evaluate
().
toJson
();
return
discoveryService
.
evaluate
(
expression
).
toJson
();
}
catch
(
Exception
e
)
{
// unable to catch ExpressionException
throw
new
DiscoveryException
(
"Invalid expression"
,
e
);
}
}
/**
* Return the schema for the given tableName.
*
* @param tableName tableName
* @return Schema as JSON
*/
@Override
public
String
getSchema
(
String
tableName
)
throws
DiscoveryException
{
// todo - validate if indeed this is a table type and exists
String
schemaQuery
=
HIVE_TABLE_TYPE_NAME
+
" where name=\""
+
tableName
+
"\", "
+
HIVE_TABLE_COLUMNS_ATTRIBUTE_NAME
;
// + " as column select column.name, column.dataType, column.comment";
return
discoveryService
.
searchByDSL
(
schemaQuery
);
}
}
repository/src/main/java/org/apache/hadoop/metadata/discovery/LineageService.java
View file @
879eda63
...
...
@@ -38,4 +38,12 @@ public interface LineageService {
* @return Inputs as JSON
*/
String
getInputs
(
String
tableName
)
throws
DiscoveryException
;
/**
* Return the schema for the given tableName.
*
* @param tableName tableName
* @return Schema as JSON
*/
String
getSchema
(
String
tableName
)
throws
DiscoveryException
;
}
repository/src/main/java/org/apache/hadoop/metadata/discovery/graph/GraphBackedDiscoveryService.java
View file @
879eda63
...
...
@@ -43,6 +43,7 @@ import scala.util.Either;
import
scala.util.parsing.combinator.Parsers
;
import
javax.inject.Inject
;
import
javax.inject.Singleton
;
import
javax.script.Bindings
;
import
javax.script.ScriptEngine
;
import
javax.script.ScriptEngineManager
;
...
...
@@ -56,6 +57,7 @@ import java.util.Map;
/**
* Graph backed implementation of Search.
*/
@Singleton
public
class
GraphBackedDiscoveryService
implements
DiscoveryService
{
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
GraphBackedDiscoveryService
.
class
);
...
...
@@ -72,7 +74,10 @@ public class GraphBackedDiscoveryService implements DiscoveryService {
@Override
public
String
searchByFullText
(
String
query
)
throws
DiscoveryException
{
Iterator
iterator
=
titanGraph
.
query
().
has
(
Constants
.
ENTITY_TEXT_PROPERTY_KEY
,
Text
.
CONTAINS
,
query
).
vertices
().
iterator
();
Iterator
iterator
=
titanGraph
.
query
()
.
has
(
Constants
.
ENTITY_TEXT_PROPERTY_KEY
,
Text
.
CONTAINS
,
query
)
.
vertices
()
.
iterator
();
JsonArray
results
=
new
JsonArray
();
while
(
iterator
.
hasNext
())
{
Vertex
vertex
=
(
Vertex
)
iterator
.
next
();
...
...
@@ -111,13 +116,13 @@ public class GraphBackedDiscoveryService implements DiscoveryService {
throw
new
DiscoveryException
(
"Invalid expression : "
+
dslQuery
);
}
p
rivate
GremlinQueryResult
evaluate
(
Expressions
.
Expression
expression
)
{
p
ublic
GremlinQueryResult
evaluate
(
Expressions
.
Expression
expression
)
{
Expressions
.
Expression
validatedExpression
=
QueryProcessor
.
validate
(
expression
);
GremlinQuery
gremlinQuery
=
new
GremlinTranslator
(
validatedExpression
,
graphPersistenceStrategy
).
translate
();
LOG
.
debug
(
"Query =
"
+
validatedExpression
);
LOG
.
debug
(
"Expression Tree =
"
+
validatedExpression
.
treeString
());
LOG
.
debug
(
"Gremlin Query =
"
+
gremlinQuery
.
queryStr
());
LOG
.
debug
(
"Query =
{}"
,
validatedExpression
);
LOG
.
debug
(
"Expression Tree =
{}"
,
validatedExpression
.
treeString
());
LOG
.
debug
(
"Gremlin Query =
{}"
,
gremlinQuery
.
queryStr
());
return
new
GremlinEvaluator
(
gremlinQuery
,
graphPersistenceStrategy
,
titanGraph
).
evaluate
();
}
...
...
repository/src/main/java/org/apache/hadoop/metadata/repository/graph/GraphBackedMetadataRepository.java
View file @
879eda63
...
...
@@ -50,6 +50,7 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
javax.inject.Inject
;
import
javax.inject.Singleton
;
import
java.math.BigDecimal
;
import
java.math.BigInteger
;
import
java.util.ArrayList
;
...
...
@@ -64,6 +65,7 @@ import java.util.concurrent.atomic.AtomicInteger;
* An implementation backed by a Graph database provided
* as a Graph Service.
*/
@Singleton
public
class
GraphBackedMetadataRepository
implements
MetadataRepository
{
private
static
final
Logger
LOG
=
...
...
@@ -431,7 +433,10 @@ public class GraphBackedMetadataRepository implements MetadataRepository {
return
guid
;
}
private
void
addFullTextProperty
(
EntityProcessor
entityProcessor
,
List
<
ITypedReferenceableInstance
>
newTypedInstances
)
throws
MetadataException
{
private
void
addFullTextProperty
(
EntityProcessor
entityProcessor
,
List
<
ITypedReferenceableInstance
>
newTypedInstances
)
throws
MetadataException
{
for
(
ITypedReferenceableInstance
typedInstance
:
newTypedInstances
)
{
// Traverse
Id
id
=
typedInstance
.
getId
();
Vertex
instanceVertex
=
entityProcessor
.
idToVertexMap
.
get
(
id
);
...
...
@@ -440,13 +445,16 @@ public class GraphBackedMetadataRepository implements MetadataRepository {
}
}
private
String
getFullText
(
Vertex
instanceVertex
,
boolean
followReferences
)
throws
MetadataException
{
private
String
getFullText
(
Vertex
instanceVertex
,
boolean
followReferences
)
throws
MetadataException
{
String
guid
=
instanceVertex
.
getProperty
(
Constants
.
GUID_PROPERTY_KEY
);
ITypedReferenceableInstance
typedReference
=
graphToInstanceMapper
.
mapGraphToTypedInstance
(
guid
,
instanceVertex
);
ITypedReferenceableInstance
typedReference
=
graphToInstanceMapper
.
mapGraphToTypedInstance
(
guid
,
instanceVertex
);
return
getFullText
(
typedReference
,
followReferences
);
}
private
String
getFullText
(
ITypedInstance
typedInstance
,
boolean
followReferences
)
throws
MetadataException
{
private
String
getFullText
(
ITypedInstance
typedInstance
,
boolean
followReferences
)
throws
MetadataException
{
StringBuilder
fullText
=
new
StringBuilder
();
for
(
AttributeInfo
attributeInfo
:
typedInstance
.
fieldMapping
().
fields
.
values
())
{
Object
attrValue
=
typedInstance
.
get
(
attributeInfo
.
name
);
...
...
@@ -502,8 +510,7 @@ public class GraphBackedMetadataRepository implements MetadataRepository {
private
List
<
ITypedReferenceableInstance
>
discoverInstances
(
EntityProcessor
entityProcessor
)
throws
RepositoryException
{
List
<
ITypedReferenceableInstance
>
newTypedInstances
=
new
ArrayList
<>();
for
(
IReferenceableInstance
transientInstance
:
entityProcessor
.
idToInstanceMap
.
values
())
{
for
(
IReferenceableInstance
transientInstance
:
entityProcessor
.
idToInstanceMap
.
values
())
{
LOG
.
debug
(
"Discovered instance {}"
,
transientInstance
.
getTypeName
());
try
{
ClassType
cT
=
typeSystem
.
getDataType
(
...
...
@@ -530,9 +537,9 @@ public class GraphBackedMetadataRepository implements MetadataRepository {
EntityProcessor
entityProcessor
,
List
<
ITypedReferenceableInstance
>
newTypedInstances
)
throws
MetadataException
{
String
typedInstanceGUID
=
null
;
for
(
ITypedReferenceableInstance
typedInstance
:
newTypedInstances
)
{
// Traverse
// over newInstances
for
(
ITypedReferenceableInstance
typedInstance
:
newTypedInstances
)
{
// Traverse over newInstances
LOG
.
debug
(
"Adding typed instance {}"
,
typedInstance
.
getTypeName
());
Id
id
=
typedInstance
.
getId
();
...
...
repository/src/test/java/org/apache/hadoop/metadata/TestUtils.java
View file @
879eda63
...
...
@@ -19,6 +19,11 @@
package
org
.
apache
.
hadoop
.
metadata
;
import
com.google.common.collect.ImmutableList
;
import
com.thinkaurelius.titan.core.TitanGraph
;
import
com.tinkerpop.blueprints.Edge
;
import
com.tinkerpop.blueprints.Vertex
;
import
com.tinkerpop.blueprints.util.io.graphson.GraphSONWriter
;
import
org.apache.hadoop.metadata.repository.graph.GraphHelper
;
import
org.apache.hadoop.metadata.typesystem.ITypedReferenceableInstance
;
import
org.apache.hadoop.metadata.typesystem.Referenceable
;
import
org.apache.hadoop.metadata.typesystem.types.AttributeDefinition
;
...
...
@@ -34,6 +39,8 @@ import org.apache.hadoop.metadata.typesystem.types.TraitType;
import
org.apache.hadoop.metadata.typesystem.types.TypeSystem
;
import
org.testng.Assert
;
import
java.io.File
;
import
static
org
.
apache
.
hadoop
.
metadata
.
typesystem
.
types
.
utils
.
TypesUtil
.
createClassTypeDef
;
import
static
org
.
apache
.
hadoop
.
metadata
.
typesystem
.
types
.
utils
.
TypesUtil
.
createOptionalAttrDef
;
import
static
org
.
apache
.
hadoop
.
metadata
.
typesystem
.
types
.
utils
.
TypesUtil
.
createRequiredAttrDef
;
...
...
@@ -49,6 +56,31 @@ public final class TestUtils {
}
/**
* Dumps the graph in GSON format in the path returned.
*
* @param titanGraph handle to graph
* @return path to the dump file
* @throws Exception
*/
public
static
String
dumpGraph
(
TitanGraph
titanGraph
)
throws
Exception
{
File
tempFile
=
File
.
createTempFile
(
"graph"
,
".gson"
);
System
.
out
.
println
(
"tempFile.getPath() = "
+
tempFile
.
getPath
());
GraphSONWriter
.
outputGraph
(
titanGraph
,
tempFile
.
getPath
());
System
.
out
.
println
(
"Vertices:"
);
for
(
Vertex
vertex
:
titanGraph
.
getVertices
())
{
System
.
out
.
println
(
GraphHelper
.
vertexString
(
vertex
));
}
System
.
out
.
println
(
"Edges:"
);
for
(
Edge
edge
:
titanGraph
.
getEdges
())
{
System
.
out
.
println
(
GraphHelper
.
edgeString
(
edge
));
}
return
tempFile
.
getPath
();
}
/**
* Class Hierarchy is:
* Department(name : String, employees : Array[Person])
* Person(name : String, department : Department, manager : Manager)
...
...
repository/src/test/java/org/apache/hadoop/metadata/discovery/HiveLineageServiceTest.java
View file @
879eda63
...
...
@@ -20,14 +20,8 @@ package org.apache.hadoop.metadata.discovery;
import
com.google.common.base.Preconditions
;
import
com.google.common.collect.ImmutableList
;
import
com.thinkaurelius.titan.core.TitanGraph
;
import
com.tinkerpop.blueprints.Edge
;
import
com.tinkerpop.blueprints.Vertex
;
import
com.tinkerpop.blueprints.util.io.graphson.GraphSONWriter
;
import
org.apache.hadoop.metadata.RepositoryMetadataModule
;
import
org.apache.hadoop.metadata.discovery.graph.GraphBackedDiscoveryService
;
import
org.apache.hadoop.metadata.repository.graph.GraphHelper
;
import
org.apache.hadoop.metadata.repository.graph.GraphProvider
;
import
org.apache.hadoop.metadata.services.DefaultMetadataService
;
import
org.apache.hadoop.metadata.typesystem.Referenceable
;
import
org.apache.hadoop.metadata.typesystem.TypesDef
;
...
...
@@ -55,8 +49,6 @@ import org.testng.annotations.Guice;
import
org.testng.annotations.Test
;
import
javax.inject.Inject
;
import
java.io.File
;
import
java.util.ArrayList
;
import
java.util.List
;
/**
...
...
@@ -74,8 +66,8 @@ public class HiveLineageServiceTest {
@Inject
private
HiveLineageService
hiveLineageService
;
@Inject
private
GraphProvider
<
TitanGraph
>
graphProvider
;
//
@Inject
//
private GraphProvider<TitanGraph> graphProvider;
@BeforeClass
public
void
setUp
()
throws
Exception
{
...
...
@@ -84,24 +76,7 @@ public class HiveLineageServiceTest {
setUpTypes
();
setupInstances
();
// dumpGraph();
}
private
void
dumpGraph
()
throws
Exception
{
TitanGraph
titanGraph
=
graphProvider
.
get
();
File
tempFile
=
File
.
createTempFile
(
"graph"
,
".gson"
);
System
.
out
.
println
(
"tempFile.getPath() = "
+
tempFile
.
getPath
());
GraphSONWriter
.
outputGraph
(
titanGraph
,
tempFile
.
getPath
());
System
.
out
.
println
(
"Vertices:"
);
for
(
Vertex
vertex
:
titanGraph
.
getVertices
())
{
System
.
out
.
println
(
GraphHelper
.
vertexString
(
vertex
));
}
System
.
out
.
println
(
"Edges:"
);
for
(
Edge
edge
:
titanGraph
.
getEdges
())
{
System
.
out
.
println
(
GraphHelper
.
edgeString
(
edge
));
}
// TestUtils.dumpGraph(graphProvider.get());
}
@DataProvider
(
name
=
"dslQueriesProvider"
)
...
...
@@ -204,6 +179,34 @@ public class HiveLineageServiceTest {
Assert
.
assertTrue
(
paths
.
length
()
>
0
);
}
@DataProvider
(
name
=
"tableNamesProvider"
)
private
Object
[][]
tableNames
()
{
return
new
String
[][]
{
{
"sales_fact"
,
"4"
},
{
"time_dim"
,
"3"
},
{
"sales_fact_daily_mv"
,
"4"
},
{
"sales_fact_monthly_mv"
,
"4"
}
};
}
@Test
(
dataProvider
=
"tableNamesProvider"
)
public
void
testGetSchema
(
String
tableName
,
String
expected
)
throws
Exception
{
JSONObject
results
=
new
JSONObject
(
hiveLineageService
.
getSchema
(
tableName
));
Assert
.
assertNotNull
(
results
);
System
.
out
.
println
(
"columns = "
+
results
);
JSONArray
rows
=
results
.
getJSONArray
(
"rows"
);
Assert
.
assertEquals
(
rows
.
length
(),
Integer
.
parseInt
(
expected
));
for
(
int
index
=
0
;
index
<
rows
.
length
();
index
++)
{
final
JSONObject
row
=
rows
.
getJSONObject
(
index
);
Assert
.
assertNotNull
(
row
.
getString
(
"name"
));
Assert
.
assertNotNull
(
row
.
getString
(
"comment"
));
Assert
.
assertNotNull
(
row
.
getString
(
"dataType"
));
Assert
.
assertEquals
(
row
.
getString
(
"$typeName$"
),
"hive_column"
);
}
}
private
void
setUpTypes
()
throws
Exception
{
TypesDef
typesDef
=
createTypeDefinitions
();
String
typesAsJSON
=
TypesSerialization
.
toJson
(
typesDef
);
...
...
@@ -214,6 +217,8 @@ public class HiveLineageServiceTest {
private
static
final
String
HIVE_TABLE_TYPE
=
"hive_table"
;
private
static
final
String
COLUMN_TYPE
=
"hive_column"
;
private
static
final
String
HIVE_PROCESS_TYPE
=
"hive_process"
;
private
static
final
String
STORAGE_DESC_TYPE
=
"StorageDesc"
;
private
static
final
String
VIEW_TYPE
=
"View"
;
private
TypesDef
createTypeDefinitions
()
{
HierarchicalTypeDefinition
<
ClassType
>
dbClsDef
...
...
@@ -225,6 +230,15 @@ public class HiveLineageServiceTest {
attrDef
(
"createTime"
,
DataTypes
.
INT_TYPE
)
);
HierarchicalTypeDefinition
<
ClassType
>
storageDescClsDef
=
TypesUtil
.
createClassTypeDef
(
STORAGE_DESC_TYPE
,
null
,
attrDef
(
"location"
,
DataTypes
.
STRING_TYPE
),
attrDef
(
"inputFormat"
,
DataTypes
.
STRING_TYPE
),
attrDef
(
"outputFormat"
,
DataTypes
.
STRING_TYPE
),
attrDef
(
"compressed"
,
DataTypes
.
STRING_TYPE
,
Multiplicity
.
REQUIRED
,
false
,
null
)
);
HierarchicalTypeDefinition
<
ClassType
>
columnClsDef
=
TypesUtil
.
createClassTypeDef
(
COLUMN_TYPE
,
null
,
attrDef
(
"name"
,
DataTypes
.
STRING_TYPE
),
...
...
@@ -241,6 +255,10 @@ public class HiveLineageServiceTest {
attrDef
(
"lastAccessTime"
,
DataTypes
.
INT_TYPE
),
attrDef
(
"tableType"
,
DataTypes
.
STRING_TYPE
),
attrDef
(
"temporary"
,
DataTypes
.
BOOLEAN_TYPE
),
new
AttributeDefinition
(
"db"
,
DATABASE_TYPE
,
Multiplicity
.
REQUIRED
,
false
,
null
),
new
AttributeDefinition
(
"sd"
,
STORAGE_DESC_TYPE
,
Multiplicity
.
REQUIRED
,
false
,
null
),
new
AttributeDefinition
(
"columns"
,
DataTypes
.
arrayTypeName
(
COLUMN_TYPE
),
Multiplicity
.
COLLECTION
,
true
,
null
)
...
...
@@ -264,6 +282,16 @@ public class HiveLineageServiceTest {
attrDef
(
"queryGraph"
,
DataTypes
.
STRING_TYPE
,
Multiplicity
.
REQUIRED
)
);
HierarchicalTypeDefinition
<
ClassType
>
viewClsDef
=
TypesUtil
.
createClassTypeDef
(
VIEW_TYPE
,
null
,
attrDef
(
"name"
,
DataTypes
.
STRING_TYPE
),
new
AttributeDefinition
(
"db"
,
DATABASE_TYPE
,
Multiplicity
.
REQUIRED
,
false
,
null
),
new
AttributeDefinition
(
"inputTables"
,
DataTypes
.
arrayTypeName
(
HIVE_TABLE_TYPE
),
Multiplicity
.
COLLECTION
,
false
,
null
)
);
HierarchicalTypeDefinition
<
TraitType
>
dimTraitDef
=
TypesUtil
.
createTraitTypeDef
(
"Dimension"
,
null
);
...
...
@@ -279,11 +307,16 @@ public class HiveLineageServiceTest {
HierarchicalTypeDefinition
<
TraitType
>
piiTraitDef
=
TypesUtil
.
createTraitTypeDef
(
"PII"
,
null
);
HierarchicalTypeDefinition
<
TraitType
>
jdbcTraitDef
=
TypesUtil
.
createTraitTypeDef
(
"JdbcAccess"
,
null
);
return
TypeUtils
.
getTypesDef
(
ImmutableList
.<
EnumTypeDefinition
>
of
(),
ImmutableList
.<
StructTypeDefinition
>
of
(),
ImmutableList
.
of
(
dimTraitDef
,
factTraitDef
,
metricTraitDef
,
etlTraitDef
,
piiTraitDef
),
ImmutableList
.
of
(
dbClsDef
,
columnClsDef
,
tblClsDef
,
loadProcessClsDef
)
ImmutableList
.
of
(
dimTraitDef
,
factTraitDef
,
piiTraitDef
,
metricTraitDef
,
etlTraitDef
,
jdbcTraitDef
),
ImmutableList
.
of
(
dbClsDef
,
storageDescClsDef
,
columnClsDef
,
tblClsDef
,
loadProcessClsDef
,
viewClsDef
)
);
}
...
...
@@ -306,45 +339,72 @@ public class HiveLineageServiceTest {
Id
salesDB
=
database
(
"Sales"
,
"Sales Database"
,
"John ETL"
,
"hdfs://host:8000/apps/warehouse/sales"
);
ArrayList
<
Referenceable
>
salesFactColumns
=
new
ArrayList
<>();
salesFactColumns
.
add
(
column
(
"time_id"
,
"int"
,
"time id"
));
salesFactColumns
.
add
(
column
(
"product_id"
,
"int"
,
"product id"
));
salesFactColumns
.
add
(
column
(
"customer_id"
,
"int"
,
"customer id"
,
"PII"
));
salesFactColumns
.
add
(
column
(
"sales"
,
"double"
,
"product id"
,
"Metric"
));
Referenceable
sd
=
storageDescriptor
(
"hdfs://host:8000/apps/warehouse/sales"
,
"TextInputFormat"
,
"TextOutputFormat"
,
true
);
List
<
Referenceable
>
salesFactColumns
=
ImmutableList
.
of
(
column
(
"time_id"
,
"int"
,
"time id"
),
column
(
"product_id"
,
"int"
,
"product id"
),
column
(
"customer_id"
,
"int"
,
"customer id"
,
"PII"
),
column
(
"sales"
,
"double"
,
"product id"
,
"Metric"
)
);
Id
salesFact
=
table
(
"sales_fact"
,
"sales fact table"
,
salesDB
,
"Joe"
,
"Managed"
,
salesFactColumns
,
"Fact"
);
salesDB
,
sd
,
"Joe"
,
"Managed"
,
salesFactColumns
,
"Fact"
);
ArrayList
<
Referenceable
>
timeDimColumns
=
new
ArrayList
<>();
timeDimColumns
.
add
(
column
(
"time_id"
,
"int"
,
"time id"
));
timeDimColumns
.
add
(
column
(
"dayOfYear"
,
"int"
,
"day Of Year"
));
timeDimColumns
.
add
(
column
(
"weekDay"
,
"int"
,
"week Day"
));
List
<
Referenceable
>
timeDimColumns
=
ImmutableList
.
of
(
column
(
"time_id"
,
"int"
,
"time id"
),
column
(
"dayOfYear"
,
"int"
,
"day Of Year"
),
column
(
"weekDay"
,
"int"
,
"week Day"
)
);
Id
timeDim
=
table
(
"time_dim"
,
"time dimension table"
,
salesDB
,
"John Doe"
,
"External"
,
timeDimColumns
,
"Dimension"
);
salesDB
,
sd
,
"John Doe"
,
"External"
,
timeDimColumns
,
"Dimension"
);
Id
reportingDB
=
database
(
"Reporting"
,
"reporting database"
,
"Jane BI"
,
"hdfs://host:8000/apps/warehouse/reporting"
);
Id
salesFactDaily
=
table
(
"sales_fact_daily_mv"
,
"sales fact daily materialized view"
,
reportingDB
,
"Joe BI"
,
"Managed"
,
salesFactColumns
,
"Metric"
);
reportingDB
,
sd
,
"Joe BI"
,
"Managed"
,
salesFactColumns
,
"Metric"
);
Id
loadSalesFactDaily
=
loadProcess
(
"loadSalesDaily"
,
"John ETL"
,
loadProcess
(
"loadSalesDaily"
,
"John ETL"
,
ImmutableList
.
of
(
salesFact
,
timeDim
),
ImmutableList
.
of
(
salesFactDaily
),
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
"ETL"
);
System
.
out
.
println
(
"added loadSalesFactDaily = "
+
loadSalesFactDaily
);
List
<
Referenceable
>
productDimColumns
=
ImmutableList
.
of
(
column
(
"product_id"
,
"int"
,
"product id"
),
column
(
"product_name"
,
"string"
,
"product name"
),
column
(
"brand_name"
,
"int"
,
"brand name"
)
);
Id
productDim
=
table
(
"product_dim"
,
"product dimension table"
,
salesDB
,
sd
,
"John Doe"
,
"Managed"
,
productDimColumns
,
"Dimension"
);
view
(
"product_dim_view"
,
reportingDB
,
ImmutableList
.
of
(
productDim
),
"Dimension"
,
"JdbcAccess"
);
List
<
Referenceable
>
customerDimColumns
=
ImmutableList
.
of
(
column
(
"customer_id"
,
"int"
,
"customer id"
,
"PII"
),
column
(
"name"
,
"string"
,
"customer name"
,
"PII"
),
column
(
"address"
,
"string"
,
"customer address"
,
"PII"
)
);
Id
customerDim
=
table
(
"customer_dim"
,
"customer dimension table"
,
salesDB
,
sd
,
"fetl"
,
"External"
,
customerDimColumns
,
"Dimension"
);
view
(
"customer_dim_view"
,
reportingDB
,
ImmutableList
.
of
(
customerDim
),
"Dimension"
,
"JdbcAccess"
);
Id
salesFactMonthly
=
table
(
"sales_fact_monthly_mv"
,
"sales fact monthly materialized view"
,
reportingDB
,
"Jane BI"
,
"Managed"
,
salesFactColumns
,
"Metric"
);
reportingDB
,
sd
,
"Jane BI"
,
"Managed"
,
salesFactColumns
,
"Metric"
);
Id
loadSalesFactMonthly
=
loadProcess
(
"loadSalesMonthly"
,
"John ETL"
,
loadProcess
(
"loadSalesMonthly"
,
"John ETL"
,
ImmutableList
.
of
(
salesFactDaily
),
ImmutableList
.
of
(
salesFactMonthly
),
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
"ETL"
);
System
.
out
.
println
(
"added loadSalesFactMonthly = "
+
loadSalesFactMonthly
);
}
Id
database
(
String
name
,
String
description
,
...
...
@@ -360,6 +420,18 @@ public class HiveLineageServiceTest {
return
createInstance
(
referenceable
);
}
Referenceable
storageDescriptor
(
String
location
,
String
inputFormat
,
String
outputFormat
,
boolean
compressed
)
throws
Exception
{
Referenceable
referenceable
=
new
Referenceable
(
STORAGE_DESC_TYPE
);
referenceable
.
set
(
"location"
,
location
);
referenceable
.
set
(
"inputFormat"
,
inputFormat
);
referenceable
.
set
(
"outputFormat"
,
outputFormat
);
referenceable
.
set
(
"compressed"
,
compressed
);
return
referenceable
;
}
Referenceable
column
(
String
name
,
String
dataType
,
String
comment
,
String
...
traitNames
)
throws
Exception
{
Referenceable
referenceable
=
new
Referenceable
(
COLUMN_TYPE
,
traitNames
);
...
...
@@ -370,7 +442,8 @@ public class HiveLineageServiceTest {
return
referenceable
;
}
Id
table
(
String
name
,
String
description
,
Id
dbId
,
Id
table
(
String
name
,
String
description
,
Id
dbId
,
Referenceable
sd
,
String
owner
,
String
tableType
,
List
<
Referenceable
>
columns
,
String
...
traitNames
)
throws
Exception
{
...
...
@@ -384,6 +457,9 @@ public class HiveLineageServiceTest {
referenceable
.
set
(
"retention"
,
System
.
currentTimeMillis
());
referenceable
.
set
(
"db"
,
dbId
);
// todo: fix this bug with object walker
// referenceable.set("sd", sd);
referenceable
.
set
(
"sd"
,
createInstance
(
sd
));
referenceable
.
set
(
"columns"
,
columns
);
return
createInstance
(
referenceable
);
...
...
@@ -412,6 +488,18 @@ public class HiveLineageServiceTest {
return
createInstance
(
referenceable
);
}
Id
view
(
String
name
,
Id
dbId
,
List
<
Id
>
inputTables
,
String
...
traitNames
)
throws
Exception
{
Referenceable
referenceable
=
new
Referenceable
(
VIEW_TYPE
,
traitNames
);
referenceable
.
set
(
"name"
,
name
);
referenceable
.
set
(
"db"
,
dbId
);
referenceable
.
set
(
"inputTables"
,
inputTables
);
return
createInstance
(
referenceable
);
}
private
Id
createInstance
(
Referenceable
referenceable
)
throws
Exception
{
String
typeName
=
referenceable
.
getTypeName
();
System
.
out
.
println
(
"creating instance of type "
+
typeName
);
...
...
repository/src/test/resources/application.properties
View file @
879eda63
...
...
@@ -27,9 +27,16 @@ metadata.graph.index.search.elasticsearch.client-only=false
metadata.graph.index.search.elasticsearch.local-mode
=
true
######### Hive Lineage Configs #########
metadata.lineage.hive.table.type.name
=
hive_table
metadata.lineage.hive.column.type.name
=
hive_column
metadata.lineage.hive.table.column.name
=
columns
metadata.lineage.hive.process.type.name
=
hive_process
metadata.lineage.hive.process.inputs.name
=
inputTables
metadata.lineage.hive.process.outputs.name
=
outputTables
######### Security Properties #########
# SSL config
metadata.enableTLS
=
false
######### Security Properties #########
src/conf/application.properties
View file @
879eda63
...
...
@@ -28,6 +28,16 @@ metadata.graph.index.search.elasticsearch.client-only=false
metadata.graph.index.search.elasticsearch.local-mode
=
true
######### Hive Lineage Configs #########
# This models follows the quick-start guide
metadata.lineage.hive.table.type.name
=
Table
metadata.lineage.hive.column.type.name
=
Column
metadata.lineage.hive.table.column.name
=
columns
metadata.lineage.hive.process.type.name
=
LoadProcess
metadata.lineage.hive.process.inputs.name
=
inputTables
metadata.lineage.hive.process.outputs.name
=
outputTables
######### Security Properties #########
# SSL config
...
...
webapp/src/main/java/org/apache/hadoop/metadata/examples/QuickStart.java
View file @
879eda63
...
...
@@ -40,7 +40,6 @@ import org.apache.hadoop.metadata.typesystem.types.utils.TypesUtil;
import
org.codehaus.jettison.json.JSONArray
;
import
org.codehaus.jettison.json.JSONObject
;
import
java.util.ArrayList
;
import
java.util.List
;
/**
...
...
@@ -77,7 +76,7 @@ public class QuickStart {
private
static
final
String
COLUMN_TYPE
=
"Column"
;
private
static
final
String
TABLE_TYPE
=
"Table"
;
private
static
final
String
VIEW_TYPE
=
"View"
;
private
static
final
String
LOAD_PROCESS_TYPE
=
"
hive_p
rocess"
;
private
static
final
String
LOAD_PROCESS_TYPE
=
"
LoadP
rocess"
;
private
static
final
String
STORAGE_DESC_TYPE
=
"StorageDesc"
;
private
static
final
String
[]
TYPES
=
{
...
...
@@ -135,7 +134,7 @@ public class QuickStart {
new
AttributeDefinition
(
"db"
,
DATABASE_TYPE
,
Multiplicity
.
REQUIRED
,
false
,
null
),
new
AttributeDefinition
(
"sd"
,
STORAGE_DESC_TYPE
,
Multiplicity
.
REQUIRED
,
false
,
null
),
Multiplicity
.
OPTIONAL
,
false
,
null
),
attrDef
(
"owner"
,
DataTypes
.
STRING_TYPE
),
attrDef
(
"createTime"
,
DataTypes
.
INT_TYPE
),
attrDef
(
"lastAccessTime"
,
DataTypes
.
INT_TYPE
),
...
...
@@ -228,36 +227,40 @@ public class QuickStart {
Referenceable
sd
=
rawStorageDescriptor
(
"hdfs://host:8000/apps/warehouse/sales"
,
"TextInputFormat"
,
"TextOutputFormat"
,
true
);
ArrayList
<
Referenceable
>
salesFactColumns
=
new
ArrayList
<>();
salesFactColumns
.
add
(
rawColumn
(
"time_id"
,
"int"
,
"time id"
));
salesFactColumns
.
add
(
rawColumn
(
"product_id"
,
"int"
,
"product id"
));
salesFactColumns
.
add
(
rawColumn
(
"customer_id"
,
"int"
,
"customer id"
,
"PII"
));
salesFactColumns
.
add
(
rawColumn
(
"sales"
,
"double"
,
"product id"
,
"Metric"
));
List
<
Referenceable
>
salesFactColumns
=
ImmutableList
.
of
(
rawColumn
(
"time_id"
,
"int"
,
"time id"
),
rawColumn
(
"product_id"
,
"int"
,
"product id"
),
rawColumn
(
"customer_id"
,
"int"
,
"customer id"
,
"PII"
),
rawColumn
(
"sales"
,
"double"
,
"product id"
,
"Metric"
)
);
Id
salesFact
=
table
(
"sales_fact"
,
"sales fact table"
,
salesDB
,
sd
,
"Joe"
,
"Managed"
,
salesFactColumns
,
"Fact"
);
ArrayList
<
Referenceable
>
productDimColumns
=
new
ArrayList
<>();
productDimColumns
.
add
(
rawColumn
(
"product_id"
,
"int"
,
"product id"
));
productDimColumns
.
add
(
rawColumn
(
"product_name"
,
"string"
,
"product name"
));
productDimColumns
.
add
(
rawColumn
(
"brand_name"
,
"int"
,
"brand name"
));
List
<
Referenceable
>
productDimColumns
=
ImmutableList
.
of
(
rawColumn
(
"product_id"
,
"int"
,
"product id"
),
rawColumn
(
"product_name"
,
"string"
,
"product name"
),
rawColumn
(
"brand_name"
,
"int"
,
"brand name"
)
);
Id
productDim
=
table
(
"product_dim"
,
"product dimension table"
,
salesDB
,
sd
,
"John Doe"
,
"Managed"
,
productDimColumns
,
"Dimension"
);
ArrayList
<
Referenceable
>
timeDimColumns
=
new
ArrayList
<>();
timeDimColumns
.
add
(
rawColumn
(
"time_id"
,
"int"
,
"time id"
));
timeDimColumns
.
add
(
rawColumn
(
"dayOfYear"
,
"int"
,
"day Of Year"
));
timeDimColumns
.
add
(
rawColumn
(
"weekDay"
,
"int"
,
"week Day"
));
List
<
Referenceable
>
timeDimColumns
=
ImmutableList
.
of
(
rawColumn
(
"time_id"
,
"int"
,
"time id"
),
rawColumn
(
"dayOfYear"
,
"int"
,
"day Of Year"
),
rawColumn
(
"weekDay"
,
"int"
,
"week Day"
)
);
Id
timeDim
=
table
(
"time_dim"
,
"time dimension table"
,
salesDB
,
sd
,
"John Doe"
,
"External"
,
timeDimColumns
,
"Dimension"
);
ArrayList
<
Referenceable
>
customerDimColumns
=
new
ArrayList
<>();
customerDimColumns
.
add
(
rawColumn
(
"customer_id"
,
"int"
,
"customer id"
,
"PII"
));
customerDimColumns
.
add
(
rawColumn
(
"name"
,
"string"
,
"customer name"
,
"PII"
));
customerDimColumns
.
add
(
rawColumn
(
"address"
,
"string"
,
"customer address"
,
"PII"
));
List
<
Referenceable
>
customerDimColumns
=
ImmutableList
.
of
(
rawColumn
(
"customer_id"
,
"int"
,
"customer id"
,
"PII"
),
rawColumn
(
"name"
,
"string"
,
"customer name"
,
"PII"
),
rawColumn
(
"address"
,
"string"
,
"customer address"
,
"PII"
)
);
Id
customerDim
=
table
(
"customer_dim"
,
"customer dimension table"
,
salesDB
,
sd
,
"fetl"
,
"External"
,
customerDimColumns
,
"Dimension"
);
...
...
@@ -270,29 +273,25 @@ public class QuickStart {
"sales fact daily materialized view"
,
reportingDB
,
sd
,
"Joe BI"
,
"Managed"
,
salesFactColumns
,
"Metric"
);
Id
loadSalesFactDaily
=
loadProcess
(
"loadSalesDaily"
,
"John ETL"
,
loadProcess
(
"loadSalesDaily"
,
"John ETL"
,
ImmutableList
.
of
(
salesFact
,
timeDim
),
ImmutableList
.
of
(
salesFactDaily
),
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
"ETL"
);
System
.
out
.
println
(
"added loadSalesFactDaily = "
+
loadSalesFactDaily
);
Id
productDimView
=
view
(
"product_dim_view"
,
reportingDB
,
view
(
"product_dim_view"
,
reportingDB
,
ImmutableList
.
of
(
productDim
),
"Dimension"
,
"JdbcAccess"
);
System
.
out
.
println
(
"added productDimView = "
+
productDimView
);
Id
customerDimView
=
view
(
"customer_dim_view"
,
reportingDB
,
view
(
"customer_dim_view"
,
reportingDB
,
ImmutableList
.
of
(
customerDim
),
"Dimension"
,
"JdbcAccess"
);
System
.
out
.
println
(
"added customerDimView = "
+
customerDimView
);
Id
salesFactMonthly
=
table
(
"sales_fact_monthly_mv"
,
"sales fact monthly materialized view"
,
reportingDB
,
sd
,
"Jane BI"
,
"Managed"
,
salesFactColumns
,
"Metric"
);
Id
loadSalesFactMonthly
=
loadProcess
(
"loadSalesMonthly"
,
"John ETL"
,
loadProcess
(
"loadSalesMonthly"
,
"John ETL"
,
ImmutableList
.
of
(
salesFactDaily
),
ImmutableList
.
of
(
salesFactMonthly
),
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
"ETL"
);
System
.
out
.
println
(
"added loadSalesFactMonthly = "
+
loadSalesFactMonthly
);
}
private
Id
createInstance
(
Referenceable
referenceable
)
throws
Exception
{
...
...
@@ -357,7 +356,8 @@ public class QuickStart {
referenceable
.
set
(
"lastAccessTime"
,
System
.
currentTimeMillis
());
referenceable
.
set
(
"retention"
,
System
.
currentTimeMillis
());
referenceable
.
set
(
"db"
,
dbId
);
referenceable
.
set
(
"sd"
,
sd
);
// todo: fix this bug with object walker
// referenceable.set("sd", sd);
referenceable
.
set
(
"columns"
,
columns
);
return
createInstance
(
referenceable
);
...
...
@@ -464,6 +464,7 @@ public class QuickStart {
"Table as _loop0 loop (hive_process outputTables) withPath",
"Table as src loop (hive_process outputTables) as dest select src.name as srcTable, dest.name as destTable withPath",
*/
"Table where name=\"sales_fact\", columns"
,
"Table where name=\"sales_fact\", columns as column select column.name, column.dataType, column.comment"
,
};
}
...
...
webapp/src/main/java/org/apache/hadoop/metadata/web/resources/HiveLineageResource.java
View file @
879eda63
...
...
@@ -130,4 +130,38 @@ public class HiveLineageResource {
Servlets
.
getErrorResponse
(
e
,
Response
.
Status
.
INTERNAL_SERVER_ERROR
));
}
}
/**
* Return the schema for the given tableName.
*
* @param tableName table name
*/
@GET
@Path
(
"schema/{tableName}"
)
@Consumes
(
MediaType
.
APPLICATION_JSON
)
@Produces
(
MediaType
.
APPLICATION_JSON
)
public
Response
schema
(
@Context
HttpServletRequest
request
,
@PathParam
(
"tableName"
)
String
tableName
)
{
Preconditions
.
checkNotNull
(
tableName
,
"table name cannot be null"
);
LOG
.
info
(
"Fetching schema for tableName={}"
,
tableName
);
try
{
final
String
jsonResult
=
lineageService
.
getSchema
(
tableName
);
JSONObject
response
=
new
JSONObject
();
response
.
put
(
MetadataServiceClient
.
REQUEST_ID
,
Servlets
.
getRequestId
());
response
.
put
(
"tableName"
,
tableName
);
response
.
put
(
MetadataServiceClient
.
RESULTS
,
new
JSONObject
(
jsonResult
));
return
Response
.
ok
(
response
).
build
();
}
catch
(
DiscoveryException
e
)
{
LOG
.
error
(
"Unable to get schema for table {}"
,
tableName
,
e
);
throw
new
WebApplicationException
(
Servlets
.
getErrorResponse
(
e
,
Response
.
Status
.
BAD_REQUEST
));
}
catch
(
JSONException
e
)
{
LOG
.
error
(
"Unable to get schema for table {}"
,
tableName
,
e
);
throw
new
WebApplicationException
(
Servlets
.
getErrorResponse
(
e
,
Response
.
Status
.
INTERNAL_SERVER_ERROR
));
}
}
}
webapp/src/main/resources/application.properties
View file @
879eda63
...
...
@@ -25,9 +25,16 @@ metadata.graph.index.search.backend=lucene
metadata.graph.index.search.directory
=
webapp/target/data/lucene
######### Hive Lineage Configs #########
metadata.lineage.hive.table.type.name
=
hive_table
metadata.lineage.hive.column.type.name
=
hive_column
metadata.lineage.hive.table.column.name
=
columns
metadata.lineage.hive.process.type.name
=
hive_process
metadata.lineage.hive.process.inputs.name
=
inputTables
metadata.lineage.hive.process.outputs.name
=
outputTables
######### Security Properties #########
# SSL config
metadata.enableTLS
=
false
######### Security Properties #########
webapp/src/test/java/org/apache/hadoop/metadata/web/resources/BaseResourceIT.java
View file @
879eda63
...
...
@@ -27,6 +27,7 @@ import org.apache.hadoop.metadata.typesystem.Referenceable;
import
org.apache.hadoop.metadata.typesystem.TypesDef
;
import
org.apache.hadoop.metadata.typesystem.json.InstanceSerialization
;
import
org.apache.hadoop.metadata.typesystem.json.TypesSerialization
;
import
org.apache.hadoop.metadata.typesystem.persistence.Id
;
import
org.codehaus.jettison.json.JSONObject
;
import
org.testng.Assert
;
import
org.testng.annotations.BeforeClass
;
...
...
@@ -44,7 +45,7 @@ public abstract class BaseResourceIT {
protected
WebResource
service
;
protected
MetadataServiceClient
serviceClient
;
public
static
String
baseUrl
=
"http://localhost:21000/"
;
;
public
static
String
baseUrl
=
"http://localhost:21000/"
;
@BeforeClass
public
void
setUp
()
throws
Exception
{
...
...
@@ -80,7 +81,7 @@ public abstract class BaseResourceIT {
Assert
.
assertNotNull
(
response
.
get
(
MetadataServiceClient
.
REQUEST_ID
));
}
protected
Referenceable
createInstance
(
Referenceable
referenceable
)
throws
Exception
{
protected
Id
createInstance
(
Referenceable
referenceable
)
throws
Exception
{
String
typeName
=
referenceable
.
getTypeName
();
System
.
out
.
println
(
"creating instance of type "
+
typeName
);
...
...
@@ -91,6 +92,6 @@ public abstract class BaseResourceIT {
System
.
out
.
println
(
"created instance for type "
+
typeName
+
", guid: "
+
guid
);
// return the reference to created instance with guid
return
new
Referenceable
(
guid
,
referenceable
.
getTypeName
(),
referenceable
.
getValuesMap
());
return
new
Id
(
guid
,
0
,
referenceable
.
getTypeName
());
}
}
webapp/src/test/java/org/apache/hadoop/metadata/web/resources/EntityJerseyResourceIT.java
View file @
879eda63
...
...
@@ -68,6 +68,7 @@ public class EntityJerseyResourceIT extends BaseResourceIT {
private
static
final
String
TABLE_NAME
=
"bar"
;
private
Referenceable
tableInstance
;
private
Id
tableId
;
@BeforeClass
public
void
setUp
()
throws
Exception
{
...
...
@@ -79,8 +80,9 @@ public class EntityJerseyResourceIT extends BaseResourceIT {
@Test
public
void
testSubmitEntity
()
throws
Exception
{
tableInstance
=
createHiveTableInstance
();
tableId
=
createInstance
(
tableInstance
);
String
guid
=
getGuid
(
tableInstance
);
final
String
guid
=
tableId
.
_getId
(
);
try
{
Assert
.
assertNotNull
(
UUID
.
fromString
(
guid
));
}
catch
(
IllegalArgumentException
e
)
{
...
...
@@ -88,18 +90,9 @@ public class EntityJerseyResourceIT extends BaseResourceIT {
}
}
private
String
getGuid
(
Referenceable
referenceable
)
throws
Exception
{
Id
id
=
referenceable
.
getId
();
Assert
.
assertNotNull
(
id
);
String
guid
=
id
.
id
;
Assert
.
assertNotNull
(
guid
);
return
guid
;
}
@Test
(
dependsOnMethods
=
"testSubmitEntity"
)
public
void
testAddProperty
()
throws
Exception
{
String
guid
=
getGuid
(
tableInstance
);
final
String
guid
=
tableId
.
_getId
(
);
//add property
String
description
=
"bar table - new desc"
;
ClientResponse
clientResponse
=
addProperty
(
guid
,
"description"
,
description
);
...
...
@@ -131,21 +124,18 @@ public class EntityJerseyResourceIT extends BaseResourceIT {
databaseInstance
.
set
(
"name"
,
"newdb"
);
databaseInstance
.
set
(
"description"
,
"new database"
);
// ClassType classType = typeSystem.getDataType(ClassType.class, DATABASE_TYPE);
// ITypedReferenceableInstance dbInstance = classType.convert(databaseInstance, Multiplicity.REQUIRED);
Referenceable
dbInstance
=
createInstance
(
databaseInstance
);
String
dbId
=
getGuid
(
dbInstance
);
Id
dbInstance
=
createInstance
(
databaseInstance
);
String
dbId
=
dbInstance
.
_getId
();
//Add reference property
String
guid
=
getGuid
(
tableInstance
);
final
String
guid
=
tableId
.
_getId
(
);
ClientResponse
clientResponse
=
addProperty
(
guid
,
"database"
,
dbId
);
Assert
.
assertEquals
(
clientResponse
.
getStatus
(),
Response
.
Status
.
OK
.
getStatusCode
());
}
@Test
(
dependsOnMethods
=
"testSubmitEntity"
)
public
void
testGetEntityDefinition
()
throws
Exception
{
String
guid
=
getGuid
(
tableInstance
);
final
String
guid
=
tableId
.
_getId
(
);
ClientResponse
clientResponse
=
getEntityDefinition
(
guid
);
Assert
.
assertEquals
(
clientResponse
.
getStatus
(),
Response
.
Status
.
OK
.
getStatusCode
());
...
...
@@ -274,7 +264,7 @@ public class EntityJerseyResourceIT extends BaseResourceIT {
@Test
(
dependsOnMethods
=
"testSubmitEntity"
)
public
void
testGetTraitNames
()
throws
Exception
{
String
guid
=
getGuid
(
tableInstance
);
final
String
guid
=
tableId
.
_getId
(
);
ClientResponse
clientResponse
=
service
.
path
(
"api/metadata/entities/traits/list"
)
.
path
(
guid
)
...
...
@@ -307,7 +297,7 @@ public class EntityJerseyResourceIT extends BaseResourceIT {
String
traitInstanceAsJSON
=
InstanceSerialization
.
toJson
(
traitInstance
,
true
);
LOG
.
debug
(
"traitInstanceAsJSON = "
+
traitInstanceAsJSON
);
String
guid
=
getGuid
(
tableInstance
);
final
String
guid
=
tableId
.
_getId
(
);
ClientResponse
clientResponse
=
service
.
path
(
"api/metadata/entities/traits/add"
)
.
path
(
guid
)
...
...
@@ -350,7 +340,7 @@ public class EntityJerseyResourceIT extends BaseResourceIT {
@Test
(
dependsOnMethods
=
"testAddTrait"
)
public
void
testDeleteTrait
()
throws
Exception
{
final
String
traitName
=
"PII_Trait"
;
final
String
guid
=
getGuid
(
tableInstance
);
final
String
guid
=
tableId
.
_getId
(
);
ClientResponse
clientResponse
=
service
.
path
(
"api/metadata/entities/traits/delete"
)
...
...
@@ -478,6 +468,6 @@ public class EntityJerseyResourceIT extends BaseResourceIT {
List
<
String
>
traits
=
tableInstance
.
getTraits
();
Assert
.
assertEquals
(
traits
.
size
(),
7
);
return
createInstance
(
tableInstance
)
;
return
tableInstance
;
}
}
webapp/src/test/java/org/apache/hadoop/metadata/web/resources/HiveLineageJerseyResourceIT.java
View file @
879eda63
...
...
@@ -46,7 +46,6 @@ import org.testng.annotations.Test;
import
javax.ws.rs.HttpMethod
;
import
javax.ws.rs.core.MediaType
;
import
javax.ws.rs.core.Response
;
import
java.util.ArrayList
;
import
java.util.List
;
/**
...
...
@@ -122,15 +121,67 @@ public class HiveLineageJerseyResourceIT extends BaseResourceIT {
Assert
.
assertTrue
(
paths
.
length
()
>
0
);
}
@Test
public
void
testSchema
()
throws
Exception
{
WebResource
resource
=
service
.
path
(
"api/metadata/lineage/hive/schema"
)
.
path
(
"sales_fact"
);
ClientResponse
clientResponse
=
resource
.
accept
(
MediaType
.
APPLICATION_JSON
)
.
type
(
MediaType
.
APPLICATION_JSON
)
.
method
(
HttpMethod
.
GET
,
ClientResponse
.
class
);
Assert
.
assertEquals
(
clientResponse
.
getStatus
(),
Response
.
Status
.
OK
.
getStatusCode
());
String
responseAsString
=
clientResponse
.
getEntity
(
String
.
class
);
Assert
.
assertNotNull
(
responseAsString
);
System
.
out
.
println
(
"schema = "
+
responseAsString
);
JSONObject
response
=
new
JSONObject
(
responseAsString
);
Assert
.
assertNotNull
(
response
.
get
(
MetadataServiceClient
.
REQUEST_ID
));
JSONObject
results
=
response
.
getJSONObject
(
MetadataServiceClient
.
RESULTS
);
Assert
.
assertNotNull
(
results
);
JSONArray
rows
=
results
.
getJSONArray
(
"rows"
);
Assert
.
assertEquals
(
rows
.
length
(),
4
);
for
(
int
index
=
0
;
index
<
rows
.
length
();
index
++)
{
final
JSONObject
row
=
rows
.
getJSONObject
(
index
);
Assert
.
assertNotNull
(
row
.
getString
(
"name"
));
Assert
.
assertNotNull
(
row
.
getString
(
"comment"
));
Assert
.
assertNotNull
(
row
.
getString
(
"dataType"
));
Assert
.
assertEquals
(
row
.
getString
(
"$typeName$"
),
"hive_column"
);
}
}
private
void
setUpTypes
()
throws
Exception
{
TypesDef
typesDef
=
createTypeDefinitions
();
createType
(
typesDef
);
}
private
static
final
String
DATABASE_TYPE
=
"hive_db"
;
private
static
final
String
HIVE_TABLE_TYPE
=
"hive_table"
;
private
static
final
String
COLUMN_TYPE
=
"hive_column"
;
private
static
final
String
HIVE_PROCESS_TYPE
=
"hive_process"
;
private
TypesDef
createTypeDefinitions
()
{
HierarchicalTypeDefinition
<
ClassType
>
dbClsDef
=
TypesUtil
.
createClassTypeDef
(
DATABASE_TYPE
,
null
,
attrDef
(
"name"
,
DataTypes
.
STRING_TYPE
),
attrDef
(
"description"
,
DataTypes
.
STRING_TYPE
),
attrDef
(
"locationUri"
,
DataTypes
.
STRING_TYPE
),
attrDef
(
"owner"
,
DataTypes
.
STRING_TYPE
),
attrDef
(
"createTime"
,
DataTypes
.
INT_TYPE
)
);
HierarchicalTypeDefinition
<
ClassType
>
columnClsDef
=
TypesUtil
.
createClassTypeDef
(
COLUMN_TYPE
,
null
,
attrDef
(
"name"
,
DataTypes
.
STRING_TYPE
),
attrDef
(
"dataType"
,
DataTypes
.
STRING_TYPE
),
attrDef
(
"comment"
,
DataTypes
.
STRING_TYPE
)
);
HierarchicalTypeDefinition
<
ClassType
>
tblClsDef
=
TypesUtil
.
createClassTypeDef
(
HIVE_TABLE_TYPE
,
null
,
attrDef
(
"name"
,
DataTypes
.
STRING_TYPE
),
...
...
@@ -139,7 +190,12 @@ public class HiveLineageJerseyResourceIT extends BaseResourceIT {
attrDef
(
"createTime"
,
DataTypes
.
INT_TYPE
),
attrDef
(
"lastAccessTime"
,
DataTypes
.
INT_TYPE
),
attrDef
(
"tableType"
,
DataTypes
.
STRING_TYPE
),
attrDef
(
"temporary"
,
DataTypes
.
BOOLEAN_TYPE
)
attrDef
(
"temporary"
,
DataTypes
.
BOOLEAN_TYPE
),
new
AttributeDefinition
(
"db"
,
DATABASE_TYPE
,
Multiplicity
.
REQUIRED
,
false
,
null
),
new
AttributeDefinition
(
"columns"
,
DataTypes
.
arrayTypeName
(
COLUMN_TYPE
),
Multiplicity
.
COLLECTION
,
true
,
null
)
);
HierarchicalTypeDefinition
<
ClassType
>
loadProcessClsDef
=
...
...
@@ -172,11 +228,15 @@ public class HiveLineageJerseyResourceIT extends BaseResourceIT {
HierarchicalTypeDefinition
<
TraitType
>
etlTraitDef
=
TypesUtil
.
createTraitTypeDef
(
"ETL"
,
null
);
HierarchicalTypeDefinition
<
TraitType
>
piiTraitDef
=
TypesUtil
.
createTraitTypeDef
(
"PII"
,
null
);
return
TypeUtils
.
getTypesDef
(
ImmutableList
.<
EnumTypeDefinition
>
of
(),
ImmutableList
.<
StructTypeDefinition
>
of
(),
ImmutableList
.
of
(
dimTraitDef
,
factTraitDef
,
metricTraitDef
,
etlTraitDef
),
ImmutableList
.
of
(
tblClsDef
,
loadProcessClsDef
)
ImmutableList
.
of
(
dimTraitDef
,
factTraitDef
,
metricTraitDef
,
etlTraitDef
,
piiTraitDef
),
ImmutableList
.
of
(
dbClsDef
,
columnClsDef
,
tblClsDef
,
loadProcessClsDef
)
);
}
...
...
@@ -196,35 +256,76 @@ public class HiveLineageJerseyResourceIT extends BaseResourceIT {
}
private
void
setupInstances
()
throws
Exception
{
Referenceable
salesFact
=
table
(
"sales_fact"
,
"sales fact table"
,
"Joe"
,
"Managed"
,
"Fact"
);
Id
salesDB
=
database
(
"Sales"
,
"Sales Database"
,
"John ETL"
,
"hdfs://host:8000/apps/warehouse/sales"
);
List
<
Referenceable
>
salesFactColumns
=
ImmutableList
.
of
(
column
(
"time_id"
,
"int"
,
"time id"
),
column
(
"product_id"
,
"int"
,
"product id"
),
column
(
"customer_id"
,
"int"
,
"customer id"
,
"PII"
),
column
(
"sales"
,
"double"
,
"product id"
,
"Metric"
)
);
Id
salesFact
=
table
(
"sales_fact"
,
"sales fact table"
,
salesDB
,
"Joe"
,
"Managed"
,
salesFactColumns
,
"Fact"
);
Referenceable
timeDim
=
table
(
"time_dim"
,
"time dimension table"
,
"John Doe"
,
"External"
,
"Dimension"
);
List
<
Referenceable
>
timeDimColumns
=
ImmutableList
.
of
(
column
(
"time_id"
,
"int"
,
"time id"
),
column
(
"dayOfYear"
,
"int"
,
"day Of Year"
),
column
(
"weekDay"
,
"int"
,
"week Day"
)
);
Id
timeDim
=
table
(
"time_dim"
,
"time dimension table"
,
salesDB
,
"John Doe"
,
"External"
,
timeDimColumns
,
"Dimension"
);
Referenceable
salesFactDaily
=
table
(
"sales_fact_daily_mv"
,
Id
reportingDB
=
database
(
"Reporting"
,
"reporting database"
,
"Jane BI"
,
"hdfs://host:8000/apps/warehouse/reporting"
);
Id
salesFactDaily
=
table
(
"sales_fact_daily_mv"
,
"sales fact daily materialized view"
,
"Joe BI"
,
"Managed"
,
"Metric"
);
reportingDB
,
"Joe BI"
,
"Managed"
,
salesFactColumns
,
"Metric"
);
Referenceable
loadSalesFactDaily
=
loadProcess
(
"loadSalesDaily"
,
"John ETL"
,
loadProcess
(
"loadSalesDaily"
,
"John ETL"
,
ImmutableList
.
of
(
salesFact
,
timeDim
),
ImmutableList
.
of
(
salesFactDaily
),
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
"ETL"
);
System
.
out
.
println
(
"added loadSalesFactDaily = "
+
loadSalesFactDaily
);
Referenceable
salesFactMonthly
=
table
(
"sales_fact_monthly_mv"
,
Id
salesFactMonthly
=
table
(
"sales_fact_monthly_mv"
,
"sales fact monthly materialized view"
,
"Jane BI"
,
"Managed"
,
"Metric"
);
reportingDB
,
"Jane BI"
,
"Managed"
,
salesFactColumns
,
"Metric"
);
Referenceable
loadSalesFactMonthly
=
loadProcess
(
"loadSalesMonthly"
,
"John ETL"
,
loadProcess
(
"loadSalesMonthly"
,
"John ETL"
,
ImmutableList
.
of
(
salesFactDaily
),
ImmutableList
.
of
(
salesFactMonthly
),
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
"ETL"
);
System
.
out
.
println
(
"added loadSalesFactMonthly = "
+
loadSalesFactMonthly
);
}
Referenceable
table
(
String
name
,
String
description
,
Id
database
(
String
name
,
String
description
,
String
owner
,
String
locationUri
,
String
...
traitNames
)
throws
Exception
{
Referenceable
referenceable
=
new
Referenceable
(
DATABASE_TYPE
,
traitNames
);
referenceable
.
set
(
"name"
,
name
);
referenceable
.
set
(
"description"
,
description
);
referenceable
.
set
(
"owner"
,
owner
);
referenceable
.
set
(
"locationUri"
,
locationUri
);
referenceable
.
set
(
"createTime"
,
System
.
currentTimeMillis
());
return
createInstance
(
referenceable
);
}
Referenceable
column
(
String
name
,
String
dataType
,
String
comment
,
String
...
traitNames
)
throws
Exception
{
Referenceable
referenceable
=
new
Referenceable
(
COLUMN_TYPE
,
traitNames
);
referenceable
.
set
(
"name"
,
name
);
referenceable
.
set
(
"dataType"
,
dataType
);
referenceable
.
set
(
"comment"
,
comment
);
return
referenceable
;
}
Id
table
(
String
name
,
String
description
,
Id
dbId
,
String
owner
,
String
tableType
,
List
<
Referenceable
>
columns
,
String
...
traitNames
)
throws
Exception
{
Referenceable
referenceable
=
new
Referenceable
(
HIVE_TABLE_TYPE
,
traitNames
);
referenceable
.
set
(
"name"
,
name
);
...
...
@@ -235,12 +336,15 @@ public class HiveLineageJerseyResourceIT extends BaseResourceIT {
referenceable
.
set
(
"lastAccessTime"
,
System
.
currentTimeMillis
());
referenceable
.
set
(
"retention"
,
System
.
currentTimeMillis
());
referenceable
.
set
(
"db"
,
dbId
);
referenceable
.
set
(
"columns"
,
columns
);
return
createInstance
(
referenceable
);
}
Referenceable
loadProcess
(
String
name
,
String
user
,
List
<
Referenceable
>
inputTables
,
List
<
Referenceable
>
outputTables
,
Id
loadProcess
(
String
name
,
String
user
,
List
<
Id
>
inputTables
,
List
<
Id
>
outputTables
,
String
queryText
,
String
queryPlan
,
String
queryId
,
String
queryGraph
,
String
...
traitNames
)
throws
Exception
{
...
...
@@ -250,17 +354,8 @@ public class HiveLineageJerseyResourceIT extends BaseResourceIT {
referenceable
.
set
(
"startTime"
,
System
.
currentTimeMillis
());
referenceable
.
set
(
"endTime"
,
System
.
currentTimeMillis
()
+
10000
);
ArrayList
<
Id
>
inputTableIds
=
new
ArrayList
<>();
for
(
Referenceable
inputTable
:
inputTables
)
{
inputTableIds
.
add
(
inputTable
.
getId
());
}
referenceable
.
set
(
"inputTables"
,
inputTableIds
);
ArrayList
<
Id
>
outputTableIds
=
new
ArrayList
<>();
for
(
Referenceable
outputTable
:
outputTables
)
{
outputTableIds
.
add
(
outputTable
.
getId
());
}
referenceable
.
set
(
"outputTables"
,
outputTableIds
);
referenceable
.
set
(
"inputTables"
,
inputTables
);
referenceable
.
set
(
"outputTables"
,
outputTables
);
referenceable
.
set
(
"queryText"
,
queryText
);
referenceable
.
set
(
"queryPlan"
,
queryPlan
);
...
...
webapp/src/test/java/org/apache/hadoop/metadata/web/resources/MetadataDiscoveryJerseyResourceIT.java
View file @
879eda63
...
...
@@ -25,6 +25,7 @@ import org.apache.hadoop.metadata.MetadataServiceClient;
import
org.apache.hadoop.metadata.typesystem.Referenceable
;
import
org.apache.hadoop.metadata.typesystem.Struct
;
import
org.apache.hadoop.metadata.typesystem.TypesDef
;
import
org.apache.hadoop.metadata.typesystem.persistence.Id
;
import
org.apache.hadoop.metadata.typesystem.types.ClassType
;
import
org.apache.hadoop.metadata.typesystem.types.DataTypes
;
import
org.apache.hadoop.metadata.typesystem.types.EnumTypeDefinition
;
...
...
@@ -169,7 +170,7 @@ public class MetadataDiscoveryJerseyResourceIT extends BaseResourceIT {
ImmutableList
.<
String
>
of
(),
TypesUtil
.
createRequiredAttrDef
(
"tag"
,
DataTypes
.
STRING_TYPE
));
HierarchicalTypeDefinition
<
TraitType
>
piiTrait
=
TypesUtil
.
createTraitTypeDef
(
"PII"
,
ImmutableList
.<
String
>
of
());
TypesUtil
.
createTraitTypeDef
(
"PII
_TYPE
"
,
ImmutableList
.<
String
>
of
());
HierarchicalTypeDefinition
<
TraitType
>
phiTrait
=
TypesUtil
.
createTraitTypeDef
(
"PHI"
,
ImmutableList
.<
String
>
of
());
HierarchicalTypeDefinition
<
TraitType
>
pciTrait
=
...
...
@@ -190,9 +191,9 @@ public class MetadataDiscoveryJerseyResourceIT extends BaseResourceIT {
createType
(
typesDef
);
}
private
Referenceable
createInstance
()
throws
Exception
{
private
Id
createInstance
()
throws
Exception
{
Referenceable
entityInstance
=
new
Referenceable
(
"dsl_test_type"
,
"Classification"
,
"PII"
,
"PHI"
,
"PCI"
,
"SOX"
,
"SEC"
,
"Finance"
);
"Classification"
,
"PII
_TYPE
"
,
"PHI"
,
"PCI"
,
"SOX"
,
"SEC"
,
"Finance"
);
entityInstance
.
set
(
"name"
,
"foo name"
);
entityInstance
.
set
(
"description"
,
"bar description"
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment