Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
atlas
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dataplatform
atlas
Commits
b65dd91c
Commit
b65dd91c
authored
May 18, 2016
by
Shwetha GS
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ATLAS-713 Entity lineage based on entity id (shwethags)
parent
857561a3
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
692 additions
and
284 deletions
+692
-284
AtlasClient.java
client/src/main/java/org/apache/atlas/AtlasClient.java
+37
-4
VLineage.js
dashboardv2/public/js/models/VLineage.js
+2
-2
VSchema.js
dashboardv2/public/js/models/VSchema.js
+2
-2
DetailPageLayoutView.js
...ardv2/public/js/views/detail_page/DetailPageLayoutView.js
+4
-4
LineageLayoutView.js
dashboardv2/public/js/views/graph/LineageLayoutView.js
+2
-2
SchemaLayoutView.js
dashboardv2/public/js/views/schema/SchemaLayoutView.js
+1
-1
atlas-application.properties
distro/src/conf/atlas-application.properties
+2
-8
release-log.txt
release-log.txt
+1
-0
RepositoryMetadataModule.java
.../main/java/org/apache/atlas/RepositoryMetadataModule.java
+2
-2
DataSetLineageService.java
...ava/org/apache/atlas/discovery/DataSetLineageService.java
+93
-100
ClosureQuery.scala
.../src/main/scala/org/apache/atlas/query/ClosureQuery.scala
+22
-22
BaseRepositoryTest.java
...ry/src/test/java/org/apache/atlas/BaseRepositoryTest.java
+12
-12
DataSetLineageServiceTest.java
...org/apache/atlas/discovery/DataSetLineageServiceTest.java
+268
-81
GraphBackedDiscoveryServiceTest.java
...ache/atlas/discovery/GraphBackedDiscoveryServiceTest.java
+2
-2
GremlinTest2.scala
.../src/test/scala/org/apache/atlas/query/GremlinTest2.scala
+4
-4
LineageService.java
.../main/java/org/apache/atlas/discovery/LineageService.java
+26
-18
atlas-application.properties
typesystem/src/main/resources/atlas-application.properties
+1
-7
DataSetLineageResource.java
...rg/apache/atlas/web/resources/DataSetLineageResource.java
+4
-8
LineageResource.java
.../java/org/apache/atlas/web/resources/LineageResource.java
+153
-0
DataSetLineageJerseyResourceIT.java
...e/atlas/web/resources/DataSetLineageJerseyResourceIT.java
+54
-5
No files found.
client/src/main/java/org/apache/atlas/AtlasClient.java
View file @
b65dd91c
...
...
@@ -90,7 +90,8 @@ public class AtlasClient {
public
static
final
String
URI_ENTITY
=
"entities"
;
public
static
final
String
URI_ENTITY_AUDIT
=
"audit"
;
public
static
final
String
URI_SEARCH
=
"discovery/search"
;
public
static
final
String
URI_LINEAGE
=
"lineage/hive/table"
;
public
static
final
String
URI_NAME_LINEAGE
=
"lineage/hive/table"
;
public
static
final
String
URI_LINEAGE
=
"lineage/"
;
public
static
final
String
URI_TRAITS
=
"traits"
;
public
static
final
String
QUERY
=
"query"
;
...
...
@@ -416,7 +417,12 @@ public class AtlasClient {
SEARCH_GREMLIN
(
BASE_URI
+
URI_SEARCH
+
"/gremlin"
,
HttpMethod
.
GET
,
Response
.
Status
.
OK
),
SEARCH_FULL_TEXT
(
BASE_URI
+
URI_SEARCH
+
"/fulltext"
,
HttpMethod
.
GET
,
Response
.
Status
.
OK
),
//Lineage operations
//Lineage operations based on dataset name
NAME_LINEAGE_INPUTS_GRAPH
(
BASE_URI
+
URI_NAME_LINEAGE
,
HttpMethod
.
GET
,
Response
.
Status
.
OK
),
NAME_LINEAGE_OUTPUTS_GRAPH
(
BASE_URI
+
URI_NAME_LINEAGE
,
HttpMethod
.
GET
,
Response
.
Status
.
OK
),
NAME_LINEAGE_SCHEMA
(
BASE_URI
+
URI_NAME_LINEAGE
,
HttpMethod
.
GET
,
Response
.
Status
.
OK
),
//Lineage operations based on entity id of the dataset
LINEAGE_INPUTS_GRAPH
(
BASE_URI
+
URI_LINEAGE
,
HttpMethod
.
GET
,
Response
.
Status
.
OK
),
LINEAGE_OUTPUTS_GRAPH
(
BASE_URI
+
URI_LINEAGE
,
HttpMethod
.
GET
,
Response
.
Status
.
OK
),
LINEAGE_SCHEMA
(
BASE_URI
+
URI_LINEAGE
,
HttpMethod
.
GET
,
Response
.
Status
.
OK
);
...
...
@@ -988,7 +994,7 @@ public class AtlasClient {
}
public
JSONObject
getInputGraph
(
String
datasetName
)
throws
AtlasServiceException
{
JSONObject
response
=
callAPI
(
API
.
LINEAGE_INPUTS_GRAPH
,
null
,
datasetName
,
"/inputs/graph"
);
JSONObject
response
=
callAPI
(
API
.
NAME_
LINEAGE_INPUTS_GRAPH
,
null
,
datasetName
,
"/inputs/graph"
);
try
{
return
response
.
getJSONObject
(
AtlasClient
.
RESULTS
);
}
catch
(
JSONException
e
)
{
...
...
@@ -997,7 +1003,34 @@ public class AtlasClient {
}
public
JSONObject
getOutputGraph
(
String
datasetName
)
throws
AtlasServiceException
{
JSONObject
response
=
callAPI
(
API
.
LINEAGE_OUTPUTS_GRAPH
,
null
,
datasetName
,
"/outputs/graph"
);
JSONObject
response
=
callAPI
(
API
.
NAME_LINEAGE_OUTPUTS_GRAPH
,
null
,
datasetName
,
"/outputs/graph"
);
try
{
return
response
.
getJSONObject
(
AtlasClient
.
RESULTS
);
}
catch
(
JSONException
e
)
{
throw
new
AtlasServiceException
(
e
);
}
}
public
JSONObject
getInputGraphForEntity
(
String
entityId
)
throws
AtlasServiceException
{
JSONObject
response
=
callAPI
(
API
.
LINEAGE_INPUTS_GRAPH
,
null
,
entityId
,
"/inputs/graph"
);
try
{
return
response
.
getJSONObject
(
AtlasClient
.
RESULTS
);
}
catch
(
JSONException
e
)
{
throw
new
AtlasServiceException
(
e
);
}
}
public
JSONObject
getOutputGraphForEntity
(
String
datasetId
)
throws
AtlasServiceException
{
JSONObject
response
=
callAPI
(
API
.
LINEAGE_OUTPUTS_GRAPH
,
null
,
datasetId
,
"/outputs/graph"
);
try
{
return
response
.
getJSONObject
(
AtlasClient
.
RESULTS
);
}
catch
(
JSONException
e
)
{
throw
new
AtlasServiceException
(
e
);
}
}
public
JSONObject
getSchemaForEntity
(
String
datasetId
)
throws
AtlasServiceException
{
JSONObject
response
=
callAPI
(
API
.
LINEAGE_OUTPUTS_GRAPH
,
null
,
datasetId
,
"/schema"
);
try
{
return
response
.
getJSONObject
(
AtlasClient
.
RESULTS
);
}
catch
(
JSONException
e
)
{
...
...
dashboardv2/public/js/models/VLineage.js
View file @
b65dd91c
...
...
@@ -23,7 +23,7 @@ define(['require',
'use strict'
;
var
VLineage
=
VBaseModel
.
extend
({
urlRoot
:
Globals
.
baseURL
+
'api/atlas/lineage/
hive/table/
assetName/outputs/graph'
,
urlRoot
:
Globals
.
baseURL
+
'api/atlas/lineage/assetName/outputs/graph'
,
defaults
:
{},
...
...
@@ -36,7 +36,7 @@ define(['require',
this
.
bindErrorEvents
();
},
toString
:
function
()
{
return
this
.
get
(
'
name
'
);
return
this
.
get
(
'
id
'
);
},
},
{});
return
VLineage
;
...
...
dashboardv2/public/js/models/VSchema.js
View file @
b65dd91c
...
...
@@ -22,7 +22,7 @@ define(['require',
],
function
(
require
,
Globals
,
VBaseModel
)
{
'use strict'
;
var
VSchema
=
VBaseModel
.
extend
({
urlRoot
:
Globals
.
baseURL
+
'/api/atlas/lineage/
hive/table/
log_fact_daily_mv/schema'
,
urlRoot
:
Globals
.
baseURL
+
'/api/atlas/lineage/log_fact_daily_mv/schema'
,
defaults
:
{},
...
...
@@ -35,7 +35,7 @@ define(['require',
this
.
bindErrorEvents
();
},
toString
:
function
()
{
return
this
.
get
(
'
name
'
);
return
this
.
get
(
'
id
'
);
},
},
{});
return
VSchema
;
...
...
dashboardv2/public/js/views/detail_page/DetailPageLayoutView.js
View file @
b65dd91c
...
...
@@ -92,7 +92,7 @@ define(['require',
this
.
renderEntityDetailTableLayoutView
();
this
.
renderTagTableLayoutView
(
tagGuid
);
this
.
renderLineageLayoutView
(
tagGuid
);
this
.
renderSchemaLayoutView
();
this
.
renderSchemaLayoutView
(
tagGuid
);
},
this
);
},
onRender
:
function
()
{},
...
...
@@ -120,17 +120,17 @@ define(['require',
require
([
'views/graph/LineageLayoutView'
],
function
(
LineageLayoutView
)
{
that
.
RLineageLayoutView
.
show
(
new
LineageLayoutView
({
globalVent
:
that
.
globalVent
,
assetName
:
t
hat
.
name
,
assetName
:
t
agGuid
,
guid
:
tagGuid
}));
});
},
renderSchemaLayoutView
:
function
()
{
renderSchemaLayoutView
:
function
(
tagGuid
)
{
var
that
=
this
;
require
([
'views/schema/SchemaLayoutView'
],
function
(
SchemaLayoutView
)
{
that
.
RSchemaTableLayoutView
.
show
(
new
SchemaLayoutView
({
globalVent
:
that
.
globalVent
,
name
:
t
hat
.
name
,
name
:
t
agGuid
,
vent
:
that
.
vent
}));
});
...
...
dashboardv2/public/js/views/graph/LineageLayoutView.js
View file @
b65dd91c
...
...
@@ -56,8 +56,8 @@ define(['require',
this
.
inputCollection
=
new
VLineageList
();
this
.
outputCollection
=
new
VLineageList
();
this
.
entityModel
=
new
VEntity
();
this
.
inputCollection
.
url
=
"/api/atlas/lineage/
hive/table/
"
+
this
.
assetName
+
"/inputs/graph"
;
this
.
outputCollection
.
url
=
"/api/atlas/lineage/
hive/table/
"
+
this
.
assetName
+
"/outputs/graph"
;
this
.
inputCollection
.
url
=
"/api/atlas/lineage/"
+
this
.
assetName
+
"/inputs/graph"
;
this
.
outputCollection
.
url
=
"/api/atlas/lineage/"
+
this
.
assetName
+
"/outputs/graph"
;
this
.
bindEvents
();
this
.
fetchGraphData
();
this
.
data
=
{};
...
...
dashboardv2/public/js/views/schema/SchemaLayoutView.js
View file @
b65dd91c
...
...
@@ -73,7 +73,7 @@ define(['require',
initialize
:
function
(
options
)
{
_
.
extend
(
this
,
_
.
pick
(
options
,
'globalVent'
,
'name'
,
'vent'
));
this
.
schemaCollection
=
new
VSchemaList
([],
{});
this
.
schemaCollection
.
url
=
"/api/atlas/lineage/
hive/table/
"
+
this
.
name
+
"/schema"
;
this
.
schemaCollection
.
url
=
"/api/atlas/lineage/"
+
this
.
name
+
"/schema"
;
this
.
commonTableOptions
=
{
collection
:
this
.
schemaCollection
,
includeFilter
:
false
,
...
...
distro/src/conf/atlas-application.properties
View file @
b65dd91c
...
...
@@ -63,15 +63,9 @@ atlas.kafka.auto.commit.enable=false
######### Hive Lineage Configs #########
# This models reflects the base super types for Data and Process
#atlas.lineage.hive.table.type.name=DataSet
#atlas.lineage.hive.process.type.name=Process
#atlas.lineage.hive.process.inputs.name=inputs
#atlas.lineage.hive.process.outputs.name=outputs
## Schema
atlas.lineage.
hive.table.schema.query.hive_table
=
hive_table where name
='%s'
\,
columns
atlas.lineage.
hive.table.schema.query.Table
=
Table where name
='%s'
\,
columns
atlas.lineage.
schema.query.hive_table
=
hive_table where __guid
='%s'
\,
columns
atlas.lineage.
schema.query.Table
=
Table where __guid
='%s'
\,
columns
## Server port configuration
#atlas.server.http.port=21000
...
...
release-log.txt
View file @
b65dd91c
...
...
@@ -21,6 +21,7 @@ ATLAS-409 Atlas will not import avro tables with schema read from a file (dosset
ATLAS-379 Create sqoop and falcon metadata addons (venkatnrangan,bvellanki,sowmyaramesh via shwethags)
ALL CHANGES:
ATLAS-713 Entity lineage based on entity id (shwethags)
ATLAS-736 UI - BUG :: displaying timestamp values for hive_db description (kevalbhatt18 via yhemanth)
ATLAS-784 Configure config.store.uri for Falcon hook IT (yhemanth)
ATLAS-645 FieldMapping.output() results in stack overflow when instances reference each other (dkantor via shwethags)
...
...
repository/src/main/java/org/apache/atlas/RepositoryMetadataModule.java
View file @
b65dd91c
...
...
@@ -26,7 +26,7 @@ import com.google.inject.throwingproviders.ThrowingProviderBinder;
import
com.thinkaurelius.titan.core.TitanGraph
;
import
org.aopalliance.intercept.MethodInterceptor
;
import
org.apache.atlas.discovery.DiscoveryService
;
import
org.apache.atlas.discovery.
Hive
LineageService
;
import
org.apache.atlas.discovery.
DataSet
LineageService
;
import
org.apache.atlas.discovery.LineageService
;
import
org.apache.atlas.discovery.graph.GraphBackedDiscoveryService
;
import
org.apache.atlas.listener.EntityChangeListener
;
...
...
@@ -83,7 +83,7 @@ public class RepositoryMetadataModule extends com.google.inject.AbstractModule {
// bind the DiscoveryService interface to an implementation
bind
(
DiscoveryService
.
class
).
to
(
GraphBackedDiscoveryService
.
class
).
asEagerSingleton
();
bind
(
LineageService
.
class
).
to
(
Hive
LineageService
.
class
).
asEagerSingleton
();
bind
(
LineageService
.
class
).
to
(
DataSet
LineageService
.
class
).
asEagerSingleton
();
bindAuditRepository
(
binder
());
...
...
repository/src/main/java/org/apache/atlas/discovery/
Hive
LineageService.java
→
repository/src/main/java/org/apache/atlas/discovery/
DataSet
LineageService.java
View file @
b65dd91c
...
...
@@ -20,19 +20,19 @@ package org.apache.atlas.discovery;
import
com.thinkaurelius.titan.core.TitanGraph
;
import
org.apache.atlas.ApplicationProperties
;
import
org.apache.atlas.AtlasClient
;
import
org.apache.atlas.AtlasException
;
import
org.apache.atlas.GraphTransaction
;
import
org.apache.atlas.typesystem.exception.EntityNotFoundException
;
import
org.apache.atlas.utils.ParamChecker
;
import
org.apache.atlas.discovery.graph.DefaultGraphPersistenceStrategy
;
import
org.apache.atlas.discovery.graph.GraphBackedDiscoveryService
;
import
org.apache.atlas.query.Expressions
;
import
org.apache.atlas.query.GremlinQueryResult
;
import
org.apache.atlas.query.
HiveLineag
eQuery
;
import
org.apache.atlas.query.
HiveWhereUsed
Query
;
import
org.apache.atlas.query.
InputLineageClosur
eQuery
;
import
org.apache.atlas.query.
OutputLineageClosure
Query
;
import
org.apache.atlas.repository.MetadataRepository
;
import
org.apache.atlas.repository.graph.GraphProvider
;
import
org.apache.atlas.typesystem.exception.EntityNotFoundException
;
import
org.apache.atlas.typesystem.persistence.ReferenceableInstance
;
import
org.apache.atlas.utils.ParamChecker
;
import
org.apache.commons.configuration.Configuration
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
...
...
@@ -47,35 +47,29 @@ import javax.inject.Singleton;
* Hive implementation of Lineage service interface.
*/
@Singleton
public
class
Hive
LineageService
implements
LineageService
{
public
class
DataSet
LineageService
implements
LineageService
{
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
Hive
LineageService
.
class
);
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
DataSet
LineageService
.
class
);
private
static
final
Option
<
List
<
String
>>
SELECT_ATTRIBUTES
=
Some
.<
List
<
String
>>
apply
(
List
.<
String
>
fromArray
(
new
String
[]{
"name"
}));
public
static
final
String
SELECT_INSTANCE_GUID
=
"__guid"
;
public
static
final
String
HIVE_TABLE_SCHEMA_QUERY_PREFIX
=
"atlas.lineage.hive.tabl
e.schema.query."
;
public
static
final
String
DATASET_SCHEMA_QUERY_PREFIX
=
"atlas.lineag
e.schema.query."
;
private
static
final
String
HIVE_TABLE_TYPE_NAME
;
private
static
final
String
HIVE_PROCESS_TYPE_NAME
;
private
static
final
String
HIVE_PROCESS_INPUT_ATTRIBUTE_NAME
;
private
static
final
String
HIVE_PROCESS_OUTPUT_ATTRIBUTE_NAME
;
private
static
final
String
HIVE_PROCESS_TYPE_NAME
=
"Process"
;
private
static
final
String
HIVE_PROCESS_INPUT_ATTRIBUTE_NAME
=
"inputs"
;
private
static
final
String
HIVE_PROCESS_OUTPUT_ATTRIBUTE_NAME
=
"outputs"
;
private
static
final
String
HIVE_TABLE_EXISTS_QUERY
;
private
static
final
String
DATASET_EXISTS_QUERY
=
AtlasClient
.
DATA_SET_SUPER_TYPE
+
" where __guid = '%s'"
;
private
static
final
String
DATASET_NAME_EXISTS_QUERY
=
AtlasClient
.
DATA_SET_SUPER_TYPE
+
" where name = '%s' and __state = 'ACTIVE'"
;
private
static
final
Configuration
propertiesConf
;
static
{
// todo - externalize this using type system - dog food
try
{
propertiesConf
=
ApplicationProperties
.
get
();
HIVE_TABLE_TYPE_NAME
=
propertiesConf
.
getString
(
"atlas.lineage.hive.table.type.name"
,
"DataSet"
);
HIVE_PROCESS_TYPE_NAME
=
propertiesConf
.
getString
(
"atlas.lineage.hive.process.type.name"
,
"Process"
);
HIVE_PROCESS_INPUT_ATTRIBUTE_NAME
=
propertiesConf
.
getString
(
"atlas.lineage.hive.process.inputs.name"
,
"inputs"
);
HIVE_PROCESS_OUTPUT_ATTRIBUTE_NAME
=
propertiesConf
.
getString
(
"atlas.lineage.hive.process.outputs.name"
,
"outputs"
);
HIVE_TABLE_EXISTS_QUERY
=
propertiesConf
.
getString
(
"atlas.lineage.hive.table.exists.query"
,
"from "
+
HIVE_TABLE_TYPE_NAME
+
" where name=\"%s\""
);
}
catch
(
AtlasException
e
)
{
throw
new
RuntimeException
(
e
);
}
...
...
@@ -87,136 +81,135 @@ public class HiveLineageService implements LineageService {
private
final
GraphBackedDiscoveryService
discoveryService
;
@Inject
Hive
LineageService
(
GraphProvider
<
TitanGraph
>
graphProvider
,
MetadataRepository
metadataRepository
,
GraphBackedDiscoveryService
discoveryService
)
throws
DiscoveryException
{
DataSet
LineageService
(
GraphProvider
<
TitanGraph
>
graphProvider
,
MetadataRepository
metadataRepository
,
GraphBackedDiscoveryService
discoveryService
)
throws
DiscoveryException
{
this
.
titanGraph
=
graphProvider
.
get
();
this
.
graphPersistenceStrategy
=
new
DefaultGraphPersistenceStrategy
(
metadataRepository
);
this
.
discoveryService
=
discoveryService
;
}
/**
* Return the lineage outputs
for the given table
Name.
* Return the lineage outputs
graph for the given dataset
Name.
*
* @param
tableName table
Name
* @return
Lineage Outputs
as JSON
* @param
datasetName dataset
Name
* @return
Outputs Graph
as JSON
*/
@Override
@GraphTransaction
public
String
getOutputs
(
String
tableName
)
throws
AtlasException
{
LOG
.
info
(
"Fetching lineage outputs for tableName={}"
,
tableName
);
ParamChecker
.
notEmpty
(
tableName
,
"table name cannot be null"
);
validateTableExists
(
tableName
);
HiveWhereUsedQuery
outputsQuery
=
new
HiveWhereUsedQuery
(
HIVE_TABLE_TYPE_NAME
,
tableName
,
HIVE_PROCESS_TYPE_NAME
,
HIVE_PROCESS_INPUT_ATTRIBUTE_NAME
,
HIVE_PROCESS_OUTPUT_ATTRIBUTE_NAME
,
Option
.
empty
(),
SELECT_ATTRIBUTES
,
true
,
graphPersistenceStrategy
,
titanGraph
);
Expressions
.
Expression
expression
=
outputsQuery
.
expr
();
LOG
.
debug
(
"Expression is ["
+
expression
.
toString
()
+
"]"
);
try
{
return
discoveryService
.
evaluate
(
expression
).
toJson
();
}
catch
(
Exception
e
)
{
// unable to catch ExpressionException
throw
new
DiscoveryException
(
"Invalid expression ["
+
expression
.
toString
()
+
"]"
,
e
);
}
public
String
getOutputsGraph
(
String
datasetName
)
throws
AtlasException
{
LOG
.
info
(
"Fetching lineage outputs graph for datasetName={}"
,
datasetName
);
ParamChecker
.
notEmpty
(
datasetName
,
"dataset name"
);
ReferenceableInstance
datasetInstance
=
validateDatasetNameExists
(
datasetName
);
return
getOutputsGraphForId
(
datasetInstance
.
getId
().
_getId
());
}
/**
* Return the lineage
out
puts graph for the given tableName.
* Return the lineage
in
puts graph for the given tableName.
*
* @param tableName tableName
* @return
Out
puts Graph as JSON
* @return
In
puts Graph as JSON
*/
@Override
@GraphTransaction
public
String
getOutputsGraph
(
String
tableName
)
throws
AtlasException
{
LOG
.
info
(
"Fetching lineage outputs graph for tableName={}"
,
tableName
);
ParamChecker
.
notEmpty
(
tableName
,
"table name cannot be null"
);
validateTableExists
(
tableName
);
HiveWhereUsedQuery
outputsQuery
=
new
HiveWhereUsedQuery
(
HIVE_TABLE_TYPE_NAME
,
tableName
,
HIVE_PROCESS_TYPE_NAME
,
HIVE_PROCESS_INPUT_ATTRIBUTE_NAME
,
HIVE_PROCESS_OUTPUT_ATTRIBUTE_NAME
,
Option
.
empty
(),
SELECT_ATTRIBUTES
,
true
,
graphPersistenceStrategy
,
titanGraph
);
return
outputsQuery
.
graph
().
toInstanceJson
();
public
String
getInputsGraph
(
String
tableName
)
throws
AtlasException
{
LOG
.
info
(
"Fetching lineage inputs graph for tableName={}"
,
tableName
);
ParamChecker
.
notEmpty
(
tableName
,
"table name"
);
ReferenceableInstance
datasetInstance
=
validateDatasetNameExists
(
tableName
);
return
getInputsGraphForId
(
datasetInstance
.
getId
().
_getId
());
}
/**
* Return the lineage inputs for the given tableName.
*
* @param tableName tableName
* @return Lineage Inputs as JSON
*/
@Override
@GraphTransaction
public
String
getInputs
(
String
tableName
)
throws
AtlasException
{
LOG
.
info
(
"Fetching lineage inputs for tableName={}"
,
tableName
);
ParamChecker
.
notEmpty
(
tableName
,
"table name cannot be null"
);
validateTableExists
(
tableName
);
public
String
getInputsGraphForEntity
(
String
guid
)
throws
AtlasException
{
LOG
.
info
(
"Fetching lineage inputs graph for entity={}"
,
guid
);
ParamChecker
.
notEmpty
(
guid
,
"Entity id"
);
validateDatasetExists
(
guid
);
return
getInputsGraphForId
(
guid
);
}
HiveLineageQuery
inputsQuery
=
new
HiveLineageQuery
(
HIVE_TABLE_TYPE_NAME
,
tableName
,
HIVE_PROCESS_TYPE_NAME
,
private
String
getInputsGraphForId
(
String
guid
)
{
InputLineageClosureQuery
inputsQuery
=
new
InputLineageClosureQuery
(
AtlasClient
.
DATA_SET_SUPER_TYPE
,
SELECT_INSTANCE_GUID
,
guid
,
HIVE_PROCESS_TYPE_NAME
,
HIVE_PROCESS_INPUT_ATTRIBUTE_NAME
,
HIVE_PROCESS_OUTPUT_ATTRIBUTE_NAME
,
Option
.
empty
(),
SELECT_ATTRIBUTES
,
true
,
graphPersistenceStrategy
,
titanGraph
);
Expressions
.
Expression
expression
=
inputsQuery
.
expr
();
LOG
.
debug
(
"Expression is ["
+
expression
.
toString
()
+
"]"
);
try
{
return
discoveryService
.
evaluate
(
expression
).
toJson
();
}
catch
(
Exception
e
)
{
// unable to catch ExpressionException
throw
new
DiscoveryException
(
"Invalid expression ["
+
expression
.
toString
()
+
"]"
,
e
);
}
return
inputsQuery
.
graph
().
toInstanceJson
();
}
/**
* Return the lineage inputs graph for the given tableName.
*
* @param tableName tableName
* @return Inputs Graph as JSON
*/
@Override
@GraphTransaction
public
String
getInputsGraph
(
String
tableName
)
throws
AtlasException
{
LOG
.
info
(
"Fetching lineage inputs graph for tableName={}"
,
tableName
);
ParamChecker
.
notEmpty
(
tableName
,
"table name cannot be null"
);
validateTableExists
(
tableName
);
public
String
getOutputsGraphForEntity
(
String
guid
)
throws
AtlasException
{
LOG
.
info
(
"Fetching lineage outputs graph for entity guid={}"
,
guid
);
ParamChecker
.
notEmpty
(
guid
,
"Entity id"
);
validateDatasetExists
(
guid
);
return
getOutputsGraphForId
(
guid
);
}
HiveLineageQuery
inputsQuery
=
new
HiveLineageQuery
(
HIVE_TABLE_TYPE_NAME
,
tableName
,
HIVE_PROCESS_TYPE_NAME
,
HIVE_PROCESS_INPUT_ATTRIBUTE_NAME
,
HIVE_PROCESS_OUTPUT_ATTRIBUTE_NAME
,
Option
.
empty
(),
SELECT_ATTRIBUTES
,
true
,
graphPersistenceStrategy
,
titanGraph
);
return
inputsQuery
.
graph
().
toInstanceJson
();
private
String
getOutputsGraphForId
(
String
guid
)
{
OutputLineageClosureQuery
outputsQuery
=
new
OutputLineageClosureQuery
(
AtlasClient
.
DATA_SET_SUPER_TYPE
,
SELECT_INSTANCE_GUID
,
guid
,
HIVE_PROCESS_TYPE_NAME
,
HIVE_PROCESS_INPUT_ATTRIBUTE_NAME
,
HIVE_PROCESS_OUTPUT_ATTRIBUTE_NAME
,
Option
.
empty
(),
SELECT_ATTRIBUTES
,
true
,
graphPersistenceStrategy
,
titanGraph
);
return
outputsQuery
.
graph
().
toInstanceJson
();
}
/**
* Return the schema for the given tableName.
*
* @param
table
Name tableName
* @param
dataset
Name tableName
* @return Schema as JSON
*/
@Override
@GraphTransaction
public
String
getSchema
(
String
tableName
)
throws
AtlasException
{
LOG
.
info
(
"Fetching schema for tableName={}"
,
tableName
);
ParamChecker
.
notEmpty
(
tableName
,
"table name cannot be null"
);
String
typeName
=
validateTableExists
(
tableName
);
public
String
getSchema
(
String
datasetName
)
throws
AtlasException
{
ParamChecker
.
notEmpty
(
datasetName
,
"table name"
);
LOG
.
info
(
"Fetching schema for tableName={}"
,
datasetName
);
ReferenceableInstance
datasetInstance
=
validateDatasetNameExists
(
datasetName
);
return
getSchemaForId
(
datasetInstance
.
getTypeName
(),
datasetInstance
.
getId
().
_getId
());
}
private
String
getSchemaForId
(
String
typeName
,
String
guid
)
throws
DiscoveryException
{
final
String
schemaQuery
=
String
.
format
(
propertiesConf
.
getString
(
HIVE_TABLE_SCHEMA_QUERY_PREFIX
+
typeName
),
tableName
);
String
.
format
(
propertiesConf
.
getString
(
DATASET_SCHEMA_QUERY_PREFIX
+
typeName
),
guid
);
return
discoveryService
.
searchByDSL
(
schemaQuery
);
}
@Override
public
String
getSchemaForEntity
(
String
guid
)
throws
AtlasException
{
ParamChecker
.
notEmpty
(
guid
,
"Entity id"
);
LOG
.
info
(
"Fetching schema for entity guid={}"
,
guid
);
String
typeName
=
validateDatasetExists
(
guid
);
return
getSchemaForId
(
typeName
,
guid
);
}
/**
* Validate if indeed this is a table type and exists.
*
* @param
table
Name table name
* @param
dataset
Name table name
*/
private
String
validateTableExists
(
String
table
Name
)
throws
AtlasException
{
final
String
tableExistsQuery
=
String
.
format
(
HIVE_TABLE_EXISTS_QUERY
,
table
Name
);
private
ReferenceableInstance
validateDatasetNameExists
(
String
dataset
Name
)
throws
AtlasException
{
final
String
tableExistsQuery
=
String
.
format
(
DATASET_NAME_EXISTS_QUERY
,
dataset
Name
);
GremlinQueryResult
queryResult
=
discoveryService
.
evaluate
(
tableExistsQuery
);
if
(!(
queryResult
.
rows
().
length
()
>
0
))
{
throw
new
EntityNotFoundException
(
tableName
+
" does not exist"
);
throw
new
EntityNotFoundException
(
datasetName
+
" does not exist"
);
}
return
(
ReferenceableInstance
)
queryResult
.
rows
().
apply
(
0
);
}
/**
* Validate if indeed this is a table type and exists.
*
* @param guid entity id
*/
private
String
validateDatasetExists
(
String
guid
)
throws
AtlasException
{
final
String
datasetExistsQuery
=
String
.
format
(
DATASET_EXISTS_QUERY
,
guid
);
GremlinQueryResult
queryResult
=
discoveryService
.
evaluate
(
datasetExistsQuery
);
if
(!(
queryResult
.
rows
().
length
()
>
0
))
{
throw
new
EntityNotFoundException
(
"Dataset with guid = "
+
guid
+
" does not exist"
);
}
ReferenceableInstance
referenceable
=
(
ReferenceableInstance
)
queryResult
.
rows
().
apply
(
0
);
return
referenceable
.
getTypeName
();
}
}
repository/src/main/scala/org/apache/atlas/query/ClosureQuery.scala
View file @
b65dd91c
...
...
@@ -256,21 +256,21 @@ trait SingleInstanceClosureQuery[T] extends ClosureQuery {
* @param persistenceStrategy as needed to evaluate the Closure Query.
* @param g as needed to evaluate the Closure Query.
*/
case
class
HiveLineageQuery
(
tableTypeName
:
String
,
tableName
:
String
,
ctasTypeName
:
String
,
ctasInputTableAttribute
:
String
,
ctasOutputTableAttribute
:
String
,
depth
:
Option
[
Int
],
selectAttributes
:
Option
[
List
[
String
]],
withPath
:
Boolean
,
persistenceStrategy
:
GraphPersistenceStrategies
,
g
:
TitanGraph
case
class
InputLineageClosureQuery
(
tableTypeName
:
String
,
attributeToSelectInstance
:
String
,
tableName
:
String
,
ctasTypeName
:
String
,
ctasInputTableAttribute
:
String
,
ctasOutputTableAttribute
:
String
,
depth
:
Option
[
Int
],
selectAttributes
:
Option
[
List
[
String
]],
withPath
:
Boolean
,
persistenceStrategy
:
GraphPersistenceStrategies
,
g
:
TitanGraph
)
extends
SingleInstanceClosureQuery
[
String
]
{
val
closureType
:
String
=
tableTypeName
val
attributeToSelectInstance
=
"name"
val
attributeTyp
=
DataTypes
.
STRING_TYPE
val
instanceValue
=
tableName
...
...
@@ -296,21 +296,21 @@ case class HiveLineageQuery(tableTypeName : String,
* @param persistenceStrategy as needed to evaluate the Closure Query.
* @param g as needed to evaluate the Closure Query.
*/
case
class
HiveWhereUsedQuery
(
tableTypeName
:
String
,
tableName
:
String
,
ctasTypeName
:
String
,
ctasInputTableAttribute
:
String
,
ctasOutputTableAttribute
:
String
,
depth
:
Option
[
Int
],
selectAttributes
:
Option
[
List
[
String
]],
withPath
:
Boolean
,
persistenceStrategy
:
GraphPersistenceStrategies
,
g
:
TitanGraph
case
class
OutputLineageClosureQuery
(
tableTypeName
:
String
,
attributeToSelectInstance
:
String
,
tableName
:
String
,
ctasTypeName
:
String
,
ctasInputTableAttribute
:
String
,
ctasOutputTableAttribute
:
String
,
depth
:
Option
[
Int
],
selectAttributes
:
Option
[
List
[
String
]],
withPath
:
Boolean
,
persistenceStrategy
:
GraphPersistenceStrategies
,
g
:
TitanGraph
)
extends
SingleInstanceClosureQuery
[
String
]
{
val
closureType
:
String
=
tableTypeName
val
attributeToSelectInstance
=
"name"
val
attributeTyp
=
DataTypes
.
STRING_TYPE
val
instanceValue
=
tableName
...
...
repository/src/test/java/org/apache/atlas/Base
Hive
RepositoryTest.java
→
repository/src/test/java/org/apache/atlas/BaseRepositoryTest.java
View file @
b65dd91c
...
...
@@ -55,7 +55,7 @@ import java.util.List;
* Base Class to set up hive types and instances for tests
*/
@Guice
(
modules
=
RepositoryMetadataModule
.
class
)
public
class
Base
Hive
RepositoryTest
{
public
class
BaseRepositoryTest
{
@Inject
protected
MetadataService
metadataService
;
...
...
@@ -94,8 +94,8 @@ public class BaseHiveRepositoryTest {
metadataService
.
createType
(
typesAsJSON
);
}
pr
ivate
static
final
String
DATABASE_TYPE
=
"hive_db"
;
pr
ivate
static
final
String
HIVE_TABLE_TYPE
=
"hive_table"
;
pr
otected
static
final
String
DATABASE_TYPE
=
"hive_db"
;
pr
otected
static
final
String
HIVE_TABLE_TYPE
=
"hive_table"
;
private
static
final
String
COLUMN_TYPE
=
"hive_column"
;
private
static
final
String
HIVE_PROCESS_TYPE
=
"hive_process"
;
private
static
final
String
STORAGE_DESC_TYPE
=
"StorageDesc"
;
...
...
@@ -104,7 +104,8 @@ public class BaseHiveRepositoryTest {
TypesDef
createTypeDefinitions
()
{
HierarchicalTypeDefinition
<
ClassType
>
dbClsDef
=
TypesUtil
.
createClassTypeDef
(
DATABASE_TYPE
,
null
,
attrDef
(
"name"
,
DataTypes
.
STRING_TYPE
),
.
createClassTypeDef
(
DATABASE_TYPE
,
null
,
TypesUtil
.
createUniqueRequiredAttrDef
(
"name"
,
DataTypes
.
STRING_TYPE
),
attrDef
(
"description"
,
DataTypes
.
STRING_TYPE
),
attrDef
(
"locationUri"
,
DataTypes
.
STRING_TYPE
),
attrDef
(
"owner"
,
DataTypes
.
STRING_TYPE
),
attrDef
(
"createTime"
,
DataTypes
.
LONG_TYPE
));
...
...
@@ -127,8 +128,7 @@ public class BaseHiveRepositoryTest {
attrDef
(
"temporary"
,
DataTypes
.
BOOLEAN_TYPE
),
new
AttributeDefinition
(
"db"
,
DATABASE_TYPE
,
Multiplicity
.
REQUIRED
,
false
,
null
),
// todo - uncomment this, something is broken
new
AttributeDefinition
(
"sd"
,
STORAGE_DESC_TYPE
,
Multiplicity
.
REQUIRED
,
true
,
null
),
new
AttributeDefinition
(
"sd"
,
STORAGE_DESC_TYPE
,
Multiplicity
.
REQUIRED
,
true
,
null
),
new
AttributeDefinition
(
"columns"
,
DataTypes
.
arrayTypeName
(
COLUMN_TYPE
),
Multiplicity
.
COLLECTION
,
true
,
null
));
...
...
@@ -285,7 +285,7 @@ public class BaseHiveRepositoryTest {
return
createInstance
(
referenceable
,
clsType
);
}
Referenceable
storageDescriptor
(
String
location
,
String
inputFormat
,
String
outputFormat
,
boolean
compressed
,
List
<
Referenceable
>
columns
)
protected
Referenceable
storageDescriptor
(
String
location
,
String
inputFormat
,
String
outputFormat
,
boolean
compressed
,
List
<
Referenceable
>
columns
)
throws
Exception
{
Referenceable
referenceable
=
new
Referenceable
(
STORAGE_DESC_TYPE
);
referenceable
.
set
(
"location"
,
location
);
...
...
@@ -297,7 +297,7 @@ public class BaseHiveRepositoryTest {
return
referenceable
;
}
Referenceable
column
(
String
name
,
String
dataType
,
String
comment
,
String
...
traitNames
)
throws
Exception
{
protected
Referenceable
column
(
String
name
,
String
dataType
,
String
comment
,
String
...
traitNames
)
throws
Exception
{
Referenceable
referenceable
=
new
Referenceable
(
COLUMN_TYPE
,
traitNames
);
referenceable
.
set
(
"name"
,
name
);
referenceable
.
set
(
"dataType"
,
dataType
);
...
...
@@ -306,7 +306,7 @@ public class BaseHiveRepositoryTest {
return
referenceable
;
}
Id
table
(
String
name
,
String
description
,
Id
dbId
,
Referenceable
sd
,
String
owner
,
String
tableType
,
protected
Id
table
(
String
name
,
String
description
,
Id
dbId
,
Referenceable
sd
,
String
owner
,
String
tableType
,
List
<
Referenceable
>
columns
,
String
...
traitNames
)
throws
Exception
{
Referenceable
referenceable
=
new
Referenceable
(
HIVE_TABLE_TYPE
,
traitNames
);
referenceable
.
set
(
"name"
,
name
);
...
...
@@ -327,12 +327,12 @@ public class BaseHiveRepositoryTest {
return
createInstance
(
referenceable
,
clsType
);
}
Id
loadProcess
(
String
name
,
String
description
,
String
user
,
List
<
Id
>
inputTables
,
List
<
Id
>
outputTables
,
protected
Id
loadProcess
(
String
name
,
String
description
,
String
user
,
List
<
Id
>
inputTables
,
List
<
Id
>
outputTables
,
String
queryText
,
String
queryPlan
,
String
queryId
,
String
queryGraph
,
String
...
traitNames
)
throws
Exception
{
Referenceable
referenceable
=
new
Referenceable
(
HIVE_PROCESS_TYPE
,
traitNames
);
referenceable
.
set
(
AtlasClient
.
NAME
,
name
);
referenceable
.
set
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
name
);
referenceable
.
set
(
"name"
,
name
);
referenceable
.
set
(
"qualifiedName"
,
name
);
referenceable
.
set
(
"description"
,
description
);
referenceable
.
set
(
"user"
,
user
);
referenceable
.
set
(
"startTime"
,
System
.
currentTimeMillis
());
...
...
repository/src/test/java/org/apache/atlas/discovery/
Hive
LineageServiceTest.java
→
repository/src/test/java/org/apache/atlas/discovery/
DataSet
LineageServiceTest.java
View file @
b65dd91c
...
...
@@ -18,9 +18,16 @@
package
org
.
apache
.
atlas
.
discovery
;
import
org.apache.atlas.BaseHiveRepositoryTest
;
import
com.google.common.collect.ImmutableList
;
import
org.apache.atlas.AtlasException
;
import
org.apache.atlas.BaseRepositoryTest
;
import
org.apache.atlas.RepositoryMetadataModule
;
import
org.apache.atlas.typesystem.ITypedReferenceableInstance
;
import
org.apache.atlas.typesystem.Referenceable
;
import
org.apache.atlas.typesystem.exception.EntityNotFoundException
;
import
org.apache.atlas.typesystem.persistence.Id
;
import
org.apache.commons.collections.ArrayStack
;
import
org.apache.commons.lang.RandomStringUtils
;
import
org.codehaus.jettison.json.JSONArray
;
import
org.codehaus.jettison.json.JSONObject
;
import
org.testng.Assert
;
...
...
@@ -31,18 +38,24 @@ import org.testng.annotations.Guice;
import
org.testng.annotations.Test
;
import
javax.inject.Inject
;
import
java.util.Arrays
;
import
java.util.List
;
import
static
org
.
testng
.
Assert
.
assertEquals
;
import
static
org
.
testng
.
Assert
.
assertNotNull
;
import
static
org
.
testng
.
Assert
.
fail
;
/**
* Unit tests for Hive LineageService.
*/
@Guice
(
modules
=
RepositoryMetadataModule
.
class
)
public
class
HiveLineageServiceTest
extends
BaseHiv
eRepositoryTest
{
public
class
DataSetLineageServiceTest
extends
Bas
eRepositoryTest
{
@Inject
private
DiscoveryService
discoveryService
;
@Inject
private
HiveLineageService
hiveL
ineageService
;
private
DataSetLineageService
l
ineageService
;
@BeforeClass
public
void
setUp
()
throws
Exception
{
...
...
@@ -100,66 +113,55 @@ public class HiveLineageServiceTest extends BaseHiveRepositoryTest {
public
void
testSearchByDSLQueries
(
String
dslQuery
)
throws
Exception
{
System
.
out
.
println
(
"Executing dslQuery = "
+
dslQuery
);
String
jsonResults
=
discoveryService
.
searchByDSL
(
dslQuery
);
Assert
.
assertNotNull
(
jsonResults
);
assertNotNull
(
jsonResults
);
JSONObject
results
=
new
JSONObject
(
jsonResults
);
Assert
.
assertEquals
(
results
.
length
(),
3
);
System
.
out
.
println
(
"results = "
+
results
);
Object
query
=
results
.
get
(
"query"
);
Assert
.
assertNotNull
(
query
);
assertNotNull
(
query
);
JSONObject
dataType
=
results
.
getJSONObject
(
"dataType"
);
Assert
.
assertNotNull
(
dataType
);
assertNotNull
(
dataType
);
String
typeName
=
dataType
.
getString
(
"typeName"
);
Assert
.
assertNotNull
(
typeName
);
assertNotNull
(
typeName
);
JSONArray
rows
=
results
.
getJSONArray
(
"rows"
);
Assert
.
assertNotNull
(
rows
);
assertNotNull
(
rows
);
Assert
.
assertTrue
(
rows
.
length
()
>=
0
);
// some queries may not have any results
System
.
out
.
println
(
"query ["
+
dslQuery
+
"] returned ["
+
rows
.
length
()
+
"] rows"
);
}
@Test
public
void
testGetInputs
()
throws
Exception
{
JSONObject
results
=
new
JSONObject
(
hiveLineageService
.
getInputs
(
"sales_fact_monthly_mv"
));
Assert
.
assertNotNull
(
results
);
System
.
out
.
println
(
"inputs = "
+
results
);
JSONArray
rows
=
results
.
getJSONArray
(
"rows"
);
Assert
.
assertTrue
(
rows
.
length
()
>
0
);
final
JSONObject
row
=
rows
.
getJSONObject
(
0
);
JSONArray
paths
=
row
.
getJSONArray
(
"path"
);
Assert
.
assertTrue
(
paths
.
length
()
>
0
);
@Test
(
dataProvider
=
"invalidArgumentsProvider"
)
public
void
testGetInputsGraphInvalidArguments
(
final
String
tableName
,
String
expectedException
)
throws
Exception
{
testInvalidArguments
(
expectedException
,
new
Invoker
()
{
@Override
void
run
()
throws
AtlasException
{
lineageService
.
getInputsGraph
(
tableName
);
}
});
}
@Test
(
expectedExceptions
=
IllegalArgumentException
.
class
)
public
void
testGetInputsTableNameNull
()
throws
Exception
{
hiveLineageService
.
getInputs
(
null
);
Assert
.
fail
();
}
@Test
(
expectedExceptions
=
IllegalArgumentException
.
class
)
public
void
testGetInputsTableNameEmpty
()
throws
Exception
{
hiveLineageService
.
getInputs
(
""
);
Assert
.
fail
();
}
@Test
(
expectedExceptions
=
EntityNotFoundException
.
class
)
public
void
testGetInputsBadTableName
()
throws
Exception
{
hiveLineageService
.
getInputs
(
"blah"
);
Assert
.
fail
();
@Test
(
dataProvider
=
"invalidArgumentsProvider"
)
public
void
testGetInputsGraphForEntityInvalidArguments
(
final
String
tableName
,
String
expectedException
)
throws
Exception
{
testInvalidArguments
(
expectedException
,
new
Invoker
()
{
@Override
void
run
()
throws
AtlasException
{
lineageService
.
getInputsGraphForEntity
(
tableName
);
}
});
}
@Test
public
void
testGetInputsGraph
()
throws
Exception
{
JSONObject
results
=
new
JSONObject
(
hiveL
ineageService
.
getInputsGraph
(
"sales_fact_monthly_mv"
));
Assert
.
assertNotNull
(
results
);
JSONObject
results
=
new
JSONObject
(
l
ineageService
.
getInputsGraph
(
"sales_fact_monthly_mv"
));
assertNotNull
(
results
);
System
.
out
.
println
(
"inputs graph = "
+
results
);
JSONObject
values
=
results
.
getJSONObject
(
"values"
);
Assert
.
assertNotNull
(
values
);
assertNotNull
(
values
);
final
JSONObject
vertices
=
values
.
getJSONObject
(
"vertices"
);
Assert
.
assertEquals
(
vertices
.
length
(),
4
);
...
...
@@ -169,45 +171,72 @@ public class HiveLineageServiceTest extends BaseHiveRepositoryTest {
}
@Test
public
void
testGetOutputs
()
throws
Exception
{
JSONObject
results
=
new
JSONObject
(
hiveLineageService
.
getOutputs
(
"sales_fact"
));
Assert
.
assertNotNull
(
results
);
System
.
out
.
println
(
"outputs = "
+
results
);
public
void
testGetInputsGraphForEntity
()
throws
Exception
{
ITypedReferenceableInstance
entity
=
repository
.
getEntityDefinition
(
HIVE_TABLE_TYPE
,
"name"
,
"sales_fact_monthly_mv"
);
JSONArray
rows
=
results
.
getJSONArray
(
"rows"
);
Assert
.
assertTrue
(
rows
.
length
()
>
0
);
JSONObject
results
=
new
JSONObject
(
lineageService
.
getInputsGraphForEntity
(
entity
.
getId
().
_getId
()));
assertNotNull
(
results
);
System
.
out
.
println
(
"inputs graph = "
+
results
);
final
JSONObject
row
=
rows
.
getJSONObject
(
0
);
JSONArray
paths
=
row
.
getJSONArray
(
"path"
);
Assert
.
assertTrue
(
paths
.
length
()
>
0
);
}
JSONObject
values
=
results
.
getJSONObject
(
"values"
);
assertNotNull
(
values
);
final
JSONObject
vertices
=
values
.
getJSONObject
(
"vertices"
);
Assert
.
assertEquals
(
vertices
.
length
(),
4
);
@Test
(
expectedExceptions
=
IllegalArgumentException
.
class
)
public
void
testGetOututsTableNameNull
()
throws
Exception
{
hiveLineageService
.
getOutputs
(
null
);
Assert
.
fail
();
final
JSONObject
edges
=
values
.
getJSONObject
(
"edges"
);
Assert
.
assertEquals
(
edges
.
length
(),
4
);
}
@Test
(
expectedExceptions
=
IllegalArgumentException
.
class
)
public
void
testGetOutputsTableNameEmpty
()
throws
Exception
{
hiveLineageService
.
getOutputs
(
""
);
Assert
.
fail
();
@Test
(
dataProvider
=
"invalidArgumentsProvider"
)
public
void
testGetOutputsGraphInvalidArguments
(
final
String
tableName
,
String
expectedException
)
throws
Exception
{
testInvalidArguments
(
expectedException
,
new
Invoker
()
{
@Override
void
run
()
throws
AtlasException
{
lineageService
.
getOutputsGraph
(
tableName
);
}
});
}
@Test
(
expectedExceptions
=
EntityNotFoundException
.
class
)
public
void
testGetOutputsBadTableName
()
throws
Exception
{
hiveLineageService
.
getOutputs
(
"blah"
);
Assert
.
fail
();
@Test
(
dataProvider
=
"invalidArgumentsProvider"
)
public
void
testGetOutputsGraphForEntityInvalidArguments
(
final
String
tableName
,
String
expectedException
)
throws
Exception
{
testInvalidArguments
(
expectedException
,
new
Invoker
()
{
@Override
void
run
()
throws
AtlasException
{
lineageService
.
getOutputsGraphForEntity
(
tableName
);
}
});
}
@Test
public
void
testGetOutputsGraph
()
throws
Exception
{
JSONObject
results
=
new
JSONObject
(
hiveL
ineageService
.
getOutputsGraph
(
"sales_fact"
));
Assert
.
assertNotNull
(
results
);
JSONObject
results
=
new
JSONObject
(
l
ineageService
.
getOutputsGraph
(
"sales_fact"
));
assertNotNull
(
results
);
System
.
out
.
println
(
"outputs graph = "
+
results
);
JSONObject
values
=
results
.
getJSONObject
(
"values"
);
Assert
.
assertNotNull
(
values
);
assertNotNull
(
values
);
final
JSONObject
vertices
=
values
.
getJSONObject
(
"vertices"
);
Assert
.
assertEquals
(
vertices
.
length
(),
3
);
final
JSONObject
edges
=
values
.
getJSONObject
(
"edges"
);
Assert
.
assertEquals
(
edges
.
length
(),
4
);
}
@Test
public
void
testGetOutputsGraphForEntity
()
throws
Exception
{
ITypedReferenceableInstance
entity
=
repository
.
getEntityDefinition
(
HIVE_TABLE_TYPE
,
"name"
,
"sales_fact"
);
JSONObject
results
=
new
JSONObject
(
lineageService
.
getOutputsGraphForEntity
(
entity
.
getId
().
_getId
()));
assertNotNull
(
results
);
System
.
out
.
println
(
"outputs graph = "
+
results
);
JSONObject
values
=
results
.
getJSONObject
(
"values"
);
assertNotNull
(
values
);
final
JSONObject
vertices
=
values
.
getJSONObject
(
"vertices"
);
Assert
.
assertEquals
(
vertices
.
length
(),
3
);
...
...
@@ -224,8 +253,29 @@ public class HiveLineageServiceTest extends BaseHiveRepositoryTest {
@Test
(
dataProvider
=
"tableNamesProvider"
)
public
void
testGetSchema
(
String
tableName
,
String
expected
)
throws
Exception
{
JSONObject
results
=
new
JSONObject
(
hiveLineageService
.
getSchema
(
tableName
));
Assert
.
assertNotNull
(
results
);
JSONObject
results
=
new
JSONObject
(
lineageService
.
getSchema
(
tableName
));
assertNotNull
(
results
);
System
.
out
.
println
(
"columns = "
+
results
);
JSONArray
rows
=
results
.
getJSONArray
(
"rows"
);
Assert
.
assertEquals
(
rows
.
length
(),
Integer
.
parseInt
(
expected
));
for
(
int
index
=
0
;
index
<
rows
.
length
();
index
++)
{
final
JSONObject
row
=
rows
.
getJSONObject
(
index
);
assertNotNull
(
row
.
getString
(
"name"
));
assertNotNull
(
row
.
getString
(
"comment"
));
assertNotNull
(
row
.
getString
(
"dataType"
));
Assert
.
assertEquals
(
row
.
getString
(
"$typeName$"
),
"hive_column"
);
}
}
@Test
(
dataProvider
=
"tableNamesProvider"
)
public
void
testGetSchemaForEntity
(
String
tableName
,
String
expected
)
throws
Exception
{
ITypedReferenceableInstance
entity
=
repository
.
getEntityDefinition
(
HIVE_TABLE_TYPE
,
"name"
,
tableName
);
JSONObject
results
=
new
JSONObject
(
lineageService
.
getSchemaForEntity
(
entity
.
getId
().
_getId
()));
assertNotNull
(
results
);
System
.
out
.
println
(
"columns = "
+
results
);
JSONArray
rows
=
results
.
getJSONArray
(
"rows"
);
...
...
@@ -233,28 +283,165 @@ public class HiveLineageServiceTest extends BaseHiveRepositoryTest {
for
(
int
index
=
0
;
index
<
rows
.
length
();
index
++)
{
final
JSONObject
row
=
rows
.
getJSONObject
(
index
);
Assert
.
assertNotNull
(
row
.
getString
(
"name"
));
Assert
.
assertNotNull
(
row
.
getString
(
"comment"
));
Assert
.
assertNotNull
(
row
.
getString
(
"dataType"
));
assertNotNull
(
row
.
getString
(
"name"
));
assertNotNull
(
row
.
getString
(
"comment"
));
assertNotNull
(
row
.
getString
(
"dataType"
));
Assert
.
assertEquals
(
row
.
getString
(
"$typeName$"
),
"hive_column"
);
}
}
@Test
(
expectedExceptions
=
IllegalArgumentException
.
class
)
public
void
testGetSchemaTableNameNull
()
throws
Exception
{
hiveLineageService
.
getSchema
(
null
);
Assert
.
fail
();
@DataProvider
(
name
=
"invalidArgumentsProvider"
)
private
Object
[][]
arguments
()
{
return
new
String
[][]{{
null
,
IllegalArgumentException
.
class
.
getName
()},
{
""
,
IllegalArgumentException
.
class
.
getName
()},
{
"blah"
,
EntityNotFoundException
.
class
.
getName
()}};
}
abstract
class
Invoker
{
abstract
void
run
()
throws
AtlasException
;
}
public
void
testInvalidArguments
(
String
expectedException
,
Invoker
invoker
)
throws
Exception
{
try
{
invoker
.
run
();
fail
(
"Expected "
+
expectedException
);
}
catch
(
Exception
e
)
{
assertEquals
(
e
.
getClass
().
getName
(),
expectedException
);
}
}
@Test
(
dataProvider
=
"invalidArgumentsProvider"
)
public
void
testGetSchemaInvalidArguments
(
final
String
tableName
,
String
expectedException
)
throws
Exception
{
testInvalidArguments
(
expectedException
,
new
Invoker
()
{
@Override
void
run
()
throws
AtlasException
{
lineageService
.
getSchema
(
tableName
);
}
});
}
@Test
(
dataProvider
=
"invalidArgumentsProvider"
)
public
void
testGetSchemaForEntityInvalidArguments
(
final
String
entityId
,
String
expectedException
)
throws
Exception
{
testInvalidArguments
(
expectedException
,
new
Invoker
()
{
@Override
void
run
()
throws
AtlasException
{
lineageService
.
getSchemaForEntity
(
entityId
);
}
});
}
@Test
public
void
testLineageWithDelete
()
throws
Exception
{
String
tableName
=
"table"
+
random
();
createTable
(
tableName
,
3
,
true
);
JSONObject
results
=
new
JSONObject
(
lineageService
.
getSchema
(
tableName
));
assertEquals
(
results
.
getJSONArray
(
"rows"
).
length
(),
3
);
results
=
new
JSONObject
(
lineageService
.
getInputsGraph
(
tableName
));
assertEquals
(
results
.
getJSONObject
(
"values"
).
getJSONObject
(
"vertices"
).
length
(),
2
);
results
=
new
JSONObject
(
lineageService
.
getOutputsGraph
(
tableName
));
assertEquals
(
results
.
getJSONObject
(
"values"
).
getJSONObject
(
"vertices"
).
length
(),
2
);
String
tableId
=
getEntityId
(
HIVE_TABLE_TYPE
,
"name"
,
tableName
);
results
=
new
JSONObject
(
lineageService
.
getSchemaForEntity
(
tableId
));
assertEquals
(
results
.
getJSONArray
(
"rows"
).
length
(),
3
);
results
=
new
JSONObject
(
lineageService
.
getInputsGraphForEntity
(
tableId
));
assertEquals
(
results
.
getJSONObject
(
"values"
).
getJSONObject
(
"vertices"
).
length
(),
2
);
results
=
new
JSONObject
(
lineageService
.
getOutputsGraphForEntity
(
tableId
));
assertEquals
(
results
.
getJSONObject
(
"values"
).
getJSONObject
(
"vertices"
).
length
(),
2
);
//Delete the entity. Lineage for entity returns the same results as before.
//Lineage for table name throws EntityNotFoundException
repository
.
deleteEntities
(
Arrays
.
asList
(
tableId
));
results
=
new
JSONObject
(
lineageService
.
getSchemaForEntity
(
tableId
));
assertEquals
(
results
.
getJSONArray
(
"rows"
).
length
(),
3
);
results
=
new
JSONObject
(
lineageService
.
getInputsGraphForEntity
(
tableId
));
assertEquals
(
results
.
getJSONObject
(
"values"
).
getJSONObject
(
"vertices"
).
length
(),
2
);
results
=
new
JSONObject
(
lineageService
.
getOutputsGraphForEntity
(
tableId
));
assertEquals
(
results
.
getJSONObject
(
"values"
).
getJSONObject
(
"vertices"
).
length
(),
2
);
try
{
lineageService
.
getSchema
(
tableName
);
fail
(
"Expected EntityNotFoundException"
);
}
catch
(
EntityNotFoundException
e
)
{
//expected
}
try
{
lineageService
.
getInputsGraph
(
tableName
);
fail
(
"Expected EntityNotFoundException"
);
}
catch
(
EntityNotFoundException
e
)
{
//expected
}
try
{
lineageService
.
getOutputsGraph
(
tableName
);
fail
(
"Expected EntityNotFoundException"
);
}
catch
(
EntityNotFoundException
e
)
{
//expected
}
//Create table again should show new lineage
createTable
(
tableName
,
2
,
false
);
results
=
new
JSONObject
(
lineageService
.
getSchema
(
tableName
));
assertEquals
(
results
.
getJSONArray
(
"rows"
).
length
(),
2
);
results
=
new
JSONObject
(
lineageService
.
getOutputsGraph
(
tableName
));
assertEquals
(
results
.
getJSONObject
(
"values"
).
getJSONObject
(
"vertices"
).
length
(),
0
);
results
=
new
JSONObject
(
lineageService
.
getInputsGraph
(
tableName
));
assertEquals
(
results
.
getJSONObject
(
"values"
).
getJSONObject
(
"vertices"
).
length
(),
0
);
tableId
=
getEntityId
(
HIVE_TABLE_TYPE
,
"name"
,
tableName
);
results
=
new
JSONObject
(
lineageService
.
getSchemaForEntity
(
tableId
));
assertEquals
(
results
.
getJSONArray
(
"rows"
).
length
(),
2
);
results
=
new
JSONObject
(
lineageService
.
getInputsGraphForEntity
(
tableId
));
assertEquals
(
results
.
getJSONObject
(
"values"
).
getJSONObject
(
"vertices"
).
length
(),
0
);
results
=
new
JSONObject
(
lineageService
.
getOutputsGraphForEntity
(
tableId
));
assertEquals
(
results
.
getJSONObject
(
"values"
).
getJSONObject
(
"vertices"
).
length
(),
0
);
}
private
void
createTable
(
String
tableName
,
int
numCols
,
boolean
createLineage
)
throws
Exception
{
String
dbId
=
getEntityId
(
DATABASE_TYPE
,
"name"
,
"Sales"
);
Id
salesDB
=
new
Id
(
dbId
,
0
,
DATABASE_TYPE
);
//Create the entity again and schema should return the new schema
List
<
Referenceable
>
columns
=
new
ArrayStack
();
for
(
int
i
=
0
;
i
<
numCols
;
i
++)
{
columns
.
add
(
column
(
"col"
+
random
(),
"int"
,
"column descr"
));
}
Referenceable
sd
=
storageDescriptor
(
"hdfs://host:8000/apps/warehouse/sales"
,
"TextInputFormat"
,
"TextOutputFormat"
,
true
,
ImmutableList
.
of
(
column
(
"time_id"
,
"int"
,
"time id"
)));
Id
table
=
table
(
tableName
,
"test table"
,
salesDB
,
sd
,
"fetl"
,
"External"
,
columns
);
if
(
createLineage
)
{
Id
inTable
=
table
(
"table"
+
random
(),
"test table"
,
salesDB
,
sd
,
"fetl"
,
"External"
,
columns
);
Id
outTable
=
table
(
"table"
+
random
(),
"test table"
,
salesDB
,
sd
,
"fetl"
,
"External"
,
columns
);
loadProcess
(
"process"
+
random
(),
"hive query for monthly summary"
,
"Tim ETL"
,
ImmutableList
.
of
(
inTable
),
ImmutableList
.
of
(
table
),
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
"ETL"
);
loadProcess
(
"process"
+
random
(),
"hive query for monthly summary"
,
"Tim ETL"
,
ImmutableList
.
of
(
table
),
ImmutableList
.
of
(
outTable
),
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
"ETL"
);
}
}
@Test
(
expectedExceptions
=
IllegalArgumentException
.
class
)
public
void
testGetSchemaTableNameEmpty
()
throws
Exception
{
hiveLineageService
.
getSchema
(
""
);
Assert
.
fail
();
private
String
random
()
{
return
RandomStringUtils
.
randomAlphanumeric
(
5
);
}
@Test
(
expectedExceptions
=
EntityNotFoundException
.
class
)
public
void
testGetSchemaBadTableName
()
throws
Exception
{
hiveLineageService
.
getSchema
(
"blah"
);
Assert
.
fail
();
private
String
getEntityId
(
String
typeName
,
String
attributeName
,
String
attributeValue
)
throws
Exception
{
return
repository
.
getEntityDefinition
(
typeName
,
attributeName
,
attributeValue
).
getId
().
_getId
();
}
}
repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java
View file @
b65dd91c
...
...
@@ -20,7 +20,7 @@ package org.apache.atlas.discovery;
import
com.google.common.collect.ImmutableSet
;
import
org.apache.atlas.Base
Hive
RepositoryTest
;
import
org.apache.atlas.BaseRepositoryTest
;
import
org.apache.atlas.RepositoryMetadataModule
;
import
org.apache.atlas.RequestContext
;
import
org.apache.atlas.TestUtils
;
...
...
@@ -60,7 +60,7 @@ import static org.testng.Assert.assertEquals;
import
static
org
.
testng
.
Assert
.
assertNotNull
;
@Guice
(
modules
=
RepositoryMetadataModule
.
class
)
public
class
GraphBackedDiscoveryServiceTest
extends
Base
Hive
RepositoryTest
{
public
class
GraphBackedDiscoveryServiceTest
extends
BaseRepositoryTest
{
@Inject
private
MetadataRepository
repositoryService
;
...
...
repository/src/test/scala/org/apache/atlas/query/GremlinTest2.scala
View file @
b65dd91c
...
...
@@ -107,7 +107,7 @@ class GremlinTest2 extends BaseGremlinTest {
}
@Test
def
testHighLevelLineage
{
val
r
=
HiveLineageQuery
(
"Tabl
e"
,
"sales_fact_monthly_mv"
,
val
r
=
InputLineageClosureQuery
(
"Table"
,
"nam
e"
,
"sales_fact_monthly_mv"
,
"LoadProcess"
,
"inputTables"
,
"outputTable"
,
...
...
@@ -116,7 +116,7 @@ class GremlinTest2 extends BaseGremlinTest {
}
@Test
def
testHighLevelLineageReturnGraph
{
val
r
=
HiveLineageQuery
(
"Tabl
e"
,
"sales_fact_monthly_mv"
,
val
r
=
InputLineageClosureQuery
(
"Table"
,
"nam
e"
,
"sales_fact_monthly_mv"
,
"LoadProcess"
,
"inputTables"
,
"outputTable"
,
...
...
@@ -127,7 +127,7 @@ class GremlinTest2 extends BaseGremlinTest {
}
@Test
def
testHighLevelWhereUsed
{
val
r
=
HiveWhereUsedQuery
(
"Tabl
e"
,
"sales_fact"
,
val
r
=
OutputLineageClosureQuery
(
"Table"
,
"nam
e"
,
"sales_fact"
,
"LoadProcess"
,
"inputTables"
,
"outputTable"
,
...
...
@@ -136,7 +136,7 @@ class GremlinTest2 extends BaseGremlinTest {
}
@Test
def
testHighLevelWhereUsedReturnGraph
{
val
r
=
HiveWhereUsedQuery
(
"Tabl
e"
,
"sales_fact"
,
val
r
=
OutputLineageClosureQuery
(
"Table"
,
"nam
e"
,
"sales_fact"
,
"LoadProcess"
,
"inputTables"
,
"outputTable"
,
...
...
server-api/src/main/java/org/apache/atlas/discovery/LineageService.java
View file @
b65dd91c
...
...
@@ -26,42 +26,50 @@ import org.apache.atlas.AtlasException;
public
interface
LineageService
{
/**
* Return the lineage outputs
for the given table
Name.
* Return the lineage outputs
graph for the given dataset
Name.
*
* @param
tableName table
Name
* @return Outputs as JSON
* @param
datasetName dataset
Name
* @return Outputs
Graph
as JSON
*/
String
getOutputs
(
String
table
Name
)
throws
AtlasException
;
String
getOutputs
Graph
(
String
dataset
Name
)
throws
AtlasException
;
/**
* Return the lineage
outputs graph for the given table
Name.
* Return the lineage
inputs graph for the given dataset
Name.
*
* @param
tableName table
Name
* @return
Out
puts Graph as JSON
* @param
datasetName dataset
Name
* @return
In
puts Graph as JSON
*/
String
get
OutputsGraph
(
String
table
Name
)
throws
AtlasException
;
String
get
InputsGraph
(
String
dataset
Name
)
throws
AtlasException
;
/**
* Return the lineage inputs
for the given tableName
.
* Return the lineage inputs
graph for the given entity id
.
*
* @param
tableName tableName
* @return Inputs as JSON
* @param
guid entity id
* @return Inputs
Graph
as JSON
*/
String
getInputs
(
String
tableName
)
throws
AtlasException
;
String
getInputs
GraphForEntity
(
String
guid
)
throws
AtlasException
;
/**
* Return the lineage inputs graph for the given
tableName
.
* Return the lineage inputs graph for the given
entity id
.
*
* @param
tableName tableName
* @param
guid entity id
* @return Inputs Graph as JSON
*/
String
getInputsGraph
(
String
tableName
)
throws
AtlasException
;
String
getOutputsGraphForEntity
(
String
guid
)
throws
AtlasException
;
/**
* Return the schema for the given datasetName.
*
* @param datasetName datasetName
* @return Schema as JSON
*/
String
getSchema
(
String
datasetName
)
throws
AtlasException
;
/**
* Return the schema for the given
tableName
.
* Return the schema for the given
entity id
.
*
* @param
tableName
tableName
* @param
guid
tableName
* @return Schema as JSON
*/
String
getSchema
(
String
tableName
)
throws
AtlasException
;
String
getSchema
ForEntity
(
String
guid
)
throws
AtlasException
;
}
typesystem/src/main/resources/atlas-application.properties
View file @
b65dd91c
...
...
@@ -49,14 +49,8 @@ atlas.graph.index.search.solr.zookeeper-url=${solr.zk.address}
######### Hive Lineage Configs #########
# This models reflects the base super types for Data and Process
#atlas.lineage.hive.table.type.name=DataSet
#atlas.lineage.hive.process.type.name=Process
#atlas.lineage.hive.process.inputs.name=inputs
#atlas.lineage.hive.process.outputs.name=outputs
## Schema
atlas.lineage.
hive.table.schema.query.hive_table
=
hive_table where name
='%s'
\,
columns
atlas.lineage.
schema.query.hive_table
=
hive_table where __guid
='%s'
\,
columns
######### Notification Configs #########
atlas.notification.embedded
=
true
...
...
webapp/src/main/java/org/apache/atlas/web/resources/
Hive
LineageResource.java
→
webapp/src/main/java/org/apache/atlas/web/resources/
DataSet
LineageResource.java
View file @
b65dd91c
...
...
@@ -19,10 +19,9 @@
package
org
.
apache
.
atlas
.
web
.
resources
;
import
org.apache.atlas.AtlasClient
;
import
org.apache.atlas.typesystem.exception.EntityNotFoundException
;
import
org.apache.atlas.utils.ParamChecker
;
import
org.apache.atlas.discovery.DiscoveryException
;
import
org.apache.atlas.discovery.LineageService
;
import
org.apache.atlas.typesystem.exception.EntityNotFoundException
;
import
org.apache.atlas.web.util.Servlets
;
import
org.codehaus.jettison.json.JSONObject
;
import
org.slf4j.Logger
;
...
...
@@ -45,9 +44,9 @@ import javax.ws.rs.core.Response;
*/
@Path
(
"lineage/hive"
)
@Singleton
public
class
Hive
LineageResource
{
public
class
DataSet
LineageResource
{
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
Hive
LineageResource
.
class
);
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
DataSet
LineageResource
.
class
);
private
final
LineageService
lineageService
;
...
...
@@ -58,7 +57,7 @@ public class HiveLineageResource {
* @param lineageService lineage service handle
*/
@Inject
public
Hive
LineageResource
(
LineageService
lineageService
)
{
public
DataSet
LineageResource
(
LineageService
lineageService
)
{
this
.
lineageService
=
lineageService
;
}
...
...
@@ -75,7 +74,6 @@ public class HiveLineageResource {
LOG
.
info
(
"Fetching lineage inputs graph for tableName={}"
,
tableName
);
try
{
ParamChecker
.
notEmpty
(
tableName
,
"table name cannot be null"
);
final
String
jsonResult
=
lineageService
.
getInputsGraph
(
tableName
);
JSONObject
response
=
new
JSONObject
();
...
...
@@ -109,7 +107,6 @@ public class HiveLineageResource {
LOG
.
info
(
"Fetching lineage outputs graph for tableName={}"
,
tableName
);
try
{
ParamChecker
.
notEmpty
(
tableName
,
"table name cannot be null"
);
final
String
jsonResult
=
lineageService
.
getOutputsGraph
(
tableName
);
JSONObject
response
=
new
JSONObject
();
...
...
@@ -143,7 +140,6 @@ public class HiveLineageResource {
LOG
.
info
(
"Fetching schema for tableName={}"
,
tableName
);
try
{
ParamChecker
.
notEmpty
(
tableName
,
"table name cannot be null"
);
final
String
jsonResult
=
lineageService
.
getSchema
(
tableName
);
JSONObject
response
=
new
JSONObject
();
...
...
webapp/src/main/java/org/apache/atlas/web/resources/LineageResource.java
0 → 100644
View file @
b65dd91c
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package
org
.
apache
.
atlas
.
web
.
resources
;
import
org.apache.atlas.AtlasClient
;
import
org.apache.atlas.discovery.DiscoveryException
;
import
org.apache.atlas.discovery.LineageService
;
import
org.apache.atlas.typesystem.exception.EntityNotFoundException
;
import
org.apache.atlas.web.util.Servlets
;
import
org.codehaus.jettison.json.JSONObject
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
javax.inject.Inject
;
import
javax.inject.Singleton
;
import
javax.ws.rs.Consumes
;
import
javax.ws.rs.GET
;
import
javax.ws.rs.Path
;
import
javax.ws.rs.PathParam
;
import
javax.ws.rs.Produces
;
import
javax.ws.rs.WebApplicationException
;
import
javax.ws.rs.core.Response
;
@Path
(
"lineage"
)
@Singleton
public
class
LineageResource
{
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
DataSetLineageResource
.
class
);
private
final
LineageService
lineageService
;
/**
* Created by the Guice ServletModule and injected with the
* configured LineageService.
*
* @param lineageService lineage service handle
*/
@Inject
public
LineageResource
(
LineageService
lineageService
)
{
this
.
lineageService
=
lineageService
;
}
/**
* Returns input lineage graph for the given entity id.
* @param guid dataset entity id
* @return
*/
@GET
@Path
(
"{guid}/inputs/graph"
)
@Consumes
(
Servlets
.
JSON_MEDIA_TYPE
)
@Produces
(
Servlets
.
JSON_MEDIA_TYPE
)
public
Response
inputsGraph
(
@PathParam
(
"guid"
)
String
guid
)
{
LOG
.
info
(
"Fetching lineage inputs graph for guid={}"
,
guid
);
try
{
final
String
jsonResult
=
lineageService
.
getInputsGraphForEntity
(
guid
);
JSONObject
response
=
new
JSONObject
();
response
.
put
(
AtlasClient
.
REQUEST_ID
,
Servlets
.
getRequestId
());
response
.
put
(
AtlasClient
.
RESULTS
,
new
JSONObject
(
jsonResult
));
return
Response
.
ok
(
response
).
build
();
}
catch
(
EntityNotFoundException
e
)
{
LOG
.
error
(
"entity not found for guid={}"
,
guid
,
e
);
throw
new
WebApplicationException
(
Servlets
.
getErrorResponse
(
e
,
Response
.
Status
.
NOT_FOUND
));
}
catch
(
DiscoveryException
|
IllegalArgumentException
e
)
{
LOG
.
error
(
"Unable to get lineage inputs graph for entity guid={}"
,
guid
,
e
);
throw
new
WebApplicationException
(
Servlets
.
getErrorResponse
(
e
,
Response
.
Status
.
BAD_REQUEST
));
}
catch
(
Throwable
e
)
{
LOG
.
error
(
"Unable to get lineage inputs graph for entity guid={}"
,
guid
,
e
);
throw
new
WebApplicationException
(
Servlets
.
getErrorResponse
(
e
,
Response
.
Status
.
INTERNAL_SERVER_ERROR
));
}
}
/**
* Returns the outputs graph for a given entity id.
*
* @param guid dataset entity id
*/
@GET
@Path
(
"{guid}/outputs/graph"
)
@Consumes
(
Servlets
.
JSON_MEDIA_TYPE
)
@Produces
(
Servlets
.
JSON_MEDIA_TYPE
)
public
Response
outputsGraph
(
@PathParam
(
"guid"
)
String
guid
)
{
LOG
.
info
(
"Fetching lineage outputs graph for entity guid={}"
,
guid
);
try
{
final
String
jsonResult
=
lineageService
.
getOutputsGraphForEntity
(
guid
);
JSONObject
response
=
new
JSONObject
();
response
.
put
(
AtlasClient
.
REQUEST_ID
,
Servlets
.
getRequestId
());
response
.
put
(
AtlasClient
.
RESULTS
,
new
JSONObject
(
jsonResult
));
return
Response
.
ok
(
response
).
build
();
}
catch
(
EntityNotFoundException
e
)
{
LOG
.
error
(
"table entity not found for {}"
,
guid
,
e
);
throw
new
WebApplicationException
(
Servlets
.
getErrorResponse
(
e
,
Response
.
Status
.
NOT_FOUND
));
}
catch
(
DiscoveryException
|
IllegalArgumentException
e
)
{
LOG
.
error
(
"Unable to get lineage outputs graph for entity guid={}"
,
guid
,
e
);
throw
new
WebApplicationException
(
Servlets
.
getErrorResponse
(
e
,
Response
.
Status
.
BAD_REQUEST
));
}
catch
(
Throwable
e
)
{
LOG
.
error
(
"Unable to get lineage outputs graph for entity guid={}"
,
guid
,
e
);
throw
new
WebApplicationException
(
Servlets
.
getErrorResponse
(
e
,
Response
.
Status
.
INTERNAL_SERVER_ERROR
));
}
}
/**
* Returns the schema for the given dataset id.
*
* @param guid dataset entity id
*/
@GET
@Path
(
"{guid}/schema"
)
@Consumes
(
Servlets
.
JSON_MEDIA_TYPE
)
@Produces
(
Servlets
.
JSON_MEDIA_TYPE
)
public
Response
schema
(
@PathParam
(
"guid"
)
String
guid
)
{
LOG
.
info
(
"Fetching schema for entity guid={}"
,
guid
);
try
{
final
String
jsonResult
=
lineageService
.
getSchemaForEntity
(
guid
);
JSONObject
response
=
new
JSONObject
();
response
.
put
(
AtlasClient
.
REQUEST_ID
,
Servlets
.
getRequestId
());
response
.
put
(
AtlasClient
.
RESULTS
,
new
JSONObject
(
jsonResult
));
return
Response
.
ok
(
response
).
build
();
}
catch
(
EntityNotFoundException
e
)
{
LOG
.
error
(
"table entity not found for {}"
,
guid
,
e
);
throw
new
WebApplicationException
(
Servlets
.
getErrorResponse
(
e
,
Response
.
Status
.
NOT_FOUND
));
}
catch
(
DiscoveryException
|
IllegalArgumentException
e
)
{
LOG
.
error
(
"Unable to get schema for entity guid={}"
,
guid
,
e
);
throw
new
WebApplicationException
(
Servlets
.
getErrorResponse
(
e
,
Response
.
Status
.
BAD_REQUEST
));
}
catch
(
Throwable
e
)
{
LOG
.
error
(
"Unable to get schema for entity={}"
,
guid
,
e
);
throw
new
WebApplicationException
(
Servlets
.
getErrorResponse
(
e
,
Response
.
Status
.
INTERNAL_SERVER_ERROR
));
}
}
}
webapp/src/test/java/org/apache/atlas/web/resources/
Hive
LineageJerseyResourceIT.java
→
webapp/src/test/java/org/apache/atlas/web/resources/
DataSet
LineageJerseyResourceIT.java
View file @
b65dd91c
...
...
@@ -38,7 +38,7 @@ import java.util.List;
/**
* Hive Lineage Integration Tests.
*/
public
class
Hive
LineageJerseyResourceIT
extends
BaseResourceIT
{
public
class
DataSet
LineageJerseyResourceIT
extends
BaseResourceIT
{
private
static
final
String
BASE_URI
=
"api/atlas/lineage/hive/table/"
;
private
String
salesFactTable
;
...
...
@@ -81,6 +81,22 @@ public class HiveLineageJerseyResourceIT extends BaseResourceIT {
}
@Test
public
void
testInputsGraphForEntity
()
throws
Exception
{
String
tableId
=
serviceClient
.
getEntity
(
HIVE_TABLE_TYPE
,
"name"
,
salesMonthlyTable
).
getId
().
_getId
();
JSONObject
results
=
serviceClient
.
getInputGraphForEntity
(
tableId
);
Assert
.
assertNotNull
(
results
);
JSONObject
values
=
results
.
getJSONObject
(
"values"
);
Assert
.
assertNotNull
(
values
);
final
JSONObject
vertices
=
values
.
getJSONObject
(
"vertices"
);
Assert
.
assertEquals
(
vertices
.
length
(),
4
);
final
JSONObject
edges
=
values
.
getJSONObject
(
"edges"
);
Assert
.
assertEquals
(
edges
.
length
(),
4
);
}
@Test
public
void
testOutputsGraph
()
throws
Exception
{
WebResource
resource
=
service
.
path
(
BASE_URI
).
path
(
salesFactTable
).
path
(
"outputs"
).
path
(
"graph"
);
...
...
@@ -109,6 +125,22 @@ public class HiveLineageJerseyResourceIT extends BaseResourceIT {
}
@Test
public
void
testOutputsGraphForEntity
()
throws
Exception
{
String
tableId
=
serviceClient
.
getEntity
(
HIVE_TABLE_TYPE
,
"name"
,
salesFactTable
).
getId
().
_getId
();
JSONObject
results
=
serviceClient
.
getOutputGraphForEntity
(
tableId
);
Assert
.
assertNotNull
(
results
);
JSONObject
values
=
results
.
getJSONObject
(
"values"
);
Assert
.
assertNotNull
(
values
);
final
JSONObject
vertices
=
values
.
getJSONObject
(
"vertices"
);
Assert
.
assertEquals
(
vertices
.
length
(),
3
);
final
JSONObject
edges
=
values
.
getJSONObject
(
"edges"
);
Assert
.
assertEquals
(
edges
.
length
(),
4
);
}
@Test
public
void
testSchema
()
throws
Exception
{
WebResource
resource
=
service
.
path
(
BASE_URI
).
path
(
salesFactTable
).
path
(
"schema"
);
...
...
@@ -139,6 +171,24 @@ public class HiveLineageJerseyResourceIT extends BaseResourceIT {
}
@Test
public
void
testSchemaForEntity
()
throws
Exception
{
String
tableId
=
serviceClient
.
getEntity
(
HIVE_TABLE_TYPE
,
"name"
,
salesFactTable
).
getId
().
_getId
();
JSONObject
results
=
serviceClient
.
getSchemaForEntity
(
tableId
);
Assert
.
assertNotNull
(
results
);
JSONArray
rows
=
results
.
getJSONArray
(
"rows"
);
Assert
.
assertEquals
(
rows
.
length
(),
4
);
for
(
int
index
=
0
;
index
<
rows
.
length
();
index
++)
{
final
JSONObject
row
=
rows
.
getJSONObject
(
index
);
Assert
.
assertNotNull
(
row
.
getString
(
"name"
));
Assert
.
assertNotNull
(
row
.
getString
(
"comment"
));
Assert
.
assertNotNull
(
row
.
getString
(
"dataType"
));
Assert
.
assertEquals
(
row
.
getString
(
"$typeName$"
),
"hive_column"
);
}
}
@Test
public
void
testSchemaForEmptyTable
()
throws
Exception
{
WebResource
resource
=
service
.
path
(
BASE_URI
).
path
(
""
).
path
(
"schema"
);
...
...
@@ -184,8 +234,7 @@ public class HiveLineageJerseyResourceIT extends BaseResourceIT {
table
(
"sales_fact_daily_mv"
+
randomString
(),
"sales fact daily materialized view"
,
reportingDB
,
"Joe BI"
,
"MANAGED"
,
salesFactColumns
,
"Metric"
);
String
procName
=
"loadSalesDaily"
+
randomString
();
loadProcess
(
procName
,
"John ETL"
,
ImmutableList
.
of
(
salesFact
,
timeDim
),
loadProcess
(
"loadSalesDaily"
+
randomString
(),
"John ETL"
,
ImmutableList
.
of
(
salesFact
,
timeDim
),
ImmutableList
.
of
(
salesFactDaily
),
"create table as select "
,
"plan"
,
"id"
,
"graph"
,
"ETL"
);
salesMonthlyTable
=
"sales_fact_monthly_mv"
+
randomString
();
...
...
@@ -238,8 +287,8 @@ public class HiveLineageJerseyResourceIT extends BaseResourceIT {
Id
loadProcess
(
String
name
,
String
user
,
List
<
Id
>
inputTables
,
List
<
Id
>
outputTables
,
String
queryText
,
String
queryPlan
,
String
queryId
,
String
queryGraph
,
String
...
traitNames
)
throws
Exception
{
Referenceable
referenceable
=
new
Referenceable
(
HIVE_PROCESS_TYPE
,
traitNames
);
referenceable
.
set
(
AtlasClient
.
NAME
,
name
);
referenceable
.
set
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
name
);
referenceable
.
set
(
"name"
,
name
);
referenceable
.
set
(
"qualifiedName"
,
name
);
referenceable
.
set
(
"user"
,
user
);
referenceable
.
set
(
"startTime"
,
System
.
currentTimeMillis
());
referenceable
.
set
(
"endTime"
,
System
.
currentTimeMillis
()
+
10000
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment