Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
atlas
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dataplatform
atlas
Commits
9d1040b7
Commit
9d1040b7
authored
8 years ago
by
Suma Shivaprasad
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ATLAS-642 import-hive should create the lineage for external tables ( svimal2106 via sumasai)
parent
4f681657
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
78 additions
and
5 deletions
+78
-5
HiveMetaStoreBridge.java
...ava/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java
+64
-2
HiveHook.java
...ge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
+4
-0
HiveMetaStoreBridgeTest.java
...org/apache/atlas/hive/bridge/HiveMetaStoreBridgeTest.java
+7
-1
HiveHookIT.java
.../src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
+2
-2
release-log.txt
release-log.txt
+1
-0
No files found.
addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java
View file @
9d1040b7
...
...
@@ -27,6 +27,7 @@ import org.apache.atlas.fs.model.FSDataModel;
import
org.apache.atlas.fs.model.FSDataTypes
;
import
org.apache.atlas.hive.model.HiveDataModelGenerator
;
import
org.apache.atlas.hive.model.HiveDataTypes
;
import
org.apache.atlas.notification.hook.HookNotification
;
import
org.apache.atlas.typesystem.Referenceable
;
import
org.apache.atlas.typesystem.Struct
;
import
org.apache.atlas.typesystem.json.InstanceSerialization
;
...
...
@@ -37,6 +38,7 @@ import org.apache.commons.configuration.Configuration;
import
org.apache.commons.lang.RandomStringUtils
;
import
org.apache.hadoop.fs.Path
;
import
org.apache.hadoop.hive.conf.HiveConf
;
import
org.apache.hadoop.hive.metastore.TableType
;
import
org.apache.hadoop.hive.metastore.api.Database
;
import
org.apache.hadoop.hive.metastore.api.FieldSchema
;
import
org.apache.hadoop.hive.metastore.api.Order
;
...
...
@@ -239,6 +241,18 @@ public class HiveMetaStoreBridge {
return
String
.
format
(
"%s@%s"
,
dbName
.
toLowerCase
(),
clusterName
);
}
private
String
getCreateTableString
(
Table
table
,
String
location
){
String
colString
=
""
;
List
<
FieldSchema
>
colList
=
table
.
getAllCols
();
for
(
FieldSchema
col:
colList
){
colString
+=
col
.
getName
()
+
" "
+
col
.
getType
()
+
","
;
}
colString
=
colString
.
substring
(
0
,
colString
.
length
()
-
1
);
String
query
=
"create external table "
+
table
.
getTableName
()
+
"("
+
colString
+
")"
+
" location '"
+
location
+
"'"
;
return
query
;
}
/**
* Imports all tables for the given db
* @param databaseName
...
...
@@ -247,10 +261,45 @@ public class HiveMetaStoreBridge {
*/
private
void
importTables
(
Referenceable
databaseReferenceable
,
String
databaseName
)
throws
Exception
{
List
<
String
>
hiveTables
=
hiveClient
.
getAllTables
(
databaseName
);
LOG
.
info
(
"Importing tables {} for db {}"
,
hiveTables
.
toString
(),
databaseName
);
for
(
String
tableName
:
hiveTables
)
{
Table
table
=
hiveClient
.
getTable
(
databaseName
,
tableName
);
Referenceable
tableReferenceable
=
registerTable
(
databaseReferenceable
,
table
);
if
(
table
.
getTableType
()
==
TableType
.
EXTERNAL_TABLE
){
String
tableQualifiedName
=
getTableQualifiedName
(
clusterName
,
table
);
Referenceable
process
=
getProcessReference
(
tableQualifiedName
);
if
(
process
==
null
){
LOG
.
info
(
"Attempting to register create table process for {}"
,
tableQualifiedName
);
Referenceable
lineageProcess
=
new
Referenceable
(
HiveDataTypes
.
HIVE_PROCESS
.
getName
());
ArrayList
<
Referenceable
>
sourceList
=
new
ArrayList
<>();
ArrayList
<
Referenceable
>
targetList
=
new
ArrayList
<>();
String
tableLocation
=
table
.
getDataLocation
().
toString
();
Referenceable
path
=
fillHDFSDataSet
(
tableLocation
);
String
query
=
getCreateTableString
(
table
,
tableLocation
);
sourceList
.
add
(
path
);
targetList
.
add
(
tableReferenceable
);
lineageProcess
.
set
(
"inputs"
,
sourceList
);
lineageProcess
.
set
(
"outputs"
,
targetList
);
lineageProcess
.
set
(
"userName"
,
table
.
getOwner
());
lineageProcess
.
set
(
"startTime"
,
new
Date
(
System
.
currentTimeMillis
()));
lineageProcess
.
set
(
"endTime"
,
new
Date
(
System
.
currentTimeMillis
()));
lineageProcess
.
set
(
"operationType"
,
"CREATETABLE"
);
lineageProcess
.
set
(
"queryText"
,
query
);
lineageProcess
.
set
(
"queryId"
,
query
);
lineageProcess
.
set
(
"queryPlan"
,
"{}"
);
lineageProcess
.
set
(
"clusterName"
,
clusterName
);
List
<
String
>
recentQueries
=
new
ArrayList
<>(
1
);
recentQueries
.
add
(
query
);
lineageProcess
.
set
(
"recentQueries"
,
recentQueries
);
lineageProcess
.
set
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
tableQualifiedName
);
lineageProcess
.
set
(
AtlasClient
.
NAME
,
query
);
registerInstance
(
lineageProcess
);
}
else
{
LOG
.
info
(
"Process {} is already registered"
,
process
.
toString
());
}
}
}
}
...
...
@@ -269,9 +318,21 @@ public class HiveMetaStoreBridge {
return
getEntityReferenceFromDSL
(
typeName
,
dslQuery
);
}
private
Referenceable
getProcessReference
(
String
qualifiedName
)
throws
Exception
{
LOG
.
debug
(
"Getting reference for process {}"
,
qualifiedName
);
String
typeName
=
HiveDataTypes
.
HIVE_PROCESS
.
getName
();
String
dslQuery
=
getProcessDSLQuery
(
typeName
,
qualifiedName
);
return
getEntityReferenceFromDSL
(
typeName
,
dslQuery
);
}
static
String
getProcessDSLQuery
(
String
typeName
,
String
qualifiedName
)
throws
Exception
{
String
dslQuery
=
String
.
format
(
"%s as t where qualifiedName = '%s'"
,
typeName
,
qualifiedName
);
return
dslQuery
;
}
static
String
getTableDSLQuery
(
String
clusterName
,
String
dbName
,
String
tableName
,
String
typeName
,
boolean
isTemporary
)
{
String
entityName
=
getTableQualifiedName
(
clusterName
,
dbName
,
tableName
,
isTemporary
);
return
String
.
format
(
"%s as t where
n
ame = '%s'"
,
typeName
,
entityName
);
return
String
.
format
(
"%s as t where
qualifiedN
ame = '%s'"
,
typeName
,
entityName
);
}
/**
...
...
@@ -398,6 +459,7 @@ public class HiveMetaStoreBridge {
String
tableName
=
table
.
getTableName
();
LOG
.
info
(
"Attempting to register table ["
+
tableName
+
"]"
);
Referenceable
tableReference
=
getTableReference
(
table
);
LOG
.
info
(
"Found result "
+
tableReference
);
if
(
tableReference
==
null
)
{
tableReference
=
createTableInstance
(
dbReference
,
table
);
tableReference
=
registerInstance
(
tableReference
);
...
...
This diff is collapsed.
Click to expand it.
addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
View file @
9d1040b7
...
...
@@ -678,6 +678,10 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
}};
Referenceable
processReferenceable
=
getProcessReferenceable
(
dgiBridge
,
event
,
inputs
,
outputs
);
String
tableQualifiedName
=
dgiBridge
.
getTableQualifiedName
(
dgiBridge
.
getClusterName
(),
hiveTable
);
if
(
isCreateOp
(
event
)){
processReferenceable
.
set
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
tableQualifiedName
);
}
entities
.
addAll
(
tables
.
values
());
entities
.
add
(
processReferenceable
);
messages
.
add
(
new
HookNotification
.
EntityUpdateRequest
(
event
.
getUser
(),
entities
));
...
...
This diff is collapsed.
Click to expand it.
addons/hive-bridge/src/test/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridgeTest.java
View file @
9d1040b7
...
...
@@ -90,7 +90,7 @@ public class HiveMetaStoreBridgeTest {
public
void
testImportThatUpdatesRegisteredTable
()
throws
Exception
{
setupDB
(
hiveClient
,
TEST_DB_NAME
);
setupTable
(
hiveClient
,
TEST_DB_NAME
,
TEST_TABLE_NAME
);
Table
hiveTable
=
setupTable
(
hiveClient
,
TEST_DB_NAME
,
TEST_TABLE_NAME
);
returnExistingDatabase
(
TEST_DB_NAME
,
atlasClient
,
CLUSTER_NAME
);
...
...
@@ -99,6 +99,9 @@ public class HiveMetaStoreBridgeTest {
HiveDataTypes
.
HIVE_TABLE
.
getName
(),
false
))).
thenReturn
(
getEntityReference
(
"82e06b34-9151-4023-aa9d-b82103a50e77"
));
when
(
atlasClient
.
getEntity
(
"82e06b34-9151-4023-aa9d-b82103a50e77"
)).
thenReturn
(
createTableReference
());
String
processQualifiedName
=
HiveMetaStoreBridge
.
getTableQualifiedName
(
CLUSTER_NAME
,
hiveTable
);
when
(
atlasClient
.
searchByDSL
(
HiveMetaStoreBridge
.
getProcessDSLQuery
(
HiveDataTypes
.
HIVE_PROCESS
.
getName
(),
processQualifiedName
))).
thenReturn
(
getEntityReference
(
"82e06b34-9151-4023-aa9d-b82103a50e77"
));
HiveMetaStoreBridge
bridge
=
new
HiveMetaStoreBridge
(
CLUSTER_NAME
,
hiveClient
,
atlasClient
);
bridge
.
importHiveMetadata
();
...
...
@@ -140,6 +143,9 @@ public class HiveMetaStoreBridgeTest {
TEST_TABLE_NAME
,
HiveDataTypes
.
HIVE_TABLE
.
getName
(),
false
))).
thenReturn
(
getEntityReference
(
"82e06b34-9151-4023-aa9d-b82103a50e77"
));
String
processQualifiedName
=
HiveMetaStoreBridge
.
getTableQualifiedName
(
CLUSTER_NAME
,
hiveTable
);
when
(
atlasClient
.
searchByDSL
(
HiveMetaStoreBridge
.
getProcessDSLQuery
(
HiveDataTypes
.
HIVE_PROCESS
.
getName
(),
processQualifiedName
))).
thenReturn
(
getEntityReference
(
"82e06b34-9151-4023-aa9d-b82103a50e77"
));
when
(
atlasClient
.
getEntity
(
"82e06b34-9151-4023-aa9d-b82103a50e77"
)).
thenReturn
(
createTableReference
());
Partition
partition
=
mock
(
Partition
.
class
);
...
...
This diff is collapsed.
Click to expand it.
addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
View file @
9d1040b7
...
...
@@ -240,8 +240,8 @@ public class HiveHookIT {
final
String
query
=
String
.
format
(
"create TEMPORARY EXTERNAL table %s.%s( %s, %s) location '%s'"
,
DEFAULT_DB
,
tableName
,
colName
+
" int"
,
"name string"
,
pFile
);
runCommand
(
query
);
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
,
null
,
true
);
String
processId
=
assertProcessIsRegistered
(
query
);
String
processId
=
assertEntityIsRegistered
(
HiveDataTypes
.
HIVE_PROCESS
.
getName
(),
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
HiveMetaStoreBridge
.
getTableQualifiedName
(
CLUSTER_NAME
,
DEFAULT_DB
,
tableName
,
true
),
null
);
Referenceable
processReference
=
atlasClient
.
getEntity
(
processId
);
assertEquals
(
processReference
.
get
(
"userName"
),
UserGroupInformation
.
getCurrentUser
().
getShortUserName
());
...
...
This diff is collapsed.
Click to expand it.
release-log.txt
View file @
9d1040b7
...
...
@@ -24,6 +24,7 @@ ATLAS-409 Atlas will not import avro tables with schema read from a file (dosset
ATLAS-379 Create sqoop and falcon metadata addons (venkatnrangan,bvellanki,sowmyaramesh via shwethags)
ALL CHANGES:
ATLAS-642 import-hive should create the lineage for external tables (svimal2106 via sumasai)
ATLAS-901 Log messages that cannot be sent to Kafka to a specific log configuration (yhemanth)
ATLAS-911 Get entity by unique attribute doesn't enforce type (shwethags)
ATLAS-899 Fix Hive Hook documentation (sumasai via yhemanth)
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment