Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
atlas
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dataplatform
atlas
Commits
5618ad4c
Commit
5618ad4c
authored
Feb 01, 2018
by
rdsolani
Committed by
Madhan Neethiraj
Feb 13, 2018
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ATLAS-2439: updated Sqoop hook to use V2 notifications
Signed-off-by:
Madhan Neethiraj
<
madhan@apache.org
>
(cherry picked from commit e8908dbfe1d4dfe641cc7a802625f396aa0a399d)
parent
9cd28bbe
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
113 additions
and
99 deletions
+113
-99
SqoopHook.java
.../src/main/java/org/apache/atlas/sqoop/hook/SqoopHook.java
+113
-99
No files found.
addons/sqoop-bridge/src/main/java/org/apache/atlas/sqoop/hook/SqoopHook.java
View file @
5618ad4c
...
...
@@ -26,10 +26,12 @@ import org.apache.atlas.hive.bridge.HiveMetaStoreBridge;
import
org.apache.atlas.hive.model.HiveDataTypes
;
import
org.apache.atlas.hook.AtlasHook
;
import
org.apache.atlas.hook.AtlasHookException
;
import
org.apache.atlas.model.instance.AtlasEntity
;
import
org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo
;
import
org.apache.atlas.model.notification.HookNotification
;
import
org.apache.atlas.v1.model.instance.Referenceable
;
import
org.apache.atlas.v1.model.notification.HookNotificationV1.EntityCreateRequest
;
import
org.apache.atlas.model.notification.HookNotification.EntityUpdateRequestV2
;
import
org.apache.atlas.sqoop.model.SqoopDataTypes
;
import
org.apache.atlas.type.AtlasTypeUtil
;
import
org.apache.commons.configuration.Configuration
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.sqoop.SqoopJobDataPublisher
;
...
...
@@ -47,151 +49,163 @@ import java.util.Properties;
* AtlasHook sends lineage information to the AtlasSever.
*/
public
class
SqoopHook
extends
SqoopJobDataPublisher
{
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
SqoopHook
.
class
);
public
static
final
String
CONF_PREFIX
=
"atlas.hook.sqoop."
;
public
static
final
String
HOOK_NUM_RETRIES
=
CONF_PREFIX
+
"numRetries"
;
public
static
final
String
ATLAS_CLUSTER_NAME
=
"atlas.cluster.name"
;
public
static
final
String
CONF_PREFIX
=
"atlas.hook.sqoop."
;
public
static
final
String
HOOK_NUM_RETRIES
=
CONF_PREFIX
+
"numRetries"
;
public
static
final
String
ATLAS_CLUSTER_NAME
=
"atlas.cluster.name"
;
public
static
final
String
DEFAULT_CLUSTER_NAME
=
"primary"
;
public
static
final
String
USER
=
"userName"
;
public
static
final
String
DB_STORE_TYPE
=
"dbStoreType"
;
public
static
final
String
USER
=
"userName"
;
public
static
final
String
DB_STORE_TYPE
=
"dbStoreType"
;
public
static
final
String
DB_STORE_USAGE
=
"storeUse"
;
public
static
final
String
SOURCE
=
"source"
;
public
static
final
String
DESCRIPTION
=
"description"
;
public
static
final
String
STORE_URI
=
"storeUri"
;
public
static
final
String
OPERATION
=
"operation"
;
public
static
final
String
START_TIME
=
"startTime"
;
public
static
final
String
END_TIME
=
"endTime"
;
public
static
final
String
CMD_LINE_OPTS
=
"commandlineOpts"
;
// multiple inputs and outputs for process
public
static
final
String
INPUTS
=
"inputs"
;
public
static
final
String
OUTPUTS
=
"outputs"
;
public
static
final
String
SOURCE
=
"source"
;
public
static
final
String
DESCRIPTION
=
"description"
;
public
static
final
String
STORE_URI
=
"storeUri"
;
public
static
final
String
OPERATION
=
"operation"
;
public
static
final
String
START_TIME
=
"startTime"
;
public
static
final
String
END_TIME
=
"endTime"
;
public
static
final
String
CMD_LINE_OPTS
=
"commandlineOpts"
;
public
static
final
String
INPUTS
=
"inputs"
;
public
static
final
String
OUTPUTS
=
"outputs"
;
static
{
org
.
apache
.
hadoop
.
conf
.
Configuration
.
addDefaultResource
(
"sqoop-site.xml"
);
}
public
Referenceable
createHiveDatabaseInstance
(
String
clusterName
,
String
dbName
)
{
Referenceable
dbRef
=
new
Referenceable
(
HiveDataTypes
.
HIVE_DB
.
getName
());
dbRef
.
set
(
AtlasConstants
.
CLUSTER_NAME_ATTRIBUTE
,
clusterName
);
dbRef
.
set
(
AtlasClient
.
NAME
,
dbName
);
dbRef
.
set
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
HiveMetaStoreBridge
.
getDBQualifiedName
(
clusterName
,
dbName
));
return
dbRef
;
}
@Override
public
void
publish
(
SqoopJobDataPublisher
.
Data
data
)
throws
AtlasHookException
{
try
{
Configuration
atlasProperties
=
ApplicationProperties
.
get
();
String
clusterName
=
atlasProperties
.
getString
(
ATLAS_CLUSTER_NAME
,
DEFAULT_CLUSTER_NAME
);
AtlasEntity
entDbStore
=
createDBStoreInstance
(
data
);
AtlasEntity
entHiveDb
=
createHiveDatabaseInstance
(
clusterName
,
data
.
getHiveDB
());
AtlasEntity
entHiveTable
=
createHiveTableInstance
(
entHiveDb
,
data
.
getHiveTable
());
AtlasEntity
entProcess
=
createSqoopProcessInstance
(
entDbStore
,
entHiveTable
,
data
,
clusterName
);
AtlasEntitiesWithExtInfo
entities
=
new
AtlasEntitiesWithExtInfo
(
entProcess
);
public
Referenceable
createHiveTableInstance
(
String
clusterName
,
Referenceable
dbRef
,
String
tableName
,
String
dbName
)
{
Referenceable
tableRef
=
new
Referenceable
(
HiveDataTypes
.
HIVE_TABLE
.
getName
());
tableRef
.
set
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
HiveMetaStoreBridge
.
getTableQualifiedName
(
clusterName
,
dbName
,
tableName
));
tableRef
.
set
(
AtlasClient
.
NAME
,
tableName
.
toLowerCase
());
tableRef
.
set
(
HiveMetaStoreBridge
.
DB
,
dbRef
);
return
tableRef
;
entities
.
addReferredEntity
(
entDbStore
);
entities
.
addReferredEntity
(
entHiveDb
);
entities
.
addReferredEntity
(
entHiveTable
);
HookNotification
message
=
new
EntityUpdateRequestV2
(
AtlasHook
.
getUser
(),
entities
);
AtlasHook
.
notifyEntities
(
Arrays
.
asList
(
message
),
atlasProperties
.
getInt
(
HOOK_NUM_RETRIES
,
3
));
}
catch
(
Exception
e
)
{
throw
new
AtlasHookException
(
"SqoopHook.publish() failed."
,
e
);
}
}
private
Referenceable
createDBStoreInstance
(
SqoopJobDataPublisher
.
Data
data
)
throws
ImportException
{
private
AtlasEntity
createHiveDatabaseInstance
(
String
clusterName
,
String
dbName
)
{
AtlasEntity
entHiveDb
=
new
AtlasEntity
(
HiveDataTypes
.
HIVE_DB
.
getName
());
String
qualifiedName
=
HiveMetaStoreBridge
.
getDBQualifiedName
(
clusterName
,
dbName
);
entHiveDb
.
setAttribute
(
AtlasConstants
.
CLUSTER_NAME_ATTRIBUTE
,
clusterName
);
entHiveDb
.
setAttribute
(
AtlasClient
.
NAME
,
dbName
);
entHiveDb
.
setAttribute
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
qualifiedName
);
return
entHiveDb
;
}
Referenceable
storeRef
=
new
Referenceable
(
SqoopDataTypes
.
SQOOP_DBDATASTORE
.
getName
());
private
AtlasEntity
createHiveTableInstance
(
AtlasEntity
entHiveDb
,
String
tableName
)
{
AtlasEntity
entHiveTable
=
new
AtlasEntity
(
HiveDataTypes
.
HIVE_TABLE
.
getName
());
String
qualifiedName
=
HiveMetaStoreBridge
.
getTableQualifiedName
((
String
)
entHiveDb
.
getAttribute
(
AtlasConstants
.
CLUSTER_NAME_ATTRIBUTE
),
(
String
)
entHiveDb
.
getAttribute
(
AtlasClient
.
NAME
),
tableName
);
entHiveTable
.
setAttribute
(
AtlasClient
.
NAME
,
tableName
.
toLowerCase
());
entHiveTable
.
setAttribute
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
qualifiedName
);
entHiveTable
.
setAttribute
(
HiveMetaStoreBridge
.
DB
,
AtlasTypeUtil
.
getAtlasObjectId
(
entHiveDb
));
return
entHiveTable
;
}
private
AtlasEntity
createDBStoreInstance
(
SqoopJobDataPublisher
.
Data
data
)
throws
ImportException
{
String
table
=
data
.
getStoreTable
();
String
query
=
data
.
getStoreQuery
();
if
(
StringUtils
.
isBlank
(
table
)
&&
StringUtils
.
isBlank
(
query
))
{
throw
new
ImportException
(
"Both table and query cannot be empty for DBStoreInstance"
);
}
String
usage
=
table
!=
null
?
"TABLE"
:
"QUERY"
;
String
usage
=
table
!=
null
?
"TABLE"
:
"QUERY"
;
String
source
=
table
!=
null
?
table
:
query
;
String
name
=
getSqoopDBStoreName
(
data
);
storeRef
.
set
(
AtlasClient
.
NAME
,
name
);
storeRef
.
set
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
name
);
storeRef
.
set
(
SqoopHook
.
DB_STORE_TYPE
,
data
.
getStoreType
());
storeRef
.
set
(
SqoopHook
.
DB_STORE_USAGE
,
usage
);
storeRef
.
set
(
SqoopHook
.
STORE_URI
,
data
.
getUrl
());
storeRef
.
set
(
SqoopHook
.
SOURCE
,
source
);
storeRef
.
set
(
SqoopHook
.
DESCRIPTION
,
""
);
storeRef
.
set
(
AtlasClient
.
OWNER
,
data
.
getUser
());
return
storeRef
;
String
name
=
getSqoopDBStoreName
(
data
);
AtlasEntity
entDbStore
=
new
AtlasEntity
(
SqoopDataTypes
.
SQOOP_DBDATASTORE
.
getName
());
entDbStore
.
setAttribute
(
AtlasClient
.
NAME
,
name
);
entDbStore
.
setAttribute
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
name
);
entDbStore
.
setAttribute
(
SqoopHook
.
DB_STORE_TYPE
,
data
.
getStoreType
());
entDbStore
.
setAttribute
(
SqoopHook
.
DB_STORE_USAGE
,
usage
);
entDbStore
.
setAttribute
(
SqoopHook
.
STORE_URI
,
data
.
getUrl
());
entDbStore
.
setAttribute
(
SqoopHook
.
SOURCE
,
source
);
entDbStore
.
setAttribute
(
SqoopHook
.
DESCRIPTION
,
""
);
entDbStore
.
setAttribute
(
AtlasClient
.
OWNER
,
data
.
getUser
());
return
entDbStore
;
}
private
Referenceable
createSqoopProcessInstance
(
Referenceable
dbStoreRef
,
Referenceable
hiveTableRef
,
SqoopJobDataPublisher
.
Data
data
,
String
clusterName
)
{
Referenceable
procRef
=
new
Referenceable
(
SqoopDataTypes
.
SQOOP_PROCESS
.
getName
());
final
String
sqoopProcessName
=
getSqoopProcessName
(
data
,
clusterName
);
procRef
.
set
(
AtlasClient
.
NAME
,
sqoopProcessName
);
procRef
.
set
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
sqoopProcessName
);
procRef
.
set
(
SqoopHook
.
OPERATION
,
data
.
getOperation
());
if
(
isImportOperation
(
data
))
{
procRef
.
set
(
SqoopHook
.
INPUTS
,
dbStoreRef
);
procRef
.
set
(
SqoopHook
.
OUTPUTS
,
hiveTableRef
);
}
else
{
procRef
.
set
(
SqoopHook
.
INPUTS
,
hiveTableRef
);
procRef
.
set
(
SqoopHook
.
OUTPUTS
,
dbStoreRef
);
}
procRef
.
set
(
SqoopHook
.
USER
,
data
.
getUser
());
procRef
.
set
(
SqoopHook
.
START_TIME
,
new
Date
(
data
.
getStartTime
()));
procRef
.
set
(
SqoopHook
.
END_TIME
,
new
Date
(
data
.
getEndTime
()));
private
AtlasEntity
createSqoopProcessInstance
(
AtlasEntity
entDbStore
,
AtlasEntity
entHiveTable
,
SqoopJobDataPublisher
.
Data
data
,
String
clusterName
)
{
AtlasEntity
entProcess
=
new
AtlasEntity
(
SqoopDataTypes
.
SQOOP_PROCESS
.
getName
());
String
sqoopProcessName
=
getSqoopProcessName
(
data
,
clusterName
);
Map
<
String
,
String
>
sqoopOptionsMap
=
new
HashMap
<>();
Properties
options
=
data
.
getOptions
();
Map
<
String
,
String
>
sqoopOptionsMap
=
new
HashMap
<>();
Properties
options
=
data
.
getOptions
();
for
(
Object
k
:
options
.
keySet
())
{
sqoopOptionsMap
.
put
((
String
)
k
,
(
String
)
options
.
get
(
k
));
}
procRef
.
set
(
SqoopHook
.
CMD_LINE_OPTS
,
sqoopOptionsMap
);
return
procRef
;
entProcess
.
setAttribute
(
AtlasClient
.
NAME
,
sqoopProcessName
);
entProcess
.
setAttribute
(
AtlasClient
.
REFERENCEABLE_ATTRIBUTE_NAME
,
sqoopProcessName
);
entProcess
.
setAttribute
(
SqoopHook
.
OPERATION
,
data
.
getOperation
());
if
(
isImportOperation
(
data
))
{
entProcess
.
setAttribute
(
SqoopHook
.
INPUTS
,
AtlasTypeUtil
.
getAtlasObjectId
(
entDbStore
));
entProcess
.
setAttribute
(
SqoopHook
.
OUTPUTS
,
AtlasTypeUtil
.
getAtlasObjectId
(
entHiveTable
));
}
else
{
entProcess
.
setAttribute
(
SqoopHook
.
INPUTS
,
AtlasTypeUtil
.
getAtlasObjectId
(
entHiveTable
));
entProcess
.
setAttribute
(
SqoopHook
.
OUTPUTS
,
AtlasTypeUtil
.
getAtlasObjectId
(
entDbStore
));
}
entProcess
.
setAttribute
(
SqoopHook
.
USER
,
data
.
getUser
());
entProcess
.
setAttribute
(
SqoopHook
.
START_TIME
,
new
Date
(
data
.
getStartTime
()));
entProcess
.
setAttribute
(
SqoopHook
.
END_TIME
,
new
Date
(
data
.
getEndTime
()));
entProcess
.
setAttribute
(
SqoopHook
.
CMD_LINE_OPTS
,
sqoopOptionsMap
);
return
entProcess
;
}
private
boolean
isImportOperation
(
SqoopJobDataPublisher
.
Data
data
)
{
return
data
.
getOperation
().
toLowerCase
().
equals
(
"import"
);
}
static
String
getSqoopProcessName
(
Data
data
,
String
clusterName
)
{
StringBuilder
name
=
new
StringBuilder
(
String
.
format
(
"sqoop %s --connect %s"
,
data
.
getOperation
(),
data
.
getUrl
()));
StringBuilder
name
=
new
StringBuilder
(
String
.
format
(
"sqoop %s --connect %s"
,
data
.
getOperation
(),
data
.
getUrl
()));
if
(
StringUtils
.
isNotEmpty
(
data
.
getStoreTable
()))
{
name
.
append
(
" --table "
).
append
(
data
.
getStoreTable
());
}
if
(
StringUtils
.
isNotEmpty
(
data
.
getStoreQuery
()))
{
name
.
append
(
" --query "
).
append
(
data
.
getStoreQuery
());
}
name
.
append
(
String
.
format
(
" --hive-%s --hive-database %s --hive-table %s --hive-cluster %s"
,
data
.
getOperation
(),
data
.
getHiveDB
().
toLowerCase
(),
data
.
getHiveTable
().
toLowerCase
(),
clusterName
));
name
.
append
(
String
.
format
(
" --hive-%s --hive-database %s --hive-table %s --hive-cluster %s"
,
data
.
getOperation
(),
data
.
getHiveDB
().
toLowerCase
(),
data
.
getHiveTable
().
toLowerCase
(),
clusterName
));
return
name
.
toString
();
}
static
String
getSqoopDBStoreName
(
SqoopJobDataPublisher
.
Data
data
)
{
StringBuilder
name
=
new
StringBuilder
(
String
.
format
(
"%s --url %s"
,
data
.
getStoreType
(),
data
.
getUrl
()));
if
(
StringUtils
.
isNotEmpty
(
data
.
getStoreTable
()))
{
name
.
append
(
" --table "
).
append
(
data
.
getStoreTable
());
}
if
(
StringUtils
.
isNotEmpty
(
data
.
getStoreQuery
()))
{
name
.
append
(
" --query "
).
append
(
data
.
getStoreQuery
());
}
return
name
.
toString
();
}
static
boolean
isImportOperation
(
SqoopJobDataPublisher
.
Data
data
)
{
return
data
.
getOperation
().
toLowerCase
().
equals
(
"import"
);
}
@Override
public
void
publish
(
SqoopJobDataPublisher
.
Data
data
)
throws
AtlasHookException
{
try
{
Configuration
atlasProperties
=
ApplicationProperties
.
get
();
String
clusterName
=
atlasProperties
.
getString
(
ATLAS_CLUSTER_NAME
,
DEFAULT_CLUSTER_NAME
);
Referenceable
dbStoreRef
=
createDBStoreInstance
(
data
);
Referenceable
dbRef
=
createHiveDatabaseInstance
(
clusterName
,
data
.
getHiveDB
());
Referenceable
hiveTableRef
=
createHiveTableInstance
(
clusterName
,
dbRef
,
data
.
getHiveTable
(),
data
.
getHiveDB
());
Referenceable
procRef
=
createSqoopProcessInstance
(
dbStoreRef
,
hiveTableRef
,
data
,
clusterName
);
int
maxRetries
=
atlasProperties
.
getInt
(
HOOK_NUM_RETRIES
,
3
);
HookNotification
message
=
new
EntityCreateRequest
(
AtlasHook
.
getUser
(),
dbStoreRef
,
dbRef
,
hiveTableRef
,
procRef
);
AtlasHook
.
notifyEntities
(
Arrays
.
asList
(
message
),
maxRetries
);
}
catch
(
Exception
e
)
{
throw
new
AtlasHookException
(
"SqoopHook.publish() failed."
,
e
);
}
return
name
.
toString
();
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment