Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
atlas
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dataplatform
atlas
Commits
b4e4f604
Commit
b4e4f604
authored
4 years ago
by
Pinal
Committed by
nixonrodrigues
4 years ago
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ATLAS-3938 : Import Hive Script: Support deletion of non existing database and table entities
parent
d0de3897
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
246 additions
and
10 deletions
+246
-10
import-hive.sh
addons/hive-bridge/src/bin/import-hive.sh
+1
-0
HiveMetaStoreBridge.java
...ava/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java
+228
-1
AtlasClientV2.java
...ient-v2/src/main/java/org/apache/atlas/AtlasClientV2.java
+17
-9
No files found.
addons/hive-bridge/src/bin/import-hive.sh
View file @
b4e4f604
...
...
@@ -139,6 +139,7 @@ do
--database
)
IMPORT_ARGS
=
"
$IMPORT_ARGS
--database
$1
"
;
shift
;;
--table
)
IMPORT_ARGS
=
"
$IMPORT_ARGS
--table
$1
"
;
shift
;;
--filename
)
IMPORT_ARGS
=
"
$IMPORT_ARGS
--filename
$1
"
;
shift
;;
-deleteNonExisting
)
IMPORT_ARGS
=
"
$IMPORT_ARGS
-deleteNonExisting"
;;
""
)
break
;;
*
)
JVM_ARGS
=
"
$JVM_ARGS
$option
"
esac
...
...
This diff is collapsed.
Click to expand it.
addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java
View file @
b4e4f604
...
...
@@ -28,6 +28,8 @@ import org.apache.atlas.AtlasServiceException;
import
org.apache.atlas.hive.hook.events.BaseHiveEvent
;
import
org.apache.atlas.hive.model.HiveDataTypes
;
import
org.apache.atlas.hook.AtlasHookException
;
import
org.apache.atlas.model.discovery.AtlasSearchResult
;
import
org.apache.atlas.model.discovery.SearchParameters
;
import
org.apache.atlas.model.instance.AtlasEntityHeader
;
import
org.apache.atlas.model.instance.EntityMutationResponse
;
import
org.apache.atlas.model.instance.EntityMutations
;
...
...
@@ -61,6 +63,7 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import
org.apache.hadoop.hive.metastore.api.StorageDescriptor
;
import
org.apache.hadoop.hive.ql.metadata.Hive
;
import
org.apache.hadoop.hive.ql.metadata.HiveException
;
import
org.apache.hadoop.hive.ql.metadata.InvalidTableException
;
import
org.apache.hadoop.hive.ql.metadata.Table
;
import
org.apache.hadoop.hive.ql.session.SessionState
;
import
org.apache.hadoop.security.UserGroupInformation
;
...
...
@@ -97,12 +100,15 @@ public class HiveMetaStoreBridge {
public
static
final
String
SEP
=
":"
.
intern
();
public
static
final
String
HDFS_PATH
=
"hdfs_path"
;
public
static
final
String
DEFAULT_METASTORE_CATALOG
=
"hive"
;
public
static
final
String
HIVE_TABLE_DB_EDGE_LABEL
=
"__hive_table.db"
;
public
static
final
String
HOOK_HIVE_PAGE_LIMIT
=
CONF_PREFIX
+
"page.limit"
;
public
static
final
String
HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2
=
"v2"
;
private
static
final
int
EXIT_CODE_SUCCESS
=
0
;
private
static
final
int
EXIT_CODE_FAILED
=
1
;
private
static
final
String
DEFAULT_ATLAS_URL
=
"http://localhost:21000/"
;
private
static
int
pageLimit
=
10000
;
private
final
String
metadataNamespace
;
private
final
Hive
hiveClient
;
...
...
@@ -122,9 +128,13 @@ public class HiveMetaStoreBridge {
options
.
addOption
(
"t"
,
"table"
,
true
,
"Table name"
);
options
.
addOption
(
"f"
,
"filename"
,
true
,
"Filename"
);
options
.
addOption
(
"failOnError"
,
false
,
"failOnError"
);
options
.
addOption
(
"deleteNonExisting"
,
false
,
"Delete database and table entities in Atlas if not present in Hive"
);
CommandLine
cmd
=
new
BasicParser
().
parse
(
options
,
args
);
boolean
failOnError
=
cmd
.
hasOption
(
"failOnError"
);
boolean
deleteNonExisting
=
cmd
.
hasOption
(
"deleteNonExisting"
);
LOG
.
info
(
"delete non existing flag : {} "
,
deleteNonExisting
);
String
databaseToImport
=
cmd
.
getOptionValue
(
"d"
);
String
tableToImport
=
cmd
.
getOptionValue
(
"t"
);
String
fileToImport
=
cmd
.
getOptionValue
(
"f"
);
...
...
@@ -148,7 +158,9 @@ public class HiveMetaStoreBridge {
HiveMetaStoreBridge
hiveMetaStoreBridge
=
new
HiveMetaStoreBridge
(
atlasConf
,
new
HiveConf
(),
atlasClientV2
);
if
(
StringUtils
.
isNotEmpty
(
fileToImport
))
{
if
(
deleteNonExisting
)
{
hiveMetaStoreBridge
.
deleteEntitiesForNonExistingHiveMetadata
(
failOnError
);
}
else
if
(
StringUtils
.
isNotEmpty
(
fileToImport
))
{
File
f
=
new
File
(
fileToImport
);
if
(
f
.
exists
()
&&
f
.
canRead
())
{
...
...
@@ -212,6 +224,8 @@ public class HiveMetaStoreBridge {
System
.
out
.
println
(
" database1:tbl1"
);
System
.
out
.
println
(
" database1:tbl2"
);
System
.
out
.
println
(
" database2:tbl2"
);
System
.
out
.
println
(
"Usage 5: import-hive.sh [-deleteNonExisting] "
);
System
.
out
.
println
(
" Deletes databases and tables which are not in Hive ..."
);
System
.
out
.
println
();
}
...
...
@@ -225,6 +239,9 @@ public class HiveMetaStoreBridge {
this
.
atlasClientV2
=
atlasClientV2
;
this
.
convertHdfsPathToLowerCase
=
atlasProperties
.
getBoolean
(
HDFS_PATH_CONVERT_TO_LOWER_CASE
,
false
);
this
.
awsS3AtlasModelVersion
=
atlasProperties
.
getString
(
HOOK_AWS_S3_ATLAS_MODEL_VERSION
,
HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2
);
if
(
atlasProperties
!=
null
)
{
pageLimit
=
atlasProperties
.
getInteger
(
HOOK_HIVE_PAGE_LIMIT
,
10000
);
}
}
/**
...
...
@@ -959,4 +976,214 @@ public class HiveMetaStoreBridge {
}
return
ret
;
}
private
List
<
AtlasEntityHeader
>
getAllDatabaseInCluster
()
throws
AtlasServiceException
{
List
<
AtlasEntityHeader
>
entities
=
new
ArrayList
<>();
final
int
pageSize
=
pageLimit
;
SearchParameters
.
FilterCriteria
fc
=
new
SearchParameters
.
FilterCriteria
();
fc
.
setAttributeName
(
ATTRIBUTE_CLUSTER_NAME
);
fc
.
setAttributeValue
(
metadataNamespace
);
fc
.
setOperator
(
SearchParameters
.
Operator
.
EQ
);
for
(
int
i
=
0
;
;
i
++)
{
int
offset
=
pageSize
*
i
;
LOG
.
info
(
"Retrieving databases: offset={}, pageSize={}"
,
offset
,
pageSize
);
AtlasSearchResult
searchResult
=
atlasClientV2
.
basicSearch
(
HIVE_TYPE_DB
,
fc
,
null
,
null
,
true
,
pageSize
,
offset
);
List
<
AtlasEntityHeader
>
entityHeaders
=
searchResult
==
null
?
null
:
searchResult
.
getEntities
();
int
dbCount
=
entityHeaders
==
null
?
0
:
entityHeaders
.
size
();
LOG
.
info
(
"Retrieved {} databases of {} cluster"
,
dbCount
,
metadataNamespace
);
if
(
dbCount
>
0
)
{
entities
.
addAll
(
entityHeaders
);
}
if
(
dbCount
<
pageSize
)
{
// last page
break
;
}
}
return
entities
;
}
private
List
<
AtlasEntityHeader
>
getAllTablesInDb
(
String
databaseGuid
)
throws
AtlasServiceException
{
List
<
AtlasEntityHeader
>
entities
=
new
ArrayList
<>();
final
int
pageSize
=
pageLimit
;
for
(
int
i
=
0
;
;
i
++)
{
int
offset
=
pageSize
*
i
;
LOG
.
info
(
"Retrieving tables: offset={}, pageSize={}"
,
offset
,
pageSize
);
AtlasSearchResult
searchResult
=
atlasClientV2
.
relationshipSearch
(
databaseGuid
,
HIVE_TABLE_DB_EDGE_LABEL
,
null
,
null
,
true
,
pageSize
,
offset
);
List
<
AtlasEntityHeader
>
entityHeaders
=
searchResult
==
null
?
null
:
searchResult
.
getEntities
();
int
tableCount
=
entityHeaders
==
null
?
0
:
entityHeaders
.
size
();
LOG
.
info
(
"Retrieved {} tables of {} database"
,
tableCount
,
databaseGuid
);
if
(
tableCount
>
0
)
{
entities
.
addAll
(
entityHeaders
);
}
if
(
tableCount
<
pageSize
)
{
// last page
break
;
}
}
return
entities
;
}
public
String
getHiveDatabaseName
(
String
qualifiedName
)
{
if
(
StringUtils
.
isNotEmpty
(
qualifiedName
))
{
String
[]
split
=
qualifiedName
.
split
(
"@"
);
if
(
split
.
length
>
0
)
{
return
split
[
0
];
}
}
return
null
;
}
public
String
getHiveTableName
(
String
qualifiedName
,
boolean
isTemporary
)
{
if
(
StringUtils
.
isNotEmpty
(
qualifiedName
))
{
String
tableName
=
StringUtils
.
substringBetween
(
qualifiedName
,
"."
,
"@"
);
if
(!
isTemporary
)
{
return
tableName
;
}
else
{
if
(
StringUtils
.
isNotEmpty
(
tableName
))
{
String
[]
splitTemp
=
tableName
.
split
(
TEMP_TABLE_PREFIX
);
if
(
splitTemp
.
length
>
0
)
{
return
splitTemp
[
0
];
}
}
}
}
return
null
;
}
private
void
deleteByGuid
(
List
<
String
>
guidTodelete
)
throws
AtlasServiceException
{
if
(
CollectionUtils
.
isNotEmpty
(
guidTodelete
))
{
for
(
String
guid
:
guidTodelete
)
{
EntityMutationResponse
response
=
atlasClientV2
.
deleteEntityByGuid
(
guid
);
if
(
response
.
getDeletedEntities
().
size
()
<
1
)
{
LOG
.
info
(
"Entity with guid : {} is not deleted"
,
guid
);
}
else
{
LOG
.
info
(
"Entity with guid : {} is deleted"
,
guid
);
}
}
}
else
{
LOG
.
info
(
"No Entity to delete from Atlas"
);
}
}
public
void
deleteEntitiesForNonExistingHiveMetadata
(
boolean
failOnError
)
throws
Exception
{
//fetch databases from Atlas
List
<
AtlasEntityHeader
>
dbs
=
null
;
try
{
dbs
=
getAllDatabaseInCluster
();
LOG
.
info
(
"Total Databases in cluster {} : {} "
,
metadataNamespace
,
dbs
.
size
());
}
catch
(
AtlasServiceException
e
)
{
LOG
.
error
(
"Failed to retrieve database entities for cluster {} from Atlas"
,
metadataNamespace
,
e
);
if
(
failOnError
)
{
throw
e
;
}
}
if
(
CollectionUtils
.
isNotEmpty
(
dbs
))
{
//iterate all dbs to check if exists in hive
for
(
AtlasEntityHeader
db
:
dbs
)
{
String
dbGuid
=
db
.
getGuid
();
String
hiveDbName
=
getHiveDatabaseName
((
String
)
db
.
getAttribute
(
ATTRIBUTE_QUALIFIED_NAME
));
if
(
StringUtils
.
isEmpty
(
hiveDbName
))
{
LOG
.
error
(
"Failed to get database from qualifiedName: {}, guid: {} "
,
db
.
getAttribute
(
ATTRIBUTE_QUALIFIED_NAME
),
dbGuid
);
continue
;
}
List
<
AtlasEntityHeader
>
tables
;
try
{
tables
=
getAllTablesInDb
(
dbGuid
);
LOG
.
info
(
"Total Tables in database {} : {} "
,
hiveDbName
,
tables
.
size
());
}
catch
(
AtlasServiceException
e
)
{
LOG
.
error
(
"Failed to retrieve table entities for database {} from Atlas"
,
hiveDbName
,
e
);
if
(
failOnError
)
{
throw
e
;
}
continue
;
}
List
<
String
>
guidsToDelete
=
new
ArrayList
<>();
if
(!
hiveClient
.
databaseExists
(
hiveDbName
))
{
//table guids
if
(
CollectionUtils
.
isNotEmpty
(
tables
))
{
for
(
AtlasEntityHeader
table
:
tables
)
{
guidsToDelete
.
add
(
table
.
getGuid
());
}
}
//db guid
guidsToDelete
.
add
(
db
.
getGuid
());
LOG
.
info
(
"Added database {}.{} and its {} tables to delete"
,
metadataNamespace
,
hiveDbName
,
tables
.
size
());
}
else
{
//iterate all table of db to check if it exists
if
(
CollectionUtils
.
isNotEmpty
(
tables
))
{
for
(
AtlasEntityHeader
table
:
tables
)
{
String
hiveTableName
=
getHiveTableName
((
String
)
table
.
getAttribute
(
ATTRIBUTE_QUALIFIED_NAME
),
true
);
if
(
StringUtils
.
isEmpty
(
hiveTableName
))
{
LOG
.
error
(
"Failed to get table from qualifiedName: {}, guid: {} "
,
table
.
getAttribute
(
ATTRIBUTE_QUALIFIED_NAME
),
table
.
getGuid
());
continue
;
}
try
{
hiveClient
.
getTable
(
hiveDbName
,
hiveTableName
,
true
);
}
catch
(
InvalidTableException
e
)
{
//table doesn't exists
LOG
.
info
(
"Added table {}.{} to delete"
,
hiveDbName
,
hiveTableName
);
guidsToDelete
.
add
(
table
.
getGuid
());
}
catch
(
HiveException
e
)
{
LOG
.
error
(
"Failed to get table {}.{} from Hive"
,
hiveDbName
,
hiveTableName
,
e
);
if
(
failOnError
)
{
throw
e
;
}
}
}
}
}
//delete entities
if
(
CollectionUtils
.
isNotEmpty
(
guidsToDelete
))
{
try
{
deleteByGuid
(
guidsToDelete
);
}
catch
(
AtlasServiceException
e
)
{
LOG
.
error
(
"Failed to delete Atlas entities for database {}"
,
hiveDbName
,
e
);
if
(
failOnError
)
{
throw
e
;
}
}
}
}
}
else
{
LOG
.
info
(
"No database found in service."
);
}
}
}
This diff is collapsed.
Click to expand it.
client/client-v2/src/main/java/org/apache/atlas/AtlasClientV2.java
View file @
b4e4f604
...
...
@@ -123,6 +123,8 @@ public class AtlasClientV2 extends AtlasBaseClient {
private
static
final
String
RELATIONSHIPS_URI
=
BASE_URI
+
"v2/relationship/"
;
private
static
final
String
BULK_HEADERS
=
"bulk/headers"
;
private
static
final
String
BULK_SET_CLASSIFICATIONS
=
"bulk/setClassifications"
;
private
static
final
String
RELATIONSHIP_URI
=
DISCOVERY_URI
+
"/relationship"
;
//Glossary APIs
private
static
final
String
GLOSSARY_URI
=
BASE_URI
+
"v2/glossary"
;
...
...
@@ -664,16 +666,22 @@ public class AtlasClientV2 extends AtlasBaseClient {
}
public
AtlasSearchResult
basicSearch
(
String
typeName
,
String
classification
,
String
query
,
boolean
excludeDeletedEntities
,
int
limit
,
int
offset
)
throws
AtlasServiceException
{
MultivaluedMap
<
String
,
String
>
queryParams
=
new
MultivaluedMapImpl
();
return
this
.
basicSearch
(
typeName
,
null
,
classification
,
query
,
excludeDeletedEntities
,
limit
,
offset
);
}
queryParams
.
add
(
"typeName"
,
typeName
);
queryParams
.
add
(
"classification"
,
classification
);
queryParams
.
add
(
QUERY
,
query
);
queryParams
.
add
(
"excludeDeletedEntities"
,
String
.
valueOf
(
excludeDeletedEntities
));
queryParams
.
add
(
LIMIT
,
String
.
valueOf
(
limit
));
queryParams
.
add
(
OFFSET
,
String
.
valueOf
(
offset
));
public
AtlasSearchResult
basicSearch
(
String
typeName
,
SearchParameters
.
FilterCriteria
entityFilters
,
String
classification
,
String
query
,
boolean
excludeDeletedEntities
,
int
limit
,
int
offset
)
throws
AtlasServiceException
{
SearchParameters
parameters
=
new
SearchParameters
();
parameters
.
setTypeName
(
typeName
);
parameters
.
setClassification
(
classification
);
parameters
.
setQuery
(
query
);
parameters
.
setExcludeDeletedEntities
(
excludeDeletedEntities
);
parameters
.
setLimit
(
limit
);
parameters
.
setOffset
(
offset
);
if
(
entityFilters
!=
null
){
parameters
.
setEntityFilters
(
entityFilters
);
}
return
callAPI
(
API_V2
.
BASIC_SEARCH
,
AtlasSearchResult
.
class
,
queryParam
s
);
return
callAPI
(
API_V2
.
BASIC_SEARCH
,
AtlasSearchResult
.
class
,
parameter
s
);
}
public
AtlasSearchResult
facetedSearch
(
SearchParameters
searchParameters
)
throws
AtlasServiceException
{
...
...
@@ -1202,7 +1210,7 @@ public class AtlasClientV2 extends AtlasBaseClient {
// Discovery APIs
public
static
final
API_V2
DSL_SEARCH
=
new
API_V2
(
DSL_SEARCH_URI
,
HttpMethod
.
GET
,
Response
.
Status
.
OK
);
public
static
final
API_V2
FULL_TEXT_SEARCH
=
new
API_V2
(
FULL_TEXT_SEARCH_URI
,
HttpMethod
.
GET
,
Response
.
Status
.
OK
);
public
static
final
API_V2
BASIC_SEARCH
=
new
API_V2
(
BASIC_SEARCH_URI
,
HttpMethod
.
GE
T
,
Response
.
Status
.
OK
);
public
static
final
API_V2
BASIC_SEARCH
=
new
API_V2
(
BASIC_SEARCH_URI
,
HttpMethod
.
POS
T
,
Response
.
Status
.
OK
);
public
static
final
API_V2
FACETED_SEARCH
=
new
API_V2
(
FACETED_SEARCH_URI
,
HttpMethod
.
POST
,
Response
.
Status
.
OK
);
public
static
final
API_V2
ATTRIBUTE_SEARCH
=
new
API_V2
(
DISCOVERY_URI
+
"/attribute"
,
HttpMethod
.
GET
,
Response
.
Status
.
OK
);
public
static
final
API_V2
RELATIONSHIP_SEARCH
=
new
API_V2
(
DISCOVERY_URI
+
"/relationship"
,
HttpMethod
.
GET
,
Response
.
Status
.
OK
);
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment