Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
atlas
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dataplatform
atlas
Commits
5506e778
Commit
5506e778
authored
Jun 02, 2015
by
arpitgupta
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' into dal
parents
cd0e3950
74c54aa9
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
62 additions
and
30 deletions
+62
-30
HiveMetaStoreBridge.java
...ache/hadoop/metadata/hive/bridge/HiveMetaStoreBridge.java
+25
-12
HiveDataModelGenerator.java
...he/hadoop/metadata/hive/model/HiveDataModelGenerator.java
+9
-3
HiveHookIT.java
...java/org/apache/hadoop/metadata/hive/hook/HiveHookIT.java
+19
-2
RepositoryMetadataModule.java
.../org/apache/hadoop/metadata/RepositoryMetadataModule.java
+7
-7
DefaultMetadataService.java
...ache/hadoop/metadata/services/DefaultMetadataService.java
+2
-1
HiveLineageServiceTest.java
...che/hadoop/metadata/discovery/HiveLineageServiceTest.java
+0
-5
No files found.
addons/hive-bridge/src/main/java/org/apache/hadoop/metadata/hive/bridge/HiveMetaStoreBridge.java
View file @
5506e778
...
...
@@ -26,6 +26,7 @@ import org.apache.hadoop.hive.metastore.api.Index;
import
org.apache.hadoop.hive.metastore.api.Order
;
import
org.apache.hadoop.hive.metastore.api.SerDeInfo
;
import
org.apache.hadoop.hive.metastore.api.StorageDescriptor
;
import
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants
;
import
org.apache.hadoop.hive.ql.metadata.Hive
;
import
org.apache.hadoop.hive.ql.metadata.Partition
;
import
org.apache.hadoop.hive.ql.metadata.Table
;
...
...
@@ -228,8 +229,8 @@ public class HiveMetaStoreBridge {
String
dbType
=
HiveDataTypes
.
HIVE_DB
.
getName
();
String
tableType
=
HiveDataTypes
.
HIVE_TABLE
.
getName
();
String
gremlinQuery
=
String
.
format
(
"g.V.has('__typeName', '%s').has('%s.values', %s).as('p')."
+
"out('__%s.tableName').has('%s.name', '%s').out('__%s.dbName').has('%s.name', '%s')"
+
".has('%s.clusterName', '%s').back('p').toList()"
,
typeName
,
typeName
,
valuesStr
,
typeName
,
+
"out('__%s.tableName').has('%s.name', '%s').out('__%s.dbName').has('%s.name', '%s')"
+
".has('%s.clusterName', '%s').back('p').toList()"
,
typeName
,
typeName
,
valuesStr
,
typeName
,
tableType
,
tableName
.
toLowerCase
(),
tableType
,
dbType
,
dbName
.
toLowerCase
(),
dbType
,
clusterName
);
return
getEntityReferenceFromGremlin
(
typeName
,
gremlinQuery
);
...
...
@@ -263,17 +264,22 @@ public class HiveMetaStoreBridge {
tableRef
=
new
Referenceable
(
HiveDataTypes
.
HIVE_TABLE
.
getName
());
tableRef
.
set
(
"name"
,
hiveTable
.
getTableName
());
tableRef
.
set
(
"owner"
,
hiveTable
.
getOwner
());
//todo fix
tableRef
.
set
(
"createTime"
,
hiveTable
.
get
LastAccessTime
(
));
tableRef
.
set
(
"createTime"
,
hiveTable
.
get
Metadata
().
getProperty
(
hive_metastoreConstants
.
DDL_TIME
));
tableRef
.
set
(
"lastAccessTime"
,
hiveTable
.
getLastAccessTime
());
tableRef
.
set
(
"retention"
,
hiveTable
.
getRetention
());
tableRef
.
set
(
HiveDataModelGenerator
.
COMMENT
,
hiveTable
.
getParameters
().
get
(
HiveDataModelGenerator
.
COMMENT
));
// add reference to the database
tableRef
.
set
(
"dbName"
,
dbReference
);
List
<
Referenceable
>
colList
=
getColumns
(
hiveTable
.
getCols
());
tableRef
.
set
(
"columns"
,
colList
);
// add reference to the StorageDescriptor
StorageDescriptor
storageDesc
=
hiveTable
.
getSd
();
Referenceable
sdReferenceable
=
fillStorageDescStruct
(
storageDesc
);
Referenceable
sdReferenceable
=
fillStorageDescStruct
(
storageDesc
,
colList
);
tableRef
.
set
(
"sd"
,
sdReferenceable
);
// add reference to the Partition Keys
...
...
@@ -293,8 +299,6 @@ public class HiveMetaStoreBridge {
tableRef
.
set
(
"tableType"
,
hiveTable
.
getTableType
().
name
());
tableRef
.
set
(
"temporary"
,
hiveTable
.
isTemporary
());
List
<
Referenceable
>
colList
=
getColumns
(
hiveTable
.
getAllCols
());
tableRef
.
set
(
"columns"
,
colList
);
tableRef
=
createInstance
(
tableRef
);
}
else
{
...
...
@@ -388,7 +392,7 @@ public class HiveMetaStoreBridge {
indexRef
.
set
(
"origTableName"
,
index
.
getOrigTableName
());
indexRef
.
set
(
"indexTableName"
,
index
.
getIndexTableName
());
Referenceable
sdReferenceable
=
fillStorageDescStruct
(
index
.
getSd
());
Referenceable
sdReferenceable
=
fillStorageDescStruct
(
index
.
getSd
()
,
null
);
indexRef
.
set
(
"sd"
,
sdReferenceable
);
indexRef
.
set
(
"parameters"
,
index
.
getParameters
());
...
...
@@ -398,7 +402,7 @@ public class HiveMetaStoreBridge {
createInstance
(
indexRef
);
}
private
Referenceable
fillStorageDescStruct
(
StorageDescriptor
storageDesc
)
throws
Exception
{
private
Referenceable
fillStorageDescStruct
(
StorageDescriptor
storageDesc
,
List
<
Referenceable
>
colList
)
throws
Exception
{
LOG
.
debug
(
"Filling storage descriptor information for "
+
storageDesc
);
Referenceable
sdReferenceable
=
new
Referenceable
(
HiveDataTypes
.
HIVE_STORAGEDESC
.
getName
());
...
...
@@ -415,6 +419,8 @@ public class HiveMetaStoreBridge {
serdeInfoStruct
.
set
(
"parameters"
,
serdeInfo
.
getParameters
());
sdReferenceable
.
set
(
"serdeInfo"
,
serdeInfoStruct
);
sdReferenceable
.
set
(
HiveDataModelGenerator
.
STORAGE_NUM_BUCKETS
,
storageDesc
.
getNumBuckets
());
sdReferenceable
.
set
(
HiveDataModelGenerator
.
STORAGE_IS_STORED_AS_SUB_DIRS
,
storageDesc
.
isStoredAsSubDirectories
());
// Will need to revisit this after we fix typesystem.
/*
...
...
@@ -433,8 +439,15 @@ public class HiveMetaStoreBridge {
}
*/
List
<
Referenceable
>
fieldsList
=
getColumns
(
storageDesc
.
getCols
());
sdReferenceable
.
set
(
"cols"
,
fieldsList
);
//Use the passed column list if not null, ex: use same references for table and SD
List
<
FieldSchema
>
columns
=
storageDesc
.
getCols
();
if
(
columns
!=
null
&&
!
columns
.
isEmpty
())
{
if
(
colList
!=
null
)
{
sdReferenceable
.
set
(
"cols"
,
colList
);
}
else
{
sdReferenceable
.
set
(
"cols"
,
getColumns
(
columns
));
}
}
List
<
Struct
>
sortColsStruct
=
new
ArrayList
<>();
for
(
Order
sortcol
:
storageDesc
.
getSortCols
())
{
...
...
@@ -472,7 +485,7 @@ public class HiveMetaStoreBridge {
Referenceable
colReferenceable
=
new
Referenceable
(
HiveDataTypes
.
HIVE_COLUMN
.
getName
());
colReferenceable
.
set
(
"name"
,
fs
.
getName
());
colReferenceable
.
set
(
"type"
,
fs
.
getType
());
colReferenceable
.
set
(
"comment"
,
fs
.
getComment
());
colReferenceable
.
set
(
HiveDataModelGenerator
.
COMMENT
,
fs
.
getComment
());
colList
.
add
(
createInstance
(
colReferenceable
));
}
...
...
addons/hive-bridge/src/main/java/org/apache/hadoop/metadata/hive/model/HiveDataModelGenerator.java
View file @
5506e778
...
...
@@ -55,6 +55,10 @@ public class HiveDataModelGenerator {
private
final
Map
<
String
,
EnumTypeDefinition
>
enumTypeDefinitionMap
;
private
final
Map
<
String
,
StructTypeDefinition
>
structTypeDefinitionMap
;
public
static
final
String
COMMENT
=
"comment"
;
public
static
final
String
STORAGE_NUM_BUCKETS
=
"numBuckets"
;
public
static
final
String
STORAGE_IS_STORED_AS_SUB_DIRS
=
"storedAsSubDirectories"
;
public
HiveDataModelGenerator
()
{
classTypeDefinitions
=
new
HashMap
<>();
enumTypeDefinitionMap
=
new
HashMap
<>();
...
...
@@ -237,7 +241,7 @@ public class HiveDataModelGenerator {
Multiplicity
.
OPTIONAL
,
false
,
null
),
new
AttributeDefinition
(
"compressed"
,
DataTypes
.
BOOLEAN_TYPE
.
getName
(),
Multiplicity
.
REQUIRED
,
false
,
null
),
new
AttributeDefinition
(
"numBuckets"
,
DataTypes
.
INT_TYPE
.
getName
(),
new
AttributeDefinition
(
STORAGE_NUM_BUCKETS
,
DataTypes
.
INT_TYPE
.
getName
(),
Multiplicity
.
OPTIONAL
,
false
,
null
),
new
AttributeDefinition
(
"serdeInfo"
,
HiveDataTypes
.
HIVE_SERDE
.
getName
(),
Multiplicity
.
OPTIONAL
,
false
,
null
),
...
...
@@ -251,7 +255,7 @@ public class HiveDataModelGenerator {
Multiplicity
.
OPTIONAL
,
false
,
null
),
//new AttributeDefinition("skewedInfo", DefinedTypes.HIVE_SKEWEDINFO.getName(),
// Multiplicity.OPTIONAL, false, null),
new
AttributeDefinition
(
"storedAsSubDirectories"
,
DataTypes
.
BOOLEAN_TYPE
.
getName
(),
new
AttributeDefinition
(
STORAGE_IS_STORED_AS_SUB_DIRS
,
DataTypes
.
BOOLEAN_TYPE
.
getName
(),
Multiplicity
.
OPTIONAL
,
false
,
null
),
};
...
...
@@ -326,7 +330,7 @@ public class HiveDataModelGenerator {
Multiplicity
.
REQUIRED
,
false
,
null
),
new
AttributeDefinition
(
"type"
,
DataTypes
.
STRING_TYPE
.
getName
(),
Multiplicity
.
REQUIRED
,
false
,
null
),
new
AttributeDefinition
(
"comment"
,
DataTypes
.
STRING_TYPE
.
getName
(),
new
AttributeDefinition
(
COMMENT
,
DataTypes
.
STRING_TYPE
.
getName
(),
Multiplicity
.
OPTIONAL
,
false
,
null
),
};
HierarchicalTypeDefinition
<
ClassType
>
definition
=
...
...
@@ -377,6 +381,8 @@ public class HiveDataModelGenerator {
Multiplicity
.
OPTIONAL
,
false
,
null
),
new
AttributeDefinition
(
"lastAccessTime"
,
DataTypes
.
INT_TYPE
.
getName
(),
Multiplicity
.
OPTIONAL
,
false
,
null
),
new
AttributeDefinition
(
COMMENT
,
DataTypes
.
STRING_TYPE
.
getName
(),
Multiplicity
.
OPTIONAL
,
false
,
null
),
new
AttributeDefinition
(
"retention"
,
DataTypes
.
INT_TYPE
.
getName
(),
Multiplicity
.
OPTIONAL
,
false
,
null
),
new
AttributeDefinition
(
"sd"
,
HiveDataTypes
.
HIVE_STORAGEDESC
.
getName
(),
...
...
addons/hive-bridge/src/test/java/org/apache/hadoop/metadata/hive/hook/HiveHookIT.java
View file @
5506e778
...
...
@@ -21,12 +21,15 @@ package org.apache.hadoop.metadata.hive.hook;
import
org.apache.commons.lang.RandomStringUtils
;
import
org.apache.hadoop.hive.conf.HiveConf
;
import
org.apache.hadoop.hive.metastore.TableType
;
import
org.apache.hadoop.hive.metastore.api.StorageDescriptor
;
import
org.apache.hadoop.hive.ql.Driver
;
import
org.apache.hadoop.hive.ql.session.SessionState
;
import
org.apache.hadoop.metadata.MetadataServiceClient
;
import
org.apache.hadoop.metadata.hive.bridge.HiveMetaStoreBridge
;
import
org.apache.hadoop.metadata.hive.model.HiveDataModelGenerator
;
import
org.apache.hadoop.metadata.hive.model.HiveDataTypes
;
import
org.apache.hadoop.metadata.typesystem.Referenceable
;
import
org.apache.hadoop.metadata.typesystem.persistence.Id
;
import
org.apache.log4j.spi.LoggerFactory
;
import
org.codehaus.jettison.json.JSONArray
;
import
org.codehaus.jettison.json.JSONObject
;
...
...
@@ -118,7 +121,7 @@ public class HiveHookIT {
private
String
createTable
(
boolean
partition
)
throws
Exception
{
String
tableName
=
tableName
();
runCommand
(
"create table "
+
tableName
+
"(id int, name string)"
+
(
partition
?
" partitioned by(dt string)"
runCommand
(
"create table "
+
tableName
+
"(id int, name string)
comment 'table comment'
"
+
(
partition
?
" partitioned by(dt string)"
:
""
));
return
tableName
;
}
...
...
@@ -127,18 +130,32 @@ public class HiveHookIT {
public
void
testCreateTable
()
throws
Exception
{
String
tableName
=
tableName
();
String
dbName
=
createDatabase
();
runCommand
(
"create table "
+
dbName
+
"."
+
tableName
+
"(id int, name string)"
);
String
colName
=
"col"
+
random
();
runCommand
(
"create table "
+
dbName
+
"."
+
tableName
+
"("
+
colName
+
" int, name string)"
);
assertTableIsRegistered
(
dbName
,
tableName
);
//there is only one instance of column registered
assertColumnIsRegistered
(
colName
);
tableName
=
createTable
();
String
tableId
=
assertTableIsRegistered
(
DEFAULT_DB
,
tableName
);
Referenceable
tableRef
=
dgiCLient
.
getEntity
(
tableId
);
Assert
.
assertEquals
(
tableRef
.
get
(
"tableType"
),
TableType
.
MANAGED_TABLE
.
name
());
Assert
.
assertEquals
(
tableRef
.
get
(
HiveDataModelGenerator
.
COMMENT
),
"table comment"
);
final
Id
sdId
=
(
Id
)
tableRef
.
get
(
"sd"
);
Referenceable
sdRef
=
dgiCLient
.
getEntity
(
sdId
.
id
);
Assert
.
assertEquals
(
sdRef
.
get
(
HiveDataModelGenerator
.
STORAGE_IS_STORED_AS_SUB_DIRS
),
false
);
//Create table where database doesn't exist, will create database instance as well
assertDatabaseIsRegistered
(
DEFAULT_DB
);
}
private
String
assertColumnIsRegistered
(
String
colName
)
throws
Exception
{
LOG
.
debug
(
"Searching for column {}"
,
colName
);
String
query
=
String
.
format
(
"%s where name = '%s'"
,
HiveDataTypes
.
HIVE_COLUMN
.
getName
(),
colName
.
toLowerCase
());
return
assertEntityIsRegistered
(
query
,
true
);
}
@Test
public
void
testCTAS
()
throws
Exception
{
String
tableName
=
createTable
();
...
...
repository/src/main/java/org/apache/hadoop/metadata/RepositoryMetadataModule.java
View file @
5506e778
...
...
@@ -48,28 +48,28 @@ public class RepositoryMetadataModule extends com.google.inject.AbstractModule {
ThrowingProviderBinder
.
create
(
binder
())
.
bind
(
GraphProvider
.
class
,
TitanGraph
.
class
)
.
to
(
TitanGraphProvider
.
class
)
.
in
(
Scopes
.
SINGLETON
);
.
asEagerSingleton
(
);
// allow for dynamic binding of the metadata repo & graph service
// bind the MetadataRepositoryService interface to an implementation
bind
(
MetadataRepository
.
class
).
to
(
GraphBackedMetadataRepository
.
class
);
bind
(
MetadataRepository
.
class
).
to
(
GraphBackedMetadataRepository
.
class
)
.
asEagerSingleton
()
;
// bind the ITypeStore interface to an implementation
bind
(
ITypeStore
.
class
).
to
(
GraphBackedTypeStore
.
class
);
bind
(
ITypeStore
.
class
).
to
(
GraphBackedTypeStore
.
class
)
.
asEagerSingleton
()
;
// bind the GraphService interface to an implementation
// bind(GraphService.class).to(graphServiceClass);
// bind the MetadataService interface to an implementation
bind
(
MetadataService
.
class
).
to
(
DefaultMetadataService
.
class
);
bind
(
MetadataService
.
class
).
to
(
DefaultMetadataService
.
class
)
.
asEagerSingleton
()
;
// bind the DiscoveryService interface to an implementation
bind
(
DiscoveryService
.
class
).
to
(
GraphBackedDiscoveryService
.
class
);
bind
(
DiscoveryService
.
class
).
to
(
GraphBackedDiscoveryService
.
class
)
.
asEagerSingleton
()
;
bind
(
SearchIndexer
.
class
).
to
(
GraphBackedSearchIndexer
.
class
);
bind
(
SearchIndexer
.
class
).
to
(
GraphBackedSearchIndexer
.
class
)
.
asEagerSingleton
()
;
bind
(
LineageService
.
class
).
to
(
HiveLineageService
.
class
);
bind
(
LineageService
.
class
).
to
(
HiveLineageService
.
class
)
.
asEagerSingleton
()
;
MethodInterceptor
interceptor
=
new
GraphTransactionInterceptor
();
requestInjection
(
interceptor
);
...
...
repository/src/main/java/org/apache/hadoop/metadata/services/DefaultMetadataService.java
View file @
5506e778
...
...
@@ -89,10 +89,11 @@ public class DefaultMetadataService implements MetadataService {
private
void
restoreTypeSystem
()
{
LOG
.
info
(
"Restoring type system from the store"
);
try
{
createSuperTypes
();
TypesDef
typesDef
=
typeStore
.
restore
();
typeSystem
.
defineTypes
(
typesDef
);
createSuperTypes
();
}
catch
(
MetadataException
e
)
{
throw
new
RuntimeException
(
e
);
}
...
...
repository/src/test/java/org/apache/hadoop/metadata/discovery/HiveLineageServiceTest.java
View file @
5506e778
...
...
@@ -57,11 +57,6 @@ import java.util.List;
@Guice
(
modules
=
RepositoryMetadataModule
.
class
)
public
class
HiveLineageServiceTest
{
static
{
// this would override super types creation if not first thing
TypeSystem
.
getInstance
().
reset
();
}
@Inject
private
DefaultMetadataService
metadataService
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment