Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
atlas
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dataplatform
atlas
Commits
21109f1e
Commit
21109f1e
authored
Jun 04, 2015
by
Shwetha GS
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
de-duping on query string in hive hook
parent
2270d05f
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
98 additions
and
46 deletions
+98
-46
HiveMetaStoreBridge.java
...ache/hadoop/metadata/hive/bridge/HiveMetaStoreBridge.java
+15
-1
HiveHook.java
...n/java/org/apache/hadoop/metadata/hive/hook/HiveHook.java
+50
-36
HiveHookIT.java
...java/org/apache/hadoop/metadata/hive/hook/HiveHookIT.java
+25
-3
GraphBackedTypeStore.java
...p/metadata/repository/typestore/GraphBackedTypeStore.java
+8
-6
No files found.
addons/hive-bridge/src/main/java/org/apache/hadoop/metadata/hive/bridge/HiveMetaStoreBridge.java
View file @
21109f1e
...
@@ -18,6 +18,7 @@
...
@@ -18,6 +18,7 @@
package
org
.
apache
.
hadoop
.
metadata
.
hive
.
bridge
;
package
org
.
apache
.
hadoop
.
metadata
.
hive
.
bridge
;
import
org.apache.commons.lang.StringEscapeUtils
;
import
org.apache.commons.lang.StringUtils
;
import
org.apache.commons.lang.StringUtils
;
import
org.apache.hadoop.hive.conf.HiveConf
;
import
org.apache.hadoop.hive.conf.HiveConf
;
import
org.apache.hadoop.hive.metastore.api.Database
;
import
org.apache.hadoop.hive.metastore.api.Database
;
...
@@ -158,11 +159,24 @@ public class HiveMetaStoreBridge {
...
@@ -158,11 +159,24 @@ public class HiveMetaStoreBridge {
LOG
.
debug
(
"Getting reference for database {}"
,
databaseName
);
LOG
.
debug
(
"Getting reference for database {}"
,
databaseName
);
String
typeName
=
HiveDataTypes
.
HIVE_DB
.
getName
();
String
typeName
=
HiveDataTypes
.
HIVE_DB
.
getName
();
String
dslQuery
=
String
.
format
(
"%s where name = '%s' and clusterName = '%s'"
,
HiveDataTypes
.
HIVE_DB
.
getName
()
,
String
dslQuery
=
String
.
format
(
"%s where name = '%s' and clusterName = '%s'"
,
typeName
,
databaseName
.
toLowerCase
(),
clusterName
);
databaseName
.
toLowerCase
(),
clusterName
);
return
getEntityReferenceFromDSL
(
typeName
,
dslQuery
);
return
getEntityReferenceFromDSL
(
typeName
,
dslQuery
);
}
}
public
Referenceable
getProcessReference
(
String
queryStr
)
throws
Exception
{
LOG
.
debug
(
"Getting reference for process with query {}"
,
queryStr
);
String
typeName
=
HiveDataTypes
.
HIVE_PROCESS
.
getName
();
//todo enable DSL
// String dslQuery = String.format("%s where queryText = \"%s\"", typeName, queryStr);
// return getEntityReferenceFromDSL(typeName, dslQuery);
String
gremlinQuery
=
String
.
format
(
"g.V.has('__typeName', '%s').has('%s.queryText', \"%s\").toList()"
,
typeName
,
typeName
,
StringEscapeUtils
.
escapeJava
(
queryStr
));
return
getEntityReferenceFromGremlin
(
typeName
,
gremlinQuery
);
}
private
Referenceable
getEntityReferenceFromDSL
(
String
typeName
,
String
dslQuery
)
throws
Exception
{
private
Referenceable
getEntityReferenceFromDSL
(
String
typeName
,
String
dslQuery
)
throws
Exception
{
MetadataServiceClient
dgiClient
=
getMetadataServiceClient
();
MetadataServiceClient
dgiClient
=
getMetadataServiceClient
();
JSONArray
results
=
dgiClient
.
searchByDSL
(
dslQuery
);
JSONArray
results
=
dgiClient
.
searchByDSL
(
dslQuery
);
...
...
addons/hive-bridge/src/main/java/org/apache/hadoop/metadata/hive/hook/HiveHook.java
View file @
21109f1e
...
@@ -37,6 +37,8 @@ package org.apache.hadoop.metadata.hive.hook;
...
@@ -37,6 +37,8 @@ package org.apache.hadoop.metadata.hive.hook;
import
com.google.common.util.concurrent.ThreadFactoryBuilder
;
import
com.google.common.util.concurrent.ThreadFactoryBuilder
;
import
org.apache.commons.lang.StringEscapeUtils
;
import
org.apache.commons.lang.StringUtils
;
import
org.apache.hadoop.hive.conf.HiveConf
;
import
org.apache.hadoop.hive.conf.HiveConf
;
import
org.apache.hadoop.hive.ql.QueryPlan
;
import
org.apache.hadoop.hive.ql.QueryPlan
;
import
org.apache.hadoop.hive.ql.exec.ExplainTask
;
import
org.apache.hadoop.hive.ql.exec.ExplainTask
;
...
@@ -271,6 +273,13 @@ public class HiveHook implements ExecuteWithHookContext {
...
@@ -271,6 +273,13 @@ public class HiveHook implements ExecuteWithHookContext {
}
}
}
}
private
String
normalize
(
String
str
)
{
if
(
StringUtils
.
isEmpty
(
str
))
{
return
null
;
}
return
str
.
toLowerCase
().
trim
();
}
private
void
registerProcess
(
HiveMetaStoreBridge
dgiBridge
,
HiveEvent
event
)
throws
Exception
{
private
void
registerProcess
(
HiveMetaStoreBridge
dgiBridge
,
HiveEvent
event
)
throws
Exception
{
Set
<
ReadEntity
>
inputs
=
event
.
inputs
;
Set
<
ReadEntity
>
inputs
=
event
.
inputs
;
Set
<
WriteEntity
>
outputs
=
event
.
outputs
;
Set
<
WriteEntity
>
outputs
=
event
.
outputs
;
...
@@ -285,48 +294,53 @@ public class HiveHook implements ExecuteWithHookContext {
...
@@ -285,48 +294,53 @@ public class HiveHook implements ExecuteWithHookContext {
}
}
String
queryId
=
event
.
queryPlan
.
getQueryId
();
String
queryId
=
event
.
queryPlan
.
getQueryId
();
String
queryStr
=
event
.
queryPlan
.
getQueryStr
(
);
String
queryStr
=
normalize
(
event
.
queryPlan
.
getQueryStr
()
);
long
queryStartTime
=
event
.
queryPlan
.
getQueryStartTime
();
long
queryStartTime
=
event
.
queryPlan
.
getQueryStartTime
();
LOG
.
debug
(
"Registering CTAS query: {}"
,
queryStr
);
LOG
.
debug
(
"Registering CTAS query: {}"
,
queryStr
);
Referenceable
processReferenceable
=
new
Referenceable
(
HiveDataTypes
.
HIVE_PROCESS
.
getName
());
Referenceable
processReferenceable
=
dgiBridge
.
getProcessReference
(
queryStr
);
processReferenceable
.
set
(
"name"
,
event
.
operation
.
getOperationName
());
if
(
processReferenceable
==
null
)
{
processReferenceable
.
set
(
"startTime"
,
queryStartTime
);
processReferenceable
=
new
Referenceable
(
HiveDataTypes
.
HIVE_PROCESS
.
getName
());
processReferenceable
.
set
(
"userName"
,
event
.
user
);
processReferenceable
.
set
(
"name"
,
event
.
operation
.
getOperationName
());
processReferenceable
.
set
(
"startTime"
,
queryStartTime
);
List
<
Referenceable
>
source
=
new
ArrayList
<>();
processReferenceable
.
set
(
"userName"
,
event
.
user
);
for
(
ReadEntity
readEntity
:
inputs
)
{
if
(
readEntity
.
getType
()
==
Entity
.
Type
.
TABLE
)
{
List
<
Referenceable
>
source
=
new
ArrayList
<>();
Table
table
=
readEntity
.
getTable
();
for
(
ReadEntity
readEntity
:
inputs
)
{
String
dbName
=
table
.
getDbName
();
if
(
readEntity
.
getType
()
==
Entity
.
Type
.
TABLE
)
{
source
.
add
(
dgiBridge
.
registerTable
(
dbName
,
table
.
getTableName
()));
Table
table
=
readEntity
.
getTable
();
}
String
dbName
=
table
.
getDbName
();
if
(
readEntity
.
getType
()
==
Entity
.
Type
.
PARTITION
)
{
source
.
add
(
dgiBridge
.
registerTable
(
dbName
,
table
.
getTableName
()));
dgiBridge
.
registerPartition
(
readEntity
.
getPartition
());
}
}
if
(
readEntity
.
getType
()
==
Entity
.
Type
.
PARTITION
)
{
}
dgiBridge
.
registerPartition
(
readEntity
.
getPartition
());
processReferenceable
.
set
(
"inputs"
,
source
);
}
List
<
Referenceable
>
target
=
new
ArrayList
<>();
for
(
WriteEntity
writeEntity
:
outputs
)
{
if
(
writeEntity
.
getType
()
==
Entity
.
Type
.
TABLE
||
writeEntity
.
getType
()
==
Entity
.
Type
.
PARTITION
)
{
Table
table
=
writeEntity
.
getTable
();
String
dbName
=
table
.
getDbName
();
target
.
add
(
dgiBridge
.
registerTable
(
dbName
,
table
.
getTableName
()));
}
}
if
(
writeEntity
.
getType
()
==
Entity
.
Type
.
PARTITION
)
{
processReferenceable
.
set
(
"inputs"
,
source
);
dgiBridge
.
registerPartition
(
writeEntity
.
getPartition
());
List
<
Referenceable
>
target
=
new
ArrayList
<>();
for
(
WriteEntity
writeEntity
:
outputs
)
{
if
(
writeEntity
.
getType
()
==
Entity
.
Type
.
TABLE
||
writeEntity
.
getType
()
==
Entity
.
Type
.
PARTITION
)
{
Table
table
=
writeEntity
.
getTable
();
String
dbName
=
table
.
getDbName
();
target
.
add
(
dgiBridge
.
registerTable
(
dbName
,
table
.
getTableName
()));
}
if
(
writeEntity
.
getType
()
==
Entity
.
Type
.
PARTITION
)
{
dgiBridge
.
registerPartition
(
writeEntity
.
getPartition
());
}
}
}
processReferenceable
.
set
(
"outputs"
,
target
);
processReferenceable
.
set
(
"queryText"
,
queryStr
);
processReferenceable
.
set
(
"queryId"
,
queryId
);
processReferenceable
.
set
(
"queryPlan"
,
event
.
jsonPlan
.
toString
());
processReferenceable
.
set
(
"endTime"
,
System
.
currentTimeMillis
());
//TODO set
processReferenceable
.
set
(
"queryGraph"
,
"queryGraph"
);
dgiBridge
.
createInstance
(
processReferenceable
);
}
else
{
LOG
.
debug
(
"Query {} is already registered"
,
queryStr
);
}
}
processReferenceable
.
set
(
"outputs"
,
target
);
processReferenceable
.
set
(
"queryText"
,
queryStr
);
processReferenceable
.
set
(
"queryId"
,
queryId
);
processReferenceable
.
set
(
"queryPlan"
,
event
.
jsonPlan
.
toString
());
processReferenceable
.
set
(
"endTime"
,
System
.
currentTimeMillis
());
//TODO set
processReferenceable
.
set
(
"queryGraph"
,
"queryGraph"
);
dgiBridge
.
createInstance
(
processReferenceable
);
}
}
...
...
addons/hive-bridge/src/test/java/org/apache/hadoop/metadata/hive/hook/HiveHookIT.java
View file @
21109f1e
...
@@ -19,6 +19,8 @@
...
@@ -19,6 +19,8 @@
package
org
.
apache
.
hadoop
.
metadata
.
hive
.
hook
;
package
org
.
apache
.
hadoop
.
metadata
.
hive
.
hook
;
import
org.apache.commons.lang.RandomStringUtils
;
import
org.apache.commons.lang.RandomStringUtils
;
import
org.apache.commons.lang.StringEscapeUtils
;
import
org.apache.commons.lang.StringUtils
;
import
org.apache.hadoop.hive.conf.HiveConf
;
import
org.apache.hadoop.hive.conf.HiveConf
;
import
org.apache.hadoop.hive.metastore.TableType
;
import
org.apache.hadoop.hive.metastore.TableType
;
import
org.apache.hadoop.hive.ql.Driver
;
import
org.apache.hadoop.hive.ql.Driver
;
...
@@ -222,7 +224,7 @@ public class HiveHookIT {
...
@@ -222,7 +224,7 @@ public class HiveHookIT {
String
tableName
=
createTable
(
false
);
String
tableName
=
createTable
(
false
);
String
filename
=
"pfile://"
+
mkdir
(
"export"
);
String
filename
=
"pfile://"
+
mkdir
(
"export"
);
String
query
=
"export table "
+
tableName
+
" to
'"
+
filename
+
"'
"
;
String
query
=
"export table "
+
tableName
+
" to
\""
+
filename
+
"\"
"
;
runCommand
(
query
);
runCommand
(
query
);
assertProcessIsRegistered
(
query
);
assertProcessIsRegistered
(
query
);
...
@@ -239,6 +241,11 @@ public class HiveHookIT {
...
@@ -239,6 +241,11 @@ public class HiveHookIT {
String
query
=
"select * from "
+
tableName
;
String
query
=
"select * from "
+
tableName
;
runCommand
(
query
);
runCommand
(
query
);
assertProcessIsRegistered
(
query
);
assertProcessIsRegistered
(
query
);
//single entity per query
query
=
"SELECT * from "
+
tableName
.
toUpperCase
();
runCommand
(
query
);
assertProcessIsRegistered
(
query
);
}
}
@Test
@Test
...
@@ -268,8 +275,23 @@ public class HiveHookIT {
...
@@ -268,8 +275,23 @@ public class HiveHookIT {
}
}
private
void
assertProcessIsRegistered
(
String
queryStr
)
throws
Exception
{
private
void
assertProcessIsRegistered
(
String
queryStr
)
throws
Exception
{
String
dslQuery
=
String
.
format
(
"%s where queryText = \"%s\""
,
HiveDataTypes
.
HIVE_PROCESS
.
getName
(),
queryStr
);
// String dslQuery = String.format("%s where queryText = \"%s\"", HiveDataTypes.HIVE_PROCESS.getName(),
assertEntityIsRegistered
(
dslQuery
,
true
);
// normalize(queryStr));
// assertEntityIsRegistered(dslQuery, true);
//todo replace with DSL
String
typeName
=
HiveDataTypes
.
HIVE_PROCESS
.
getName
();
String
gremlinQuery
=
String
.
format
(
"g.V.has('__typeName', '%s').has('%s.queryText', \"%s\").toList()"
,
typeName
,
typeName
,
normalize
(
queryStr
));
JSONObject
response
=
dgiCLient
.
searchByGremlin
(
gremlinQuery
);
JSONArray
results
=
response
.
getJSONArray
(
MetadataServiceClient
.
RESULTS
);
Assert
.
assertEquals
(
results
.
length
(),
1
);
}
private
String
normalize
(
String
str
)
{
if
(
StringUtils
.
isEmpty
(
str
))
{
return
null
;
}
return
StringEscapeUtils
.
escapeJava
(
str
.
toLowerCase
());
}
}
private
String
assertTableIsRegistered
(
String
dbName
,
String
tableName
)
throws
Exception
{
private
String
assertTableIsRegistered
(
String
dbName
,
String
tableName
)
throws
Exception
{
...
...
repository/src/main/java/org/apache/hadoop/metadata/repository/typestore/GraphBackedTypeStore.java
View file @
21109f1e
...
@@ -281,12 +281,14 @@ public class GraphBackedTypeStore implements ITypeStore {
...
@@ -281,12 +281,14 @@ public class GraphBackedTypeStore implements ITypeStore {
private
AttributeDefinition
[]
getAttributes
(
Vertex
vertex
,
String
typeName
)
throws
MetadataException
{
private
AttributeDefinition
[]
getAttributes
(
Vertex
vertex
,
String
typeName
)
throws
MetadataException
{
List
<
AttributeDefinition
>
attributes
=
new
ArrayList
<>();
List
<
AttributeDefinition
>
attributes
=
new
ArrayList
<>();
List
<
String
>
attrNames
=
vertex
.
getProperty
(
getPropertyKey
(
typeName
));
List
<
String
>
attrNames
=
vertex
.
getProperty
(
getPropertyKey
(
typeName
));
for
(
String
attrName
:
attrNames
)
{
if
(
attrNames
!=
null
)
{
try
{
for
(
String
attrName
:
attrNames
)
{
String
propertyKey
=
getPropertyKey
(
typeName
,
attrName
);
try
{
attributes
.
add
(
AttributeInfo
.
fromJson
((
String
)
vertex
.
getProperty
(
propertyKey
)));
String
propertyKey
=
getPropertyKey
(
typeName
,
attrName
);
}
catch
(
JSONException
e
)
{
attributes
.
add
(
AttributeInfo
.
fromJson
((
String
)
vertex
.
getProperty
(
propertyKey
)));
throw
new
MetadataException
(
e
);
}
catch
(
JSONException
e
)
{
throw
new
MetadataException
(
e
);
}
}
}
}
}
return
attributes
.
toArray
(
new
AttributeDefinition
[
attributes
.
size
()]);
return
attributes
.
toArray
(
new
AttributeDefinition
[
attributes
.
size
()]);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment