Commit fb3f6b46 by Shwetha GS

hive bridge and hook packaging

parent 6e212def
...@@ -33,7 +33,8 @@ ...@@ -33,7 +33,8 @@
<packaging>jar</packaging> <packaging>jar</packaging>
<properties> <properties>
<hive.version>0.14.0</hive.version> <hive.version>1.1.0</hive.version>
<calcite.version>0.9.2-incubating</calcite.version>
<hadoop.version>2.5.0</hadoop.version> <hadoop.version>2.5.0</hadoop.version>
</properties> </properties>
...@@ -41,6 +42,17 @@ ...@@ -41,6 +42,17 @@
<dependency> <dependency>
<groupId>org.apache.hadoop.metadata</groupId> <groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-client</artifactId> <artifactId>metadata-client</artifactId>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-typesystem</artifactId>
</dependency> </dependency>
<dependency> <dependency>
...@@ -48,16 +60,6 @@ ...@@ -48,16 +60,6 @@
<artifactId>hive-metastore</artifactId> <artifactId>hive-metastore</artifactId>
<version>${hive.version}</version> <version>${hive.version}</version>
<scope>provided</scope> <scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-avatica</artifactId>
</exclusion>
</exclusions>
</dependency> </dependency>
<dependency> <dependency>
...@@ -65,18 +67,9 @@ ...@@ -65,18 +67,9 @@
<artifactId>hive-exec</artifactId> <artifactId>hive-exec</artifactId>
<version>${hive.version}</version> <version>${hive.version}</version>
<scope>provided</scope> <scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-avatica</artifactId>
</exclusion>
</exclusions>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.hive</groupId> <groupId>org.apache.hive</groupId>
<artifactId>hive-cli</artifactId> <artifactId>hive-cli</artifactId>
...@@ -84,25 +77,10 @@ ...@@ -84,25 +77,10 @@
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-typesystem</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-client</artifactId>
</dependency>
<dependency> <dependency>
<groupId>org.apache.hadoop</groupId> <groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId> <artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version> <version>${hadoop.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
...@@ -141,7 +119,21 @@ ...@@ -141,7 +119,21 @@
<goal>copy-dependencies</goal> <goal>copy-dependencies</goal>
</goals> </goals>
<configuration> <configuration>
<outputDirectory>${project.build.directory}/dependency</outputDirectory> <outputDirectory>${project.build.directory}/dependency/bridge/hive</outputDirectory>
<includeScope>compile</includeScope>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
</configuration>
</execution>
<execution>
<id>copy-hook-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/dependency/hook/hive</outputDirectory>
<includeScope>runtime</includeScope> <includeScope>runtime</includeScope>
<overWriteReleases>false</overWriteReleases> <overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots> <overWriteSnapshots>false</overWriteSnapshots>
...@@ -151,6 +143,38 @@ ...@@ -151,6 +143,38 @@
</executions> </executions>
</plugin> </plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.10</version>
<executions>
<execution>
<id>copy</id>
<phase>package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<artifactItems>
<artifactItem>
<groupId>${project.groupId}</groupId>
<artifactId>${project.artifactId}</artifactId>
<version>${project.version}</version>
<overWrite>true</overWrite>
<outputDirectory>${project.build.directory}/dependency/bridge/hive</outputDirectory>
</artifactItem>
<artifactItem>
<groupId>${project.groupId}</groupId>
<artifactId>${project.artifactId}</artifactId>
<version>${project.version}</version>
<overWrite>true</overWrite>
<outputDirectory>${project.build.directory}/dependency/hook/hive</outputDirectory>
</artifactItem>
</artifactItems>
</configuration>
</execution>
</executions>
</plugin>
<plugin> <plugin>
<groupId>org.mortbay.jetty</groupId> <groupId>org.mortbay.jetty</groupId>
......
#!/bin/bash
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. See accompanying LICENSE file.
#
# resolve links - $0 may be a softlink
PRG="${0}"
while [ -h "${PRG}" ]; do
ls=`ls -ld "${PRG}"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "${PRG}"`/"$link"
fi
done
BASEDIR=`dirname ${PRG}`
BASEDIR=`cd ${BASEDIR}/..;pwd`
if [ -z "$METADATA_CONF" ]; then
METADATA_CONF=${BASEDIR}/conf
fi
export METADATA_CONF
if [ -f "${METADATA_CONF}/metadata-env.sh" ]; then
. "${METADATA_CONF}/metadata-env.sh"
fi
if test -z ${JAVA_HOME}
then
JAVA_BIN=`which java`
JAR_BIN=`which jar`
else
JAVA_BIN=${JAVA_HOME}/bin/java
JAR_BIN=${JAVA_HOME}/bin/jar
fi
export JAVA_BIN
if [ ! -e $JAVA_BIN ] || [ ! -e $JAR_BIN ]; then
echo "$JAVA_BIN and/or $JAR_BIN not found on the system. Please make sure java and jar commands are available."
exit 1
fi
METADATACPPATH="$METADATA_CONF"
for i in "${BASEDIR}/lib-bridge/"*.jar; do
METADATACPPATH="${METADATACPPATH}:$i"
done
echo $METADATACPPATH
JAVA_PROPERTIES="$METADATA_OPTS"
shift
while [[ ${1} =~ ^\-D ]]; do
JAVA_PROPERTIES="${JAVA_PROPERTIES} ${1}"
shift
done
TIME=`date +%Y%m%d%H%M%s`
${JAVA_BIN} ${JAVA_PROPERTIES} -cp ${METADATACPPATH} org.apache.hadoop.metadata.hive.bridge.HiveMetaStoreBridge
RETVAL=$?
[ $RETVAL -eq 0 ] && echo Hive Data Model Imported!!!
[ $RETVAL -ne 0 ] && echo Failure in Hive Data Model import!!!
...@@ -29,6 +29,7 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo; ...@@ -29,6 +29,7 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.metadata.MetadataServiceClient; import org.apache.hadoop.metadata.MetadataServiceClient;
import org.apache.hadoop.metadata.hive.model.HiveDataModelGenerator;
import org.apache.hadoop.metadata.hive.model.HiveDataTypes; import org.apache.hadoop.metadata.hive.model.HiveDataTypes;
import org.apache.hadoop.metadata.typesystem.Referenceable; import org.apache.hadoop.metadata.typesystem.Referenceable;
import org.apache.hadoop.metadata.typesystem.Struct; import org.apache.hadoop.metadata.typesystem.Struct;
...@@ -45,6 +46,8 @@ import java.util.List; ...@@ -45,6 +46,8 @@ import java.util.List;
* and registers then in DGI. * and registers then in DGI.
*/ */
public class HiveMetaStoreBridge { public class HiveMetaStoreBridge {
private static final String DEFAULT_DGI_URL = "http://localhost:21000/";
public static class Pair<S, T> { public static class Pair<S, T> {
public S first; public S first;
public T second; public T second;
...@@ -72,7 +75,7 @@ public class HiveMetaStoreBridge { ...@@ -72,7 +75,7 @@ public class HiveMetaStoreBridge {
*/ */
public HiveMetaStoreBridge(HiveConf hiveConf) throws Exception { public HiveMetaStoreBridge(HiveConf hiveConf) throws Exception {
hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf); hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf);
metadataServiceClient = new MetadataServiceClient(hiveConf.get(DGI_URL_PROPERTY)); metadataServiceClient = new MetadataServiceClient(hiveConf.get(DGI_URL_PROPERTY, DEFAULT_DGI_URL));
} }
public MetadataServiceClient getMetadataServiceClient() { public MetadataServiceClient getMetadataServiceClient() {
...@@ -104,7 +107,9 @@ public class HiveMetaStoreBridge { ...@@ -104,7 +107,9 @@ public class HiveMetaStoreBridge {
dbRef.set("locationUri", hiveDB.getLocationUri()); dbRef.set("locationUri", hiveDB.getLocationUri());
dbRef.set("parameters", hiveDB.getParameters()); dbRef.set("parameters", hiveDB.getParameters());
dbRef.set("ownerName", hiveDB.getOwnerName()); dbRef.set("ownerName", hiveDB.getOwnerName());
dbRef.set("ownerType", hiveDB.getOwnerType().getValue()); if (hiveDB.getOwnerType() != null) {
dbRef.set("ownerType", hiveDB.getOwnerType().getValue());
}
return createInstance(dbRef); return createInstance(dbRef);
} }
...@@ -114,7 +119,7 @@ public class HiveMetaStoreBridge { ...@@ -114,7 +119,7 @@ public class HiveMetaStoreBridge {
LOG.debug("creating instance of type " + typeName); LOG.debug("creating instance of type " + typeName);
String entityJSON = InstanceSerialization.toJson(referenceable, true); String entityJSON = InstanceSerialization.toJson(referenceable, true);
LOG.debug("Submitting new entity= " + entityJSON); LOG.debug("Submitting new entity {} = {}", referenceable.getTypeName(), entityJSON);
JSONObject jsonObject = metadataServiceClient.createEntity(entityJSON); JSONObject jsonObject = metadataServiceClient.createEntity(entityJSON);
String guid = jsonObject.getString(MetadataServiceClient.RESULTS); String guid = jsonObject.getString(MetadataServiceClient.RESULTS);
LOG.debug("created instance for type " + typeName + ", guid: " + guid); LOG.debug("created instance for type " + typeName + ", guid: " + guid);
...@@ -338,8 +343,19 @@ public class HiveMetaStoreBridge { ...@@ -338,8 +343,19 @@ public class HiveMetaStoreBridge {
return createInstance(sdReferenceable); return createInstance(sdReferenceable);
} }
private void registerHiveDataModel() throws Exception {
HiveDataModelGenerator dataModelGenerator = new HiveDataModelGenerator();
try {
getMetadataServiceClient().createType(dataModelGenerator.getModelAsJson());
} catch (Exception e) {
//Ignore if type is already registered
//TODO make createType idempotent
}
}
public static void main(String[] argv) throws Exception { public static void main(String[] argv) throws Exception {
HiveMetaStoreBridge hiveMetaStoreBridge = new HiveMetaStoreBridge(new HiveConf()); HiveMetaStoreBridge hiveMetaStoreBridge = new HiveMetaStoreBridge(new HiveConf());
hiveMetaStoreBridge.registerHiveDataModel();
hiveMetaStoreBridge.importHiveMetadata(); hiveMetaStoreBridge.importHiveMetadata();
} }
} }
...@@ -42,6 +42,7 @@ import org.apache.commons.logging.Log; ...@@ -42,6 +42,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.QueryPlan; import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.exec.ExplainTask;
import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.hooks.Entity; import org.apache.hadoop.hive.ql.hooks.Entity;
import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext; import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext;
...@@ -56,6 +57,7 @@ import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHook; ...@@ -56,6 +57,7 @@ import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHook;
import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext;
import org.apache.hadoop.hive.ql.parse.ParseDriver; import org.apache.hadoop.hive.ql.parse.ParseDriver;
import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExplainWork;
import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.metadata.MetadataServiceClient; import org.apache.hadoop.metadata.MetadataServiceClient;
import org.apache.hadoop.metadata.hive.bridge.HiveMetaStoreBridge; import org.apache.hadoop.metadata.hive.bridge.HiveMetaStoreBridge;
...@@ -124,7 +126,6 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo ...@@ -124,7 +126,6 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
} }
LOG.info("Created DGI Hook"); LOG.info("Created DGI Hook");
executor.shutdown();
} }
@Override @Override
...@@ -142,16 +143,16 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo ...@@ -142,16 +143,16 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
fireAndForget(hookContext, conf); fireAndForget(hookContext, conf);
} else { } else {
executor.submit( executor.submit(
new Runnable() { new Runnable() {
@Override @Override
public void run() { public void run() {
try { try {
fireAndForget(hookContext, conf); fireAndForget(hookContext, conf);
} catch (Throwable e) { } catch (Throwable e) {
LOG.info("DGI hook failed", e); LOG.info("DGI hook failed", e);
}
} }
} }
}
); );
} }
} }
...@@ -190,14 +191,14 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo ...@@ -190,14 +191,14 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
break; break;
case CREATETABLE_AS_SELECT: case CREATETABLE_AS_SELECT:
registerCTAS(dgiBridge, hookContext); registerCTAS(dgiBridge, hookContext, conf);
break; break;
default: default:
} }
} }
private void registerCTAS(HiveMetaStoreBridge dgiBridge, HookContext hookContext) throws Exception { private void registerCTAS(HiveMetaStoreBridge dgiBridge, HookContext hookContext, HiveConf conf) throws Exception {
Set<ReadEntity> inputs = hookContext.getInputs(); Set<ReadEntity> inputs = hookContext.getInputs();
Set<WriteEntity> outputs = hookContext.getOutputs(); Set<WriteEntity> outputs = hookContext.getOutputs();
String user = hookContext.getUserName(); String user = hookContext.getUserName();
...@@ -238,17 +239,19 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo ...@@ -238,17 +239,19 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
processReferenceable.set("targetTableNames", target); processReferenceable.set("targetTableNames", target);
processReferenceable.set("queryText", queryStr); processReferenceable.set("queryText", queryStr);
processReferenceable.set("queryId", queryId); processReferenceable.set("queryId", queryId);
processReferenceable.set("queryPlan", getQueryPlan(hookContext, conf));
processReferenceable.set("endTime", System.currentTimeMillis());
//TODO set //TODO set
processReferenceable.set("endTime", queryStartTime);
processReferenceable.set("queryPlan", "queryPlan");
processReferenceable.set("queryGraph", "queryGraph"); processReferenceable.set("queryGraph", "queryGraph");
dgiBridge.createInstance(processReferenceable); dgiBridge.createInstance(processReferenceable);
} }
/** /**
* Gets reference for the database. Creates new instance if it doesn't exist * Gets reference for the database. Creates new instance if it doesn't exist
*
* @param dgiBridge * @param dgiBridge
* @param dbName database name * @param dbName database name
* @return Reference for database * @return Reference for database
* @throws Exception * @throws Exception
*/ */
...@@ -271,6 +274,7 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo ...@@ -271,6 +274,7 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
/** /**
* Gets reference for the table. Creates new instance if it doesn't exist * Gets reference for the table. Creates new instance if it doesn't exist
*
* @param dgiBridge * @param dgiBridge
* @param dbName * @param dbName
* @param tableName table name * @param tableName table name
...@@ -296,16 +300,20 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo ...@@ -296,16 +300,20 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
} }
//TODO Do we need this?? private String getQueryPlan(HookContext hookContext, HiveConf conf) throws Exception {
//We need to somehow get the sem associated with the plan and //We need to somehow get the sem associated with the plan and use it here.
// use it here. MySemanticAnaylzer sem = new MySemanticAnaylzer(conf);
//MySemanticAnaylzer sem = new MySemanticAnaylzer(conf); QueryPlan queryPlan = hookContext.getQueryPlan();
//sem.setInputs(plan.getInputs()); sem.setInputs(queryPlan.getInputs());
//ExplainWork ew = new ExplainWork(null, null, rootTasks, ExplainWork ew = new ExplainWork(null, null, queryPlan.getRootTasks(), queryPlan.getFetchTask(), null, sem,
// plan.getFetchTask(), null, sem, false, true, false, false, false);
// false, true, false, false, false);
//JSONObject explainPlan = ExplainTask explain = new ExplainTask();
// explain.getJSONLogicalPlan(null, ew); explain.initialize(conf, queryPlan, null);
org.json.JSONObject explainPlan = explain.getJSONLogicalPlan(null, ew);
return explainPlan.toString();
}
private void analyzeHiveParseTree(ASTNode ast) { private void analyzeHiveParseTree(ASTNode ast) {
String astStr = ast.dump(); String astStr = ast.dump();
...@@ -486,7 +494,7 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo ...@@ -486,7 +494,7 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
/** /**
* This is an attempt to use the parser. Sematnic issues are not handled here. * This is an attempt to use the parser. Sematnic issues are not handled here.
* * <p/>
* Trying to recompile the query runs into some issues in the preExec * Trying to recompile the query runs into some issues in the preExec
* hook but we need to make sure all the semantic issues are handled. May be we should save the AST in the * hook but we need to make sure all the semantic issues are handled. May be we should save the AST in the
* Semantic analyzer and have it available in the preExec hook so that we walk with it freely. * Semantic analyzer and have it available in the preExec hook so that we walk with it freely.
......
...@@ -44,9 +44,7 @@ public class HiveHookIT { ...@@ -44,9 +44,7 @@ public class HiveHookIT {
public void setUp() throws Exception { public void setUp() throws Exception {
//Register hive types //Register hive types
HiveDataModelGenerator hiveModel = new HiveDataModelGenerator(); HiveDataModelGenerator hiveModel = new HiveDataModelGenerator();
hiveModel.createDataModel(); String typesAsJson = hiveModel.getModelAsJson();
TypesDef typesDef = hiveModel.getTypesDef();
String typesAsJson = TypesSerialization.toJson(typesDef);
MetadataServiceClient dgiClient = new MetadataServiceClient(DGI_URL); MetadataServiceClient dgiClient = new MetadataServiceClient(DGI_URL);
try { try {
dgiClient.createType(typesAsJson); dgiClient.createType(typesAsJson);
......
...@@ -48,6 +48,13 @@ ...@@ -48,6 +48,13 @@
</fileSet> </fileSet>
<fileSet> <fileSet>
<directory>addons/hive-bridge/src/main/bin</directory>
<outputDirectory>bin</outputDirectory>
<fileMode>0755</fileMode>
<directoryMode>0755</directoryMode>
</fileSet>
<fileSet>
<directory>logs</directory> <directory>logs</directory>
<outputDirectory>logs</outputDirectory> <outputDirectory>logs</outputDirectory>
<directoryMode>0777</directoryMode> <directoryMode>0777</directoryMode>
...@@ -73,6 +80,16 @@ ...@@ -73,6 +80,16 @@
<directory>src/main/examples</directory> <directory>src/main/examples</directory>
<outputDirectory>examples</outputDirectory> <outputDirectory>examples</outputDirectory>
</fileSet> </fileSet>
<fileSet>
<directory>addons/hive-bridge/target/dependency/bridge</directory>
<outputDirectory>bridge</outputDirectory>
</fileSet>
<fileSet>
<directory>addons/hive-bridge/target/dependency/hook</directory>
<outputDirectory>hook</outputDirectory>
</fileSet>
</fileSets> </fileSets>
<files> <files>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment