Commit fb3f6b46 by Shwetha GS

hive bridge and hook packaging

parent 6e212def
......@@ -33,7 +33,8 @@
<packaging>jar</packaging>
<properties>
<hive.version>0.14.0</hive.version>
<hive.version>1.1.0</hive.version>
<calcite.version>0.9.2-incubating</calcite.version>
<hadoop.version>2.5.0</hadoop.version>
</properties>
......@@ -41,6 +42,17 @@
<dependency>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-client</artifactId>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-typesystem</artifactId>
</dependency>
<dependency>
......@@ -48,16 +60,6 @@
<artifactId>hive-metastore</artifactId>
<version>${hive.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-avatica</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
......@@ -65,18 +67,9 @@
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-avatica</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-cli</artifactId>
......@@ -84,25 +77,10 @@
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-typesystem</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-client</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
......@@ -141,7 +119,21 @@
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/dependency</outputDirectory>
<outputDirectory>${project.build.directory}/dependency/bridge/hive</outputDirectory>
<includeScope>compile</includeScope>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
</configuration>
</execution>
<execution>
<id>copy-hook-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/dependency/hook/hive</outputDirectory>
<includeScope>runtime</includeScope>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
......@@ -151,6 +143,38 @@
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.10</version>
<executions>
<execution>
<id>copy</id>
<phase>package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<artifactItems>
<artifactItem>
<groupId>${project.groupId}</groupId>
<artifactId>${project.artifactId}</artifactId>
<version>${project.version}</version>
<overWrite>true</overWrite>
<outputDirectory>${project.build.directory}/dependency/bridge/hive</outputDirectory>
</artifactItem>
<artifactItem>
<groupId>${project.groupId}</groupId>
<artifactId>${project.artifactId}</artifactId>
<version>${project.version}</version>
<overWrite>true</overWrite>
<outputDirectory>${project.build.directory}/dependency/hook/hive</outputDirectory>
</artifactItem>
</artifactItems>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.mortbay.jetty</groupId>
......
#!/bin/bash
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. See accompanying LICENSE file.
#
# resolve links - $0 may be a softlink
PRG="${0}"
while [ -h "${PRG}" ]; do
ls=`ls -ld "${PRG}"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "${PRG}"`/"$link"
fi
done
BASEDIR=`dirname ${PRG}`
BASEDIR=`cd ${BASEDIR}/..;pwd`
if [ -z "$METADATA_CONF" ]; then
METADATA_CONF=${BASEDIR}/conf
fi
export METADATA_CONF
if [ -f "${METADATA_CONF}/metadata-env.sh" ]; then
. "${METADATA_CONF}/metadata-env.sh"
fi
if test -z ${JAVA_HOME}
then
JAVA_BIN=`which java`
JAR_BIN=`which jar`
else
JAVA_BIN=${JAVA_HOME}/bin/java
JAR_BIN=${JAVA_HOME}/bin/jar
fi
export JAVA_BIN
if [ ! -e $JAVA_BIN ] || [ ! -e $JAR_BIN ]; then
echo "$JAVA_BIN and/or $JAR_BIN not found on the system. Please make sure java and jar commands are available."
exit 1
fi
METADATACPPATH="$METADATA_CONF"
for i in "${BASEDIR}/lib-bridge/"*.jar; do
METADATACPPATH="${METADATACPPATH}:$i"
done
echo $METADATACPPATH
JAVA_PROPERTIES="$METADATA_OPTS"
shift
while [[ ${1} =~ ^\-D ]]; do
JAVA_PROPERTIES="${JAVA_PROPERTIES} ${1}"
shift
done
TIME=`date +%Y%m%d%H%M%s`
${JAVA_BIN} ${JAVA_PROPERTIES} -cp ${METADATACPPATH} org.apache.hadoop.metadata.hive.bridge.HiveMetaStoreBridge
RETVAL=$?
[ $RETVAL -eq 0 ] && echo Hive Data Model Imported!!!
[ $RETVAL -ne 0 ] && echo Failure in Hive Data Model import!!!
......@@ -29,6 +29,7 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.metadata.MetadataServiceClient;
import org.apache.hadoop.metadata.hive.model.HiveDataModelGenerator;
import org.apache.hadoop.metadata.hive.model.HiveDataTypes;
import org.apache.hadoop.metadata.typesystem.Referenceable;
import org.apache.hadoop.metadata.typesystem.Struct;
......@@ -45,6 +46,8 @@ import java.util.List;
* and registers then in DGI.
*/
public class HiveMetaStoreBridge {
private static final String DEFAULT_DGI_URL = "http://localhost:21000/";
public static class Pair<S, T> {
public S first;
public T second;
......@@ -72,7 +75,7 @@ public class HiveMetaStoreBridge {
*/
public HiveMetaStoreBridge(HiveConf hiveConf) throws Exception {
hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf);
metadataServiceClient = new MetadataServiceClient(hiveConf.get(DGI_URL_PROPERTY));
metadataServiceClient = new MetadataServiceClient(hiveConf.get(DGI_URL_PROPERTY, DEFAULT_DGI_URL));
}
public MetadataServiceClient getMetadataServiceClient() {
......@@ -104,7 +107,9 @@ public class HiveMetaStoreBridge {
dbRef.set("locationUri", hiveDB.getLocationUri());
dbRef.set("parameters", hiveDB.getParameters());
dbRef.set("ownerName", hiveDB.getOwnerName());
dbRef.set("ownerType", hiveDB.getOwnerType().getValue());
if (hiveDB.getOwnerType() != null) {
dbRef.set("ownerType", hiveDB.getOwnerType().getValue());
}
return createInstance(dbRef);
}
......@@ -114,7 +119,7 @@ public class HiveMetaStoreBridge {
LOG.debug("creating instance of type " + typeName);
String entityJSON = InstanceSerialization.toJson(referenceable, true);
LOG.debug("Submitting new entity= " + entityJSON);
LOG.debug("Submitting new entity {} = {}", referenceable.getTypeName(), entityJSON);
JSONObject jsonObject = metadataServiceClient.createEntity(entityJSON);
String guid = jsonObject.getString(MetadataServiceClient.RESULTS);
LOG.debug("created instance for type " + typeName + ", guid: " + guid);
......@@ -338,8 +343,19 @@ public class HiveMetaStoreBridge {
return createInstance(sdReferenceable);
}
private void registerHiveDataModel() throws Exception {
HiveDataModelGenerator dataModelGenerator = new HiveDataModelGenerator();
try {
getMetadataServiceClient().createType(dataModelGenerator.getModelAsJson());
} catch (Exception e) {
//Ignore if type is already registered
//TODO make createType idempotent
}
}
public static void main(String[] argv) throws Exception {
HiveMetaStoreBridge hiveMetaStoreBridge = new HiveMetaStoreBridge(new HiveConf());
hiveMetaStoreBridge.registerHiveDataModel();
hiveMetaStoreBridge.importHiveMetadata();
}
}
......@@ -42,6 +42,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.exec.ExplainTask;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.hooks.Entity;
import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext;
......@@ -56,6 +57,7 @@ import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHook;
import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext;
import org.apache.hadoop.hive.ql.parse.ParseDriver;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExplainWork;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.metadata.MetadataServiceClient;
import org.apache.hadoop.metadata.hive.bridge.HiveMetaStoreBridge;
......@@ -124,7 +126,6 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
}
LOG.info("Created DGI Hook");
executor.shutdown();
}
@Override
......@@ -142,16 +143,16 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
fireAndForget(hookContext, conf);
} else {
executor.submit(
new Runnable() {
@Override
public void run() {
try {
fireAndForget(hookContext, conf);
} catch (Throwable e) {
LOG.info("DGI hook failed", e);
new Runnable() {
@Override
public void run() {
try {
fireAndForget(hookContext, conf);
} catch (Throwable e) {
LOG.info("DGI hook failed", e);
}
}
}
}
);
}
}
......@@ -190,14 +191,14 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
break;
case CREATETABLE_AS_SELECT:
registerCTAS(dgiBridge, hookContext);
registerCTAS(dgiBridge, hookContext, conf);
break;
default:
}
}
private void registerCTAS(HiveMetaStoreBridge dgiBridge, HookContext hookContext) throws Exception {
private void registerCTAS(HiveMetaStoreBridge dgiBridge, HookContext hookContext, HiveConf conf) throws Exception {
Set<ReadEntity> inputs = hookContext.getInputs();
Set<WriteEntity> outputs = hookContext.getOutputs();
String user = hookContext.getUserName();
......@@ -238,17 +239,19 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
processReferenceable.set("targetTableNames", target);
processReferenceable.set("queryText", queryStr);
processReferenceable.set("queryId", queryId);
processReferenceable.set("queryPlan", getQueryPlan(hookContext, conf));
processReferenceable.set("endTime", System.currentTimeMillis());
//TODO set
processReferenceable.set("endTime", queryStartTime);
processReferenceable.set("queryPlan", "queryPlan");
processReferenceable.set("queryGraph", "queryGraph");
dgiBridge.createInstance(processReferenceable);
}
/**
* Gets reference for the database. Creates new instance if it doesn't exist
*
* @param dgiBridge
* @param dbName database name
* @param dbName database name
* @return Reference for database
* @throws Exception
*/
......@@ -271,6 +274,7 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
/**
* Gets reference for the table. Creates new instance if it doesn't exist
*
* @param dgiBridge
* @param dbName
* @param tableName table name
......@@ -296,16 +300,20 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
}
//TODO Do we need this??
//We need to somehow get the sem associated with the plan and
// use it here.
//MySemanticAnaylzer sem = new MySemanticAnaylzer(conf);
//sem.setInputs(plan.getInputs());
//ExplainWork ew = new ExplainWork(null, null, rootTasks,
// plan.getFetchTask(), null, sem,
// false, true, false, false, false);
//JSONObject explainPlan =
// explain.getJSONLogicalPlan(null, ew);
private String getQueryPlan(HookContext hookContext, HiveConf conf) throws Exception {
//We need to somehow get the sem associated with the plan and use it here.
MySemanticAnaylzer sem = new MySemanticAnaylzer(conf);
QueryPlan queryPlan = hookContext.getQueryPlan();
sem.setInputs(queryPlan.getInputs());
ExplainWork ew = new ExplainWork(null, null, queryPlan.getRootTasks(), queryPlan.getFetchTask(), null, sem,
false, true, false, false, false);
ExplainTask explain = new ExplainTask();
explain.initialize(conf, queryPlan, null);
org.json.JSONObject explainPlan = explain.getJSONLogicalPlan(null, ew);
return explainPlan.toString();
}
private void analyzeHiveParseTree(ASTNode ast) {
String astStr = ast.dump();
......@@ -486,7 +494,7 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
/**
* This is an attempt to use the parser. Sematnic issues are not handled here.
*
* <p/>
* Trying to recompile the query runs into some issues in the preExec
* hook but we need to make sure all the semantic issues are handled. May be we should save the AST in the
* Semantic analyzer and have it available in the preExec hook so that we walk with it freely.
......
......@@ -44,9 +44,7 @@ public class HiveHookIT {
public void setUp() throws Exception {
//Register hive types
HiveDataModelGenerator hiveModel = new HiveDataModelGenerator();
hiveModel.createDataModel();
TypesDef typesDef = hiveModel.getTypesDef();
String typesAsJson = TypesSerialization.toJson(typesDef);
String typesAsJson = hiveModel.getModelAsJson();
MetadataServiceClient dgiClient = new MetadataServiceClient(DGI_URL);
try {
dgiClient.createType(typesAsJson);
......
......@@ -48,6 +48,13 @@
</fileSet>
<fileSet>
<directory>addons/hive-bridge/src/main/bin</directory>
<outputDirectory>bin</outputDirectory>
<fileMode>0755</fileMode>
<directoryMode>0755</directoryMode>
</fileSet>
<fileSet>
<directory>logs</directory>
<outputDirectory>logs</outputDirectory>
<directoryMode>0777</directoryMode>
......@@ -73,6 +80,16 @@
<directory>src/main/examples</directory>
<outputDirectory>examples</outputDirectory>
</fileSet>
<fileSet>
<directory>addons/hive-bridge/target/dependency/bridge</directory>
<outputDirectory>bridge</outputDirectory>
</fileSet>
<fileSet>
<directory>addons/hive-bridge/target/dependency/hook</directory>
<outputDirectory>hook</outputDirectory>
</fileSet>
</fileSets>
<files>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment