Commit 7a8e847a by Venkat Ranganathan

HiveTypeSystem process changes and HiveHook using semantic analyzer hooks

parent 1d850208
......@@ -58,6 +58,26 @@
<version>0.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>0.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
<version>0.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-cli</artifactId>
<version>0.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>0.14.0</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.2.2</version>
......@@ -92,4 +112,4 @@
</dependency>
</dependencies>
</project>
\ No newline at end of file
</project>
......@@ -511,7 +511,10 @@ public class HiveTypeSystem {
new AttributeDefinition("userName", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("sourceTableNames", String.format("array<%s>", DefinedTypes.HIVE_TABLE.name()), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("targetTableNames", String.format("array<%s>", DefinedTypes.HIVE_TABLE.name()), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("jobDefinition", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("queryText", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("queryPlan", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("queryId", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("queryGraph", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class, DefinedTypes.HIVE_PROCESS.name(),
......
<!--Wed Feb 4 03:23:58 2015-->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://c6501.ambari.apache.org:8020</value>
<final>true</final>
</property>
<property>
<name>fs.trash.interval</name>
<value>360</value>
</property>
<property>
<name>hadoop.http.authentication.simple.anonymous.allowed</name>
<value>true</value>
</property>
<property>
<name>hadoop.proxyuser.hcat.groups</name>
<value>users</value>
</property>
<property>
<name>hadoop.proxyuser.hcat.hosts</name>
<value>c6501.ambari.apache.org</value>
</property>
<property>
<name>hadoop.proxyuser.hive.groups</name>
<value>users</value>
</property>
<property>
<name>hadoop.proxyuser.hive.hosts</name>
<value>c6501.ambari.apache.org</value>
</property>
<property>
<name>hadoop.proxyuser.oozie.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.oozie.hosts</name>
<value>c6501.ambari.apache.org</value>
</property>
<property>
<name>hadoop.security.auth_to_local</name>
<value>
RULE:[2:$1@$0]([rn]m@.*)s/.*/yarn/
RULE:[2:$1@$0](jhs@.*)s/.*/mapred/
RULE:[2:$1@$0]([nd]n@.*)s/.*/hdfs/
RULE:[2:$1@$0](hm@.*)s/.*/hbase/
RULE:[2:$1@$0](rs@.*)s/.*/hbase/
DEFAULT</value>
</property>
<property>
<name>hadoop.security.authentication</name>
<value>simple</value>
</property>
<property>
<name>hadoop.security.authorization</name>
<value>false</value>
</property>
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.SnappyCodec</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>io.serializations</name>
<value>org.apache.hadoop.io.serializer.WritableSerialization</value>
</property>
<property>
<name>ipc.client.connect.max.retries</name>
<value>50</value>
</property>
<property>
<name>ipc.client.connection.maxidletime</name>
<value>30000</value>
</property>
<property>
<name>ipc.client.idlethreshold</name>
<value>8000</value>
</property>
<property>
<name>ipc.server.tcpnodelay</name>
<value>true</value>
</property>
<property>
<name>mapreduce.jobtracker.webinterface.trusted</name>
<value>false</value>
</property>
<property>
<name>proxyuser_group</name>
<value>users</value>
</property>
</configuration>
<!--Wed Feb 4 03:23:35 2015-->
<configuration>
<property>
<name>dfs.block.access.token.enable</name>
<value>true</value>
</property>
<property>
<name>dfs.blockreport.initialDelay</name>
<value>120</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
<property>
<name>dfs.client.read.shortcircuit</name>
<value>true</value>
</property>
<property>
<name>dfs.client.read.shortcircuit.streams.cache.size</name>
<value>4096</value>
</property>
<property>
<name>dfs.cluster.administrators</name>
<value> hdfs</value>
</property>
<property>
<name>dfs.datanode.address</name>
<value>0.0.0.0:50010</value>
</property>
<property>
<name>dfs.datanode.balance.bandwidthPerSec</name>
<value>6250000</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/hadoop/hdfs/data</value>
</property>
<property>
<name>dfs.datanode.data.dir.perm</name>
<value>750</value>
</property>
<property>
<name>dfs.datanode.du.reserved</name>
<value>1073741824</value>
</property>
<property>
<name>dfs.datanode.failed.volumes.tolerated</name>
<value>0</value>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value>0.0.0.0:50075</value>
</property>
<property>
<name>dfs.datanode.https.address</name>
<value>0.0.0.0:50475</value>
</property>
<property>
<name>dfs.datanode.ipc.address</name>
<value>0.0.0.0:8010</value>
</property>
<property>
<name>dfs.datanode.max.transfer.threads</name>
<value>4096</value>
</property>
<property>
<name>dfs.domain.socket.path</name>
<value>/var/lib/hadoop-hdfs/dn_socket</value>
</property>
<property>
<name>dfs.heartbeat.interval</name>
<value>3</value>
</property>
<property>
<name>dfs.hosts.exclude</name>
<value>/etc/hadoop/conf/dfs.exclude</value>
</property>
<property>
<name>dfs.http.policy</name>
<value>HTTP_ONLY</value>
</property>
<property>
<name>dfs.https.port</name>
<value>50470</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/hadoop/hdfs/journalnode</value>
</property>
<property>
<name>dfs.journalnode.http-address</name>
<value>0.0.0.0:8480</value>
</property>
<property>
<name>dfs.namenode.accesstime.precision</name>
<value>0</value>
</property>
<property>
<name>dfs.namenode.avoid.read.stale.datanode</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.avoid.write.stale.datanode</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.checkpoint.dir</name>
<value>/hadoop/hdfs/namesecondary</value>
</property>
<property>
<name>dfs.namenode.checkpoint.edits.dir</name>
<value>${dfs.namenode.checkpoint.dir}</value>
</property>
<property>
<name>dfs.namenode.checkpoint.period</name>
<value>21600</value>
</property>
<property>
<name>dfs.namenode.checkpoint.txns</name>
<value>1000000</value>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>40</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>c6501.ambari.apache.org:50070</value>
<final>true</final>
</property>
<property>
<name>dfs.namenode.https-address</name>
<value>c6501.ambari.apache.org:50470</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/hadoop/hdfs/namenode</value>
</property>
<property>
<name>dfs.namenode.name.dir.restore</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.safemode.threshold-pct</name>
<value>1.0f</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>c6501.ambari.apache.org:50090</value>
</property>
<property>
<name>dfs.namenode.stale.datanode.interval</name>
<value>30000</value>
</property>
<property>
<name>dfs.namenode.startup.delay.block.deletion.sec</name>
<value>3600</value>
</property>
<property>
<name>dfs.namenode.write.stale.datanode.ratio</name>
<value>1.0f</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions.superusergroup</name>
<value>hdfs</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.replication.max</name>
<value>50</value>
</property>
<property>
<name>dfs.support.append</name>
<value>true</value>
<final>true</final>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>fs.permissions.umask-mode</name>
<value>022</value>
</property>
</configuration>
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
org.apache.hadoop.metadata=DEBUG, console
org.apache.hadoop=DEBUG, console
org.apache.hive=INFO, console
org.apache.hcatalog=INFO, console
metadata.root.logger=DEBUG,console,DRFA
hive.root.logger=INFO,console,DRFA
hcatalog.root.logger=INFO,console,DRFA
metadata.log.dir=${user.dir}/metadata/logs
metadata.log.file=metadata.log
log4j.rootLogger=${metadata.root.logger}
#
# DRFA
# Daily Rolling File Appender
#
log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
log4j.appender.DRFA.File=${metadata.log.dir}/${metadata.log.file}
log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
log4j.appender.DRFA.layout.ConversionPattern=%d (%t) [%p - %l] %m%n
#
# console
# Add "console" to rootlogger above if you want to use this
#
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d (%t) [%p - %l] %m%n
<!--Wed Feb 4 03:23:58 2015-->
<configuration>
<property>
<name>mapreduce.admin.map.child.java.opts</name>
<value>-server -XX:NewRatio=8 -Djava.net.preferIPv4Stack=true -Dhdp.version=${hdp.version}</value>
</property>
<property>
<name>mapreduce.admin.reduce.child.java.opts</name>
<value>-server -XX:NewRatio=8 -Djava.net.preferIPv4Stack=true -Dhdp.version=${hdp.version}</value>
</property>
<property>
<name>mapreduce.admin.user.env</name>
<value>LD_LIBRARY_PATH=/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-amd64-64</value>
</property>
<property>
<name>mapreduce.am.max-attempts</name>
<value>2</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>$PWD/mr-framework/hadoop/share/hadoop/mapreduce/*:$PWD/mr-framework/hadoop/share/hadoop/mapreduce/lib/*:$PWD/mr-framework/hadoop/share/hadoop/common/*:$PWD/mr-framework/hadoop/share/hadoop/common/lib/*:$PWD/mr-framework/hadoop/share/hadoop/yarn/*:$PWD/mr-framework/hadoop/share/hadoop/yarn/lib/*:$PWD/mr-framework/hadoop/share/hadoop/hdfs/*:$PWD/mr-framework/hadoop/share/hadoop/hdfs/lib/*:/usr/hdp/${hdp.version}/hadoop/lib/hadoop-lzo-0.6.0.${hdp.version}.jar:/etc/hadoop/conf/secure</value>
</property>
<property>
<name>mapreduce.application.framework.path</name>
<value>/hdp/apps/${hdp.version}/mapreduce/mapreduce.tar.gz#mr-framework</value>
</property>
<property>
<name>mapreduce.cluster.administrators</name>
<value> hadoop</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.job.emit-timeline-data</name>
<value>false</value>
</property>
<property>
<name>mapreduce.job.reduce.slowstart.completedmaps</name>
<value>0.05</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>c6501.ambari.apache.org:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>mapreduce.jobhistory.done-dir</name>
<value>/mr-history/done</value>
</property>
<property>
<name>mapreduce.jobhistory.intermediate-done-dir</name>
<value>/mr-history/tmp</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>c6501.ambari.apache.org:19888</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx546m</value>
</property>
<property>
<name>mapreduce.map.log.level</name>
<value>INFO</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>682</value>
</property>
<property>
<name>mapreduce.map.output.compress</name>
<value>false</value>
</property>
<property>
<name>mapreduce.map.sort.spill.percent</name>
<value>0.7</value>
</property>
<property>
<name>mapreduce.map.speculative</name>
<value>false</value>
</property>
<property>
<name>mapreduce.output.fileoutputformat.compress</name>
<value>false</value>
</property>
<property>
<name>mapreduce.output.fileoutputformat.compress.type</name>
<value>BLOCK</value>
</property>
<property>
<name>mapreduce.reduce.input.buffer.percent</name>
<value>0.0</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx546m</value>
</property>
<property>
<name>mapreduce.reduce.log.level</name>
<value>INFO</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>682</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.fetch.retry.enabled</name>
<value>1</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.fetch.retry.interval-ms</name>
<value>1000</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.fetch.retry.timeout-ms</name>
<value>30000</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.input.buffer.percent</name>
<value>0.7</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.merge.percent</name>
<value>0.66</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.parallelcopies</name>
<value>30</value>
</property>
<property>
<name>mapreduce.reduce.speculative</name>
<value>false</value>
</property>
<property>
<name>mapreduce.shuffle.port</name>
<value>13562</value>
</property>
<property>
<name>mapreduce.task.io.sort.factor</name>
<value>100</value>
</property>
<property>
<name>mapreduce.task.io.sort.mb</name>
<value>273</value>
</property>
<property>
<name>mapreduce.task.timeout</name>
<value>300000</value>
</property>
<property>
<name>yarn.app.mapreduce.am.admin-command-opts</name>
<value>-Dhdp.version=${hdp.version}</value>
</property>
<property>
<name>yarn.app.mapreduce.am.command-opts</name>
<value>-Xmx546m -Dhdp.version=${hdp.version}</value>
</property>
<property>
<name>yarn.app.mapreduce.am.log.level</name>
<value>INFO</value>
</property>
<property>
<name>yarn.app.mapreduce.am.resource.mb</name>
<value>682</value>
</property>
<property>
<name>yarn.app.mapreduce.am.staging-dir</name>
<value>/user</value>
</property>
</configuration>
......@@ -23,8 +23,11 @@ import com.tinkerpop.blueprints.Edge;
import com.tinkerpop.blueprints.Graph;
import com.tinkerpop.blueprints.Vertex;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.cli.CliDriver;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.service.HiveClient;
import org.apache.hadoop.metadata.ITypedReferenceableInstance;
import org.apache.hadoop.metadata.MetadataException;
import org.apache.hadoop.metadata.repository.graph.GraphBackedMetadataRepository;
......@@ -40,8 +43,13 @@ import org.testng.annotations.Test;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URL;
import java.util.List;
import java.util.Properties;
@Test (enabled = false)
public class HiveGraphRepositoryTest {
......@@ -49,6 +57,8 @@ public class HiveGraphRepositoryTest {
protected HiveTypeSystem hts;
private GraphBackedMetadataRepository repository;
private GraphService gs;
public static final String HIVE_L4J_PROPS = "target/hive-log4j.properties";
public static final String HIVE_EXEC_L4J_PROPS = "target/hive-exec-log4j.properties";
private static final Logger LOG =
LoggerFactory.getLogger(HiveGraphRepositoryTest.class);
......@@ -79,8 +89,10 @@ public class HiveGraphRepositoryTest {
@Test (enabled = false)
public void testHiveImport() throws Exception {
HiveImporter hImporter = new HiveImporter(repository, hts, new HiveMetaStoreClient(new HiveConf()));
HiveConf conf = new HiveConf();
HiveMetaStoreClient hiveMetaStoreClient;
hiveMetaStoreClient = new HiveMetaStoreClient(conf);
HiveImporter hImporter = new HiveImporter(repository, hts, hiveMetaStoreClient);
hImporter.importHiveMetadata();
LOG.info("Defined DB instances");
File f = new File("./target/logs/hiveobjs.txt");
......@@ -136,4 +148,5 @@ public class HiveGraphRepositoryTest {
bw.flush();
bw.close();
}
}
......@@ -21,11 +21,15 @@ package org.apache.hadoop.metadata.hivetypes;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.ql.plan.api.QueryPlan;
import org.apache.hadoop.hive.service.HiveClient;
import org.apache.hadoop.metadata.ITypedReferenceableInstance;
import org.apache.hadoop.metadata.MetadataException;
import org.apache.hadoop.metadata.storage.Id;
import org.apache.hadoop.metadata.storage.memory.MemRepository;
import org.apache.hadoop.metadata.types.TypeSystem;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.transport.TSocket;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.annotations.BeforeClass;
......@@ -35,13 +39,17 @@ import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.Statement;
@Test (enabled = false)
@Test (enabled = true)
public class HiveTypeSystemTest {
protected MemRepository mr;
protected HiveTypeSystem hts;
private static final String hiveHost = "c6501.ambari.apache.org";
private static final short hivePort = 10000;
private static final Logger LOG =
LoggerFactory.getLogger(HiveTypeSystemTest.class);
......@@ -54,11 +62,14 @@ public class HiveTypeSystemTest {
hts = HiveTypeSystem.getInstance();
}
@Test (enabled = false)
@Test (enabled = true)
public void testHiveImport() throws MetaException, MetadataException, IOException {
HiveImporter hImporter = new HiveImporter(mr, hts, new HiveMetaStoreClient(new HiveConf()));
HiveConf conf = new HiveConf();
HiveMetaStoreClient hiveMetaStoreClient;
hiveMetaStoreClient = new HiveMetaStoreClient(conf);
HiveImporter hImporter = new HiveImporter(mr, hts, hiveMetaStoreClient);
hImporter.importHiveMetadata();
LOG.info("Defined DB instances");
File f = new File("./target/logs/hiveobjs.txt");
f.getParentFile().mkdirs();
......@@ -95,4 +106,20 @@ public class HiveTypeSystemTest {
bw.flush();
bw.close();
}
@Test (enabled = true)
public void testHiveLineage() throws MetaException, MetadataException, IOException, Exception {
Class.forName("org.apache.hive.jdbc.HiveDriver");
String url = "jdbc:hive2://" + hiveHost + ":" + hivePort;
Connection con = DriverManager.getConnection(url, "ambari-qa", "");
Statement stmt = con.createStatement();
stmt.execute("drop table if exists t");
stmt.execute("create table t(a int, b string)");
stmt.execute("drop table if exists t2");
stmt.execute("create table t2 as select * from t");
}
}
\ No newline at end of file
drop table if exists t;
create table t(a int, b string);
drop table if exists t2;
create table t2 as select * from t;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment