Commit 898e7317 by Venkat

HiveImporter and HiveTypeSystem - work in progress - still some rough edges

parent 6e506d73
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>metadata-governance</artifactId>
<groupId>org.apache.hadoop.metadata</groupId>
<version>0.1-incubating-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>metadata-hivetypes</artifactId>
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
<version>0.14.0</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.2.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-typesystem</artifactId>
<version>0.1-incubating-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</dependency>
</dependencies>
</project>
\ No newline at end of file
<!--Mon Apr 21 07:04:34 2014-->
<configuration>
<property>
<name>hive.enforce.sorting</name>
<value>true</value>
</property>
<property>
<name>hive.tez.container.size</name>
<value>250</value>
</property>
<property>
<name>hive.optimize.bucketmapjoin.sortedmerge</name>
<value>false</value>
</property>
<property>
<name>hive.compactor.worker.threads</name>
<value>0</value>
</property>
<property>
<name>hive.tez.java.opts</name>
<value>-server -Xmx200m -Djava.net.preferIPv4Stack=true -XX:NewRatio=8 -XX:+UseNUMA -XX:+UseParallelGC</value>
</property>
<property>
<name>hive.orc.splits.include.file.footer</name>
<value>false</value>
</property>
<property>
<name>hive.exec.pre.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
</property>
<property>
<name>fs.file.impl.disable.cache</name>
<value>true</value>
</property>
<property>
<name>hive.compute.query.using.stats</name>
<value>true</value>
</property>
<property>
<name>hive.security.authorization.manager</name>
<value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider</value>
</property>
<property>
<name>hive.auto.convert.join.noconditionaltask</name>
<value>true</value>
</property>
<property>
<name>hive.compactor.worker.timeout</name>
<value>86400L</value>
</property>
<property>
<name>hive.exec.failure.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
</property>
<property>
<name>ambari.hive.db.schema.name</name>
<value>hive</value>
</property>
<property>
<name>hive.server2.tez.initialize.default.sessions</name>
<value>false</value>
</property>
<property>
<name>hive.security.authorization.enabled</name>
<value>true</value>
</property>
<property>
<name>hive.exec.post.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
</property>
<property>
<name>hive.stats.autogather</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.reducededuplication</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.bucketmapjoin</name>
<value>true</value>
</property>
<property>
<name>hive.server2.tez.default.queues</name>
<value>default</value>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://localhost:9083</value>
</property>
<property>
<name>hive.enforce.sortmergebucketmapjoin</name>
<value>true</value>
</property>
<property>
<name>hive.mapjoin.bucket.cache.size</name>
<value>10000</value>
</property>
<property>
<name>hive.auto.convert.join.noconditionaltask.size</name>
<value>357564416</value>
</property>
<property>
<name>hive.vectorized.execution.enabled</name>
<value>true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
</property>
<property>
<name>hive.execution.engine</name>
<value>mr</value>
</property>
<property>
<name>hive.compactor.delta.num.threshold</name>
<value>10</value>
</property>
<property>
<name>hive.optimize.index.filter</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.reducededuplication.min.reducer</name>
<value>4</value>
</property>
<property>
<name>hive.txn.manager</name>
<value>org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/apps/hive/warehouse</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://sandbox.hortonworks.com/hive?createDatabaseIfNotExist=true</value>
</property>
<property>
<name>hive.metastore.client.socket.timeout</name>
<value>60</value>
</property>
<property>
<name>hive.vectorized.groupby.flush.percent</name>
<value>1.0</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>hive.txn.timeout</name>
<value>300</value>
</property>
<property>
<name>hive.semantic.analyzer.factory.impl</name>
<value>org.apache.hivealog.cli.HCatSemanticAnalyzerFactory</value>
</property>
<property>
<name>hive.auto.convert.join</name>
<value>true</value>
</property>
<property>
<name>hive.tez.input.format</name>
<value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
</property>
<property>
<name>hive.enforce.bucketing</name>
<value>true</value>
</property>
<property>
<name>hive.mapred.reduce.tasks.speculative.execution</name>
<value>false</value>
</property>
<property>
<name>hive.security.authenticator.manager</name>
<value>org.apache.hadoop.hive.ql.security.ProxyUserAuthenticator</value>
</property>
<property>
<name>hive.compactor.abortedtxn.threshold</name>
<value>1000</value>
</property>
<property>
<name>hive.compactor.initiator.on</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.cache.pinobjtypes</name>
<value>Table,Database,Type,FieldSchema,Order</value>
</property>
<property>
<name>hive.auto.convert.sortmerge.join</name>
<value>true</value>
</property>
<property>
<name>hive.limit.pushdown.memory.usage</name>
<value>0.04</value>
</property>
<property>
<name>fs.hdfs.impl.disable.cache</name>
<value>true</value>
</property>
<property>
<name>hive.compactor.check.interval</name>
<value>300L</value>
</property>
<property>
<name>hive.map.aggr</name>
<value>true</value>
</property>
<property>
<name>hive.compactor.delta.pct.threshold</name>
<value>0.1f</value>
</property>
<property>
<name>hive.vectorized.groupby.checkinterval</name>
<value>1024</value>
</property>
<property>
<name>hive.metastore.execute.setugi</name>
<value>true</value>
</property>
<property>
<name>hive.auto.convert.sortmerge.join.noconditionaltask</name>
<value>true</value>
</property>
<property>
<name>hive.vectorized.groupby.maxentries</name>
<value>1024</value>
</property>
<property>
<name>hive.security.metastore.authorization.manager</name>
<value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider</value>
</property>
<property>
<name>hive.server2.enable.doAs</name>
<value>false</value>
</property>
<property>
<name>hive.optimize.mapjoin.mapreduce</name>
<value>true</value>
</property>
<property>
<name>hive.server2.tez.sessions.per.default.queue</name>
<value>1</value>
</property>
<property>
<name>hive.txn.max.open.batch</name>
<value>1000</value>
</property>
</configuration>
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
org.apache.hadoop.metadata=DEBUG, console
org.apache.hadoop=INFO, console
org.apache.hive=INFO, console
org.apache.hcatalog=INFO, console
metadata.root.logger=DEBUG,console,DRFA
hive.root.logger=INFO,console,DRFA
hcatalog.root.logger=INFO,console,DRFA
metadata.log.dir=${user.dir}/metadata/logs
metadata.log.file=metadata.log
log4j.rootLogger=${metadata.root.logger}
#
# DRFA
# Daily Rolling File Appender
#
log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
log4j.appender.DRFA.File=${metadata.log.dir}/${metadata.log.file}
log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
log4j.appender.DRFA.layout.ConversionPattern=%d (%t) [%p - %l] %m%n
#
# console
# Add "console" to rootlogger above if you want to use this
#
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d (%t) [%p - %l] %m%n
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.metadata.hivetypes;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.metadata.ITypedReferenceableInstance;
import org.apache.hadoop.metadata.MetadataException;
import org.apache.hadoop.metadata.storage.Id;
import org.apache.hadoop.metadata.storage.memory.MemRepository;
import org.apache.hadoop.metadata.types.TypeSystem;
import org.junit.Before;
import org.junit.Test;
public class HiveTypeSystemTest {
protected MemRepository mr;
protected HiveTypeSystem hts;
public static final Log LOG = LogFactory.getLog(HiveTypeSystemTest.class);
@Before
public void setup() throws MetadataException {
TypeSystem ts = TypeSystem.getInstance();
ts.reset();
mr = new MemRepository(ts);
hts = HiveTypeSystem.getInstance();
}
@Test
public void testHiveImport() throws MetaException, MetadataException {
HiveImporter himport = new HiveImporter(mr, hts, new HiveMetaStoreClient(new HiveConf()));
himport.importHiveMetadata();
LOG.info("Defined instances");
for (Id id : himport.getInstances()) {
ITypedReferenceableInstance instance = mr.get(id);
LOG.info(instance.toString());
}
}
}
\ No newline at end of file
......@@ -315,6 +315,7 @@
<module>repository</module>
<module>webapp</module>
<module>docs</module>
<module>hivetypes</module>
<module>metadata-bridge-parent</module>
</modules>
......
......@@ -156,7 +156,14 @@ public class AttributeStores {
String attrName = attrNames.get(0);
int nullPos = instance.fieldMapping().fieldNullPos.get(attrName);
int colPos = instance.fieldMapping().fieldPos.get(attrName);
System.out.println("Storing attribute " + attrName + " at pos " + pos + " colPos = " + colPos +
"nullPos = " + nullPos);
if (pos == nullList.size()) {
nullList.add(instance.nullFlags[nullPos]);
} else {
nullList.set(pos, instance.nullFlags[nullPos]);
}
//list.set(pos, instance.bools[colPos]);
store(instance, colPos, pos);
......@@ -169,6 +176,7 @@ public class AttributeStores {
public void load(int pos, IConstructableType type, StructInstance instance) throws RepositoryException {
List<String> attrNames = type.getNames(attrInfo);
String attrName = attrNames.get(0);
System.out.println("Loading attribute " + attrName);
int nullPos = instance.fieldMapping().fieldNullPos.get(attrName);
int colPos = instance.fieldMapping().fieldPos.get(attrName);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment