Commit 898e7317 by Venkat

HiveImporter and HiveTypeSystem - work in progress - still some rough edges

parent 6e506d73
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>metadata-governance</artifactId>
<groupId>org.apache.hadoop.metadata</groupId>
<version>0.1-incubating-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>metadata-hivetypes</artifactId>
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
<version>0.14.0</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.2.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-typesystem</artifactId>
<version>0.1-incubating-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</dependency>
</dependencies>
</project>
\ No newline at end of file
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.metadata.hivetypes;
;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.*;
import org.apache.hadoop.metadata.*;
import org.apache.hadoop.metadata.storage.IRepository;
import org.apache.hadoop.metadata.storage.Id;
import org.apache.hadoop.metadata.storage.RepositoryException;
import org.apache.hadoop.metadata.types.IDataType;
import org.apache.hadoop.metadata.types.Multiplicity;
import org.apache.hadoop.metadata.types.StructType;
import org.apache.hadoop.metadata.types.TypeSystem;
import org.apache.thrift.TException;
import java.util.ArrayList;
import java.util.List;
public class HiveImporter {
private final HiveMetaStoreClient hiveMetastoreClient;
public static final Log LOG = LogFactory.getLog(HiveImporter.class);
private TypeSystem typeSystem;
private IRepository repository;
private HiveTypeSystem hiveTypeSystem;
private List<Id> instances;
public HiveImporter(IRepository repo, HiveTypeSystem hts, HiveMetaStoreClient hmc) throws RepositoryException {
this.repository = repo;
this.hiveMetastoreClient = hmc;
this.hiveTypeSystem = hts;
typeSystem = TypeSystem.getInstance();
instances = new ArrayList<>();
if (repository == null) {
LOG.error("repository is null");
throw new RuntimeException("repository is null");
}
repository.defineTypes(hts.getHierarchicalTypeDefinitions());
}
public List<Id> getInstances() {
return instances;
}
public void importHiveMetadata() throws MetadataException {
LOG.info("Importing hive metadata");
try {
List<String> dbs = hiveMetastoreClient.getAllDatabases();
for (String db : dbs) {
importDatabase(db);
}
} catch (MetaException me) {
throw new MetadataException(me);
}
}
private void importDatabase(String db) throws MetadataException {
try {
LOG.info("Importing objects from database : " + db);
Database hiveDB = hiveMetastoreClient.getDatabase(db);
Referenceable dbRef = new Referenceable(HiveTypeSystem.DefinedTypes.HIVE_DB.name());
dbRef.set("name", hiveDB.getName());
dbRef.set("description", hiveDB.getDescription());
dbRef.set("locationUri", hiveDB.getLocationUri());
dbRef.set("parameters", hiveDB.getParameters());
dbRef.set("ownerName", hiveDB.getOwnerName());
dbRef.set("ownerType", hiveDB.getOwnerType().toString());
ITypedReferenceableInstance dbRefTyped = repository.create(dbRef);
instances.add(dbRefTyped.getId());
importTables(db, dbRefTyped);
} catch (NoSuchObjectException nsoe) {
throw new MetadataException(nsoe);
} catch (TException te) {
throw new MetadataException(te);
}
}
private void importTables(String db, ITypedReferenceableInstance dbRefTyped) throws MetadataException {
try {
List<String> hiveTables = hiveMetastoreClient.getAllTables(db);
for (String table : hiveTables) {
LOG.info("Importing objects from " + db + "." + table);
Table hiveTable = hiveMetastoreClient.getTable(db, table);
Referenceable tableRef = new Referenceable(HiveTypeSystem.DefinedTypes.HIVE_TABLE.name());
tableRef.set("dbName", dbRefTyped);
tableRef.set("tableName", hiveTable.getTableName());
tableRef.set("owner", hiveTable.getOwner());
tableRef.set("createTime", hiveTable.getCreateTime());
tableRef.set("lastAccessTime", hiveTable.getLastAccessTime());
tableRef.set("retention", hiveTable.getRetention());
StorageDescriptor storageDesc = hiveTable.getSd();
ITypedStruct sdStruct = fillStorageDescStruct(storageDesc);
tableRef.set("sd", sdStruct);
List<ITypedReferenceableInstance> partKeys = new ArrayList<>();
Referenceable colRef;
if (hiveTable.getPartitionKeysSize() > 0) {
for (FieldSchema fs : hiveTable.getPartitionKeys()) {
colRef = new Referenceable(HiveTypeSystem.DefinedTypes.HIVE_COLUMN.name());
colRef.set("name", fs.getName());
colRef.set("type", fs.getType());
colRef.set("comment", fs.getComment());
ITypedReferenceableInstance colRefTyped = repository.create(colRef);
partKeys.add(colRefTyped);
}
tableRef.set("partitionKeys", partKeys);
}
tableRef.set("parameters", hiveTable.getParameters());
if (hiveTable.isSetViewOriginalText()) {
tableRef.set("viewOriginalText", hiveTable.getViewOriginalText());
}
if (hiveTable.isSetViewExpandedText()) {
tableRef.set("viewExpandedText", hiveTable.getViewExpandedText());
}
tableRef.set("tableType", hiveTable.getTableType());
tableRef.set("temporary", hiveTable.isTemporary());
ITypedReferenceableInstance tableRefTyped = repository.create(tableRef);
instances.add(tableRefTyped.getId());
List<Partition> tableParts = hiveMetastoreClient.listPartitions(db, table, Short.MAX_VALUE);
if (tableParts.size() > 0) {
for (Partition hivePart : tableParts) {
Referenceable partRef = new Referenceable(HiveTypeSystem.DefinedTypes.HIVE_PARTITION.name());
partRef.set("values", hivePart.getValues());
partRef.set("dbName", dbRefTyped);
partRef.set("tableName", tableRefTyped);
partRef.set("createTime", hivePart.getCreateTime());
partRef.set("lastAccessTime", hivePart.getLastAccessTime());
sdStruct = fillStorageDescStruct(hivePart.getSd());
partRef.set("sd", sdStruct);
partRef.set("parameters", hivePart.getParameters());
ITypedReferenceableInstance partRefTyped = repository.create(partRef);
instances.add(partRefTyped.getId());
}
}
}
} catch (NoSuchObjectException nsoe) {
throw new MetadataException(nsoe);
} catch (TException te) {
throw new MetadataException(te);
}
}
private ITypedStruct fillStorageDescStruct(StorageDescriptor storageDesc) throws MetadataException {
String storageDescName = HiveTypeSystem.DefinedTypes.HIVE_STORAGEDESC.name();
SerDeInfo serdeInfo = storageDesc.getSerdeInfo();
SkewedInfo skewedInfo = storageDesc.getSkewedInfo();
Struct sdStruct = new Struct(storageDescName);
LOG.info("Filling storage descriptor information for " + storageDesc);
String serdeInfoName = HiveTypeSystem.DefinedTypes.HIVE_SERDE.name();
Struct serdeInfoStruct = new Struct(serdeInfoName);
serdeInfoStruct.set("name", serdeInfo.getName());
serdeInfoStruct.set("serializationLib", serdeInfo.getSerializationLib());
serdeInfoStruct.set("parameters", serdeInfo.getParameters());
LOG.info("serdeInfo = " + serdeInfo);
StructType serdeInfotype = (StructType) hiveTypeSystem.getDataType(serdeInfoName);
ITypedStruct serdeInfoStructTyped =
serdeInfotype.convert(serdeInfoStruct, Multiplicity.OPTIONAL);
sdStruct.set("serdeInfo", serdeInfoStructTyped);
// Will need to revisit this after we fix typesystem.
//LOG.info("skewedInfo = " + skewedInfo);
//String skewedInfoName = HiveTypeSystem.DefinedTypes.HIVE_SKEWEDINFO.name();
//Struct skewedInfoStruct = new Struct(skewedInfoName);
//if (skewedInfo.getSkewedColNames().size() > 0) {
// skewedInfoStruct.set("skewedColNames", skewedInfo.getSkewedColNames());
// skewedInfoStruct.set("skewedColValues", skewedInfo.getSkewedColValues());
// skewedInfoStruct.set("skewedColValueLocationMaps", skewedInfo.getSkewedColValueLocationMaps());
// StructType skewedInfotype = (StructType) hiveTypeSystem.getDataType(skewedInfoName);
// ITypedStruct skewedInfoStructTyped =
// skewedInfotype.convert(skewedInfoStruct, Multiplicity.OPTIONAL);
// sdStruct.set("skewedInfo", skewedInfoStructTyped);
//}
List<ITypedReferenceableInstance> fieldsList = new ArrayList<>();
Referenceable colRef;
for (FieldSchema fs : storageDesc.getCols()) {
LOG.debug("Processing field " + fs);
colRef = new Referenceable(HiveTypeSystem.DefinedTypes.HIVE_COLUMN.name());
colRef.set("name", fs.getName());
colRef.set("type", fs.getType());
colRef.set("comment", fs.getComment());
ITypedReferenceableInstance colRefTyped = repository.create(colRef);
fieldsList.add(colRefTyped);
}
sdStruct.set("cols", fieldsList);
List<ITypedStruct> sortColsStruct = new ArrayList<>();
for (Order sortcol : storageDesc.getSortCols()) {
String hiveOrderName = HiveTypeSystem.DefinedTypes.HIVE_ORDER.name();
Struct colStruct = new Struct(hiveOrderName);
colStruct.set("col", sortcol.getCol());
colStruct.set("order", sortcol.getOrder());
StructType sortColType = (StructType) hiveTypeSystem.getDataType(hiveOrderName);
ITypedStruct sortColTyped =
sortColType.convert(colStruct, Multiplicity.OPTIONAL);
sortColsStruct.add(sortColTyped);
}
sdStruct.set("location", storageDesc.getLocation());
sdStruct.set("inputFormat", storageDesc.getInputFormat());
sdStruct.set("outputFormat", storageDesc.getOutputFormat());
sdStruct.set("compressed", storageDesc.isCompressed());
if (storageDesc.getBucketCols().size() > 0) {
sdStruct.set("bucketCols", storageDesc.getBucketCols());
}
if (sortColsStruct.size() > 0) {
sdStruct.set("sortCols", sortColsStruct);
}
sdStruct.set("parameters", storageDesc.getParameters());
sdStruct.set("storedAsSubDirectories", storageDesc.isStoredAsSubDirectories());
StructType storageDesctype = (StructType) hiveTypeSystem.getDataType(storageDescName);
ITypedStruct sdStructTyped =
storageDesctype.convert(sdStruct, Multiplicity.OPTIONAL);
return sdStructTyped;
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.metadata.hivetypes;
import com.google.common.collect.ImmutableList;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.metadata.MetadataException;
import org.apache.hadoop.metadata.types.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class HiveTypeSystem {
public static final Log LOG = LogFactory.getLog(HiveTypeSystem.class);
public static final class Holder {
public static final HiveTypeSystem instance = new HiveTypeSystem();
}
private TypeSystem typeSystem;
private boolean valid = false;
public enum DefinedTypes {
// Enums
HIVE_OBJECTTYPE,
HIVE_PRINCIPALTYPE,
HIVE_RESOURCETYPE,
HIVE_FUNCTIONTYPE,
// Structs
HIVE_SERDE,
HIVE_STORAGEDESC,
HIVE_SKEWEDINFO,
HIVE_ORDER,
HIVE_RESOURCEURI,
// Classes
HIVE_DB,
HIVE_TABLE,
HIVE_COLUMN,
HIVE_PARTITION,
HIVE_INDEX,
HIVE_FUNCTION,
HIVE_ROLE,
HIVE_TYPE,
//HIVE_VIEW,
}
private Map<String, HierarchicalTypeDefinition<ClassType>> classTypeDefinitions;
private Map<String, EnumTypeDefinition> enumTypeDefinitionMap;
private Map<String, StructTypeDefinition> structTypeDefinitionMap;
private DataTypes.MapType mapStrToStrMap;
private DataTypes.ArrayType strArrayType;
private Map<String, IDataType> typeMap;
private List<IDataType> enumTypes;
private static Multiplicity ZeroOrMore = new Multiplicity(0, Integer.MAX_VALUE, true);
private HiveTypeSystem() {
classTypeDefinitions = new HashMap<>();
enumTypeDefinitionMap = new HashMap<>();
structTypeDefinitionMap = new HashMap<>();
typeMap = new HashMap<>();
enumTypes = new ArrayList<>();
}
private void initialize() throws MetadataException {
LOG.info("Initializing the Hive Typesystem");
typeSystem = TypeSystem.getInstance();
mapStrToStrMap =
typeSystem.defineMapType(DataTypes.STRING_TYPE, DataTypes.STRING_TYPE);
strArrayType = typeSystem.defineArrayType(DataTypes.STRING_TYPE);
createHiveObjectTypeEnum();
createHivePrincipalTypeEnum();
createFunctionTypeEnum();
createResourceTypeEnum();
createSerDeStruct();
//createSkewedInfoStruct();
createOrderStruct();
createResourceUriStruct();
createStorageDescStruct();
createDBClass();
createTypeClass();
createColumnClass();
createPartitionClass();
createTableClass();
createIndexClass();
createFunctionClass();
createRoleClass();
for (EnumTypeDefinition def : getEnumTypeDefinitions()) {
enumTypes.add(typeSystem.defineEnumType(def));
}
typeMap.putAll(
typeSystem.defineTypes(getStructTypeDefinitions(), getTraitTypeDefinitions(), getClassTypeDefinitions()));
valid = true;
}
public synchronized static HiveTypeSystem getInstance() throws MetadataException {
HiveTypeSystem hs = Holder.instance;
if (hs.valid) {
LOG.info("Returning pre-initialized HiveTypeSystem singleton");
return hs;
}
hs.initialize();
return hs;
}
public IDataType getDataType(String typeName) {
return typeMap.get(typeName);
}
public ImmutableList<HierarchicalType> getHierarchicalTypeDefinitions() {
if (valid) {
return ImmutableList.of(
(HierarchicalType) typeMap.get(DefinedTypes.HIVE_DB.name()),
(HierarchicalType) typeMap.get(DefinedTypes.HIVE_TABLE.name()),
(HierarchicalType) typeMap.get(DefinedTypes.HIVE_COLUMN.name()),
(HierarchicalType) typeMap.get(DefinedTypes.HIVE_PARTITION.name()),
(HierarchicalType) typeMap.get(DefinedTypes.HIVE_INDEX.name()),
(HierarchicalType) typeMap.get(DefinedTypes.HIVE_FUNCTION.name()),
(HierarchicalType) typeMap.get(DefinedTypes.HIVE_ROLE.name())
);
} else {
return ImmutableList.of();
}
}
public ImmutableList<EnumTypeDefinition> getEnumTypeDefinitions() {
return ImmutableList.copyOf(enumTypeDefinitionMap.values());
}
public ImmutableList<StructTypeDefinition> getStructTypeDefinitions() {
return ImmutableList.copyOf(structTypeDefinitionMap.values());
}
public ImmutableList<HierarchicalTypeDefinition<ClassType>> getClassTypeDefinitions() {
return ImmutableList.copyOf(classTypeDefinitions.values());
}
public ImmutableList<HierarchicalTypeDefinition<TraitType>> getTraitTypeDefinitions() {
return ImmutableList.of();
}
private void createHiveObjectTypeEnum() throws MetadataException {
EnumValue values[] = {
new EnumValue("GLOBAL", 1),
new EnumValue("DATABASE", 2),
new EnumValue("TABLE", 3),
new EnumValue("PARTITION", 4),
new EnumValue("COLUMN", 5),
};
EnumTypeDefinition definition = new EnumTypeDefinition(
DefinedTypes.HIVE_OBJECTTYPE.name(), values);
enumTypeDefinitionMap.put(DefinedTypes.HIVE_OBJECTTYPE.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_OBJECTTYPE.name());
}
private void createHivePrincipalTypeEnum() throws MetadataException {
EnumValue values[] = {
new EnumValue("USER", 1),
new EnumValue("ROLE", 2),
new EnumValue("GROUP", 3),
};
EnumTypeDefinition definition = new EnumTypeDefinition(
DefinedTypes.HIVE_PRINCIPALTYPE.name(), values);
enumTypeDefinitionMap.put(DefinedTypes.HIVE_PRINCIPALTYPE.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_PRINCIPALTYPE.name());
}
private void createFunctionTypeEnum() throws MetadataException {
EnumValue values[] = {
new EnumValue("JAVA", 1),
};
EnumTypeDefinition definition = new EnumTypeDefinition(
DefinedTypes.HIVE_FUNCTIONTYPE.name(), values);
enumTypeDefinitionMap.put(DefinedTypes.HIVE_FUNCTIONTYPE.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_FUNCTIONTYPE.name());
}
private void createResourceTypeEnum() throws MetadataException {
EnumValue values[] = {
new EnumValue("JAR", 1),
new EnumValue("FILE", 2),
new EnumValue("ARCHIVE", 3),
};
EnumTypeDefinition definition = new EnumTypeDefinition(
DefinedTypes.HIVE_RESOURCETYPE.name(), values);
enumTypeDefinitionMap.put(DefinedTypes.HIVE_RESOURCETYPE.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_RESOURCETYPE.name());
}
private void createSerDeStruct() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("name", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("serializationLib", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("parameters", mapStrToStrMap.getName(), Multiplicity.OPTIONAL, false, null),
};
StructTypeDefinition definition = new StructTypeDefinition(DefinedTypes.HIVE_SERDE.name(), attributeDefinitions);
structTypeDefinitionMap.put(DefinedTypes.HIVE_SERDE.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_SERDE.name());
}
/** Revisit later after nested array types are handled by the typesystem **/
/**
private void createSkewedInfoStruct() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("skewedColNames", String.format("array<%s>", DataTypes.STRING_TYPE.getName()),
ZeroOrMore, false, null),
new AttributeDefinition("skewedColValues", String.format("array<%s>", strArrayType.getName()),
ZeroOrMore, false, null),
new AttributeDefinition("skewedColValueLocationMaps", mapStrToStrMap.getName(), Multiplicity.OPTIONAL, false, null),
};
StructTypeDefinition definition = new StructTypeDefinition(DefinedTypes.HIVE_SKEWEDINFO.name(), attributeDefinitions);
structTypeDefinitionMap.put(DefinedTypes.HIVE_SKEWEDINFO.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_SKEWEDINFO.name());
}
**/
private void createOrderStruct() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("col", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("order", DataTypes.INT_TYPE.getName(), Multiplicity.REQUIRED, false, null),
};
StructTypeDefinition definition = new StructTypeDefinition(DefinedTypes.HIVE_ORDER.name(), attributeDefinitions);
structTypeDefinitionMap.put(DefinedTypes.HIVE_ORDER.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_ORDER.name());
}
private void createStorageDescStruct() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("cols", String.format("array<%s>", DefinedTypes.HIVE_COLUMN.name()), Multiplicity.COLLECTION, false, null),
new AttributeDefinition("location", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("inputFormat", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("outputFormat", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("compressed", DataTypes.BOOLEAN_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("numBuckets", DataTypes.INT_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("serdeInfo", DefinedTypes.HIVE_SERDE.name(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("bucketCols", String.format("array<%s>",DataTypes.STRING_TYPE.getName()), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("sortCols", String.format("array<%s>", DefinedTypes.HIVE_ORDER.name()), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("parameters", mapStrToStrMap.getName(), Multiplicity.OPTIONAL, false, null),
//new AttributeDefinition("skewedInfo", DefinedTypes.HIVE_SKEWEDINFO.name(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("storedAsSubDirectories", DataTypes.BOOLEAN_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
};
StructTypeDefinition definition =
new StructTypeDefinition(DefinedTypes.HIVE_STORAGEDESC.name(), attributeDefinitions);
structTypeDefinitionMap.put(DefinedTypes.HIVE_STORAGEDESC.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_STORAGEDESC.name());
}
private void createResourceUriStruct() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("resourceType", DefinedTypes.HIVE_RESOURCETYPE.name(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("uri", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
};
StructTypeDefinition definition = new StructTypeDefinition(DefinedTypes.HIVE_RESOURCEURI.name(), attributeDefinitions);
structTypeDefinitionMap.put(DefinedTypes.HIVE_RESOURCEURI.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_RESOURCEURI.name());
}
private void createDBClass() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("name", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("description", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("locationUri", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("parameters", mapStrToStrMap.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("ownerName", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("ownerType", DefinedTypes.HIVE_PRINCIPALTYPE.name(), Multiplicity.OPTIONAL, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class, DefinedTypes.HIVE_DB.name(),
null, attributeDefinitions);
classTypeDefinitions.put(DefinedTypes.HIVE_DB.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_DB.name());
}
private void createTypeClass() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("name", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("type1", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("type2", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("fields", String.format("array<%s>",
DefinedTypes.HIVE_COLUMN.name()), Multiplicity.OPTIONAL, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class, DefinedTypes.HIVE_TYPE.name(),
null, attributeDefinitions);
classTypeDefinitions.put(DefinedTypes.HIVE_TYPE.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_TYPE.name());
}
private void createColumnClass() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("name", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
//new AttributeDefinition("type", DefinedTypes.HIVE_TYPE.name(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("type", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("comment", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class, DefinedTypes.HIVE_COLUMN.name(),
null, attributeDefinitions);
classTypeDefinitions.put(DefinedTypes.HIVE_COLUMN.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_COLUMN.name());
}
private void createPartitionClass() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("values", DataTypes.STRING_TYPE.getName(), Multiplicity.COLLECTION, false, null),
new AttributeDefinition("dbName", DefinedTypes.HIVE_DB.name(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("tableName", DefinedTypes.HIVE_TABLE.name(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("createTime", DataTypes.INT_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("lastAccessTime", DataTypes.INT_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("sd", DefinedTypes.HIVE_STORAGEDESC.name(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("parameters", mapStrToStrMap.getName(), Multiplicity.OPTIONAL, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class, DefinedTypes.HIVE_PARTITION.name(),
null, attributeDefinitions);
classTypeDefinitions.put(DefinedTypes.HIVE_PARTITION.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_PARTITION.name());
}
private void createTableClass() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("tableName", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("dbName", DefinedTypes.HIVE_DB.name(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("owner", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("createTime", DataTypes.INT_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("lastAccessTime", DataTypes.INT_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("retention", DataTypes.INT_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("sd", DefinedTypes.HIVE_STORAGEDESC.name(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("partitionKeys", String.format("array<%s>", DefinedTypes.HIVE_COLUMN.name()),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("parameters", mapStrToStrMap.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("viewOriginalText", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("viewExpandedText", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("tableType", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("temporary", DataTypes.BOOLEAN_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class, DefinedTypes.HIVE_TABLE.name(),
null, attributeDefinitions);
classTypeDefinitions.put(DefinedTypes.HIVE_TABLE.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_TABLE.name());
}
private void createIndexClass() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("indexName", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("indexHandleClass", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("dbName", DefinedTypes.HIVE_DB.name(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("createTime", DataTypes.INT_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("lastAccessTime", DataTypes.INT_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("origTableName", DefinedTypes.HIVE_TABLE.name(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("indexTableName", DefinedTypes.HIVE_TABLE.name(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("sd", DefinedTypes.HIVE_STORAGEDESC.name(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("parameters", mapStrToStrMap.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("deferredRebuild", DataTypes.BOOLEAN_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class, DefinedTypes.HIVE_INDEX.name(),
null, attributeDefinitions);
classTypeDefinitions.put(DefinedTypes.HIVE_INDEX.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_INDEX.name());
}
private void createFunctionClass() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("functionName", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("dbName", DefinedTypes.HIVE_DB.name(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("className", DataTypes.INT_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("ownerName", DataTypes.INT_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("ownerType", DefinedTypes.HIVE_PRINCIPALTYPE.name(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("createTime", DataTypes.INT_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("functionType", DefinedTypes.HIVE_FUNCTIONTYPE.name(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("resourceUris", DefinedTypes.HIVE_RESOURCEURI.name(), Multiplicity.COLLECTION, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class, DefinedTypes.HIVE_FUNCTION.name(),
null, attributeDefinitions);
classTypeDefinitions.put(DefinedTypes.HIVE_FUNCTION.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_FUNCTION.name());
}
private void createRoleClass() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("roleName", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("createTime", DataTypes.INT_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("ownerName", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class, DefinedTypes.HIVE_ROLE.name(),
null, attributeDefinitions);
classTypeDefinitions.put(DefinedTypes.HIVE_ROLE.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_ROLE.name());
}
}
<!--Mon Apr 21 07:04:34 2014-->
<configuration>
<property>
<name>hive.enforce.sorting</name>
<value>true</value>
</property>
<property>
<name>hive.tez.container.size</name>
<value>250</value>
</property>
<property>
<name>hive.optimize.bucketmapjoin.sortedmerge</name>
<value>false</value>
</property>
<property>
<name>hive.compactor.worker.threads</name>
<value>0</value>
</property>
<property>
<name>hive.tez.java.opts</name>
<value>-server -Xmx200m -Djava.net.preferIPv4Stack=true -XX:NewRatio=8 -XX:+UseNUMA -XX:+UseParallelGC</value>
</property>
<property>
<name>hive.orc.splits.include.file.footer</name>
<value>false</value>
</property>
<property>
<name>hive.exec.pre.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
</property>
<property>
<name>fs.file.impl.disable.cache</name>
<value>true</value>
</property>
<property>
<name>hive.compute.query.using.stats</name>
<value>true</value>
</property>
<property>
<name>hive.security.authorization.manager</name>
<value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider</value>
</property>
<property>
<name>hive.auto.convert.join.noconditionaltask</name>
<value>true</value>
</property>
<property>
<name>hive.compactor.worker.timeout</name>
<value>86400L</value>
</property>
<property>
<name>hive.exec.failure.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
</property>
<property>
<name>ambari.hive.db.schema.name</name>
<value>hive</value>
</property>
<property>
<name>hive.server2.tez.initialize.default.sessions</name>
<value>false</value>
</property>
<property>
<name>hive.security.authorization.enabled</name>
<value>true</value>
</property>
<property>
<name>hive.exec.post.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
</property>
<property>
<name>hive.stats.autogather</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.reducededuplication</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.bucketmapjoin</name>
<value>true</value>
</property>
<property>
<name>hive.server2.tez.default.queues</name>
<value>default</value>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://localhost:9083</value>
</property>
<property>
<name>hive.enforce.sortmergebucketmapjoin</name>
<value>true</value>
</property>
<property>
<name>hive.mapjoin.bucket.cache.size</name>
<value>10000</value>
</property>
<property>
<name>hive.auto.convert.join.noconditionaltask.size</name>
<value>357564416</value>
</property>
<property>
<name>hive.vectorized.execution.enabled</name>
<value>true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
</property>
<property>
<name>hive.execution.engine</name>
<value>mr</value>
</property>
<property>
<name>hive.compactor.delta.num.threshold</name>
<value>10</value>
</property>
<property>
<name>hive.optimize.index.filter</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.reducededuplication.min.reducer</name>
<value>4</value>
</property>
<property>
<name>hive.txn.manager</name>
<value>org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/apps/hive/warehouse</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://sandbox.hortonworks.com/hive?createDatabaseIfNotExist=true</value>
</property>
<property>
<name>hive.metastore.client.socket.timeout</name>
<value>60</value>
</property>
<property>
<name>hive.vectorized.groupby.flush.percent</name>
<value>1.0</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>hive.txn.timeout</name>
<value>300</value>
</property>
<property>
<name>hive.semantic.analyzer.factory.impl</name>
<value>org.apache.hivealog.cli.HCatSemanticAnalyzerFactory</value>
</property>
<property>
<name>hive.auto.convert.join</name>
<value>true</value>
</property>
<property>
<name>hive.tez.input.format</name>
<value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
</property>
<property>
<name>hive.enforce.bucketing</name>
<value>true</value>
</property>
<property>
<name>hive.mapred.reduce.tasks.speculative.execution</name>
<value>false</value>
</property>
<property>
<name>hive.security.authenticator.manager</name>
<value>org.apache.hadoop.hive.ql.security.ProxyUserAuthenticator</value>
</property>
<property>
<name>hive.compactor.abortedtxn.threshold</name>
<value>1000</value>
</property>
<property>
<name>hive.compactor.initiator.on</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.cache.pinobjtypes</name>
<value>Table,Database,Type,FieldSchema,Order</value>
</property>
<property>
<name>hive.auto.convert.sortmerge.join</name>
<value>true</value>
</property>
<property>
<name>hive.limit.pushdown.memory.usage</name>
<value>0.04</value>
</property>
<property>
<name>fs.hdfs.impl.disable.cache</name>
<value>true</value>
</property>
<property>
<name>hive.compactor.check.interval</name>
<value>300L</value>
</property>
<property>
<name>hive.map.aggr</name>
<value>true</value>
</property>
<property>
<name>hive.compactor.delta.pct.threshold</name>
<value>0.1f</value>
</property>
<property>
<name>hive.vectorized.groupby.checkinterval</name>
<value>1024</value>
</property>
<property>
<name>hive.metastore.execute.setugi</name>
<value>true</value>
</property>
<property>
<name>hive.auto.convert.sortmerge.join.noconditionaltask</name>
<value>true</value>
</property>
<property>
<name>hive.vectorized.groupby.maxentries</name>
<value>1024</value>
</property>
<property>
<name>hive.security.metastore.authorization.manager</name>
<value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider</value>
</property>
<property>
<name>hive.server2.enable.doAs</name>
<value>false</value>
</property>
<property>
<name>hive.optimize.mapjoin.mapreduce</name>
<value>true</value>
</property>
<property>
<name>hive.server2.tez.sessions.per.default.queue</name>
<value>1</value>
</property>
<property>
<name>hive.txn.max.open.batch</name>
<value>1000</value>
</property>
</configuration>
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
org.apache.hadoop.metadata=DEBUG, console
org.apache.hadoop=INFO, console
org.apache.hive=INFO, console
org.apache.hcatalog=INFO, console
metadata.root.logger=DEBUG,console,DRFA
hive.root.logger=INFO,console,DRFA
hcatalog.root.logger=INFO,console,DRFA
metadata.log.dir=${user.dir}/metadata/logs
metadata.log.file=metadata.log
log4j.rootLogger=${metadata.root.logger}
#
# DRFA
# Daily Rolling File Appender
#
log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
log4j.appender.DRFA.File=${metadata.log.dir}/${metadata.log.file}
log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
log4j.appender.DRFA.layout.ConversionPattern=%d (%t) [%p - %l] %m%n
#
# console
# Add "console" to rootlogger above if you want to use this
#
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d (%t) [%p - %l] %m%n
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.metadata.hivetypes;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.metadata.ITypedReferenceableInstance;
import org.apache.hadoop.metadata.MetadataException;
import org.apache.hadoop.metadata.storage.Id;
import org.apache.hadoop.metadata.storage.memory.MemRepository;
import org.apache.hadoop.metadata.types.TypeSystem;
import org.junit.Before;
import org.junit.Test;
public class HiveTypeSystemTest {
protected MemRepository mr;
protected HiveTypeSystem hts;
public static final Log LOG = LogFactory.getLog(HiveTypeSystemTest.class);
@Before
public void setup() throws MetadataException {
TypeSystem ts = TypeSystem.getInstance();
ts.reset();
mr = new MemRepository(ts);
hts = HiveTypeSystem.getInstance();
}
@Test
public void testHiveImport() throws MetaException, MetadataException {
HiveImporter himport = new HiveImporter(mr, hts, new HiveMetaStoreClient(new HiveConf()));
himport.importHiveMetadata();
LOG.info("Defined instances");
for (Id id : himport.getInstances()) {
ITypedReferenceableInstance instance = mr.get(id);
LOG.info(instance.toString());
}
}
}
\ No newline at end of file
......@@ -315,6 +315,7 @@
<module>repository</module>
<module>webapp</module>
<module>docs</module>
<module>hivetypes</module>
<module>metadata-bridge-parent</module>
</modules>
......
......@@ -156,7 +156,14 @@ public class AttributeStores {
String attrName = attrNames.get(0);
int nullPos = instance.fieldMapping().fieldNullPos.get(attrName);
int colPos = instance.fieldMapping().fieldPos.get(attrName);
System.out.println("Storing attribute " + attrName + " at pos " + pos + " colPos = " + colPos +
"nullPos = " + nullPos);
if (pos == nullList.size()) {
nullList.add(instance.nullFlags[nullPos]);
} else {
nullList.set(pos, instance.nullFlags[nullPos]);
}
//list.set(pos, instance.bools[colPos]);
store(instance, colPos, pos);
......@@ -169,6 +176,7 @@ public class AttributeStores {
public void load(int pos, IConstructableType type, StructInstance instance) throws RepositoryException {
List<String> attrNames = type.getNames(attrInfo);
String attrName = attrNames.get(0);
System.out.println("Loading attribute " + attrName);
int nullPos = instance.fieldMapping().fieldNullPos.get(attrName);
int colPos = instance.fieldMapping().fieldPos.get(attrName);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment