Commit aeada976 by Shwetha GS Committed by Harish Butani

hive bridge documentation

parent c87130eb
...@@ -55,6 +55,17 @@ ...@@ -55,6 +55,17 @@
<artifactId>metadata-typesystem</artifactId> <artifactId>metadata-typesystem</artifactId>
</dependency> </dependency>
<!-- Logging -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</dependency>
<dependency> <dependency>
<groupId>org.apache.hive</groupId> <groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId> <artifactId>hive-metastore</artifactId>
...@@ -228,6 +239,31 @@ ...@@ -228,6 +239,31 @@
</execution> </execution>
</executions> </executions>
</plugin> </plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<dependencies>
<dependency>
<groupId>org.apache.maven.doxia</groupId>
<artifactId>doxia-module-twiki</artifactId>
<version>1.3</version>
</dependency>
</dependencies>
<executions>
<execution>
<goals>
<goal>site</goal>
</goals>
<phase>prepare-package</phase>
</execution>
</executions>
<configuration>
<generateProjectInfo>false</generateProjectInfo>
<generateReports>false</generateReports>
<skip>false</skip>
</configuration>
</plugin>
</plugins> </plugins>
</build> </build>
</project> </project>
...@@ -25,16 +25,19 @@ import org.apache.hadoop.metadata.typesystem.json.TypesSerialization; ...@@ -25,16 +25,19 @@ import org.apache.hadoop.metadata.typesystem.json.TypesSerialization;
import org.apache.hadoop.metadata.typesystem.types.AttributeDefinition; import org.apache.hadoop.metadata.typesystem.types.AttributeDefinition;
import org.apache.hadoop.metadata.typesystem.types.ClassType; import org.apache.hadoop.metadata.typesystem.types.ClassType;
import org.apache.hadoop.metadata.typesystem.types.DataTypes; import org.apache.hadoop.metadata.typesystem.types.DataTypes;
import org.apache.hadoop.metadata.typesystem.types.EnumType;
import org.apache.hadoop.metadata.typesystem.types.EnumTypeDefinition; import org.apache.hadoop.metadata.typesystem.types.EnumTypeDefinition;
import org.apache.hadoop.metadata.typesystem.types.EnumValue; import org.apache.hadoop.metadata.typesystem.types.EnumValue;
import org.apache.hadoop.metadata.typesystem.types.HierarchicalTypeDefinition; import org.apache.hadoop.metadata.typesystem.types.HierarchicalTypeDefinition;
import org.apache.hadoop.metadata.typesystem.types.Multiplicity; import org.apache.hadoop.metadata.typesystem.types.Multiplicity;
import org.apache.hadoop.metadata.typesystem.types.StructType;
import org.apache.hadoop.metadata.typesystem.types.StructTypeDefinition; import org.apache.hadoop.metadata.typesystem.types.StructTypeDefinition;
import org.apache.hadoop.metadata.typesystem.types.TraitType; import org.apache.hadoop.metadata.typesystem.types.TraitType;
import org.apache.hadoop.metadata.typesystem.types.TypeUtils; import org.apache.hadoop.metadata.typesystem.types.TypeUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
...@@ -515,5 +518,23 @@ public class HiveDataModelGenerator { ...@@ -515,5 +518,23 @@ public class HiveDataModelGenerator {
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
HiveDataModelGenerator hiveDataModelGenerator = new HiveDataModelGenerator(); HiveDataModelGenerator hiveDataModelGenerator = new HiveDataModelGenerator();
System.out.println("hiveDataModelAsJSON = " + hiveDataModelGenerator.getModelAsJson()); System.out.println("hiveDataModelAsJSON = " + hiveDataModelGenerator.getModelAsJson());
TypesDef typesDef = hiveDataModelGenerator.getTypesDef();
for (EnumTypeDefinition enumType : typesDef.enumTypesAsJavaList()) {
System.out.println(String.format("%s(%s) - %s", enumType.name, EnumType.class.getSimpleName(),
Arrays.toString(enumType.enumValues)));
}
for (StructTypeDefinition structType : typesDef.structTypesAsJavaList()) {
System.out.println(String.format("%s(%s) - %s", structType.typeName, StructType.class.getSimpleName(),
Arrays.toString(structType.attributeDefinitions)));
}
for (HierarchicalTypeDefinition<ClassType> classType : typesDef.classTypesAsJavaList()) {
System.out.println(String.format("%s(%s) - %s", classType.typeName, ClassType.class.getSimpleName(),
Arrays.toString(classType.attributeDefinitions)));
}
for (HierarchicalTypeDefinition<TraitType> traitType : typesDef.traitTypesAsJavaList()) {
System.out.println(String.format("%s(%s) - %s", traitType.typeName, TraitType.class.getSimpleName(),
Arrays.toString(traitType.attributeDefinitions)));
}
} }
} }
---+ Hive DGI Bridge
Hive metadata can be modelled in DGI using its Type System. The default modelling is available in org.apache.hadoop.metadata.hive.model.HiveDataModelGenerator. It defines the following types:
* hive_object_type(EnumType) - [GLOBAL, DATABASE, TABLE, PARTITION, COLUMN]
* hive_resource_type(EnumType) - [JAR, FILE, ARCHIVE]
* hive_principal_type(EnumType) - [USER, ROLE, GROUP]
* hive_function_type(EnumType) - [JAVA]
* hive_order(StructType) - [col, order]
* hive_resourceuri(StructType) - [resourceType, uri]
* hive_serde(StructType) - [name, serializationLib, parameters]
* hive_process(ClassType) - [processName, startTime, endTime, userName, sourceTableNames, targetTableNames, queryText, queryPlan, queryId, queryGraph]
* hive_function(ClassType) - [functionName, dbName, className, ownerName, ownerType, createTime, functionType, resourceUris]
* hive_type(ClassType) - [name, type1, type2, fields]
* hive_partition(ClassType) - [values, dbName, tableName, createTime, lastAccessTime, sd, parameters]
* hive_storagedesc(ClassType) - [cols, location, inputFormat, outputFormat, compressed, numBuckets, serdeInfo, bucketCols, sortCols, parameters, storedAsSubDirectories]
* hive_index(ClassType) - [indexName, indexHandlerClass, dbName, createTime, lastAccessTime, origTableName, indexTableName, sd, parameters, deferredRebuild]
* hive_role(ClassType) - [roleName, createTime, ownerName]
* hive_column(ClassType) - [name, type, comment]
* hive_db(ClassType) - [name, description, locationUri, parameters, ownerName, ownerType]
* hive_table(ClassType) - [tableName, dbName, owner, createTime, lastAccessTime, retention, sd, partitionKeys, parameters, viewOriginalText, viewExpandedText, tableType, temporary]
---++ Importing Hive Metadata
org.apache.hadoop.metadata.hive.bridge.HiveMetaStoreBridge imports the hive metadata into DGI using the typesystem defined in org.apache.hadoop.metadata.hive.model.HiveDataModelGenerator. import-hive.sh command can be used to facilitate this. Set-up the following configs in <dgi package>/conf/hive-site.xml:
* Hive metastore configuration - Refer [[https://cwiki.apache.org/confluence/display/Hive/AdminManual+MetastoreAdmin][Hive Metastore Configuration documentation]]
* DGI endpoint - Add the following property with the DGI endpoint for your set-up
<verbatim>
<property>
<name>hive.dgi.url</name>
<value>http://localhost:21000/</value>
</property>
</verbatim>
Usage: <dgi package>/bin/import-hive.sh
---++ Hive Hook
Hive supports listeners on hive command execution using hive hooks. This can be used to add/update/remove entities in DGI. Follow the following instructions in your hive set-up
* Add org.apache.hadoop.metadata.hive.hook.HiveHook as post execution hook in hive-ste.xml
<verbatim>
<property>
<name>hive.exec.post.hooks</name>
<value>org.apache.hadoop.metadata.hive.hook.HiveHook</value>
</property>
</verbatim>
* Add 'export HIVE_AUX_JARS_PATH=<dgi package>/hook/hive' in hive-env.sh
* Restart hive-server2
...@@ -81,6 +81,7 @@ ...@@ -81,6 +81,7 @@
<outputDirectory>examples</outputDirectory> <outputDirectory>examples</outputDirectory>
</fileSet> </fileSet>
<!-- addons/hive -->
<fileSet> <fileSet>
<directory>addons/hive-bridge/target/dependency/bridge</directory> <directory>addons/hive-bridge/target/dependency/bridge</directory>
<outputDirectory>bridge</outputDirectory> <outputDirectory>bridge</outputDirectory>
...@@ -90,6 +91,12 @@ ...@@ -90,6 +91,12 @@
<directory>addons/hive-bridge/target/dependency/hook</directory> <directory>addons/hive-bridge/target/dependency/hook</directory>
<outputDirectory>hook</outputDirectory> <outputDirectory>hook</outputDirectory>
</fileSet> </fileSet>
<fileSet>
<directory>addons/hive-bridge/target/site</directory>
<outputDirectory>docs/hive</outputDirectory>
</fileSet>
</fileSets> </fileSets>
<files> <files>
......
...@@ -88,4 +88,9 @@ public final class AttributeDefinition { ...@@ -88,4 +88,9 @@ public final class AttributeDefinition {
result = 31 * result + (reverseAttributeName != null ? reverseAttributeName.hashCode() : 0); result = 31 * result + (reverseAttributeName != null ? reverseAttributeName.hashCode() : 0);
return result; return result;
} }
@Override
public String toString() {
return name;
}
} }
...@@ -53,10 +53,10 @@ public class ClassType extends HierarchicalType<ClassType, IReferenceableInstanc ...@@ -53,10 +53,10 @@ public class ClassType extends HierarchicalType<ClassType, IReferenceableInstanc
infoToNameMap = null; infoToNameMap = null;
} }
ClassType(TypeSystem typeSystem, String name, ImmutableList<String> superTraits, ClassType(TypeSystem typeSystem, String name, ImmutableList<String> superTypes,
AttributeInfo... fields) AttributeInfo... fields)
throws MetadataException { throws MetadataException {
super(typeSystem, ClassType.class, name, superTraits, fields); super(typeSystem, ClassType.class, name, superTypes, fields);
infoToNameMap = TypeUtils.buildAttrInfoToNameMap(fieldMapping); infoToNameMap = TypeUtils.buildAttrInfoToNameMap(fieldMapping);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment