Commit 54ccb87f by Dan Markwat

Changes for HiveLineage bridge; also added an integration test suite

parent 1089e036
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-bridge-parent</artifactId>
<version>0.1-incubating-SNAPSHOT</version>
</parent>
<artifactId>metadata-bridge-core</artifactId>
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
<version>0.14.0</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.2.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-typesystem</artifactId>
<version>0.1-incubating-SNAPSHOT</version>
</dependency>
</dependencies>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-bridge-parent</artifactId>
<version>0.1-incubating-SNAPSHOT</version>
</parent>
<artifactId>metadata-bridge-core</artifactId>
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
<version>0.14.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.2.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-repository</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-bridge-hive</artifactId>
<version>0.1-incubating-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-core</artifactId>
</dependency>
<dependency>
<groupId>org.mortbay.jetty</groupId>
<artifactId>jetty</artifactId>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.1.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.thinkaurelius.titan</groupId>
<artifactId>titan-core</artifactId>
<version>0.5.2</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
package org.apache.hadoop.metadata.bridge.hivelineage;
import java.util.Collections;
import java.util.List;
import javax.inject.Inject;
import org.apache.hadoop.metadata.ITypedReferenceableInstance;
import org.apache.hadoop.metadata.MetadataException;
import org.apache.hadoop.metadata.bridge.Bridge;
import org.apache.hadoop.metadata.bridge.hivelineage.hook.HiveLineageBean;
import org.apache.hadoop.metadata.repository.MetadataRepository;
import org.apache.hadoop.metadata.storage.RepositoryException;
import org.apache.hadoop.metadata.types.AttributeDefinition;
import org.apache.hadoop.metadata.types.ClassType;
import org.apache.hadoop.metadata.types.HierarchicalTypeDefinition;
import org.apache.hadoop.metadata.types.Multiplicity;
import org.apache.hadoop.metadata.types.TypeSystem;
import com.google.common.collect.ImmutableList;
public class HiveLineageBridge implements Bridge {
static final String LINEAGE_CLASS_TYPE = "HiveLineage";
private final MetadataRepository repo;
@Inject
HiveLineageBridge(MetadataRepository repo) {
this.repo = repo;
}
@Override
public boolean defineBridgeTypes(TypeSystem ts) {
// new HierarchicalTypeDefinition(ClassType.class, name, superTypes, attrDefs);
try {
HierarchicalTypeDefinition<ClassType> lineageClassTypeDef =
new HierarchicalTypeDefinition<ClassType>(
"ClassType",
new HierarchicalTypeDefinition(
ClassType.class,
LINEAGE_CLASS_TYPE,
null,
ImmutableList.<String>of(),
new AttributeDefinition[] {
new AttributeDefinition("QUERY_ID", "STRING_TYPE", Multiplicity.REQUIRED, false, null),
new AttributeDefinition("HIVE_ID", "STRING_TYPE", Multiplicity.REQUIRED, false, null),
......@@ -48,12 +68,36 @@ public class HiveLineageBridge implements Bridge {
ts.defineClassType(lineageClassTypeDef);
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (MetadataException e) {
e.printStackTrace();
}
return false;
}
public HiveLineageBean get(String id) throws RepositoryException {
// get from the system by id (?)
ITypedReferenceableInstance ref = repo.getEntityDefinition(id);
// turn into a HiveLineageBean
HiveLineageBean hlb = null;
return hlb;
}
public String create(HiveLineageBean bean) throws RepositoryException {
// turn the bean into something usable by the repo
// ???
// put bean into the repo (?)
String id = repo.createEntity(null, LINEAGE_CLASS_TYPE);
// return id of the entity OR the new full entity
return id;
}
public Iterable<String> list() throws RepositoryException {
List<String> lineage = repo.getEntityList(LINEAGE_CLASS_TYPE);
// can stub out a Map() wrapper that iterates over the list of GUIDS
// replacing them with the results of invocations to the get(id) method
// other avenue: implement this differently in the repo
return lineage;
}
}
package org.apache.hadoop.metadata.web.resources;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Writer;
import java.util.ArrayList;
import java.util.List;
import javax.inject.Inject;
import javax.inject.Singleton;
import javax.servlet.http.HttpServletRequest;
import javax.ws.rs.Consumes;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
import javax.ws.rs.Produces;
import javax.ws.rs.core.Context;
import javax.ws.rs.core.MediaType;
import org.apache.hadoop.metadata.bridge.hivelineage.HiveLineageBridge;
import org.apache.hadoop.metadata.bridge.hivelineage.hook.HiveLineageBean;
import org.apache.hadoop.metadata.storage.RepositoryException;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonPrimitive;
@Path("bridge/hive")
@Singleton
public class HiveLineageResource {
private final HiveLineageBridge bridge;
@Inject
public HiveLineageResource(HiveLineageBridge bridge) {
this.bridge = bridge;
}
/*
* @PathParam("entityType") String entityType,
*
* @DefaultValue("0") @QueryParam("offset") Integer offset,
*
* @QueryParam("numResults") Integer resultsPerPage
*/
@GET
@Path("{id}")
@Produces(MediaType.APPLICATION_JSON)
public JsonElement getById(@PathParam("id") String id) throws RepositoryException {
// get the lineage bean
HiveLineageBean hlb = bridge.get(id);
// turn it into a JsonTree & return
return new Gson().toJsonTree(hlb);
}
@GET
@Produces(MediaType.APPLICATION_JSON)
public JsonElement list() throws RepositoryException {
// make a new JsonArray to be returned
JsonArray ja = new JsonArray();
// iterate over each item returned by the hive bridge's list() method
for (String s: bridge.list()) {
// they are GUIDs so make them into JsonPrimitives
ja.add(new JsonPrimitive(s));
}
return ja;
}
@POST
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
public JsonElement addLineage(@Context HttpServletRequest request) throws IOException, RepositoryException {
// create a reader
Reader reader = new InputStreamReader(request.getInputStream());
try {
// deserialize
HiveLineageBean bean = new Gson().fromJson(reader, HiveLineageBean.class);
String id = bridge.create(bean);
JsonObject jo = new JsonObject();
jo.addProperty("id", id);
return jo;
} finally {
// be a good citizen
reader.close();
}
}
}
package org.apache.hadoop.metadata.bridge.hivelineage;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import javax.inject.Inject;
import org.apache.commons.collections.IteratorUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.metadata.RepositoryMetadataModule;
import org.apache.hadoop.metadata.bridge.hivelineage.hook.HiveLineageBean;
import org.apache.hadoop.metadata.repository.MetadataRepository;
import org.apache.hadoop.metadata.storage.RepositoryException;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Guice;
import org.testng.annotations.Test;
import com.google.gson.Gson;
@Test(enabled = false)
@Guice(modules = RepositoryMetadataModule.class)
public class TestHiveLineageBridgeIT {
@Inject
MetadataRepository repo;
HiveLineageBridge bridge;
HiveLineageBean hlb;
// the id of one.json in the repo (test #1)
String oneId;
@BeforeClass
public void bootstrap() throws IOException {
// this used in lieu of DI for now
bridge = new HiveLineageBridge(repo);
// create a hive lineage bean
FileInputStream fis = new FileInputStream("one.json");
List<String> lines = IOUtils.readLines(fis);
String json = StringUtils.join(lines, "");
hlb = new Gson().fromJson(json, HiveLineageBean.class);
}
@Test(priority = 1, enabled = false)
public void testCreate() throws RepositoryException {
// add the lineage bean to the repo
oneId = bridge.create(hlb);
// make sure this actually did worked
Assert.assertNotNull(oneId);
}
@Test(priority = 2, enabled = false)
public void testGet() throws RepositoryException, IOException {
HiveLineageBean bean = bridge.get(oneId);
Assert.assertEquals(hlb, bean);
}
@Test(priority = 3, enabled = false)
public void testList() throws RepositoryException {
List<String> list = IteratorUtils.toList(bridge.list().iterator());
Assert.assertEquals(list.size(), 1);
Assert.assertEquals(list.get(0), oneId);
}
}
{"queryId":"a760104_20150106120303_036186d5-a991-4dfc-9ff2-05b072c7e711","hiveId":"90797386-3933-4ab0-ae68-a7baa7e155d4","user":"","queryStartTime":"1420563838114","queryEndTime":"1420563853806","query":"create table nyse_gss_count_dump as select count(nyse.stock_symbol) stock_symbol_count, stock_symbol from nyse_stocks nyse where (nyse.stock_symbol \u003d \u0027AET\u0027 or nyse.stock_symbol \u003d \u0027UNH\u0027 ) and nyse.stock_symbol \u003d \u0027T\u0027 GROUP by stock_symbol","tableName":"nyse_gss_count_dump","success":true,"failed":false,"executionEngine":"tez","sourceTables":[{"tableName":"nyse_stocks","tableAlias":"nyse"}],"queryColumns":[{"tbAliasOrName":"nyse","columnName":"stock_symbol","columnAlias":"stock_symbol_count","columnFunction":"count"},{"columnName":"stock_symbol"}],"whereClause":[{"tbAliasOrName":"nyse","columnName":"stock_symbol","columnOperator":"\u003d","columnValue":"\u0027AET\u0027"},{"tbAliasOrName":"nyse","columnName":"stock_symbol","columnOperator":"\u003d","columnValue":"\u0027UNH\u0027"},{"tbAliasOrName":"nyse","columnName":"stock_symbol","columnOperator":"\u003d","columnValue":"\u0027T\u0027"}],"groupBy":[{"columnName":"stock_symbol"}]}
\ No newline at end of file
{"queryId":"a760104_20150108124747_53cb7716-8756-4dfe-b746-4055f53e2895","hiveId":"1aebd95c-c7d5-4893-8c8c-c9ae098bdd5c","user":"","queryStartTime":"1420739257453","queryEndTime":"1420739277589","query":"create table nyse_gss_count_dump as select count(nyse.stock_symbol) stock_symbol_count, stock_symbol from nyse_stocks nyse where (nyse.stock_symbol \u003d \u0027AET\u0027 or nyse.stock_symbol \u003d \u0027UNH\u0027 ) and nyse.stock_symbol \u003d \u0027T\u0027 GROUP by stock_symbol","tableName":"nyse_gss_count_dump","success":true,"failed":false,"executionEngine":"tez","sourceTables":[{"tableName":"nyse_stocks","tableAlias":"nyse"}],"queryColumns":[{"tbAliasOrName":"nyse","columnName":"stock_symbol","columnAlias":"stock_symbol_count","columnFunction":"count"},{"columnName":"stock_symbol"}],"whereClause":[{"tbAliasOrName":"nyse","columnName":"stock_symbol","columnOperator":"\u003d","columnValue":"\u0027AET\u0027"},{"tbAliasOrName":"nyse","columnName":"stock_symbol","columnOperator":"\u003d","columnValue":"\u0027UNH\u0027"},{"tbAliasOrName":"nyse","columnName":"stock_symbol","columnOperator":"\u003d","columnValue":"\u0027T\u0027"}],"groupBy":[{"columnName":"stock_symbol"}]}
\ No newline at end of file
......@@ -16,16 +16,19 @@
<groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
<version>0.13.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.4.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>0.13.1</version>
<scope>provided</scope>
</dependency>
</dependencies>
</project>
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment