Commit fcbce418 by Venkatesh Seetharam

Added more DSL tests, Docs and resolved RAT issues

parent c1c6510b
...@@ -19,11 +19,11 @@ ...@@ -19,11 +19,11 @@
######### Graph Database Configs ######### ######### Graph Database Configs #########
# Graph Storage # Graph Storage
metadata.graph.storage.backend=berkeleyje metadata.graph.storage.backend=berkeleyje
metadata.graph.storage.directory=./data/berkeley metadata.graph.storage.directory=./target/data/berkeley
# Graph Search Index # Graph Search Index
metadata.graph.index.search.backend=elasticsearch metadata.graph.index.search.backend=elasticsearch
metadata.graph.index.search.directory=./data/es metadata.graph.index.search.directory=./target/data/es
metadata.graph.index.search.elasticsearch.client-only=false metadata.graph.index.search.elasticsearch.client-only=false
metadata.graph.index.search.elasticsearch.local-mode=true metadata.graph.index.search.elasticsearch.local-mode=true
......
---+ Quick Start Guide
---++ Introduction
This quick start user guide is a simple client that adds a few sample type definitions modeled
after the example as shown below. It also adds example entities along with traits as shown in the
instance graph below.
---+++ Example Type Definitions
<img src="guide-class-diagram.png"/>
---+++ Example Instance Graph
<img src="guide-instance-graph.png"/>
---++ Running the example
This will add sample types and instances along with traits as shown in the instance graph above.
* bin/quick-start.sh
---++ Dashboard
A simple dashboard with search is available.
* http://localhost:21000/dashboard
---+ Data Governance and Metadata platform for Hadoop ---+ Data Governance and Metadata framework for Hadoop
---++ Overview ---++ Overview
DGI is a scalable and extensible set of core foundational governance services – enabling
enterprises to effectively and efficiently meet their compliance requirements within Hadoop and
allows integration with the whole enterprise data ecosystem.
---++ Use Cases ---++ Features
* Enables modeling ---+++ Data Classification
* Import or define taxonomy business-oriented annotations for data
* Define, annotate, and automate capture of relationships between data sets and underlying
elements including source, target, and derivation processes
* Export metadata to third-party systems
* Captures Lineage information for data sets and processes ---+++ Centralized Auditing
* Capture security access information for every application, process, and interaction with data
* Capture the operational information for execution, steps, and activities
---+++ Search & Lineage (Browse)
* Pre-defined navigation paths to explore the data classification and audit information
* Text-based search features locates relevant data and audit event across Data Lake quickly
and accurately
* Browse visualization of data set lineage allowing users to drill-down into operational,
security, and provenance related information
---+++ Security & Policy Engine
* Rationalize compliance policy at runtime based on data classification schemes, attributes
and roles.
* Advanced definition of policies for preventing data derivation based on classification
(i.e. re-identification) – Prohibitions
* Column and Row level masking based on cell values and attibutes.
---++ Getting Started ---++ Getting Started
* [[QuickStart][Quick Start Guide]]
---++ Documentation ---++ Documentation
* [[Architecture][High Level Architecture]] * [[Architecture][High Level Architecture]]
* [[TypeSystem][Type System]] * [[TypeSystem][Type System]]
* [[Repository][Metadata Repository]] * [[Repository][Metadata Repository]]
* [[Discovery][Metadata Discovery]] * [[Discovery][Search]]
---++ API Documentation ---++ API Documentation
......
...@@ -82,7 +82,7 @@ public class GraphBackedDiscoveryService implements DiscoveryService { ...@@ -82,7 +82,7 @@ public class GraphBackedDiscoveryService implements DiscoveryService {
return queryResult.toJson(); return queryResult.toJson();
} }
} catch (Exception e) { // unable to catch ExpressionException } catch (Exception e) { // unable to catch ExpressionException
throw new DiscoveryException("Invalid expression : " + dslQuery); throw new DiscoveryException("Invalid expression : " + dslQuery, e);
} }
throw new DiscoveryException("Invalid expression : " + dslQuery); throw new DiscoveryException("Invalid expression : " + dslQuery);
......
...@@ -36,7 +36,6 @@ import org.apache.hadoop.metadata.typesystem.types.AttributeInfo; ...@@ -36,7 +36,6 @@ import org.apache.hadoop.metadata.typesystem.types.AttributeInfo;
import org.apache.hadoop.metadata.typesystem.types.ClassType; import org.apache.hadoop.metadata.typesystem.types.ClassType;
import org.apache.hadoop.metadata.typesystem.types.DataTypes; import org.apache.hadoop.metadata.typesystem.types.DataTypes;
import org.apache.hadoop.metadata.typesystem.types.IDataType; import org.apache.hadoop.metadata.typesystem.types.IDataType;
import org.apache.hadoop.metadata.typesystem.types.Multiplicity;
import org.apache.hadoop.metadata.typesystem.types.StructType; import org.apache.hadoop.metadata.typesystem.types.StructType;
import org.apache.hadoop.metadata.typesystem.types.TraitType; import org.apache.hadoop.metadata.typesystem.types.TraitType;
import org.slf4j.Logger; import org.slf4j.Logger;
...@@ -162,20 +161,17 @@ public class GraphBackedSearchIndexer implements SearchIndexer { ...@@ -162,20 +161,17 @@ public class GraphBackedSearchIndexer implements SearchIndexer {
final String propertyName = typeName + "." + field.name; final String propertyName = typeName + "." + field.name;
switch (field.dataType().getTypeCategory()) { switch (field.dataType().getTypeCategory()) {
case PRIMITIVE: case PRIMITIVE:
createVertexMixedIndex(propertyName, createVertexMixedIndex(propertyName, getPrimitiveClass(field.dataType()));
getPrimitiveClass(field.dataType()), getCardinality(field.multiplicity));
break; break;
case ENUM: case ENUM:
createVertexMixedIndex( createVertexMixedIndex(propertyName, Integer.class);
propertyName, Integer.class, getCardinality(field.multiplicity));
break; break;
case ARRAY: case ARRAY:
case MAP: case MAP:
// index the property holder for element names // todo - how do we overcome this limitation?
createVertexMixedIndex( // IGNORE: Can only index single-valued property keys on vertices in Mixed Index
propertyName, String.class, getCardinality(field.multiplicity));
break; break;
case STRUCT: case STRUCT:
...@@ -223,6 +219,7 @@ public class GraphBackedSearchIndexer implements SearchIndexer { ...@@ -223,6 +219,7 @@ public class GraphBackedSearchIndexer implements SearchIndexer {
throw new IllegalArgumentException("unknown data type " + dataType); throw new IllegalArgumentException("unknown data type " + dataType);
} }
/*
private Cardinality getCardinality(Multiplicity multiplicity) { private Cardinality getCardinality(Multiplicity multiplicity) {
if (multiplicity == Multiplicity.OPTIONAL || multiplicity == Multiplicity.REQUIRED) { if (multiplicity == Multiplicity.OPTIONAL || multiplicity == Multiplicity.REQUIRED) {
return Cardinality.SINGLE; return Cardinality.SINGLE;
...@@ -235,12 +232,13 @@ public class GraphBackedSearchIndexer implements SearchIndexer { ...@@ -235,12 +232,13 @@ public class GraphBackedSearchIndexer implements SearchIndexer {
// todo - default to LIST as this is the most forgiving // todo - default to LIST as this is the most forgiving
return Cardinality.LIST; return Cardinality.LIST;
} }
*/
private void createCompositeAndMixedIndex(String indexName, private void createCompositeAndMixedIndex(String indexName,
String propertyName, Class propertyClass, String propertyName, Class propertyClass,
boolean isUnique, Cardinality cardinality) { boolean isUnique, Cardinality cardinality) {
createCompositeIndex(indexName, propertyName, propertyClass, isUnique, cardinality); createCompositeIndex(indexName, propertyName, propertyClass, isUnique, cardinality);
createVertexMixedIndex(propertyName, propertyClass, cardinality); createVertexMixedIndex(propertyName, propertyClass);
} }
private PropertyKey createCompositeIndex(String indexName, private PropertyKey createCompositeIndex(String indexName,
...@@ -272,15 +270,14 @@ public class GraphBackedSearchIndexer implements SearchIndexer { ...@@ -272,15 +270,14 @@ public class GraphBackedSearchIndexer implements SearchIndexer {
return propertyKey; return propertyKey;
} }
private PropertyKey createVertexMixedIndex(String propertyName, Class propertyClass, private PropertyKey createVertexMixedIndex(String propertyName, Class propertyClass) {
Cardinality cardinality) {
TitanManagement management = titanGraph.getManagementSystem(); TitanManagement management = titanGraph.getManagementSystem();
PropertyKey propertyKey = management.getPropertyKey(propertyName); PropertyKey propertyKey = management.getPropertyKey(propertyName);
if (propertyKey == null) { if (propertyKey == null) {
// ignored cardinality as Can only index single-valued property keys on vertices
propertyKey = management propertyKey = management
.makePropertyKey(propertyName) .makePropertyKey(propertyName)
.dataType(propertyClass) .dataType(propertyClass)
.cardinality(cardinality)
.make(); .make();
TitanGraphIndex vertexIndex = management.getGraphIndex(Constants.VERTEX_INDEX); TitanGraphIndex vertexIndex = management.getGraphIndex(Constants.VERTEX_INDEX);
......
...@@ -216,6 +216,7 @@ public class GraphBackedDiscoveryServiceTest { ...@@ -216,6 +216,7 @@ public class GraphBackedDiscoveryServiceTest {
{"Table loop (LoadProcess outputTable)"}, {"Table loop (LoadProcess outputTable)"},
{"Table as _loop0 loop (LoadProcess outputTable) withPath"}, {"Table as _loop0 loop (LoadProcess outputTable) withPath"},
{"Table as src loop (LoadProcess outputTable) as dest select src.name as srcTable, dest.name as destTable withPath"}, {"Table as src loop (LoadProcess outputTable) as dest select src.name as srcTable, dest.name as destTable withPath"},
{"Table as t, sd, Column as c where t.name=\"sales_fact\" select c.name as colName, c.dataType as colType"},
}; };
} }
......
...@@ -22,7 +22,7 @@ metadata.graph.storage.backend=inmemory ...@@ -22,7 +22,7 @@ metadata.graph.storage.backend=inmemory
# Graph Search Index # Graph Search Index
metadata.graph.index.search.backend=elasticsearch metadata.graph.index.search.backend=elasticsearch
metadata.graph.index.search.directory=./data/es metadata.graph.index.search.directory=./target/data/es
metadata.graph.index.search.elasticsearch.client-only=false metadata.graph.index.search.elasticsearch.client-only=false
metadata.graph.index.search.elasticsearch.local-mode=true metadata.graph.index.search.elasticsearch.local-mode=true
......
...@@ -21,6 +21,7 @@ package org.apache.hadoop.metadata.examples; ...@@ -21,6 +21,7 @@ package org.apache.hadoop.metadata.examples;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import org.apache.hadoop.metadata.MetadataServiceClient; import org.apache.hadoop.metadata.MetadataServiceClient;
import org.apache.hadoop.metadata.typesystem.IStruct;
import org.apache.hadoop.metadata.typesystem.Referenceable; import org.apache.hadoop.metadata.typesystem.Referenceable;
import org.apache.hadoop.metadata.typesystem.TypesDef; import org.apache.hadoop.metadata.typesystem.TypesDef;
import org.apache.hadoop.metadata.typesystem.json.InstanceSerialization; import org.apache.hadoop.metadata.typesystem.json.InstanceSerialization;
...@@ -40,7 +41,9 @@ import org.codehaus.jettison.json.JSONArray; ...@@ -40,7 +41,9 @@ import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONObject; import org.codehaus.jettison.json.JSONObject;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map;
/** /**
* A driver that sets up sample types and data for testing purposes. * A driver that sets up sample types and data for testing purposes.
...@@ -116,18 +119,17 @@ public class QuickStart { ...@@ -116,18 +119,17 @@ public class QuickStart {
attrDef("inputFormat", DataTypes.STRING_TYPE), attrDef("inputFormat", DataTypes.STRING_TYPE),
attrDef("outputFormat", DataTypes.STRING_TYPE), attrDef("outputFormat", DataTypes.STRING_TYPE),
attrDef("compressed", DataTypes.STRING_TYPE, attrDef("compressed", DataTypes.STRING_TYPE,
Multiplicity.REQUIRED, false, null) Multiplicity.REQUIRED, false, null),
new AttributeDefinition("columns",
DataTypes.arrayTypeName(COLUMN_TYPE),
Multiplicity.COLLECTION, true, null)
); );
HierarchicalTypeDefinition<ClassType> columnClsDef = HierarchicalTypeDefinition<ClassType> columnClsDef =
TypesUtil.createClassTypeDef(COLUMN_TYPE, null, TypesUtil.createClassTypeDef(COLUMN_TYPE, null,
attrDef("name", DataTypes.STRING_TYPE), attrDef("name", DataTypes.STRING_TYPE),
attrDef("dataType", DataTypes.STRING_TYPE), attrDef("dataType", DataTypes.STRING_TYPE),
attrDef("comment", DataTypes.STRING_TYPE), attrDef("comment", DataTypes.STRING_TYPE)
new AttributeDefinition("sd", STORAGE_DESC_TYPE,
Multiplicity.REQUIRED, false, null)
// new AttributeDefinition("table", DataTypes.STRING_TYPE.getName(),
// Multiplicity.REQUIRED, false, null)
); );
HierarchicalTypeDefinition<ClassType> tblClsDef = HierarchicalTypeDefinition<ClassType> tblClsDef =
...@@ -145,13 +147,7 @@ public class QuickStart { ...@@ -145,13 +147,7 @@ public class QuickStart {
attrDef("viewOriginalText", DataTypes.STRING_TYPE), attrDef("viewOriginalText", DataTypes.STRING_TYPE),
attrDef("viewExpandedText", DataTypes.STRING_TYPE), attrDef("viewExpandedText", DataTypes.STRING_TYPE),
attrDef("tableType", DataTypes.STRING_TYPE), attrDef("tableType", DataTypes.STRING_TYPE),
attrDef("temporary", DataTypes.BOOLEAN_TYPE), attrDef("temporary", DataTypes.BOOLEAN_TYPE)
// todo - fix this post serialization support for collections
new AttributeDefinition("columns",
DataTypes.arrayTypeName(DataTypes.STRING_TYPE.getName()),
Multiplicity.COLLECTION, false, null)
// new AttributeDefinition("columns", DataTypes.arrayTypeName(COLUMN_TYPE),
// Multiplicity.COLLECTION, true, null)
); );
HierarchicalTypeDefinition<ClassType> loadProcessClsDef = HierarchicalTypeDefinition<ClassType> loadProcessClsDef =
...@@ -160,15 +156,11 @@ public class QuickStart { ...@@ -160,15 +156,11 @@ public class QuickStart {
attrDef("userName", DataTypes.STRING_TYPE), attrDef("userName", DataTypes.STRING_TYPE),
attrDef("startTime", DataTypes.INT_TYPE), attrDef("startTime", DataTypes.INT_TYPE),
attrDef("endTime", DataTypes.INT_TYPE), attrDef("endTime", DataTypes.INT_TYPE),
// todo - fix this post serialization support for collections new AttributeDefinition("inputTables",
// new AttributeDefinition("inputTables", DataTypes.arrayTypeName(TABLE_TYPE), DataTypes.arrayTypeName(TABLE_TYPE),
// Multiplicity.COLLECTION, false, null),
// new AttributeDefinition("outputTable", TABLE_TYPE,
// Multiplicity.REQUIRED, false, null),
new AttributeDefinition("inputTables", DataTypes.STRING_TYPE.getName(),
Multiplicity.COLLECTION, false, null), Multiplicity.COLLECTION, false, null),
new AttributeDefinition("outputTable", DataTypes.STRING_TYPE.getName(), new AttributeDefinition("outputTable", TABLE_TYPE,
Multiplicity.REQUIRED, false, null), Multiplicity.OPTIONAL, false, null),
attrDef("queryText", DataTypes.STRING_TYPE, Multiplicity.REQUIRED), attrDef("queryText", DataTypes.STRING_TYPE, Multiplicity.REQUIRED),
attrDef("queryPlan", DataTypes.STRING_TYPE, Multiplicity.REQUIRED), attrDef("queryPlan", DataTypes.STRING_TYPE, Multiplicity.REQUIRED),
attrDef("queryId", DataTypes.STRING_TYPE, Multiplicity.REQUIRED), attrDef("queryId", DataTypes.STRING_TYPE, Multiplicity.REQUIRED),
...@@ -180,10 +172,8 @@ public class QuickStart { ...@@ -180,10 +172,8 @@ public class QuickStart {
attrDef("name", DataTypes.STRING_TYPE), attrDef("name", DataTypes.STRING_TYPE),
new AttributeDefinition("db", DATABASE_TYPE, new AttributeDefinition("db", DATABASE_TYPE,
Multiplicity.REQUIRED, false, null), Multiplicity.REQUIRED, false, null),
// todo - fix this post serialization support for collections new AttributeDefinition("inputTables",
// new AttributeDefinition("inputTables", TABLE_TYPE, Multiplicity.COLLECTION, DataTypes.arrayTypeName(TABLE_TYPE),
// false, null)
new AttributeDefinition("inputTables", DataTypes.STRING_TYPE.getName(),
Multiplicity.COLLECTION, false, null) Multiplicity.COLLECTION, false, null)
); );
...@@ -234,62 +224,49 @@ public class QuickStart { ...@@ -234,62 +224,49 @@ public class QuickStart {
Referenceable salesDB = database( Referenceable salesDB = database(
"Sales", "Sales Database", "John ETL", "hdfs://host:8000/apps/warehouse/sales"); "Sales", "Sales Database", "John ETL", "hdfs://host:8000/apps/warehouse/sales");
Referenceable sd = storageDescriptor("hdfs://host:8000/apps/warehouse/sales",
"TextInputFormat", "TextOutputFormat", true);
Referenceable sd = rawStorageDescriptor("hdfs://host:8000/apps/warehouse/sales",
"TextInputFormat", "TextOutputFormat", true);
ArrayList<Referenceable> salesFactColumns = new ArrayList<>(); ArrayList<Referenceable> salesFactColumns = new ArrayList<>();
Referenceable column = column("time_id", "int", "time id", sd); salesFactColumns.add(rawColumn("time_id", "int", "time id"));
salesFactColumns.add(column); salesFactColumns.add(rawColumn("product_id", "int", "product id"));
column = column("product_id", "int", "product id", sd); salesFactColumns.add(rawColumn("customer_id", "int", "customer id", "PII"));
salesFactColumns.add(column); salesFactColumns.add(rawColumn("sales", "double", "product id", "Metric"));
column = column("customer_id", "int", "customer id", sd, "PII");
salesFactColumns.add(column); Referenceable salesFact = tableDefinition("sales_fact", "sales fact table",
column = column("sales", "double", "product id", sd, "Metric");
salesFactColumns.add(column);
Referenceable salesFact = table("sales_fact", "sales fact table",
salesDB, sd, "Joe", "Managed", salesFactColumns, "Fact"); salesDB, sd, "Joe", "Managed", salesFactColumns, "Fact");
ArrayList<Referenceable> productDimColumns = new ArrayList<>(); ArrayList<Referenceable> productDimColumns = new ArrayList<>();
column = column("product_id", "int", "product id", sd); productDimColumns.add(rawColumn("product_id", "int", "product id"));
productDimColumns.add(column); productDimColumns.add(rawColumn("product_name", "string", "product name"));
column = column("product_name", "string", "product name", sd); productDimColumns.add(rawColumn("brand_name", "int", "brand name"));
productDimColumns.add(column);
column = column("brand_name", "int", "brand name", sd); Referenceable productDim = tableDefinition("product_dim", "product dimension table",
productDimColumns.add(column);
Referenceable productDim = table("product_dim", "product dimension table",
salesDB, sd, "John Doe", "Managed", productDimColumns, "Dimension"); salesDB, sd, "John Doe", "Managed", productDimColumns, "Dimension");
ArrayList<Referenceable> timeDimColumns = new ArrayList<>(); ArrayList<Referenceable> timeDimColumns = new ArrayList<>();
column = column("time_id", "int", "time id", sd); timeDimColumns.add(rawColumn("time_id", "int", "time id"));
timeDimColumns.add(column); timeDimColumns.add(rawColumn("dayOfYear", "int", "day Of Year"));
column = column("dayOfYear", "int", "day Of Year", sd); timeDimColumns.add(rawColumn("weekDay", "int", "week Day"));
timeDimColumns.add(column);
column = column("weekDay", "int", "week Day", sd); Referenceable timeDim = tableDefinition("time_dim", "time dimension table",
timeDimColumns.add(column);
Referenceable timeDim = table("time_dim", "time dimension table",
salesDB, sd, "John Doe", "External", timeDimColumns, "Dimension"); salesDB, sd, "John Doe", "External", timeDimColumns, "Dimension");
ArrayList<Referenceable> customerDimColumns = new ArrayList<>(); ArrayList<Referenceable> customerDimColumns = new ArrayList<>();
column = column("customer_id", "int", "customer id", sd, "PII"); customerDimColumns.add(rawColumn("customer_id", "int", "customer id", "PII"));
customerDimColumns.add(column); customerDimColumns.add(rawColumn("name", "string", "customer name", "PII"));
column = column("name", "string", "customer name", sd, "PII"); customerDimColumns.add(rawColumn("address", "string", "customer address", "PII"));
customerDimColumns.add(column);
column = column("address", "string", "customer address", sd, "PII"); Referenceable customerDim = tableDefinition("customer_dim", "customer dimension table",
customerDimColumns.add(column);
Referenceable customerDim = table("customer_dim", "customer dimension table",
salesDB, sd, "fetl", "External", customerDimColumns, "Dimension"); salesDB, sd, "fetl", "External", customerDimColumns, "Dimension");
Referenceable reportingDB = database("Reporting", "reporting database", "Jane BI", Referenceable reportingDB = database("Reporting", "reporting database", "Jane BI",
"hdfs://host:8000/apps/warehouse/reporting"); "hdfs://host:8000/apps/warehouse/reporting");
Referenceable salesFactDaily = table("sales_fact_daily_mv", Referenceable salesFactDaily = tableDefinition("sales_fact_daily_mv",
"sales fact daily materialized view", reportingDB, sd, "sales fact daily materialized view", reportingDB, sd,
"Joe BI", "Managed", salesFactColumns, "Metric"); "Joe BI", "Managed", salesFactColumns, "Metric");
...@@ -307,7 +284,7 @@ public class QuickStart { ...@@ -307,7 +284,7 @@ public class QuickStart {
ImmutableList.of(customerDim), "Dimension", "JdbcAccess"); ImmutableList.of(customerDim), "Dimension", "JdbcAccess");
System.out.println("added customerDimView = " + customerDimView); System.out.println("added customerDimView = " + customerDimView);
Referenceable salesFactMonthly = table("sales_fact_monthly_mv", Referenceable salesFactMonthly = tableDefinition("sales_fact_monthly_mv",
"sales fact monthly materialized view", "sales fact monthly materialized view",
reportingDB, sd, "Jane BI", "Managed", salesFactColumns, "Metric"); reportingDB, sd, "Jane BI", "Managed", salesFactColumns, "Metric");
...@@ -328,7 +305,19 @@ public class QuickStart { ...@@ -328,7 +305,19 @@ public class QuickStart {
System.out.println("created instance for type " + typeName + ", guid: " + guid); System.out.println("created instance for type " + typeName + ", guid: " + guid);
// return the reference to created instance with guid // return the reference to created instance with guid
return new Referenceable(guid, referenceable.getTypeName(), referenceable.getValuesMap()); final ImmutableList<String> traitNames = referenceable.getTraits();
if (traitNames.isEmpty()) {
return new Referenceable(guid, referenceable.getTypeName(),
referenceable.getValuesMap());
} else {
Map<String, IStruct> traits = new HashMap<>();
for (String traitName : traitNames) {
traits.put(traitName, referenceable.getTrait(traitName));
}
return new Referenceable(guid, referenceable.getTypeName(),
referenceable.getValuesMap(), traitNames, traits);
}
} }
Referenceable database(String name, String description, Referenceable database(String name, String description,
...@@ -344,34 +333,49 @@ public class QuickStart { ...@@ -344,34 +333,49 @@ public class QuickStart {
return createInstance(referenceable); return createInstance(referenceable);
} }
Referenceable storageDescriptor(String location, String inputFormat, Referenceable rawStorageDescriptor(String location, String inputFormat,
String outputFormat, String outputFormat,
boolean compressed) throws Exception { boolean compressed) throws Exception {
Referenceable referenceable = new Referenceable(STORAGE_DESC_TYPE); Referenceable referenceable = new Referenceable(STORAGE_DESC_TYPE);
referenceable.set("location", location); referenceable.set("location", location);
referenceable.set("inputFormat", inputFormat); referenceable.set("inputFormat", inputFormat);
referenceable.set("outputFormat", outputFormat); referenceable.set("outputFormat", outputFormat);
referenceable.set("compressed", compressed); referenceable.set("compressed", compressed);
return createInstance(referenceable); return referenceable;
} }
Referenceable column(String name, String dataType, Referenceable rawColumn(String name, String dataType, String comment,
String comment, Referenceable sd, String... traitNames) throws Exception {
String... traitNames) throws Exception {
Referenceable referenceable = new Referenceable(COLUMN_TYPE, traitNames); Referenceable referenceable = new Referenceable(COLUMN_TYPE, traitNames);
referenceable.set("name", name); referenceable.set("name", name);
referenceable.set("dataType", dataType); referenceable.set("dataType", dataType);
referenceable.set("comment", comment); referenceable.set("comment", comment);
referenceable.set("sd", sd);
return createInstance(referenceable); return referenceable;
}
Referenceable tableDefinition(String name, String description,
Referenceable db, Referenceable sdReferenceable,
String owner, String tableType,
List<Referenceable> columnsList,
String... traitNames) throws Exception {
List<Referenceable> columns = new ArrayList<>();
for (Referenceable columnReferenceable : columnsList) {
columns.add(createInstance(columnReferenceable));
}
sdReferenceable.set("columns", columns);
Referenceable sd = createInstance(sdReferenceable);
return table(name, description, db, sd, owner, tableType, traitNames);
} }
Referenceable table(String name, String description, Referenceable table(String name, String description,
Referenceable db, Referenceable sd, Referenceable db, Referenceable sd,
String owner, String tableType, String owner, String tableType,
List<Referenceable> columns, // List<Referenceable> columns,
String... traitNames) throws Exception { String... traitNames) throws Exception {
Referenceable referenceable = new Referenceable(TABLE_TYPE, traitNames); Referenceable referenceable = new Referenceable(TABLE_TYPE, traitNames);
referenceable.set("name", name); referenceable.set("name", name);
...@@ -384,14 +388,6 @@ public class QuickStart { ...@@ -384,14 +388,6 @@ public class QuickStart {
referenceable.set("db", db); referenceable.set("db", db);
referenceable.set("sd", sd); referenceable.set("sd", sd);
// todo - fix this post serialization support for collections
// referenceable.set("columns", columns);
ArrayList<String> columnNames = new ArrayList<>(columns.size());
for (Referenceable column : columns) {
columnNames.add(String.valueOf(column.get("name")));
}
referenceable.set("columns", columnNames);
return createInstance(referenceable); return createInstance(referenceable);
} }
...@@ -407,17 +403,8 @@ public class QuickStart { ...@@ -407,17 +403,8 @@ public class QuickStart {
referenceable.set("startTime", System.currentTimeMillis()); referenceable.set("startTime", System.currentTimeMillis());
referenceable.set("endTime", System.currentTimeMillis() + 10000); referenceable.set("endTime", System.currentTimeMillis() + 10000);
// todo - fix this post serialization support for collections
/*
referenceable.set("inputTables", inputTables); referenceable.set("inputTables", inputTables);
referenceable.set("outputTable", outputTable); referenceable.set("outputTable", outputTable);
*/
ArrayList<String> inputTableNames = new ArrayList<>(inputTables.size());
for (Referenceable inputTable : inputTables) {
inputTableNames.add(String.valueOf(inputTable.get("name")));
}
referenceable.set("inputTables", inputTableNames);
referenceable.set("outputTable", outputTable.get("name"));
referenceable.set("queryText", queryText); referenceable.set("queryText", queryText);
referenceable.set("queryPlan", queryPlan); referenceable.set("queryPlan", queryPlan);
...@@ -434,13 +421,7 @@ public class QuickStart { ...@@ -434,13 +421,7 @@ public class QuickStart {
referenceable.set("name", name); referenceable.set("name", name);
referenceable.set("db", db); referenceable.set("db", db);
// todo - fix this post serialization support for collections referenceable.set("inputTables", inputTables);
// referenceable.set("inputTables", inputTables);
ArrayList<String> inputTableNames = new ArrayList<>(inputTables.size());
for (Referenceable inputTable : inputTables) {
inputTableNames.add(String.valueOf(inputTable.get("name")));
}
referenceable.set("inputTables", inputTableNames);
return createInstance(referenceable); return createInstance(referenceable);
} }
...@@ -511,6 +492,7 @@ public class QuickStart { ...@@ -511,6 +492,7 @@ public class QuickStart {
"Table as _loop0 loop (LoadProcess outputTable) withPath", "Table as _loop0 loop (LoadProcess outputTable) withPath",
"Table as src loop (LoadProcess outputTable) as dest select src.name as srcTable, dest.name as destTable withPath", "Table as src loop (LoadProcess outputTable) as dest select src.name as srcTable, dest.name as destTable withPath",
*/ */
"Table as t, sd, columns where t.name=\"sales_fact\"",
}; };
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment