Commit 22624786 by Shwetha GS

removed un-used modules

parent fdd841dd
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.atlas</groupId>
<artifactId>atlas-bridge-parent</artifactId>
<version>0.1-incubating-SNAPSHOT</version>
</parent>
<artifactId>atlas-bridge-hive</artifactId>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
<version>0.13.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.4.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>0.13.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>1.6</source>
<target>1.6</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.4</version>
</plugin>
<!--
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>copy</id>
<phase>prepare-package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<artifactItems>
<artifactItem>
<groupId>org.apache.atlas</groupId>
<artifactId>atlas-common</artifactId>
<version>0.1-incubating-SNAPSHOT</version>
<outputDirectory>${project.build.directory}</outputDirectory>
</artifactItem>
</artifactItems>
</configuration>
</execution>
</executions>
</plugin>
-->
</plugins>
</build>
</project>
\ No newline at end of file
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.bridge.hivelineage.hook;
import java.io.Serializable;
import java.util.ArrayList;
public class HiveLineage implements Serializable {
/**
*
*/
private static final long serialVersionUID = 1L;
public String queryId;
public String hiveId;
public String user;
public String queryStartTime;
public String queryEndTime;
public String query;
public String tableName;
public String databaseName;
public String action;
public String tableLocation;
public boolean success;
public boolean failed;
public String executionEngine;
ArrayList<SourceTables> sourceTables;
ArrayList<QueryColumns> queryColumns;
ArrayList<WhereClause> whereClause;
ArrayList<CreateColumns> createColumns;
ArrayList<GroupBy> groupBy;
ArrayList<GroupBy> orderBy;
public String getQueryId() {
return this.queryId;
}
public void setQueryId(String queryId) {
this.queryId = queryId;
}
public String getExecutionEngine() {
return this.executionEngine;
}
public void setExecutionEngine(String executionEngine) {
this.executionEngine = executionEngine;
}
public String getHiveId() {
return this.hiveId;
}
public void setHiveId(String hiveId) {
this.hiveId = hiveId;
}
public boolean getSuccess() {
return this.success;
}
public void setSuccess(boolean success) {
this.success = success;
}
public boolean getFailed() {
return this.failed;
}
public void setFailed(boolean failed) {
this.failed = failed;
}
public String getTableName() {
return this.tableName;
}
public void setTableName(String tableName) {
this.tableName = tableName;
}
public String getAction() {
return this.action;
}
public void setAction(String action) {
this.action = action;
}
public String getDatabaseName() {
return this.databaseName;
}
public void setDatabaseName(String databaseName) {
this.databaseName = databaseName;
}
public String getTableLocation() {
return this.tableLocation;
}
public void setTableLocation(String tableLocation) {
this.tableLocation = tableLocation;
}
public String getUser() {
return this.user;
}
public void setUser(String user) {
this.user = user;
}
public String getQueryStartTime() {
return this.queryStartTime;
}
public void setQueryStartTime(String queryStartTime) {
this.queryStartTime = queryStartTime;
}
public String getQueryEndTime() {
return this.queryEndTime;
}
public void setQueryEndTime(String queryEndTime) {
this.queryEndTime = queryEndTime;
}
public String getQuery() {
return this.query;
}
public void setQuery(String query) {
this.query = query;
}
public ArrayList<SourceTables> getSourceTables() {
return this.sourceTables;
}
public void setSourceTables(ArrayList<SourceTables> sourceTables) {
this.sourceTables = sourceTables;
}
public ArrayList<QueryColumns> getQueryColumns() {
return this.queryColumns;
}
public void setQueryColumns(ArrayList<QueryColumns> queryColumns) {
this.queryColumns = queryColumns;
}
public ArrayList<WhereClause> getWhereClause() {
return this.whereClause;
}
public void setWhereClause(ArrayList<WhereClause> whereClause) {
this.whereClause = whereClause;
}
public ArrayList<GroupBy> getGroupBy() {
return this.groupBy;
}
public void setGroupBy(ArrayList<GroupBy> groupBy) {
this.groupBy = groupBy;
}
public ArrayList<CreateColumns> getCreateColumns() {
return this.createColumns;
}
public void setCreateColumns(ArrayList<CreateColumns> createColumns) {
this.createColumns = createColumns;
}
public class SourceTables {
public String tableName;
public String tableAlias;
public String databaseName;
public String getTableName() {
return this.tableName;
}
public void setTableName(String tableName) {
this.tableName = tableName;
}
public String getTableAlias() {
return this.tableAlias;
}
public void setTableAlias(String tableAlias) {
this.tableAlias = tableAlias;
}
public String getDatabaseName() {
return this.databaseName;
}
public void setDatabaseName(String databaseName) {
this.databaseName = databaseName;
}
}
public class QueryColumns {
public String tbAliasOrName;
public String columnName;
public String columnAlias;
public String columnFunction;
public String columnDistinctFunction;
public String getTbAliasOrName() {
return this.tbAliasOrName;
}
public void setTbAliasOrName(String tbAliasOrName) {
this.tbAliasOrName = tbAliasOrName;
}
public String getColumnName() {
return this.columnName;
}
public void setColumnName(String columnName) {
this.columnName = columnName;
}
public String getColumnAlias() {
return this.columnAlias;
}
public void setColumnAlias(String columnAlias) {
this.columnAlias = columnAlias;
}
public String getColumnFunction() {
return this.columnFunction;
}
public void setColumnFunction(String columnFunction) {
this.columnFunction = columnFunction;
}
public String getColumnDistinctFunction() {
return this.columnDistinctFunction;
}
public void setColumnDistinctFunction(String columnDistinctFunction) {
this.columnDistinctFunction = columnDistinctFunction;
}
}
public class GroupBy {
public String tbAliasOrName;
public String columnName;
public String getTbAliasOrName() {
return this.tbAliasOrName;
}
public void setTbAliasOrName(String tbAliasOrName) {
this.tbAliasOrName = tbAliasOrName;
}
public String getColumnName() {
return this.columnName;
}
public void setColumnName(String columnName) {
this.columnName = columnName;
}
}
public class WhereClause {
public String tbAliasOrName;
public String columnCondition;
public String columnName;
public String columnOperator;
public String columnValue;
public ArrayList<ColumnValueIn> columnValueIn;
public String getColumnCondition() {
return this.columnCondition;
}
public void setColumnCondition(String columnCondition) {
this.columnCondition = columnCondition;
}
public String getTbAliasOrName() {
return this.tbAliasOrName;
}
public void setTbAliasOrName(String tbAliasOrName) {
this.tbAliasOrName = tbAliasOrName;
}
public String getColumnName() {
return this.columnName;
}
public void setColumnName(String columnName) {
this.columnName = columnName;
}
public String getColumnOperator() {
return this.columnOperator;
}
public void setColumnOperator(String columnOperator) {
this.columnOperator = columnOperator;
}
public String getColumnValue() {
return this.columnValue;
}
public void setColumnValue(String columnValue) {
this.columnValue = columnValue;
}
public ArrayList<ColumnValueIn> getColumnValueIn() {
return this.columnValueIn;
}
public void setColumnValueIn(ArrayList<ColumnValueIn> columnValueIn) {
this.columnValueIn = columnValueIn;
}
}
public class CreateColumns {
public String columnName;
public String columnType;
public String getColumnName() {
return this.columnName;
}
public void setColumnName(String columnName) {
this.columnName = columnName;
}
public String getColumnType() {
return this.columnType;
}
public void setColumnType(String columnType) {
this.columnType = columnType;
}
}
public class ColumnValueIn {
public String columnValueIn;
public String getColumnValueIn() {
return this.columnValueIn;
}
public void setColumnValueIn(String columnValueIn) {
this.columnValueIn = columnValueIn;
}
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.bridge.hivelineage.hook;
import com.google.gson.Gson;
import org.apache.atlas.bridge.hivelineage.hook.HiveLineage.CreateColumns;
import org.apache.atlas.bridge.hivelineage.hook.HiveLineage.GroupBy;
import org.apache.atlas.bridge.hivelineage.hook.HiveLineage.QueryColumns;
import org.apache.atlas.bridge.hivelineage.hook.HiveLineage.SourceTables;
import org.apache.atlas.bridge.hivelineage.hook.HiveLineage.WhereClause;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.parse.ParseDriver;
import org.apache.hadoop.hive.ql.parse.ParseException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.log4j.ConsoleAppender;
import org.apache.log4j.Level;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.log4j.PatternLayout;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Stack;
/**
*
* This class prints out the lineage info. It takes sql as input and prints
* lineage info. Currently this prints only input and output tables for a given
* sql. Later we can expand to add join tables etc.
*
*/
public class HiveLineageInfo implements NodeProcessor {
private final Log LOG = LogFactory.getLog(HiveLineageInfo.class.getName());
public Map<Integer, String> queryMap;
public Integer counter = 0;
public HiveLineage hlb = new HiveLineage();
;
public ArrayList<SourceTables> sourceTables;
public ArrayList<QueryColumns> queryColumns;
public ArrayList<GroupBy> groupBy;
public ArrayList<WhereClause> whereClause;
public ArrayList<CreateColumns> createColumns;
//Main method to run tests and return json/gson feed from a query
public static void main(String[] args) throws IOException, ParseException,
SemanticException {
String query = args[0];
ConsoleAppender console = new ConsoleAppender(); //create appender
//configure the appender
String PATTERN = "%d [%p|%c|%C{1}] %m%n";
console.setLayout(new PatternLayout(PATTERN));
console.setThreshold(Level.DEBUG);
console.activateOptions();
//add appender to any Logger (here is root)
Logger.getRootLogger().addAppender(console);
LogManager.getRootLogger().setLevel(Level.DEBUG);
HiveLineageInfo lep = new HiveLineageInfo();
lep.getLineageInfo(query);
Gson gson = new Gson();
String jsonOut = gson.toJson(lep.getHLBean());
System.out.println("GSON/JSON Generate :: " + jsonOut);
}
/**
* @return Custom HiveLineageBean data to be passed to GSON parsert
*/
public HiveLineage getHLBean() {
return hlb;
}
/**
* Implements the process method for the NodeProcessor interface.
*/
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
ASTNode pt = (ASTNode) nd;
/*
* Check the 1st-level children and do simple semantic checks: 1) CTLT and
* CTAS should not coexists. 2) CTLT or CTAS should not coexists with column
* list (target table schema). 3) CTAS does not support partitioning (for
* now).
*/
switch (pt.getToken().getType()) {
case HiveParser.TOK_FROM:
LOG.debug("From Table Dump: " + pt.dump());
fromTableDump(pt);
break;
case HiveParser.TOK_SELECT:
LOG.debug("Column Dump: " + pt.dump());
columnTableDump(pt);
break;
case HiveParser.TOK_SELECTDI:
LOG.debug("Column Dump: " + pt.dump());
columnTableDump(pt);
break;
case HiveParser.TOK_CREATETABLE:
createColumns = new ArrayList<CreateColumns>();
LOG.debug("CREATABLE DUMP: " + pt.dump());
createTableDump(pt);
break;
case HiveParser.TOK_CREATEVIEW:
createColumns = new ArrayList<CreateColumns>();
LOG.debug("CREATEVIEW DUMP: " + pt.dump());
createTableDump(pt);
break;
/*
* Currently disabling processing of WHERE and GROUPBY NO VALUE RIGHT NOW
*
case HiveParser.TOK_WHERE:
whereClause = new ArrayList<WhereClause>();
LOG.debug("WHERE CLAUSE DUMP: "+pt.dump());
whereDump(pt);
this.hlb.setWhereClause(whereClause);
break;
case HiveParser.TOK_GROUPBY:
groupBy = new ArrayList<GroupBy>();
LOG.debug("GROUPBY CLAUSE DUMP: "+pt.dump());
groupByDump(pt);
this.hlb.setGroupBy(groupBy);
break;
*/
}
return null;
}
/**
* Walks the whereTree called by processWalker
*/
public void whereDump(ASTNode nodeIn) {
counter = 0;
wdump(nodeIn);
}
/**
* Walks the Where Tree called by whereDump
*/
private void wdump(ASTNode nodeIn) {
boolean parseChild = true;
if (nodeIn.getType() == HiveParser.TOK_TABLE_OR_COL) {
WhereClause whreClse = hlb.new WhereClause();
if (nodeIn.getParent().getText().equalsIgnoreCase(".")) {
ASTNode checkOrAnd = (ASTNode) nodeIn.getParent().getParent().getChild(1)
.getParent().getParent();
if (checkOrAnd.getType() == HiveParser.KW_AND ||
checkOrAnd.getType() == HiveParser.KW_OR) {
LOG.debug("WHERE:: " + checkOrAnd.getText());
whreClse.setColumnOperator(checkOrAnd.getText());
}
LOG.debug("Table Alias:: " + nodeIn.getChild(0).getText());
whreClse.setTbAliasOrName(nodeIn.getChild(0).getText());
LOG.debug("Delimiter:: " + nodeIn.getParent().getText());
LOG.debug("Column:: " + nodeIn.getParent().getChild(1).getText());
whreClse.setColumnName(nodeIn.getParent().getChild(1).getText());
LOG.debug("Column Qualifer:: " +
nodeIn.getParent().getParent().getChild(1).getParent().getText());
whreClse.setColumnOperator(
nodeIn.getParent().getParent().getChild(1).getParent().getText());
LOG.debug("Column Value:: " + nodeIn.getParent().getParent().getChild(1).getText());
whreClse.setColumnValue(nodeIn.getParent().getParent().getChild(1).getText());
} else {
ASTNode checkOrAnd = (ASTNode) nodeIn.getParent().getParent().getChild(1)
.getParent();
if (checkOrAnd.getType() == HiveParser.KW_AND ||
checkOrAnd.getType() == HiveParser.KW_OR) {
LOG.debug("WHERE:: " + checkOrAnd.getText());
whreClse.setColumnOperator(checkOrAnd.getText());
}
LOG.debug("Column:: = " + nodeIn.getChild(0).getText());
whreClse.setColumnName(nodeIn.getChild(0).getText());
//LOG.info("Delimiter "+nodeIn.getParent().getText());
LOG.debug("Column Qualifer:: " +
nodeIn.getParent().getChild(1).getParent().getText());
whreClse.setColumnOperator(nodeIn.getParent().getChild(1).getParent().getText());
LOG.debug("Column Value:: " + nodeIn.getParent().getChild(1).getText());
whreClse.setColumnValue(nodeIn.getParent().getChild(1).getText());
}
whereClause.add(whreClse);
}
if (parseChild) {
int childCount = nodeIn.getChildCount();
if (childCount != 0) {
for (int numr = 0; numr < childCount; numr++) {
wdump((ASTNode) nodeIn.getChild(numr));
}
}
}
}
/**
* Walks the GroupByTree called by processWalker
*/
public void groupByDump(ASTNode nodeIn) {
counter = 0;
gdump(nodeIn);
}
/**
* Walks the GroupBy Tree called by groupByDump
*/
private void gdump(ASTNode nodeIn) {
boolean parseChild = true;
if (nodeIn.getType() == HiveParser.TOK_TABLE_OR_COL) {
GroupBy grpBy = hlb.new GroupBy();
ASTNode parentNode = (ASTNode) nodeIn.getParent();
if (parentNode.getText().equalsIgnoreCase(".")) {
LOG.debug("GroupBy TableAlias: " + nodeIn.getChild(0).getText());
grpBy.setTbAliasOrName(nodeIn.getChild(0).getText());
LOG.debug("GroupBy Column:: " + parentNode.getChild(1).getText());
grpBy.setColumnName(parentNode.getChild(1).getText());
} else {
LOG.debug("GroupBy Column: " + nodeIn.getChild(0).getText());
grpBy.setColumnName(nodeIn.getChild(0).getText());
}
groupBy.add(grpBy);
}
if (parseChild) {
int childCount = nodeIn.getChildCount();
if (childCount != 0) {
for (int numr = 0; numr < childCount; numr++) {
gdump((ASTNode) nodeIn.getChild(numr));
}
}
}
}
/**
* Walks the CreateTable Tree called by processWalker
*/
public void createTableDump(ASTNode nodeIn) {
counter = 0;
if (nodeIn.getFirstChildWithType(HiveParser.TOK_TABNAME) != null &&
nodeIn.getAncestor(HiveParser.TOK_WHERE) == null) {
LOG.info("Create TableName:: " +
nodeIn.getFirstChildWithType(HiveParser.TOK_TABNAME).getText());
if (nodeIn.getFirstChildWithType(HiveParser.TOK_TABNAME).getChildCount() == 2) {
LOG.debug("To DataBaseName:: " +
nodeIn.getFirstChildWithType(HiveParser.TOK_TABNAME).getChild(0).getText());
hlb.setDatabaseName(
nodeIn.getFirstChildWithType(HiveParser.TOK_TABNAME).getChild(0).getText());
LOG.debug("To TableName:: " +
nodeIn.getFirstChildWithType(HiveParser.TOK_TABNAME).getChild(1).getText());
hlb.setTableName(
nodeIn.getFirstChildWithType(HiveParser.TOK_TABNAME).getChild(1).getText());
} else {
LOG.debug("To TableName:: " +
nodeIn.getFirstChildWithType(HiveParser.TOK_TABNAME).getChild(0).getText());
hlb.setTableName(
nodeIn.getFirstChildWithType(HiveParser.TOK_TABNAME).getChild(0).getText());
}
}
if (nodeIn.getFirstChildWithType(HiveParser.TOK_TABLELOCATION) != null &&
nodeIn.getAncestor(HiveParser.TOK_WHERE) == null) {
LOG.debug("Create Table Location:: " +
nodeIn.getFirstChildWithType(HiveParser.TOK_TABLELOCATION).getText());
hlb.setTableLocation(
nodeIn.getFirstChildWithType(HiveParser.TOK_TABLELOCATION).getChild(0)
.getText());
}
if (nodeIn.getFirstChildWithType(HiveParser.TOK_TABCOLLIST) != null &&
nodeIn.getAncestor(HiveParser.TOK_WHERE) == null) {
ctdump((ASTNode) nodeIn.getFirstChildWithType(HiveParser.TOK_TABCOLLIST).getParent());
hlb.setCreateColumns(createColumns);
}
}
/**
* Walks the CreateTable Tree called by createTableDump
*/
private void ctdump(ASTNode nodeIn) {
boolean parseChild = true;
if (nodeIn.getType() == HiveParser.TOK_TABCOL) {
CreateColumns crtClmns = hlb.new CreateColumns();
LOG.debug("Create Column Name:: " + nodeIn.getChild(0).getText());
crtClmns.setColumnName(nodeIn.getChild(0).getText());
LOG.debug("Create Column Type:: " + nodeIn.getChild(1).getText());
crtClmns.setColumnType(nodeIn.getChild(1).getText());
createColumns.add(crtClmns);
}
if (parseChild) {
int childCount = nodeIn.getChildCount();
if (childCount != 0) {
for (int numr = 0; numr < childCount; numr++) {
ctdump((ASTNode) nodeIn.getChild(numr));
}
}
}
}
/**
* Walks the fromTable Tree called by processWalker
*/
public void fromTableDump(ASTNode nodeIn) {
counter = 0;
ftdump(nodeIn);
}
/**
* Walks the fromTable Tree called by fromTableDump
*/
private void ftdump(ASTNode nodeIn) {
boolean parseChild = true;
if (nodeIn.getType() == HiveParser.TOK_TABNAME &&
nodeIn.getParent().getType() == HiveParser.TOK_TABREF &&
nodeIn.getAncestor(HiveParser.TOK_WHERE) == null) {
SourceTables hlbSbls = hlb.new SourceTables();
if (nodeIn.getChildCount() == 2) {
LOG.debug("From DBName:: " + nodeIn.getChild(0).getText());
hlbSbls.setDatabaseName(nodeIn.getChild(0).getText());
LOG.debug("From TableName:: " + nodeIn.getChild(1).getText());
hlbSbls.setTableName(nodeIn.getChild(1).getText());
} else {
LOG.debug("From TableName:: " + nodeIn.getChild(0).getText());
hlbSbls.setTableName(nodeIn.getChild(0).getText());
}
if (nodeIn.getType() == HiveParser.TOK_TABNAME &&
nodeIn.getParent().getChild(1) != null) {
LOG.debug("From DB/Table Alias:: " + nodeIn.getParent().getChild(1).getText());
hlbSbls.setTableAlias(nodeIn.getParent().getChild(1).getText());
}
sourceTables.add(hlbSbls);
}
if (parseChild) {
int childCount = nodeIn.getChildCount();
if (childCount != 0) {
for (int numr = 0; numr < childCount; numr++) {
ftdump((ASTNode) nodeIn.getChild(numr));
}
}
}
}
/**
* Walks the column Tree called by processWalker
*/
public void columnTableDump(ASTNode nodeIn) {
counter = 0;
clmnTdump(nodeIn);
}
/**
* Walks the columnDump Tree called by columnTableDump
*/
private void clmnTdump(ASTNode nodeIn) {
boolean parseChild = true;
if (nodeIn.getType() == HiveParser.TOK_TABLE_OR_COL &&
nodeIn.getAncestor(HiveParser.TOK_SELEXPR) != null &&
!(nodeIn.hasAncestor(HiveParser.TOK_WHERE))) {
QueryColumns qclmns = hlb.new QueryColumns();
if (nodeIn.getAncestor(HiveParser.TOK_FUNCTION) != null &&
nodeIn.getAncestor(HiveParser.TOK_SELEXPR) != null) {
LOG.debug("Function Query:: " +
nodeIn.getAncestor(HiveParser.TOK_FUNCTION).getChild(0).getText());
qclmns.setColumnFunction(
nodeIn.getAncestor(HiveParser.TOK_FUNCTION).getChild(0).getText());
}
if (nodeIn.getAncestor(HiveParser.TOK_FUNCTIONDI) != null &&
nodeIn.getAncestor(HiveParser.TOK_SELEXPR) != null) {
LOG.debug("Function Distinct Query:: " +
nodeIn.getAncestor(HiveParser.TOK_FUNCTIONDI).getChild(0).getText());
qclmns.setColumnDistinctFunction(
nodeIn.getAncestor(HiveParser.TOK_FUNCTIONDI).getChild(0).getText());
}
if (nodeIn.getParent().getText().equalsIgnoreCase(".")) {
LOG.debug("Table Name/Alias:: " + nodeIn.getChild(0).getText());
qclmns.setTbAliasOrName(nodeIn.getChild(0).getText());
LOG.debug("Column:: " + nodeIn.getParent().getChild(1).getText());
qclmns.setColumnName(nodeIn.getParent().getChild(1).getText());
if (nodeIn.getAncestor(HiveParser.TOK_SELEXPR).getChild(1) != null) {
LOG.debug("Column Alias:: " +
nodeIn.getAncestor(HiveParser.TOK_SELEXPR).getChild(1).getText());
qclmns.setColumnAlias(
nodeIn.getAncestor(HiveParser.TOK_SELEXPR).getChild(1).getText());
}
} else {
LOG.debug("Column:: " + nodeIn.getChild(0).getText());
qclmns.setColumnName(nodeIn.getChild(0).getText());
if ((nodeIn.getParent().getChild(1) != null &&
nodeIn.getParent().getChild(1).getType() != HiveParser.TOK_TABLE_OR_COL)) {
LOG.debug("Column Alias:: " + nodeIn.getParent().getChild(1).getText());
qclmns.setColumnAlias(nodeIn.getParent().getChild(1).getText());
}
}
if (qclmns.getColumnName() != null) {
queryColumns.add(qclmns);
}
}
if (parseChild) {
int childCount = nodeIn.getChildCount();
if (childCount != 0) {
for (int numr = 0; numr < childCount; numr++) {
clmnTdump((ASTNode) nodeIn.getChild(numr));
}
}
}
}
/**
* parses given query and gets the lineage info.
*
* @param query
* @throws ParseException
*/
public void getLineageInfo(String query) throws ParseException,
SemanticException {
/*
* Get the AST tree
*/
ParseDriver pd = new ParseDriver();
ASTNode tree = pd.parse(query);
LOG.info("DUMP TREE: " + tree.dump());
if (tree.getChild(0).getType() == HiveParser.TOK_DROPDATABASE) {
hlb.setAction("drop_database");
}
if (tree.getChild(0).getType() == HiveParser.TOK_CREATEDATABASE) {
hlb.setAction("create_database");
}
if (tree.getChild(0).getType() == HiveParser.TOK_CREATETABLE) {
hlb.setAction("create_table");
}
if (tree.getChild(0).getType() == HiveParser.TOK_CREATEVIEW) {
hlb.setAction("create_view");
}
if (tree.getChild(0).getType() == HiveParser.TOK_DROPTABLE) {
hlb.setAction("drop_table");
}
if (tree.getChild(0).getType() == HiveParser.TOK_INSERT) {
hlb.setAction("insert");
}
if (tree.getChild(0).getType() == HiveParser.TOK_INSERT_INTO) {
hlb.setAction("insert_into");
}
if (tree.getChild(0).getType() == HiveParser.TOK_DROPVIEW) {
hlb.setAction("drop_view");
}
if (tree.getChild(0).getType() == HiveParser.TOK_SHOWDATABASES) {
hlb.setAction("show_databases");
}
if (tree.getChild(0).getType() == HiveParser.TOK_SHOWTABLES) {
hlb.setAction("show_tables");
}
if (tree.getChild(0).getType() == HiveParser.TOK_ALTERVIEW_RENAME) {
hlb.setAction("alter_view_rename");
}
if (tree.getChild(0).getType() == HiveParser.TOK_ALTERTABLE_RENAME) {
hlb.setAction("alter_table_rename");
}
if (tree.getChild(0).getType() == HiveParser.TOK_ANALYZE) {
hlb.setAction("analyze");
}
if (tree.getChild(0).getType() == HiveParser.TOK_QUERY) {
hlb.setAction("select");
}
while ((tree.getToken() == null) && (tree.getChildCount() > 0)) {
tree = (ASTNode) tree.getChild(0);
}
sourceTables = new ArrayList<SourceTables>();
queryColumns = new ArrayList<QueryColumns>();
/*
* initialize Event Processor and dispatcher.
*/
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack. The dispatcher
// generates the plan from the operator tree
Map<Rule, NodeProcessor> rules = new LinkedHashMap<Rule, NodeProcessor>();
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(this, rules, null);
GraphWalker ogw = new DefaultGraphWalker(disp);
// Create a list of topop nodes
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.add(tree);
ogw.startWalking(topNodes, null);
if (!(sourceTables.isEmpty())) {
this.hlb.setSourceTables(sourceTables);
}
if (!(queryColumns.isEmpty())) {
this.hlb.setQueryColumns(queryColumns);
}
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.bridge.hivelineage.hook;
import com.google.gson.Gson;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.exec.ExplainTask;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext;
import org.apache.hadoop.hive.ql.hooks.HookContext;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.util.StringUtils;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import java.io.DataOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.util.Set;
/**
* DGC Hook sends query + plan info to DGCCollector Service. To enable (hadoop 2.4 and up) set
* hive.exec.pre.hooks/hive.exec.post.hooks/hive.exec.failure.hooks to include this class.
*/
public class Hook implements ExecuteWithHookContext {
private static final Log LOG = LogFactory.getLog(Hook.class.getName());
private static final String METADATA_HOST = "localhost";
private static final int METADATA_PORT = 20810;
private static final String METADATA_PATH = "/entities/submit/HiveLineage";
private HiveLineage hlb;
@Override
public void run(HookContext hookContext) throws Exception {
long currentTime = System.currentTimeMillis();
String executionEngine = null;
try {
QueryPlan plan = hookContext.getQueryPlan();
if (plan == null) {
return;
}
ExplainTask explain = new ExplainTask();
explain.initialize(hookContext.getConf(), plan, null);
String queryId = plan.getQueryId();
String queryStartTime = plan.getQueryStartTime().toString();
String user = hookContext.getUgi().getUserName();
String query = plan.getQueryStr();
int numMrJobs = Utilities.getMRTasks(plan.getRootTasks()).size();
int numTezJobs = Utilities.getTezTasks(plan.getRootTasks()).size();
String hiveId = explain.getId();
SessionState sess = SessionState.get();
if (numTezJobs > 0) {
executionEngine = "tez";
}
if (numMrJobs > 0) {
executionEngine = "mr";
}
hiveId = sess.getSessionId();
String defaultdb = null;
switch (hookContext.getHookType()) {
case PRE_EXEC_HOOK:
Set<ReadEntity> db_pre = hookContext.getInputs();
for (Object o : db_pre) {
LOG.debug("DB:Table=" + o.toString());
defaultdb = o.toString().split("@")[0];
}
currentTime = System.currentTimeMillis();
HiveLineageInfo lep_pre = new HiveLineageInfo();
lep_pre.getLineageInfo(query);
hlb = lep_pre.getHLBean();
hlb.setDatabaseName(defaultdb);
hlb.setQueryEndTime(Long.toString(currentTime));
hlb.setQueryId(queryId);
hlb.setQuery(query);
hlb.setUser(user);
hlb.setHiveId(hiveId);
hlb.setSuccess(false);
if (executionEngine != null) {
if (executionEngine.equalsIgnoreCase("mr")) {
hlb.setExecutionEngine("mapreduce");
}
if (executionEngine.equalsIgnoreCase("tez")) {
hlb.setExecutionEngine("tez");
}
if (executionEngine.equalsIgnoreCase("spark")) {
hlb.setExecutionEngine("spark");
}
} else {
hlb.setExecutionEngine("local");
}
hlb.setQueryStartTime(queryStartTime);
fireAndForget(hookContext.getConf(), hlb, queryId);
break;
case POST_EXEC_HOOK:
Set<ReadEntity> db_post = hookContext.getInputs();
for (Object o : db_post) {
LOG.debug("DB:Table=" + o.toString());
defaultdb = o.toString().split("@")[0];
}
currentTime = System.currentTimeMillis();
HiveLineageInfo lep_post = new HiveLineageInfo();
lep_post.getLineageInfo(query);
hlb = lep_post.getHLBean();
hlb.setDatabaseName(defaultdb);
hlb.setQueryEndTime(Long.toString(currentTime));
hlb.setQueryId(queryId);
hlb.setQuery(query);
hlb.setUser(user);
hlb.setQueryStartTime(queryStartTime);
hlb.setSuccess(true);
hlb.setHiveId(hiveId);
if (executionEngine != null) {
if (executionEngine.equalsIgnoreCase("mr")) {
hlb.setExecutionEngine("mapreduce");
}
if (executionEngine.equalsIgnoreCase("tez")) {
hlb.setExecutionEngine("tez");
}
if (executionEngine.equalsIgnoreCase("spark")) {
hlb.setExecutionEngine("spark");
}
} else {
hlb.setExecutionEngine("local");
}
fireAndForget(hookContext.getConf(), hlb, queryId);
break;
case ON_FAILURE_HOOK:
Set<ReadEntity> db_fail = hookContext.getInputs();
for (Object o : db_fail) {
LOG.debug("DB:Table=" + o.toString());
defaultdb = o.toString().split("@")[0];
}
HiveLineageInfo lep_failed = new HiveLineageInfo();
lep_failed.getLineageInfo(query);
hlb = lep_failed.getHLBean();
hlb.setDatabaseName(defaultdb);
hlb.setQueryEndTime(Long.toString(currentTime));
hlb.setQueryId(queryId);
hlb.setQuery(query);
hlb.setUser(user);
hlb.setQueryStartTime(queryStartTime);
hlb.setSuccess(false);
hlb.setFailed(true);
hlb.setHiveId(hiveId);
if (executionEngine != null) {
if (executionEngine.equalsIgnoreCase("mr")) {
hlb.setExecutionEngine("mapreduce");
}
if (executionEngine.equalsIgnoreCase("tez")) {
hlb.setExecutionEngine("tez");
}
if (executionEngine.equalsIgnoreCase("spark")) {
hlb.setExecutionEngine("spark");
}
} else {
hlb.setExecutionEngine("local");
}
fireAndForget(hookContext.getConf(), hlb, queryId);
break;
default:
//ignore
break;
}
} catch (Exception e) {
LOG.info("Failed to submit plan to DGC: " + StringUtils.stringifyException(e));
}
}
public void fireAndForget(Configuration conf, HiveLineage hookData, String queryId)
throws Exception {
String postUri = String
.format("http://%s:%s%s", METADATA_HOST, METADATA_PORT, METADATA_PATH);
if (conf.getTrimmed("atlas.hive.hook.uri") != null) {
postUri = conf.getTrimmed("atlas.hive.hook.uri");
}
Gson gson = new Gson();
String gsonString = gson.toJson(hookData);
LOG.debug("GSON String: " + gsonString);
String encodedGsonQuery = URLEncoder.encode(gsonString, "UTF-8");
String encodedQueryId = URLEncoder.encode(queryId, "UTF-8");
String postData = "hookdata=" + encodedGsonQuery + "&queryid=" + encodedQueryId;
// Create a trust manager that does not validate certificate chains
if (postUri.contains("https:")) {
TrustManager[] trustAllCerts = new TrustManager[]{
new X509TrustManager() {
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return null;
}
public void checkClientTrusted(
java.security.cert.X509Certificate[] certs, String authType) {
}
public void checkServerTrusted(
java.security.cert.X509Certificate[] certs, String authType) {
}
}
};
// Install the all-trusting trust manager
try {
SSLContext sc = SSLContext.getInstance("SSL");
sc.init(null, trustAllCerts, new java.security.SecureRandom());
HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
} catch (Exception e) {
e.printStackTrace();
}
}
URL url = new URL(postUri);
LOG.debug("Post URI: " + postUri);
DataOutputStream wr = null;
//HttpURLConnection urlcon = null;
if (postUri.contains("https:")) {
HttpsURLConnection urlcon = null;
urlcon = (HttpsURLConnection) url.openConnection();
urlcon.setRequestMethod("POST");
urlcon.setRequestProperty("X-Requested-By", "HiveHook");
urlcon.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
urlcon.setUseCaches(false);
urlcon.setDoInput(true);
urlcon.setDoOutput(true);
wr = new DataOutputStream(urlcon.getOutputStream());
LOG.debug("PostString: " + postData);
//wr.writeBytes(postString.);
wr.write(postData.getBytes());
wr.flush();
wr.close();
InputStream is = urlcon.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
int numCharsRead;
char[] charArray = new char[1024];
StringBuffer sb = new StringBuffer();
while ((numCharsRead = isr.read(charArray)) > 0) {
sb.append(charArray, 0, numCharsRead);
}
String result = sb.toString();
LOG.debug("Post Response: " + result);
isr.close();
is.close();
urlcon.disconnect();
} else {
HttpURLConnection urlcon = null;
urlcon = (HttpURLConnection) url.openConnection();
urlcon.setRequestMethod("POST");
urlcon.setRequestProperty("X-Requested-By", "HiveHook");
urlcon.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
urlcon.setUseCaches(false);
urlcon.setDoInput(true);
urlcon.setDoOutput(true);
wr = new DataOutputStream(urlcon.getOutputStream());
LOG.debug("PostString: " + postData);
//wr.writeBytes(postString.);
wr.write(postData.getBytes());
wr.flush();
wr.close();
InputStream is = urlcon.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
int numCharsRead;
char[] charArray = new char[1024];
StringBuffer sb = new StringBuffer();
while ((numCharsRead = isr.read(charArray)) > 0) {
sb.append(charArray, 0, numCharsRead);
}
String result = sb.toString();
LOG.debug("Post Response: " + result);
isr.close();
is.close();
urlcon.disconnect();
}
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.bridge.hivelineage.hook;
import com.google.gson.Gson;
import org.apache.hadoop.hive.ql.parse.ParseException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
public class HiveLineageInfoTest {
public static String parseQuery(String query) throws SemanticException,
ParseException {
HiveLineageInfo lep = new HiveLineageInfo();
lep.getLineageInfo(query);
Gson gson = new Gson();
String jsonOut = gson.toJson(lep.getHLBean());
return jsonOut;
}
}
......@@ -134,7 +134,6 @@
<module>repository</module>
<module>webapp</module>
<module>docs</module>
<module>tools</module>
<module>addons/hive-bridge</module>
</modules>
......
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.atlas</groupId>
<artifactId>apache-atlas</artifactId>
<version>0.1-incubating-SNAPSHOT</version>
</parent>
<artifactId>atlas-tools</artifactId>
<description>Apache Atlas Type System Tools Module</description>
<name>Apache Atlas Type System Tools</name>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>org.apache.atlas</groupId>
<artifactId>atlas-typesystem</artifactId>
</dependency>
<dependency>
<groupId>org.apache.atlas</groupId>
<artifactId>atlas-repository</artifactId>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-compiler</artifactId>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-reflect</artifactId>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>jline</artifactId>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-actors</artifactId>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scalap</artifactId>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalamacros</groupId>
<artifactId>quasiquotes_${scala.binary.version}</artifactId>
</dependency>
<dependency>
<groupId>org.json4s</groupId>
<artifactId>json4s-native_2.10</artifactId>
</dependency>
<dependency>
<groupId>com.github.nscala-time</groupId>
<artifactId>nscala-time_2.10</artifactId>
</dependency>
<dependency>
<groupId>com.typesafe</groupId>
<artifactId>config</artifactId>
</dependency>
<dependency>
<groupId>com.typesafe.akka</groupId>
<artifactId>akka-actor_2.10</artifactId>
</dependency>
<dependency>
<groupId>com.typesafe.akka</groupId>
<artifactId>akka-testkit_2.10</artifactId>
</dependency>
<dependency>
<groupId>com.typesafe.akka</groupId>
<artifactId>akka-slf4j_2.10</artifactId>
</dependency>
<dependency>
<groupId>io.spray</groupId>
<artifactId>spray-routing</artifactId>
</dependency>
<dependency>
<groupId>io.spray</groupId>
<artifactId>spray-can</artifactId>
</dependency>
<dependency>
<groupId>io.spray</groupId>
<artifactId>spray-httpx</artifactId>
</dependency>
<dependency>
<groupId>io.spray</groupId>
<artifactId>spray-testkit</artifactId>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
</dependency>
<dependency>
<groupId>it.unimi.dsi</groupId>
<artifactId>fastutil</artifactId>
</dependency>
<dependency>
<groupId>com.google.inject</groupId>
<artifactId>guice</artifactId>
</dependency>
<dependency>
<groupId>com.tinkerpop.blueprints</groupId>
<artifactId>blueprints-core</artifactId>
</dependency>
<dependency>
<groupId>com.thinkaurelius.titan</groupId>
<artifactId>titan-core</artifactId>
</dependency>
<dependency>
<groupId>com.thinkaurelius.titan</groupId>
<artifactId>titan-es</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
akka {
loglevel = DEBUG
stdout-loglevel = DEBUG
event-handlers = ["akka.event.Logging$DefaultLogger"]
default-dispatcher {
fork-join-executor {
parallelism-min = 8
}
}
test {
timefactor = 1
}
}
spray {
can {
server {
server-header = "Metadata Service"
}
}
}
http {
host = "0.0.0.0"
host = ${?HOST}
port = 9140
port = ${?PORT}
}
graphRepo {
storage.backend = inmemory
# Graph Search Index
#index.search.backend=elasticsearch
#index.search.directory=target/data/es
#index.search.elasticsearch.client-only=false
#index.search.elasticsearch.local-mode=true
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.tools.cli
import org.apache.atlas.repository.memory.MemRepository
import org.apache.atlas.typesystem.types.TypeSystem
import scala.tools.nsc.Settings
import scala.tools.nsc.interpreter.{ILoop, IMain}
object Console extends App {
val settings = new Settings
settings.usejavacp.value = true
settings.deprecation.value = true
settings.bootclasspath.value += """/Users/hbutani/.m2/repository/org/apache/atlas/1.0-SNAPSHOT/atlas-1.0-SNAPSHOT.jar:/Users/hbutani/.m2/repository/org/scala-lang/scala-compiler/2.10.4/scala-compiler-2.10.4.jar:/Users/hbutani/.m2/repository/org/scala-lang/scala-reflect/2.10.4/scala-reflect-2.10.4.jar:/Users/hbutani/.m2/repository/org/scala-lang/jline/2.10.4/jline-2.10.4.jar:/Users/hbutani/.m2/repository/org/fusesource/jansi/jansi/1.4/jansi-1.4.jar:/Users/hbutani/.m2/repository/org/scala-lang/scala-library/2.10.4/scala-library-2.10.4.jar:/Users/hbutani/.m2/repository/org/scala-lang/scala-actors/2.10.4/scala-actors-2.10.4.jar:/Users/hbutani/.m2/repository/org/scala-lang/scalap/2.10.4/scalap-2.10.4.jar:/Users/hbutani/.m2/repository/org/scalatest/scalatest_2.10/2.2.0/scalatest_2.10-2.2.0.jar:/Users/hbutani/.m2/repository/org/scalamacros/quasiquotes_2.10/2.0.1/quasiquotes_2.10-2.0.1.jar:/Users/hbutani/.m2/repository/org/json4s/json4s-native_2.10/3.2.11/json4s-native_2.10-3.2.11.jar:/Users/hbutani/.m2/repository/org/json4s/json4s-core_2.10/3.2.11/json4s-core_2.10-3.2.11.jar:/Users/hbutani/.m2/repository/org/json4s/json4s-ast_2.10/3.2.11/json4s-ast_2.10-3.2.11.jar:/Users/hbutani/.m2/repository/com/thoughtworks/paranamer/paranamer/2.6/paranamer-2.6.jar:/Users/hbutani/.m2/repository/com/github/nscala-time/nscala-time_2.10/1.6.0/nscala-time_2.10-1.6.0.jar:/Users/hbutani/.m2/repository/joda-time/joda-time/2.5/joda-time-2.5.jar:/Users/hbutani/.m2/repository/org/joda/joda-convert/1.2/joda-convert-1.2.jar:/Users/hbutani/.m2/repository/com/typesafe/config/1.2.1/config-1.2.1.jar:/Users/hbutani/.m2/repository/com/typesafe/akka/akka-actor_2.10/2.3.7/akka-actor_2.10-2.3.7.jar:/Users/hbutani/.m2/repository/com/typesafe/akka/akka-testkit_2.10/2.3.7/akka-testkit_2.10-2.3.7.jar:/Users/hbutani/.m2/repository/com/typesafe/akka/akka-slf4j_2.10/2.3.7/akka-slf4j_2.10-2.3.7.jar:/Users/hbutani/.m2/repository/org/slf4j/slf4j-api/1.7.5/slf4j-api-1.7.5.jar:/Users/hbutani/.m2/repository/io/spray/spray-routing/1.3.1/spray-routing-1.3.1.jar:/Users/hbutani/.m2/repository/io/spray/spray-http/1.3.1/spray-http-1.3.1.jar:/Users/hbutani/.m2/repository/org/parboiled/parboiled-scala_2.10/1.1.6/parboiled-scala_2.10-1.1.6.jar:/Users/hbutani/.m2/repository/org/parboiled/parboiled-core/1.1.6/parboiled-core-1.1.6.jar:/Users/hbutani/.m2/repository/io/spray/spray-util/1.3.1/spray-util-1.3.1.jar:/Users/hbutani/.m2/repository/com/chuusai/shapeless_2.10/1.2.4/shapeless_2.10-1.2.4.jar:/Users/hbutani/.m2/repository/io/spray/spray-can/1.3.1/spray-can-1.3.1.jar:/Users/hbutani/.m2/repository/io/spray/spray-io/1.3.1/spray-io-1.3.1.jar:/Users/hbutani/.m2/repository/io/spray/spray-httpx/1.3.1/spray-httpx-1.3.1.jar:/Users/hbutani/.m2/repository/org/jvnet/mimepull/mimepull/1.9.4/mimepull-1.9.4.jar:/Users/hbutani/.m2/repository/io/spray/spray-testkit/1.3.1/spray-testkit-1.3.1.jar:/Users/hbutani/.m2/repository/com/google/guava/guava/11.0.2/guava-11.0.2.jar:/Users/hbutani/.m2/repository/com/google/code/findbugs/jsr305/1.3.9/jsr305-1.3.9.jar:/Users/hbutani/.m2/repository/junit/junit/4.10/junit-4.10.jar:/Users/hbutani/.m2/repository/org/hamcrest/hamcrest-core/1.1/hamcrest-core-1.1.jar"""
val in = new IMain(settings) {
override protected def parentClassLoader = settings.getClass.getClassLoader()
}
new SampleILoop().process(settings)
}
class SampleILoop extends ILoop {
val ts: TypeSystem = TypeSystem.getInstance()
//intp = Console.in
val mr: MemRepository = new MemRepository(ts)
override def prompt = "==> "
addThunk {
intp.beQuietDuring {
intp.addImports("java.lang.Math._")
intp.addImports("org.json4s.native.Serialization.{read, write => swrite}")
intp.addImports("org.json4s._")
intp.addImports("org.json4s.native.JsonMethods._")
intp.addImports("org.apache.atlas.tools.dsl._")
//intp.bindValue("service", ms)
//intp.bindValue("cp", intp.compilerClasspath)
}
}
override def printWelcome() {
echo("\n" +
" \\,,,/\n" +
" (o o)\n" +
"-----oOOo-(_)-oOOo-----")
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.tools.dsl
import org.apache.atlas.typesystem.ITypedStruct
import org.apache.atlas.typesystem.types.{StructType, TypeSystem}
import scala.language.dynamics
class DynamicTypedStruct(val ts: ITypedStruct) extends Dynamic {
def selectDynamic(name: String) = ts.get(name)
def updateDynamic(name: String)(value: Any) {
var value1 = value
if (value != null && value.isInstanceOf[DynamicTypedStruct]) {
value1 = value.asInstanceOf[DynamicTypedStruct].ts
}
ts.set(name, value1)
}
def dataType = TypeSystem.getInstance().getDataType(classOf[StructType], ts.getTypeName)
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.tools
import org.apache.atlas.repository.memory.MemRepository
import org.apache.atlas.typesystem.json.{BigDecimalSerializer, BigIntegerSerializer, Serialization, TypedStructSerializer}
import org.apache.atlas.typesystem.persistence.StructInstance
import org.apache.atlas.typesystem.types._
import org.apache.atlas.typesystem.{IStruct, ITypedStruct}
import org.json4s._
import org.json4s.native.JsonMethods._
import org.json4s.native.Serialization.{write => swrite}
import scala.collection.JavaConversions._
import scala.language.implicitConversions
package object dsl {
val defFormat = new DefaultFormats {
override protected def dateFormatter = TypeSystem.getInstance().getDateFormat;
override val typeHints = NoTypeHints
}
implicit val formats = defFormat + new TypedStructSerializer +
new BigDecimalSerializer + new BigIntegerSerializer
val BOOLEAN_TYPE = DataTypes.BOOLEAN_TYPE
val BYTE_TYPE = DataTypes.BYTE_TYPE
val SHORT_TYPE = DataTypes.SHORT_TYPE
val INT_TYPE = DataTypes.INT_TYPE
val LONG_TYPE = DataTypes.LONG_TYPE
val FLOAT_TYPE = DataTypes.FLOAT_TYPE
val DOUBLE_TYPE = DataTypes.DOUBLE_TYPE
val BIGINT_TYPE = DataTypes.BIGINTEGER_TYPE
val BIGDECIMAL_TYPE = DataTypes.BIGDECIMAL_TYPE
val DATE_TYPE = DataTypes.DATE_TYPE
val STRING_TYPE = DataTypes.STRING_TYPE
val ATTR_OPTIONAL = Multiplicity.OPTIONAL
val ATTR_REQUIRED = Multiplicity.REQUIRED
def repo = new MemRepository(ts)
def arrayType(dT: IDataType[_]) = ts.defineArrayType(dT)
def mapType(kT: IDataType[_], vT: IDataType[_]) = ts.defineMapType(kT, vT)
def attrDef(name: String, dT: IDataType[_],
m: Multiplicity = Multiplicity.OPTIONAL,
isComposite: Boolean = false,
reverseAttributeName: String = null) = {
require(name != null)
require(dT != null)
new AttributeDefinition(name, dT.getName, m, isComposite, reverseAttributeName)
}
def listTypes = ts.getTypeNames.sorted.toList.mkString("[", ",", "]")
def ts = TypeSystem.getInstance
def defineStructType(name: String, attrDef: AttributeDefinition*) = {
require(name != null)
ts.defineStructType(name, false, attrDef: _*)
}
def createInstance(typeName: String, jsonStr: String)(implicit formats: Formats) = {
val j = parse(jsonStr)
assert(j.isInstanceOf[JObject])
var j1 = j.asInstanceOf[JObject]
j1 = JObject(JField(Serialization.STRUCT_TYPE_FIELD_NAME, JString(typeName)) :: j1.obj)
new DynamicTypedStruct(Extraction.extract[StructInstance](j1))
}
def createInstance(typeName: String) = {
new DynamicTypedStruct(
ts.getDataType(classOf[StructType], typeName).asInstanceOf[IConstructableType[IStruct, ITypedStruct]].createInstance())
}
implicit def dynTypedStructToTypedStruct(s: DynamicTypedStruct) = s.ts
implicit def dynTypedStructToJson(s: DynamicTypedStruct)(implicit formats: Formats) = {
Extraction.decompose(s.ts)(formats)
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.tools.simpleserver
import akka.actor.{ActorSystem, Props}
import akka.io.IO
import com.typesafe.config.ConfigFactory
import org.apache.atlas.repository.memory.MemRepository
import org.apache.atlas.typesystem.types.TypeSystem
import spray.can.Http
/**
* A Simple Spray based server to test the TypeSystem and MemRepository.
*
* @example {{{
* -- Using the [[ https://github.com/jakubroztocil/httpie Httpie tool]]
*
* http GET localhost:9140/listTypeNames
* pbpaste | http PUT localhost:9140/defineTypes
* http GET localhost:9140/typeDetails typeNames:='["Department", "Person", "Manager"]'
*
* pbpaste | http PUT localhost:9140/createInstance
* pbpaste | http GET localhost:9140/getInstance
* }}}
*
* - On the Mac, pbpaste makes available what is copied to clipboard. Copy contents of resources/sampleTypes.json
* - for createInstance resources/sampleInstance.json is an example
* - for getInstance send an Id back, you can copy the output from createInstance.
*
*/
object Main extends App {
val config = ConfigFactory.load()
val host = config.getString("http.host")
val port = config.getInt("http.port")
implicit val system = ActorSystem("atlasservice")
val typSys = TypeSystem.getInstance()
val memRepo = new MemRepository(typSys)
val api = system.actorOf(Props(new RestInterface(typSys, memRepo)), "httpInterface")
IO(Http) ! Http.Bind(listener = api, interface = host, port = port)
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.tools.simpleserver
import akka.actor._
import akka.util.Timeout
import com.google.common.collect.ImmutableList
import org.apache.atlas.repository.memory.MemRepository
import org.apache.atlas.typesystem.json._
import org.apache.atlas.typesystem.persistence.Id
import org.apache.atlas.typesystem.types._
import org.apache.atlas.typesystem.{ITypedReferenceableInstance, TypesDef}
import org.json4s.{Formats, NoTypeHints}
import spray.httpx.Json4sSupport
import scala.concurrent.duration._
class MetadataActor(val typeSystem: TypeSystem, val memRepository: MemRepository) extends Actor with ActorLogging {
import org.apache.atlas.tools.simpleserver.MetadataProtocol._
import scala.collection.JavaConversions._
import scala.language.postfixOps
implicit val timeout = Timeout(5 seconds)
def receive = {
case ListTypeNames() =>
sender ! TypeNames(typeSystem.getTypeNames.toList)
case GetTypeDetails(typeNames) =>
val typesDef = TypesSerialization.convertToTypesDef(typeSystem, (d: IDataType[_]) => typeNames.contains(d.getName))
sender ! TypeDetails(typesDef)
case DefineTypes(typesDef: TypesDef) =>
typesDef.enumTypes.foreach(typeSystem.defineEnumType(_))
typeSystem.defineTypes(ImmutableList.copyOf(typesDef.structTypes.toArray),
ImmutableList.copyOf(typesDef.traitTypes.toArray),
ImmutableList.copyOf(typesDef.classTypes.toArray))
var newTypes: List[HierarchicalType[_ <: HierarchicalType[_ <: AnyRef, _], _]] = Nil
typesDef.traitTypes.foreach { tDef =>
val nm = tDef.typeName
newTypes = newTypes :+
typeSystem.getDataType(classOf[HierarchicalType[_ <: HierarchicalType[_ <: AnyRef, _], _]], nm)
}
typesDef.classTypes.foreach { tDef =>
val nm = tDef.typeName
newTypes = newTypes :+
typeSystem.getDataType(classOf[HierarchicalType[_ <: HierarchicalType[_ <: AnyRef, _], _]], nm)
}
memRepository.defineTypes(newTypes)
sender ! TypesCreated
case CreateInstance(i) =>
val r = memRepository.create(i)
sender ! InstanceCreated(r.getId)
case GetInstance(id) =>
val r = memRepository.get(id)
sender ! InstanceDetails(r)
}
}
object MetadataProtocol {
case class ListTypeNames()
case class TypeNames(typeNames: List[String])
case class GetTypeDetails(typeNames: List[String])
case class TypeDetails(types: TypesDef)
case class DefineTypes(types: TypesDef)
case class TypesCreated()
case class CreateInstance(i: ITypedReferenceableInstance)
case class InstanceCreated(id: Id)
case class GetInstance(id: Id)
case class InstanceDetails(i: ITypedReferenceableInstance)
}
trait Json4sProtocol extends Json4sSupport {
val typeSystem: TypeSystem
val memRepository: MemRepository
implicit def json4sFormats: Formats =
org.json4s.native.Serialization.formats(NoTypeHints) + new MultiplicitySerializer +
new TypedStructSerializer +
new TypedReferenceableInstanceSerializer +
new BigDecimalSerializer + new BigIntegerSerializer + new IdSerializer
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.tools.simpleserver
import akka.actor._
import akka.util.Timeout
import org.apache.atlas.repository.memory.MemRepository
import org.apache.atlas.typesystem.persistence.Id
import org.apache.atlas.typesystem.types.TypeSystem
import org.apache.atlas.typesystem.{ITypedReferenceableInstance, TypesDef}
import spray.http.StatusCodes
import spray.routing._
import scala.concurrent.duration._
class Responder(val typeSystem: TypeSystem, val memRepository : MemRepository,
requestContext:RequestContext, mdSvc:ActorRef) extends Actor with Json4sProtocol with ActorLogging {
import org.apache.atlas.tools.simpleserver.MetadataProtocol._
def receive = {
case typNames:TypeNames =>
requestContext.complete(StatusCodes.OK, typNames)
self ! PoisonPill
case tD:TypeDetails =>
requestContext.complete(StatusCodes.OK, tD)
self ! PoisonPill
case TypesCreated =>
requestContext.complete(StatusCodes.OK)
self ! PoisonPill
case InstanceCreated(id) =>
requestContext.complete(StatusCodes.OK, id)
case InstanceDetails(i) =>
requestContext.complete(StatusCodes.OK, i)
}
}
class RestInterface(val typeSystem: TypeSystem, val memRepository : MemRepository) extends HttpServiceActor
with RestApi {
def receive = runRoute(routes)
}
trait RestApi extends HttpService with Json4sProtocol with ActorLogging { actor: Actor =>
import MetadataProtocol._
import scala.concurrent.ExecutionContext.Implicits.global
import scala.language.postfixOps
val typeSystem : TypeSystem
val memRepository : MemRepository
implicit val timeout = Timeout(10 seconds)
import akka.pattern.{ask, pipe}
val mdSvc = context.actorOf(Props(new MetadataActor(typeSystem, memRepository)))
def routes: Route =
path("listTypeNames") {
get { requestContext =>
val responder: ActorRef = createResponder(requestContext)
pipe(mdSvc.ask(ListTypeNames))
mdSvc.ask(ListTypeNames()).pipeTo(responder)
}
} ~
path("typeDetails") {
get {
entity(as[GetTypeDetails]) { typeDetails => requestContext =>
val responder = createResponder(requestContext)
mdSvc.ask(typeDetails).pipeTo(responder)
}
}
} ~
path("defineTypes") {
put {
entity(as[TypesDef]) { typesDef => requestContext =>
val responder = createResponder(requestContext)
mdSvc.ask(DefineTypes(typesDef)).pipeTo(responder)
}
}
} ~
path("createInstance") {
put {
entity(as[ITypedReferenceableInstance]) { i => requestContext =>
val responder = createResponder(requestContext)
mdSvc.ask(CreateInstance(i)).pipeTo(responder)
}
}
} ~
path("getInstance") {
get {
entity(as[Id]) { id => requestContext =>
val responder = createResponder(requestContext)
mdSvc.ask(GetInstance(id)).pipeTo(responder)
}
}
}
def createResponder(requestContext:RequestContext) = {
context.actorOf(Props(new Responder(typeSystem, memRepository, requestContext, mdSvc)))
}
}
\ No newline at end of file
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.tools.thrift
import org.apache.atlas.AtlasException
import org.apache.atlas.typesystem.types.DataTypes
import scala.util.parsing.combinator.lexical.StdLexical
import scala.util.parsing.combinator.syntactical.StandardTokenParsers
import scala.util.parsing.combinator.{ImplicitConversions, PackratParsers}
import scala.util.parsing.input.CharArrayReader._
object BASE_TYPES extends Enumeration {
val STRING = Value("string")
val BINARY = Value("binary")
val SLIST = Value("slist")
val BOOLEAN = Value("bool")
val BYTE = Value("byte")
val I16 = Value("i16")
val I32 = Value("i32")
val I64 = Value("i64")
val DOUBLE = Value("double")
@throws[AtlasException]
def toPrimitiveTypeName(t : BASE_TYPES.Value) : String = t match {
case STRING => DataTypes.STRING_TYPE.getName
case SLIST => DataTypes.STRING_TYPE.getName
case BOOLEAN => DataTypes.BOOLEAN_TYPE.getName
case BYTE => DataTypes.BYTE_TYPE.getName
case I16 => DataTypes.SHORT_TYPE.getName
case I32 => DataTypes.INT_TYPE.getName
case I64 => DataTypes.LONG_TYPE.getName
case DOUBLE => DataTypes.DOUBLE_TYPE.getName
case _ => throw new AtlasException(s"Thrift BaseType ($t) not supported")
}
}
object THRIFT_LANG extends Enumeration {
val CPP = Value("cpp")
val PHP = Value("php")
val PY = Value("py")
val PERL = Value("perl")
val RUBY = Value("ruby")
val SMLTK_CAT = Value("smalltalk.category")
val SMLTK_PRE = Value("smalltalk.prefix")
val JAVA = Value("java")
val COCOA = Value("cocoa")
val XSD = Value("xsd")
val CSHARP = Value("csharp")
val STAR = Value("*")
val OTHER = Value("")
}
case class TypeAnnotation(name : String, value : String)
case class CPPType(name : String)
sealed trait FunctionType
case class VoidType() extends FunctionType
sealed trait FieldType extends FunctionType
case class IdentifierType(name : String) extends FieldType
case class BaseType(typ : BASE_TYPES.Value, typAnnotations :Option[List[TypeAnnotation]]) extends FieldType
sealed trait ContainerType extends FieldType {
def typAnnotations :Option[List[TypeAnnotation]]
}
case class MapType(keyType : FieldType, valueType : FieldType,
cppType : Option[CPPType],
typAnnotations :Option[List[TypeAnnotation]]) extends ContainerType
case class SetType(elemType : FieldType,
cppType : Option[CPPType],
typAnnotations :Option[List[TypeAnnotation]]) extends ContainerType
case class ListType(elemType : FieldType,
cppType : Option[CPPType],
typAnnotations :Option[List[TypeAnnotation]]) extends ContainerType
sealed trait ConstValue
case class IntConstant(value : Int) extends ConstValue
case class DoubleConstant(value : Double) extends ConstValue
case class StringConstant(value : String) extends ConstValue
case class IdConstant(value : String) extends ConstValue
case class ConstantList(value : List[ConstValue]) extends ConstValue
case class ConstantValuePair(first : ConstValue, second : ConstValue)
case class ConstantMap(value : List[ConstantValuePair]) extends ConstValue
case class ConstDef(fieldType : FieldType, id : String, value : ConstValue)
case class TypeDef(name : String, fieldType : FieldType,
typAnnotations :Option[List[TypeAnnotation]])
case class EnumValueDef(value : String, id : Option[IntConstant], typAnnotations :Option[List[TypeAnnotation]])
case class EnumDef(name : String, enumValues : List[EnumValueDef], typAnnotations :Option[List[TypeAnnotation]])
case class SEnumDef(name : String, enumValues : List[String], typAnnotations :Option[List[TypeAnnotation]])
case class FieldDef(id : Option[IntConstant], requiredNess : Boolean, fieldType : FieldType, name : String,
fieldValue : Option[ConstValue], xsdOptional : Boolean, xsdNillable : Boolean,
xsdAttributes: Option[XsdAttributes],
typAnnotations :Option[List[TypeAnnotation]])
case class XsdAttributes(fields : List[FieldDef])
case class StructDef(name : String, xsdAll : Boolean, fields : List[FieldDef],
typAnnotations :Option[List[TypeAnnotation]])
case class UnionDef(val name : String, val xsdAll : Boolean,
val fields : List[FieldDef],
val typAnnotations :Option[List[TypeAnnotation]])
case class ExceptionDef(val name : String,
val fields : List[FieldDef],
val typAnnotations :Option[List[TypeAnnotation]])
case class FunctionDef(oneway : Boolean, returnType : FunctionType, name : String, parameters : List[FieldDef],
throwFields : Option[List[FieldDef]], typAnnotations :Option[List[TypeAnnotation]])
case class ServiceDef(name : String, superName : Option[String], functions : List[FunctionDef],
typAnnotations :Option[List[TypeAnnotation]])
case class IncludeDef(value : String)
case class CppIncludeDef(val value : String)
case class NamespaceDef(lang : THRIFT_LANG.Value, name : String, otherLang : Option[String] = None)
case class ThriftDef(val includes : List[IncludeDef],
val cppIncludes : List[CppIncludeDef],
val namespaces : List[NamespaceDef],
val constants : List[ConstDef],
val typedefs : List[TypeDef],
val enums : List[EnumDef],
val senums : List[SEnumDef],
val structs : List[StructDef],
val unions : List[UnionDef],
val xceptions : List[ExceptionDef],
val services : List[ServiceDef]) {
def this() = this(List(), List(), List(), List(), List(), List(), List(),
List(), List(), List(), List())
def this(a : IncludeDef) = this(a :: Nil, List(), List(), List(), List(), List(), List(),
List(), List(), List(), List())
def this(a : CppIncludeDef) = this(List(), a :: Nil, List(), List(), List(), List(), List(), List(),
List(), List(), List())
def this(a : NamespaceDef) = this(List(), List(), a :: Nil, List(), List(), List(), List(), List(), List(),
List(), List())
def this(a : ConstDef) = this(List(), List(), List(), a :: Nil, List(), List(), List(), List(), List(), List(),
List())
def this(a : TypeDef) = this(List(), List(), List(), List(), a :: Nil, List(), List(), List(), List(), List(), List())
def this(a : EnumDef) = this(List(), List(), List(), List(), List(), a :: Nil, List(), List(),
List(), List(), List())
def this(a : SEnumDef) = this(List(), List(), List(), List(), List(), List(), a :: Nil, List(),
List(), List(), List())
def this(a : StructDef) = this(List(), List(), List(), List(), List(), List(), List(), a :: Nil,
List(), List(), List())
def this(a : UnionDef) = this(List(), List(), List(), List(), List(), List(), List(),
List(), a :: Nil, List(), List())
def this(a : ExceptionDef) = this(List(), List(), List(), List(), List(), List(), List(),
List(), List(), a :: Nil, List())
def this(a : ServiceDef) = this(List(), List(), List(), List(), List(), List(), List(),
List(), List(), List(), a :: Nil)
def plus(a : IncludeDef) = ThriftDef(includes.+:(a), cppIncludes, namespaces, constants, typedefs, enums, senums,
structs, unions, xceptions, services)
def plus(a : CppIncludeDef) = ThriftDef(includes, cppIncludes.+:(a), namespaces, constants, typedefs, enums, senums,
structs, unions, xceptions, services)
def plus(a : NamespaceDef) = ThriftDef(includes, cppIncludes, namespaces.+:(a), constants, typedefs, enums, senums,
structs, unions, xceptions, services)
def plus(a : ConstDef) = ThriftDef(includes, cppIncludes, namespaces, constants.+:(a), typedefs, enums, senums,
structs, unions, xceptions, services)
def plus(a : TypeDef) = ThriftDef(includes, cppIncludes, namespaces, constants, typedefs.+:(a), enums, senums,
structs, unions, xceptions, services)
def plus(a : EnumDef) = ThriftDef(includes, cppIncludes, namespaces, constants, typedefs, enums.+:(a), senums,
structs, unions, xceptions, services)
def plus(a : SEnumDef) = ThriftDef(includes, cppIncludes, namespaces, constants, typedefs, enums, senums.+:(a),
structs, unions, xceptions, services)
def plus(a : StructDef) = ThriftDef(includes, cppIncludes, namespaces, constants, typedefs, enums, senums,
structs.+:(a), unions, xceptions, services)
def plus(a : UnionDef) = ThriftDef(includes, cppIncludes, namespaces, constants, typedefs, enums, senums,
structs, unions.+:(a), xceptions, services)
def plus(a : ExceptionDef) = ThriftDef(includes, cppIncludes, namespaces, constants, typedefs, enums, senums,
structs, unions, xceptions.+:(a), services)
def plus(a : ServiceDef) = ThriftDef(includes, cppIncludes, namespaces, constants, typedefs, enums, senums,
structs, unions, xceptions, services.+:(a))
def plus(a : ThriftDef) = ThriftDef(includes ::: a.includes,
cppIncludes ::: a.cppIncludes,
namespaces ::: a.namespaces,
constants ::: a.constants,
typedefs ::: a.typedefs,
enums ::: a.enums,
senums ::: a.senums,
structs ::: a.structs,
unions ::: a.unions,
xceptions ::: a.xceptions,
services ::: a.services)
}
trait ThriftKeywords {
this : StandardTokenParsers =>
import scala.language.implicitConversions
protected case class Keyword(str: String)
protected implicit def asParser(k: Keyword): Parser[String] = k.str
protected val LPAREN = Keyword("(")
protected val RPAREN = Keyword(")")
protected val EQ = Keyword("=")
protected val CPP_TYPE = Keyword("cpp_type")
protected val LIST = Keyword("list")
protected val LT = Keyword("<")
protected val GT = Keyword(">")
protected val SET = Keyword("set")
protected val MAP = Keyword("map")
protected val STRING = Keyword("string")
protected val BINARY = Keyword("binary")
protected val SLIST = Keyword("slist")
protected val BOOL = Keyword("bool")
protected val BYTE = Keyword("byte")
protected val I16 = Keyword("i16")
protected val I32 = Keyword("i32")
protected val I64 = Keyword("i64")
protected val DOUBLE = Keyword("double")
protected val VOID = Keyword("void")
protected val REQUIRED = Keyword("required")
protected val OPTIONAL = Keyword("optional")
protected val COLON = Keyword(":")
protected val THROWS = Keyword("throws")
protected val ONEWAY = Keyword("oneway")
protected val EXTENDS = Keyword("extends")
protected val SERVICE = Keyword("service")
protected val EXCEPTION = Keyword("exception")
protected val LBRACKET = Keyword("{")
protected val RBRACKET = Keyword("}")
protected val XSD_ATTRS = Keyword("xsd_attributes")
protected val XSD_NILBLE = Keyword("xsd_nillable")
protected val XSD_OPT = Keyword("xsd_optional")
protected val XSD_ALL = Keyword("xsd_all")
protected val UNION = Keyword("union")
protected val LSQBRACKET = Keyword("[")
protected val RSQBRACKET = Keyword("]")
protected val CONST = Keyword("const")
protected val STRUCT = Keyword("struct")
protected val SENUM = Keyword("senum")
protected val ENUM = Keyword("enum")
protected val COMMA = Keyword(",")
protected val SEMICOLON = Keyword(";")
protected val TYPEDEF = Keyword("typedef")
protected val INCLUDE = Keyword("include")
protected val CPP_INCL = Keyword("cpp_include")
protected val NAMESPACE = Keyword("namespace")
protected val STAR = Keyword("*")
protected val CPP_NS = Keyword("cpp_namespace")
protected val PHP_NS = Keyword("php_namespace")
protected val PY_NS = Keyword("py_module")
protected val PERL_NS = Keyword("perl_package")
protected val RUBY_NS = Keyword("ruby_namespace")
protected val SMLTK_CAT = Keyword("smalltalk_category")
protected val SMLTK_PRE = Keyword("smalltalk_prefix")
protected val JAVA_NS = Keyword("java_package")
protected val COCOA_NS = Keyword("cocoa_package")
protected val XSD_NS = Keyword("xsd_namespace")
protected val CSHARP_NS = Keyword("csharp_namespace")
def isRequired(r : Option[String]) = r match {
case Some(REQUIRED) => true
case _ => false
}
def isXsdOptional(r : Option[String]) = r match {
case Some(XSD_OPT) => true
case _ => false
}
def isXsdNillable(r : Option[String]) = r match {
case Some(XSD_NILBLE) => true
case _ => false
}
def isXsdAll(r : Option[String]) = r match {
case Some(XSD_ALL) => true
case _ => false
}
def isOneWay(r : Option[String]) = r match {
case Some(ONEWAY) => true
case _ => false
}
}
trait ThriftTypeRules extends ThriftKeywords {
this : StandardTokenParsers =>
def containterType : Parser[ContainerType] = mapType | setType | listType
def setType = SET ~ cppType.? ~ LT ~ fieldType ~ GT ~ typeAnnotations.? ^^ {
case s ~ ct ~ lt ~ t ~ gt ~ tA => SetType(t, ct, tA)
}
def listType = LIST ~ LT ~ fieldType ~ GT ~ cppType.? ~ typeAnnotations.? ^^ {
case l ~ lt ~ t ~ gt ~ ct ~ tA => ListType(t, ct, tA)
}
def mapType = MAP ~ cppType.? ~ LT ~ fieldType ~ COMMA ~ fieldType ~ GT ~ typeAnnotations.? ^^ {
case s ~ ct ~ lt ~ kt ~ c ~ vt ~ gt ~ tA => MapType(kt, vt, ct, tA)
}
def cppType : Parser[CPPType] = CPP_TYPE ~ stringLit ^^ { case c ~ s => CPPType(s)}
def fieldType: Parser[FieldType] = ident ^^ {case i => IdentifierType(i)} |
baseType |
containterType
def baseType : Parser[BaseType] = simpleBaseType ~ typeAnnotations.? ^^ { case s ~ t => BaseType(s, t)}
def simpleBaseType : Parser[BASE_TYPES.Value] = STRING ^^^ BASE_TYPES.STRING |
BINARY ^^^ BASE_TYPES.BINARY |
SLIST ^^^ BASE_TYPES.SLIST |
BOOL ^^^ BASE_TYPES.BOOLEAN |
BYTE ^^^ BASE_TYPES.BYTE |
I16 ^^^ BASE_TYPES.I16 |
I32 ^^^ BASE_TYPES.I32 |
I64 ^^^ BASE_TYPES.I64 |
DOUBLE ^^^ BASE_TYPES.DOUBLE
def typeAnnotations : Parser[List[TypeAnnotation]] =
LPAREN ~ typeAnnotation.* ~ RPAREN ^^ { case l ~ t ~ r => t.toList}
def typeAnnotation : Parser[TypeAnnotation] =
(ident ~ EQ ~ stringLit ~ commaOrSemicolon.?) ^^ { case i ~ e ~ s ~ c => TypeAnnotation(i,s)}
def commaOrSemicolon : Parser[String] = COMMA | SEMICOLON
}
/**
* @todo extract Constant Rules into this Trait. This requires moving `hexConstant` here. But how to specify
* type of `HexConstant`, it is a Path dependent Type tied to lexical member of ThriftParser.
*/
trait ThriftConstantRules extends ThriftKeywords {
this: StandardTokenParsers =>
// def parseDouble(s: String) = try { Some(s.toDouble) } catch { case _ : Throwable => None }
//
// def constValue : Parser[ConstValue] = numericLit ^^ {
// case n => parseDouble(n) match {
// case Some(d) => DoubleConstant(d)
// case _ => IntConstant(n.toInt)
// }
// } |
// hexConstant ^^ { case h => IntConstant(Integer.parseInt(h, 16))} |
// stringLit ^^ { case s => StringConstant(s)} |
// ident ^^ { case i => IdConstant(i)} |
// constList |
// constMap
//
// def constValuePair = constValue ~ COLON ~ constValue ~ commaOrSemicolon.? ^^ {
// case k ~ c ~ v ~ cs => ConstantValuePair(k,v)
// }
//
// def constList = LSQBRACKET ~ (constValue <~ commaOrSemicolon).* ~ RSQBRACKET ^^ {
// case l ~ vs ~ r => ConstantList(vs)
// }
//
// def constMap = LBRACKET ~ constValuePair.* ~ RBRACKET ^^ {
// case l ~ ps ~ r => ConstantMap(ps)
// }
}
/**
* A Parser for Thrift definition scripts.
* Based on [[https://github.com/twitter/commons/blob/master/src/antlr/twitter/thrift/descriptors/AntlrThrift.g]].
* Definition is parsed into a [[org.apache.atlas.tools.thrift.ThriftDef ThriftDef]] structure.
*
* @example {{{
* var p = new ThriftParser
* var td : Option[ThriftDef] = p("""include "share/fb303/if/fb303.thrift"
* namespace java org.apache.hadoop.hive.metastore.api
* namespace php metastore
* namespace cpp Apache.Hadoop.Hive
* \""")
* }}}
*
* @todo doesn't traverse includes directives. Includes are parsed into
* [[org.apache.atlas.tools.thrift.IncludeDef IncludeDef]] structures
* but are not traversed.
* @todo mixing in [[scala.util.parsing.combinator.PackratParsers PackratParsers]] is a placeholder. Need to
* change specific grammar rules to `lazy val` and `Parser[Elem]` to `PackratParser[Elem]`. Will do based on
* performance analysis.
* @todo Error reporting
*/
class ThriftParser extends StandardTokenParsers with ThriftKeywords with ThriftTypeRules with PackratParsers {
import scala.language.higherKinds
private val reservedWordsDelims : Seq[String] =
this
.getClass
.getMethods
.filter(_.getReturnType == classOf[Keyword])
.map(_.invoke(this).asInstanceOf[Keyword].str)
private val (thriftreservedWords : Seq[String], thriftdelims : Seq[String]) =
reservedWordsDelims.partition(s => s.charAt(0).isLetter)
override val lexical = new ThriftLexer(thriftreservedWords, thriftdelims)
import lexical.HexConstant
/** A parser which matches a hex constant */
def hexConstant: Parser[String] =
elem("string literal", _.isInstanceOf[HexConstant]) ^^ (_.chars)
def apply(input: String): Option[ThriftDef] = {
phrase(program)(new lexical.Scanner(input)) match {
case Success(r, x) => Some(r)
case Failure(m, x) => {
None
}
case Error(m, x) => {
None
}
}
}
def program = headers ~ definitions ^^ { case h ~ d => h plus d}
def headers = header.* ^^ { case l => l.foldRight(new ThriftDef)((a,t) => t plus a)}
def header = INCLUDE ~> stringLit ^^ { case s => new ThriftDef(IncludeDef(s))} |
CPP_INCL ~> stringLit ^^ { case s => new ThriftDef(CppIncludeDef(s))} |
NAMESPACE ~ ident ~ ident ^^ { case ns ~ t ~ n => new ThriftDef(NamespaceDef(THRIFT_LANG.OTHER, n, Some(t)))} |
NAMESPACE ~ STAR ~ ident ^^ { case ns ~ s ~ i => new ThriftDef(NamespaceDef(THRIFT_LANG.STAR, i))} |
CPP_NS ~ ident ^^ { case ns ~ i => new ThriftDef(NamespaceDef(THRIFT_LANG.CPP, i))} |
PHP_NS ~ ident ^^ { case ns ~ i => new ThriftDef(NamespaceDef(THRIFT_LANG.PHP, i))} |
PY_NS ~ ident ^^ { case ns ~ i => new ThriftDef(NamespaceDef(THRIFT_LANG.PY, i))} |
PERL_NS ~ ident ^^ { case ns ~ i => new ThriftDef(NamespaceDef(THRIFT_LANG.PERL, i))} |
RUBY_NS ~ ident ^^ { case ns ~ i => new ThriftDef(NamespaceDef(THRIFT_LANG.RUBY, i))} |
SMLTK_CAT ~ ident ^^ { case ns ~ i => new ThriftDef(NamespaceDef(THRIFT_LANG.SMLTK_CAT, i))} |
SMLTK_PRE ~ ident ^^ { case ns ~ i => new ThriftDef(NamespaceDef(THRIFT_LANG.SMLTK_PRE, i))} |
JAVA_NS ~ ident ^^ { case ns ~ i => new ThriftDef(NamespaceDef(THRIFT_LANG.JAVA, i))} |
COCOA_NS ~ ident ^^ { case ns ~ i => new ThriftDef(NamespaceDef(THRIFT_LANG.COCOA, i))} |
XSD_NS ~ ident ^^ { case ns ~ i => new ThriftDef(NamespaceDef(THRIFT_LANG.XSD, i))} |
CSHARP_NS ~ ident ^^ { case ns ~ i => new ThriftDef(NamespaceDef(THRIFT_LANG.CSHARP, i))}
def definitions : Parser[ThriftDef] = definition.* ^^ {
case l => l.foldRight(new ThriftDef)((a,t) => t plus a)
}
def definition : Parser[ThriftDef] = const ^^ { case c => new ThriftDef(c)} |
typeDefinition |
service ^^ { case s => new ThriftDef(s)}
def typeDefinition : Parser[ThriftDef] = (typedef ^^ {case t => new ThriftDef(t)} |
enum ^^ {case e => new ThriftDef(e)} |
senum ^^ {case e => new ThriftDef(e)} |
struct ^^ {case e => new ThriftDef(e)} |
union ^^ {case e => new ThriftDef(e)} |
xception ^^ {case e => new ThriftDef(e)}
)
def typedef : Parser[TypeDef] = TYPEDEF ~ fieldType ~ ident ~ typeAnnotations.? ^^ {
case t ~ f ~ i ~ tA => TypeDef(i, f, tA)
}
def enum : Parser[EnumDef] = ENUM ~ ident ~ LBRACKET ~ enumDef.* ~ RBRACKET ~ typeAnnotations.? ^^ {
case e ~ i ~ l ~ ed ~ r ~ t => EnumDef(i, ed.toList, t)
}
def enumDef : Parser[EnumValueDef] = ident ~ EQ ~ numericLit ~ typeAnnotations.? ~ commaOrSemicolon.? ^^ {
case i ~ e ~ n ~ t ~ c => EnumValueDef(i, Some(IntConstant(n.toInt)), t)
}
def senum : Parser[SEnumDef] = SENUM ~ ident ~ LBRACKET ~ senumDef.* ~ RBRACKET ~ typeAnnotations.? ^^ {
case se ~ i ~ l ~ sed ~ r ~ t => SEnumDef(i, sed.toList, t)
}
def senumDef : Parser[String] = stringLit <~ commaOrSemicolon.?
def service : Parser[ServiceDef] = SERVICE ~ ident ~ extnds.? ~ LBRACKET ~ function.* ~
RBRACKET ~ typeAnnotations.? ^^ {
case s ~ i ~ e ~ lb ~ fs ~ rb ~ tA => ServiceDef(i, e, fs, tA)
}
def extnds : Parser[String] = EXTENDS ~> ident
def function : Parser[FunctionDef] = ONEWAY.? ~ functionType ~ ident ~ LPAREN ~ field.* ~ RPAREN ~ throwz.? ~
typeAnnotations.? ~ commaOrSemicolon.? ^^ {
case o ~ fT ~ i ~ lp ~ fs ~ rp ~ th ~ tA ~ cS => FunctionDef(isOneWay(o), fT, i, fs, th, tA)
}
def throwz : Parser[List[FieldDef]] = THROWS ~ LPAREN ~ field.* ~ RPAREN ^^ {
case t ~ l ~ fs ~ r => fs.toList
}
def functionType : Parser[FunctionType] = VOID ^^^ VoidType() | fieldType
def xception : Parser[ExceptionDef] = EXCEPTION ~ ident ~ LBRACKET ~ field.* ~ RBRACKET ~ typeAnnotations.? ^^ {
case s ~ i ~ lb ~ fs ~ rb ~ tA => ExceptionDef(i, fs.toList, tA)
}
def union : Parser[UnionDef] = UNION ~ ident ~ XSD_ALL.? ~ LBRACKET ~ field.* ~ RBRACKET ~ typeAnnotations.? ^^ {
case s ~ i ~ xA ~ lb ~ fs ~ rb ~ tA => UnionDef(i, isXsdAll(xA), fs.toList, tA)
}
def struct : Parser[StructDef] = STRUCT ~ ident ~ XSD_ALL.? ~ LBRACKET ~ field.* ~ RBRACKET ~ typeAnnotations.? ^^ {
case s ~ i ~ xA ~ lb ~ fs ~ rb ~ tA => StructDef(i, isXsdAll(xA), fs.toList, tA)
}
def field : Parser[FieldDef] = fieldIdentifier.? ~ fieldRequiredness.? ~ fieldType ~ ident ~ fieldValue.? ~
XSD_OPT.? ~ XSD_NILBLE.? ~ xsdAttributes.? ~ typeAnnotations.? ~ commaOrSemicolon.? ^^ {
case fi ~ fr ~ ft ~id ~ fv ~ xo ~ xn ~ xa ~ tA ~ cS => FieldDef(
fi,
isRequired(fr),
ft,
id,
fv,
isXsdOptional(xo),
isXsdNillable(xn),
xa,
tA
)
}
def xsdAttributes : Parser[XsdAttributes] = XSD_ATTRS ~ LBRACKET ~ field.* ~ RBRACKET ^^ {
case x ~ l ~ f ~ r => XsdAttributes(f)
}
def fieldValue = EQ ~> constValue
def fieldRequiredness : Parser[String] = REQUIRED | OPTIONAL
def fieldIdentifier : Parser[IntConstant] = numericLit <~ COLON ^^ {
case n => IntConstant(n.toInt)
}
def const : Parser[ConstDef] = CONST ~ fieldType ~ ident ~ EQ ~ constValue ~ commaOrSemicolon.? ^^ {
case c ~ fT ~ i ~ e ~ cV ~ cS => ConstDef(fT, i, cV)
}
def parseDouble(s: String) = try { Some(s.toDouble) } catch { case _ : Throwable => None }
def constValue : Parser[ConstValue] = numericLit ^^ {
case n => parseDouble(n) match {
case Some(d) => DoubleConstant(d)
case _ => IntConstant(n.toInt)
}
} |
hexConstant ^^ { case h => IntConstant(Integer.parseInt(h, 16))} |
stringLit ^^ { case s => StringConstant(s)} |
ident ^^ { case i => IdConstant(i)} |
constList |
constMap
def constValuePair = constValue ~ COLON ~ constValue ~ commaOrSemicolon.? ^^ {
case k ~ c ~ v ~ cs => ConstantValuePair(k,v)
}
def constList = LSQBRACKET ~ (constValue <~ commaOrSemicolon).* ~ RSQBRACKET ^^ {
case l ~ vs ~ r => ConstantList(vs)
}
def constMap = LBRACKET ~ constValuePair.* ~ RBRACKET ^^ {
case l ~ ps ~ r => ConstantMap(ps)
}
}
class ThriftLexer(val keywords: Seq[String], val delims : Seq[String]) extends StdLexical with ImplicitConversions {
case class HexConstant(chars: String) extends Token {
override def toString = chars
}
case class StIdentifier(chars: String) extends Token {
override def toString = chars
}
reserved ++= keywords
delimiters ++= delims
override lazy val token: Parser[Token] =
( intConstant ^^ NumericLit
| hexConstant ^^ HexConstant
| dubConstant ^^ NumericLit
| identifier ^^ processIdent
| st_identifier ^^ StIdentifier
| string ^^ StringLit
| EofCh ^^^ EOF
| '\'' ~> failure("unclosed string literal")
| '"' ~> failure("unclosed string literal")
| delim
| failure("illegal character")
)
override def identChar = letter | elem('_')
def identifier = identChar ~ (identChar | digit | '.' ).* ^^
{ case first ~ rest => (first :: rest).mkString }
def st_identChar = letter | elem('-')
def st_identifier = st_identChar ~ (st_identChar | digit | '.' | '_').* ^^
{ case first ~ rest => (first :: rest).mkString("")}
override def whitespace: Parser[Any] =
( whitespaceChar
| '/' ~ '*' ~ comment
| '/' ~ '/' ~ chrExcept(EofCh, '\n').*
| '#' ~ chrExcept(EofCh, '\n').*
| '/' ~ '*' ~ failure("unclosed comment")
).*
protected override def comment: Parser[Any] = (
commentChar.* ~ '*' ~ '/'
)
protected def commentChar = chrExcept(EofCh, '*') | '*' ~ not('/')
def string = '\"' ~> chrExcept('\"', '\n', EofCh).* <~ '\"' ^^ { _ mkString "" } |
'\'' ~> chrExcept('\'', '\n', EofCh).* <~ '\'' ^^ { _ mkString "" }
def zero: Parser[String] = '0' ^^^ "0"
def nonzero = elem("nonzero digit", d => d.isDigit && d != '0')
def sign = elem("sign character", d => d == '-' || d == '+')
def exponent = elem("exponent character", d => d == 'e' || d == 'E')
def intConstant = opt(sign) ~> zero | intList
def intList = opt(sign) ~ nonzero ~ rep(digit) ^^ {case s ~ x ~ y => (optString("", s) :: x :: y) mkString ""}
def fracPart = '.' ~> rep(digit) ^^ { "." + _ mkString "" }
def expPart = exponent ~ opt(sign) ~ rep1(digit) ^^ { case e ~ s ~ d =>
e.toString + optString("", s) + d.mkString("")
}
def dubConstant = opt(sign) ~ digit.* ~ fracPart ~ opt(expPart) ^^ { case s ~ i ~ f ~ e =>
optString("", s) + i + f + optString("", e)
}
val hexDigits = Set[Char]() ++ "0123456789abcdefABCDEF".toArray
def hexDigit = elem("hex digit", hexDigits.contains(_))
def hexConstant = '0' ~> 'x' ~> hexDigit.+ ^^ {case h => h.mkString("")}
private def optString[A](pre: String, a: Option[A]) = a match {
case Some(x) => pre + x.toString
case None => ""
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.tools.thrift
import com.google.common.collect.ImmutableList
import org.apache.atlas.AtlasException
import org.apache.atlas.typesystem.TypesDef
import org.apache.atlas.typesystem.types.{DataTypes, HierarchicalTypeDefinition, Multiplicity, TraitType, _}
import org.slf4j.{Logger, LoggerFactory}
import scala.io.Source
import scala.util.{Failure, Success, Try}
case class CompositeRelation(typeName: String, fieldName: String, reverseFieldName: Option[String])
/**
* Convert a [[ThriftDef ThriftDef]] to
* [[TypesDef TypesDef]]. Currently there are several restrictions:
*
* - CppIncludes, SEnums are not allowed
* - The only include allowed is that of "share/fb303/if/fb303.thrift". This include is ignored.
* Any other include will trigger an exception
* - Namespaces, TypeDefs, Contants, Unions, Exceptions, and Service definitions are ignored.
* - So for fields typeDefs are not applied.
* - Field Constant values are ignored.
* - Type Annotations, XSD information is ignored.
*
* Thrift Structs can be mapped to Structs, Traits for Classes. The caller can specify their preference by
* providing the structNames, classNames and thriftNames parameters. A Struct that is not in one of these 3
* lists is not mapped.
*
* The ThriftDef doesn't specify if a relationship is composite. For e.g. in the thrift definition
* {{{
* struct Person {
1: string name,
2: Address addr,
}
struct Address {
1: string street,
2: string city,
}
* }}}
*
* If Person and Address are mapped to classes, you may not to make the Person -> Address a Composite relation.
* The caller can specify these in the 'compositeRelations' parameter.
*
*/
class ThriftTypesGen(val structNames: List[String], val classNames: List[String], val traitNames: List[String],
val compositeRelations: List[CompositeRelation]) {
private val LOG: Logger = LoggerFactory.getLogger(classOf[ThriftTypesGen])
private val FB_INCLUDE = "share/fb303/if/fb303.thrift"
/**
* for a (typeName, fieldName) specifies (isComposite, reverseName)
* if entry doesn't exist than field is not composite.
*/
private var compositeRelsMap: Map[(String, String), (Boolean, Option[String])] = Map()
def apply(thriftResource: String): TypesDef = {
val tDef = parseThrift(thriftResource)
tDef.flatMap(buildCompositeRelations).flatMap(typesDef) match {
case Success(t) => t
case Failure(v) => throw v
}
}
def buildCompositeRelations(thriftDef: ThriftDef): Try[ThriftDef] = Try {
compositeRelations.foreach { cr =>
val sDef = thriftDef.structs.find(_.name == cr.typeName)
if (!sDef.isDefined) {
throw new AtlasException(s"Unknown Struct (${cr.typeName}) specified in CompositeRelation")
}
val fDef = sDef.get.fields.find(_.name == cr.fieldName)
if (!fDef.isDefined) {
throw new AtlasException(s"Unknown Field (${cr.fieldName}) specified in CompositeRelation")
}
compositeRelsMap = compositeRelsMap + ((cr.typeName, cr.fieldName) ->(true, cr.reverseFieldName))
if (cr.reverseFieldName.isDefined) {
val reverseStructName = dataTypeName(fDef.get.fieldType)
val reverseStructDef = thriftDef.structs.find(_.name == reverseStructName)
if (!reverseStructDef.isDefined) {
throw new AtlasException(s"Cannot find Struct $reverseStructName in CompositeRelation $cr")
}
val rfDef = reverseStructDef.get.fields.find(_.name == cr.reverseFieldName)
if (!rfDef.isDefined) {
throw new AtlasException(s"Unknown Reverse Field (${cr.reverseFieldName}) specified in CompositeRelation")
}
List(cr, CompositeRelation(reverseStructName, cr.reverseFieldName.get, Some(cr.fieldName)))
compositeRelsMap = compositeRelsMap +
((reverseStructName, cr.reverseFieldName.get) ->(false, Some(cr.fieldName)))
}
}
thriftDef
}
def typesDef(thriftDef: ThriftDef): Try[TypesDef] = {
var tDef: Try[TypesDef] = Try {
TypesDef(Seq(), Seq(), Seq(), Seq())
}
tDef.flatMap((t: TypesDef) => includes(t, thriftDef.includes)).flatMap((t: TypesDef) => cppIncludes(t, thriftDef.cppIncludes))
tDef = tDef.flatMap((t: TypesDef) => includes(t, thriftDef.includes)).
flatMap((t: TypesDef) => cppIncludes(t, thriftDef.cppIncludes)).
flatMap((t: TypesDef) => namespaces(t, thriftDef.namespaces)).
flatMap((t: TypesDef) => constants(t, thriftDef.constants)).
flatMap((t: TypesDef) => senums(t, thriftDef.senums)).
flatMap((t: TypesDef) => enums(t, thriftDef.enums)).
flatMap((t: TypesDef) => structs(t, thriftDef.structs)).
flatMap((t: TypesDef) => unions(t, thriftDef.unions)).
flatMap((t: TypesDef) => exceptions(t, thriftDef.xceptions)).
flatMap((t: TypesDef) => services(t, thriftDef.services))
tDef
}
private def parseThrift(thriftResource: String): Try[ThriftDef] = {
Try {
LOG.debug("Parsing Thrift resource {}", thriftResource)
val is = getClass().getResourceAsStream(thriftResource)
val src: Source = Source.fromInputStream(is)
val thriftStr: String = src.getLines().mkString("\n")
val p = new ThriftParser
var thriftDef: Option[ThriftDef] = p(thriftStr)
thriftDef match {
case Some(s) => s
case None => {
LOG.debug("Parse for thrift resource {} failed", thriftResource)
throw new AtlasException(s"Failed to parse thrift resource: $thriftResource")
}
}
}
}
@throws[AtlasException]
private def dataTypeName(fT: FieldType): String = fT match {
case IdentifierType(n) => n
case BaseType(typ, _) => BASE_TYPES.toPrimitiveTypeName(typ)
case ListType(elemType, _, _) => DataTypes.arrayTypeName(dataTypeName(elemType))
case SetType(elemType, _, _) => DataTypes.arrayTypeName(dataTypeName(elemType))
case MapType(keyType, valueType, _, _) => DataTypes.mapTypeName(dataTypeName(keyType), dataTypeName(valueType))
}
private def enumValue(e: EnumValueDef, defId: Int): EnumValue = e match {
case EnumValueDef(value, Some(id), _) => new EnumValue(value, id.value)
case EnumValueDef(value, None, _) => new EnumValue(value, defId)
}
private def enumDef(td: TypesDef, e: EnumDef): Try[TypesDef] = {
Success(
td.copy(enumTypes = td.enumTypes :+
new EnumTypeDefinition(e.name, e.enumValues.zipWithIndex.map(t => enumValue(t._1, -t._2)): _*))
)
}
private def includeDef(td: TypesDef, i: IncludeDef): Try[TypesDef] = {
Try {
if (i.value != FB_INCLUDE) {
throw new AtlasException(s"Unsupported Include ${i.value}, only fb303.thrift is currently allowed.")
}
td
}
}
private def cppIncludeDef(td: TypesDef, i: CppIncludeDef): Try[TypesDef] = {
Try {
throw new AtlasException(s"Unsupported CppInclude ${i.value}.")
}
}
private def namespaceDef(td: TypesDef, i: NamespaceDef): Try[TypesDef] = {
Try {
LOG.debug(s"Ignoring Namespace definition $i")
td
}
}
private def constantDef(td: TypesDef, i: ConstDef): Try[TypesDef] = {
Try {
LOG.debug(s"Ignoring ConstantDef definition $i")
td
}
}
private def senumDef(td: TypesDef, i: SEnumDef): Try[TypesDef] = {
Try {
throw new AtlasException(s"Unsupported SEnums ${i}.")
}
}
private def fieldDef(typName: String, fd: FieldDef): AttributeDefinition = {
val name: String = fd.name
val dTName: String = dataTypeName(fd.fieldType)
var m: Multiplicity = Multiplicity.OPTIONAL
if (fd.requiredNess) {
m = Multiplicity.REQUIRED
}
fd.fieldType match {
case _: ListType => m = Multiplicity.COLLECTION
case _: SetType => m = Multiplicity.SET
case _ => ()
}
var isComposite = false
var reverseAttrName: String = null
val r = compositeRelsMap.get((typName, name))
if (r.isDefined) {
isComposite = r.get._1
if (r.get._2.isDefined) {
reverseAttrName = r.get._2.get
}
}
new AttributeDefinition(name, dTName, m, isComposite, reverseAttrName)
}
private def structDef(td: TypesDef, structDef: StructDef): Try[TypesDef] = Try {
val typeName: String = structDef.name
typeName match {
case t if structNames contains t => td.copy(structTypes = td.structTypes :+
new StructTypeDefinition(typeName, structDef.fields.map(fieldDef(typeName, _)).toArray))
case t: String if traitNames contains t => {
val ts = td.traitTypes :+
new HierarchicalTypeDefinition[TraitType](classOf[TraitType],
typeName, ImmutableList.of[String](), structDef.fields.map(fieldDef(typeName, _)).toArray)
td.copy(traitTypes = ts)
}
case t: String if classNames contains t => {
val cs = td.classTypes :+
new HierarchicalTypeDefinition[ClassType](classOf[ClassType],
typeName, ImmutableList.of[String](), structDef.fields.map(fieldDef(typeName, _)).toArray)
td.copy(classTypes = cs)
}
case _ => td
}
}
private def unionDef(td: TypesDef, i: UnionDef): Try[TypesDef] = {
Try {
LOG.debug(s"Ignoring Union definition $i")
td
}
}
private def exceptionDef(td: TypesDef, i: ExceptionDef): Try[TypesDef] = {
Try {
LOG.debug(s"Ignoring Exception definition $i")
td
}
}
private def serviceDef(td: TypesDef, i: ServiceDef): Try[TypesDef] = {
Try {
LOG.debug(s"Ignoring Service definition $i")
td
}
}
private def applyList[T](fn: (TypesDef, T) => Try[TypesDef])(td: TypesDef, l: List[T]): Try[TypesDef] = {
l.foldLeft[Try[TypesDef]](Success(td))((b, a) => b.flatMap({ Unit => fn(td, a)}))
}
private def includes = applyList(includeDef) _
private def cppIncludes = applyList(cppIncludeDef) _
private def namespaces = applyList(namespaceDef) _
private def constants = applyList(constantDef) _
private def enums = applyList(enumDef) _
private def senums = applyList(senumDef) _
private def structs = applyList(structDef) _
private def unions = applyList(unionDef) _
private def exceptions = applyList(exceptionDef) _
private def services = applyList(serviceDef) _
}
#!/usr/local/bin/thrift -java
# copied from hive-metastore.thrift, for testing the ThriftParser
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#
# Thrift Service that the MetaStore is built on
#
include "share/fb303/if/fb303.thrift"
namespace java org.apache.hadoop.hive.metastore.api
namespace php metastore
namespace cpp Apache.Hadoop.Hive
const string DDL_TIME = "transient_lastDdlTime"
struct Version {
1: string version,
2: string comments
}
struct FieldSchema {
1: string name, // name of the field
2: string type, // type of the field. primitive types defined above, specify list<TYPE_NAME>, map<TYPE_NAME, TYPE_NAME> for lists & maps
3: string comment
}
struct Type {
1: string name, // one of the types in PrimitiveTypes or CollectionTypes or User defined types
2: optional string type1, // object type if the name is 'list' (LIST_TYPE), key type if the name is 'map' (MAP_TYPE)
3: optional string type2, // val type if the name is 'map' (MAP_TYPE)
4: optional list<FieldSchema> fields // if the name is one of the user defined types
}
enum HiveObjectType {
GLOBAL = 1,
DATABASE = 2,
TABLE = 3,
PARTITION = 4,
COLUMN = 5,
}
enum PrincipalType {
USER = 1,
ROLE = 2,
GROUP = 3,
}
const string HIVE_FILTER_FIELD_OWNER = "hive_filter_field_owner__"
const string HIVE_FILTER_FIELD_PARAMS = "hive_filter_field_params__"
const string HIVE_FILTER_FIELD_LAST_ACCESS = "hive_filter_field_last_access__"
enum PartitionEventType {
LOAD_DONE = 1,
}
// Enums for transaction and lock management
enum TxnState {
COMMITTED = 1,
ABORTED = 2,
OPEN = 3,
}
enum LockLevel {
DB = 1,
TABLE = 2,
PARTITION = 3,
}
enum LockState {
ACQUIRED = 1, // requester has the lock
WAITING = 2, // requester is waiting for the lock and should call checklock at a later point to see if the lock has been obtained.
ABORT = 3, // the lock has been aborted, most likely due to timeout
NOT_ACQUIRED = 4, // returned only with lockNoWait, indicates the lock was not available and was not acquired
}
enum LockType {
SHARED_READ = 1,
SHARED_WRITE = 2,
EXCLUSIVE = 3,
}
enum CompactionType {
MINOR = 1,
MAJOR = 2,
}
enum GrantRevokeType {
GRANT = 1,
REVOKE = 2,
}
struct HiveObjectRef{
1: HiveObjectType objectType,
2: string dbName,
3: string objectName,
4: list<string> partValues,
5: string columnName,
}
struct PrivilegeGrantInfo {
1: string privilege,
2: i32 createTime,
3: string grantor,
4: PrincipalType grantorType,
5: bool grantOption,
}
struct HiveObjectPrivilege {
1: HiveObjectRef hiveObject,
2: string principalName,
3: PrincipalType principalType,
4: PrivilegeGrantInfo grantInfo,
}
struct PrivilegeBag {
1: list<HiveObjectPrivilege> privileges,
}
struct PrincipalPrivilegeSet {
1: map<string, list<PrivilegeGrantInfo>> userPrivileges, // user name -> privilege grant info
2: map<string, list<PrivilegeGrantInfo>> groupPrivileges, // group name -> privilege grant info
3: map<string, list<PrivilegeGrantInfo>> rolePrivileges, //role name -> privilege grant info
}
struct GrantRevokePrivilegeRequest {
1: GrantRevokeType requestType;
2: PrivilegeBag privileges;
3: optional bool revokeGrantOption; // Only for revoke request
}
struct GrantRevokePrivilegeResponse {
1: optional bool success;
}
struct Role {
1: string roleName,
2: i32 createTime,
3: string ownerName,
}
// Representation of a grant for a principal to a role
struct RolePrincipalGrant {
1: string roleName,
2: string principalName,
3: PrincipalType principalType,
4: bool grantOption,
5: i32 grantTime,
6: string grantorName,
7: PrincipalType grantorPrincipalType
}
struct GetRoleGrantsForPrincipalRequest {
1: required string principal_name,
2: required PrincipalType principal_type
}
struct GetRoleGrantsForPrincipalResponse {
1: required list<RolePrincipalGrant> principalGrants;
}
struct GetPrincipalsInRoleRequest {
1: required string roleName;
}
struct GetPrincipalsInRoleResponse {
1: required list<RolePrincipalGrant> principalGrants;
}
struct GrantRevokeRoleRequest {
1: GrantRevokeType requestType;
2: string roleName;
3: string principalName;
4: PrincipalType principalType;
5: optional string grantor; // Needed for grant
6: optional PrincipalType grantorType; // Needed for grant
7: optional bool grantOption;
}
struct GrantRevokeRoleResponse {
1: optional bool success;
}
// namespace for tables
struct Database {
1: string name,
2: string description,
3: string locationUri,
4: map<string, string> parameters, // properties associated with the database
5: optional PrincipalPrivilegeSet privileges,
6: optional string ownerName,
7: optional PrincipalType ownerType
}
// This object holds the information needed by SerDes
struct SerDeInfo {
1: string name, // name of the serde, table name by default
2: string serializationLib, // usually the class that implements the extractor & loader
3: map<string, string> parameters // initialization parameters
}
// sort order of a column (column name along with asc(1)/desc(0))
struct Order {
1: string col, // sort column name
2: i32 order // asc(1) or desc(0)
}
// this object holds all the information about skewed table
struct SkewedInfo {
1: list<string> skewedColNames, // skewed column names
2: list<list<string>> skewedColValues, //skewed values
3: map<list<string>, string> skewedColValueLocationMaps, //skewed value to location mappings
}
// this object holds all the information about physical storage of the data belonging to a table
struct StorageDescriptor {
1: list<FieldSchema> cols, // required (refer to types defined above)
2: string location, // defaults to <warehouse loc>/<db loc>/tablename
3: string inputFormat, // SequenceFileInputFormat (binary) or TextInputFormat` or custom format
4: string outputFormat, // SequenceFileOutputFormat (binary) or IgnoreKeyTextOutputFormat or custom format
5: bool compressed, // compressed or not
6: i32 numBuckets, // this must be specified if there are any dimension columns
7: SerDeInfo serdeInfo, // serialization and deserialization information
8: list<string> bucketCols, // reducer grouping columns and clustering columns and bucketing columns`
9: list<Order> sortCols, // sort order of the data in each bucket
10: map<string, string> parameters, // any user supplied key value hash
11: optional SkewedInfo skewedInfo, // skewed information
12: optional bool storedAsSubDirectories // stored as subdirectories or not
}
// table information
struct Table {
1: string tableName, // name of the table
2: string dbName, // database name ('default')
3: string owner, // owner of this table
4: i32 createTime, // creation time of the table
5: i32 lastAccessTime, // last access time (usually this will be filled from HDFS and shouldn't be relied on)
6: i32 retention, // retention time
7: StorageDescriptor sd, // storage descriptor of the table
8: list<FieldSchema> partitionKeys, // partition keys of the table. only primitive types are supported
9: map<string, string> parameters, // to store comments or any other user level parameters
10: string viewOriginalText, // original view text, null for non-view
11: string viewExpandedText, // expanded view text, null for non-view
12: string tableType, // table type enum, e.g. EXTERNAL_TABLE
13: optional PrincipalPrivilegeSet privileges,
14: optional bool temporary=false
}
struct Partition {
1: list<string> values // string value is converted to appropriate partition key type
2: string dbName,
3: string tableName,
4: i32 createTime,
5: i32 lastAccessTime,
6: StorageDescriptor sd,
7: map<string, string> parameters,
8: optional PrincipalPrivilegeSet privileges
}
struct PartitionWithoutSD {
1: list<string> values // string value is converted to appropriate partition key type
2: i32 createTime,
3: i32 lastAccessTime,
4: string relativePath,
5: map<string, string> parameters,
6: optional PrincipalPrivilegeSet privileges
}
struct PartitionSpecWithSharedSD {
1: list<PartitionWithoutSD> partitions,
2: StorageDescriptor sd,
}
struct PartitionListComposingSpec {
1: list<Partition> partitions
}
struct PartitionSpec {
1: string dbName,
2: string tableName,
3: string rootPath,
4: optional PartitionSpecWithSharedSD sharedSDPartitionSpec,
5: optional PartitionListComposingSpec partitionList
}
struct Index {
1: string indexName, // unique with in the whole database namespace
2: string indexHandlerClass, // reserved
3: string dbName,
4: string origTableName,
5: i32 createTime,
6: i32 lastAccessTime,
7: string indexTableName,
8: StorageDescriptor sd,
9: map<string, string> parameters,
10: bool deferredRebuild
}
// column statistics
struct BooleanColumnStatsData {
1: required i64 numTrues,
2: required i64 numFalses,
3: required i64 numNulls
}
struct DoubleColumnStatsData {
1: optional double lowValue,
2: optional double highValue,
3: required i64 numNulls,
4: required i64 numDVs
}
struct LongColumnStatsData {
1: optional i64 lowValue,
2: optional i64 highValue,
3: required i64 numNulls,
4: required i64 numDVs
}
struct StringColumnStatsData {
1: required i64 maxColLen,
2: required double avgColLen,
3: required i64 numNulls,
4: required i64 numDVs
}
struct BinaryColumnStatsData {
1: required i64 maxColLen,
2: required double avgColLen,
3: required i64 numNulls
}
struct Decimal {
1: required binary unscaled,
3: required i16 scale
}
struct DecimalColumnStatsData {
1: optional Decimal lowValue,
2: optional Decimal highValue,
3: required i64 numNulls,
4: required i64 numDVs
}
union ColumnStatisticsData {
1: BooleanColumnStatsData booleanStats,
2: LongColumnStatsData longStats,
3: DoubleColumnStatsData doubleStats,
4: StringColumnStatsData stringStats,
5: BinaryColumnStatsData binaryStats,
6: DecimalColumnStatsData decimalStats
}
struct ColumnStatisticsObj {
1: required string colName,
2: required string colType,
3: required ColumnStatisticsData statsData
}
struct ColumnStatisticsDesc {
1: required bool isTblLevel,
2: required string dbName,
3: required string tableName,
4: optional string partName,
5: optional i64 lastAnalyzed
}
struct ColumnStatistics {
1: required ColumnStatisticsDesc statsDesc,
2: required list<ColumnStatisticsObj> statsObj;
}
struct AggrStats {
1: required list<ColumnStatisticsObj> colStats,
2: required i64 partsFound // number of partitions for which stats were found
}
struct SetPartitionsStatsRequest {
1: required list<ColumnStatistics> colStats
}
// schema of the table/query results etc.
struct Schema {
// column names, types, comments
1: list<FieldSchema> fieldSchemas, // delimiters etc
2: map<string, string> properties
}
// Key-value store to be used with selected
// Metastore APIs (create, alter methods).
// The client can pass environment properties / configs that can be
// accessed in hooks.
struct EnvironmentContext {
1: map<string, string> properties
}
// Return type for get_partitions_by_expr
struct PartitionsByExprResult {
1: required list<Partition> partitions,
// Whether the results has any (currently, all) partitions which may or may not match
2: required bool hasUnknownPartitions
}
struct PartitionsByExprRequest {
1: required string dbName,
2: required string tblName,
3: required binary expr,
4: optional string defaultPartitionName,
5: optional i16 maxParts=-1
}
struct TableStatsResult {
1: required list<ColumnStatisticsObj> tableStats
}
struct PartitionsStatsResult {
1: required map<string, list<ColumnStatisticsObj>> partStats
}
struct TableStatsRequest {
1: required string dbName,
2: required string tblName,
3: required list<string> colNames
}
struct PartitionsStatsRequest {
1: required string dbName,
2: required string tblName,
3: required list<string> colNames,
4: required list<string> partNames
}
// Return type for add_partitions_req
struct AddPartitionsResult {
1: optional list<Partition> partitions,
}
// Request type for add_partitions_req
struct AddPartitionsRequest {
1: required string dbName,
2: required string tblName,
3: required list<Partition> parts,
4: required bool ifNotExists,
5: optional bool needResult=true
}
// Return type for drop_partitions_req
struct DropPartitionsResult {
1: optional list<Partition> partitions,
}
struct DropPartitionsExpr {
1: required binary expr;
2: optional i32 partArchiveLevel;
}
union RequestPartsSpec {
1: list<string> names;
2: list<DropPartitionsExpr> exprs;
}
// Request type for drop_partitions_req
// TODO: we might want to add "bestEffort" flag; where a subset can fail
struct DropPartitionsRequest {
1: required string dbName,
2: required string tblName,
3: required RequestPartsSpec parts,
4: optional bool deleteData,
5: optional bool ifExists=true, // currently verified on client
6: optional bool ignoreProtection,
7: optional EnvironmentContext environmentContext,
8: optional bool needResult=true
}
enum FunctionType {
JAVA = 1,
}
enum ResourceType {
JAR = 1,
FILE = 2,
ARCHIVE = 3,
}
struct ResourceUri {
1: ResourceType resourceType,
2: string uri,
}
// User-defined function
struct Function {
1: string functionName,
2: string dbName,
3: string className,
4: string ownerName,
5: PrincipalType ownerType,
6: i32 createTime,
7: FunctionType functionType,
8: list<ResourceUri> resourceUris,
}
// Structs for transaction and locks
struct TxnInfo {
1: required i64 id,
2: required TxnState state,
3: required string user, // used in 'show transactions' to help admins find who has open transactions
4: required string hostname, // used in 'show transactions' to help admins find who has open transactions
}
struct GetOpenTxnsInfoResponse {
1: required i64 txn_high_water_mark,
2: required list<TxnInfo> open_txns,
}
struct GetOpenTxnsResponse {
1: required i64 txn_high_water_mark,
2: required set<i64> open_txns,
}
struct OpenTxnRequest {
1: required i32 num_txns,
2: required string user,
3: required string hostname,
}
struct OpenTxnsResponse {
1: required list<i64> txn_ids,
}
struct AbortTxnRequest {
1: required i64 txnid,
}
struct CommitTxnRequest {
1: required i64 txnid,
}
struct LockComponent {
1: required LockType type,
2: required LockLevel level,
3: required string dbname,
4: optional string tablename,
5: optional string partitionname,
}
struct LockRequest {
1: required list<LockComponent> component,
2: optional i64 txnid,
3: required string user, // used in 'show locks' to help admins find who has open locks
4: required string hostname, // used in 'show locks' to help admins find who has open locks
}
struct LockResponse {
1: required i64 lockid,
2: required LockState state,
}
struct CheckLockRequest {
1: required i64 lockid,
}
struct UnlockRequest {
1: required i64 lockid,
}
struct ShowLocksRequest {
}
struct ShowLocksResponseElement {
1: required i64 lockid,
2: required string dbname,
3: optional string tablename,
4: optional string partname,
5: required LockState state,
6: required LockType type,
7: optional i64 txnid,
8: required i64 lastheartbeat,
9: optional i64 acquiredat,
10: required string user,
11: required string hostname,
}
struct ShowLocksResponse {
1: list<ShowLocksResponseElement> locks,
}
struct HeartbeatRequest {
1: optional i64 lockid,
2: optional i64 txnid
}
struct HeartbeatTxnRangeRequest {
1: required i64 min,
2: required i64 max
}
struct HeartbeatTxnRangeResponse {
1: required set<i64> aborted,
2: required set<i64> nosuch
}
struct CompactionRequest {
1: required string dbname,
2: required string tablename,
3: optional string partitionname,
4: required CompactionType type,
5: optional string runas,
}
struct ShowCompactRequest {
}
struct ShowCompactResponseElement {
1: required string dbname,
2: required string tablename,
3: optional string partitionname,
4: required CompactionType type,
5: required string state,
6: optional string workerid,
7: optional i64 start,
8: optional string runAs,
}
struct ShowCompactResponse {
1: required list<ShowCompactResponseElement> compacts,
}
exception MetaException {
1: string message
}
exception UnknownTableException {
1: string message
}
exception UnknownDBException {
1: string message
}
exception AlreadyExistsException {
1: string message
}
exception InvalidPartitionException {
1: string message
}
exception UnknownPartitionException {
1: string message
}
exception InvalidObjectException {
1: string message
}
exception NoSuchObjectException {
1: string message
}
exception IndexAlreadyExistsException {
1: string message
}
exception InvalidOperationException {
1: string message
}
exception ConfigValSecurityException {
1: string message
}
exception InvalidInputException {
1: string message
}
// Transaction and lock exceptions
exception NoSuchTxnException {
1: string message
}
exception TxnAbortedException {
1: string message
}
exception TxnOpenException {
1: string message
}
exception NoSuchLockException {
1: string message
}
/**
* This interface is live.
*/
service ThriftHiveMetastore extends fb303.FacebookService
{
string getMetaConf(1:string key) throws(1:MetaException o1)
void setMetaConf(1:string key, 2:string value) throws(1:MetaException o1)
void create_database(1:Database database) throws(1:AlreadyExistsException o1, 2:InvalidObjectException o2, 3:MetaException o3)
Database get_database(1:string name) throws(1:NoSuchObjectException o1, 2:MetaException o2)
void drop_database(1:string name, 2:bool deleteData, 3:bool cascade) throws(1:NoSuchObjectException o1, 2:InvalidOperationException o2, 3:MetaException o3)
list<string> get_databases(1:string pattern) throws(1:MetaException o1)
list<string> get_all_databases() throws(1:MetaException o1)
void alter_database(1:string dbname, 2:Database db) throws(1:MetaException o1, 2:NoSuchObjectException o2)
// returns the type with given name (make seperate calls for the dependent types if needed)
Type get_type(1:string name) throws(1:MetaException o1, 2:NoSuchObjectException o2)
bool create_type(1:Type type) throws(1:AlreadyExistsException o1, 2:InvalidObjectException o2, 3:MetaException o3)
bool drop_type(1:string type) throws(1:MetaException o1, 2:NoSuchObjectException o2)
map<string, Type> get_type_all(1:string name)
throws(1:MetaException o2)
// Gets a list of FieldSchemas describing the columns of a particular table
list<FieldSchema> get_fields(1: string db_name, 2: string table_name) throws (1: MetaException o1, 2: UnknownTableException o2, 3: UnknownDBException o3),
// Gets a list of FieldSchemas describing both the columns and the partition keys of a particular table
list<FieldSchema> get_schema(1: string db_name, 2: string table_name) throws (1: MetaException o1, 2: UnknownTableException o2, 3: UnknownDBException o3)
// create a Hive table. Following fields must be set
// tableName
// database (only 'default' for now until Hive QL supports databases)
// owner (not needed, but good to have for tracking purposes)
// sd.cols (list of field schemas)
// sd.inputFormat (SequenceFileInputFormat (binary like falcon tables or u_full) or TextInputFormat)
// sd.outputFormat (SequenceFileInputFormat (binary) or TextInputFormat)
// sd.serdeInfo.serializationLib (SerDe class name eg org.apache.hadoop.hive.serde.simple_meta.MetadataTypedColumnsetSerDe
// * See notes on DDL_TIME
void create_table(1:Table tbl) throws(1:AlreadyExistsException o1, 2:InvalidObjectException o2, 3:MetaException o3, 4:NoSuchObjectException o4)
void create_table_with_environment_context(1:Table tbl,
2:EnvironmentContext environment_context)
throws (1:AlreadyExistsException o1,
2:InvalidObjectException o2, 3:MetaException o3,
4:NoSuchObjectException o4)
// drops the table and all the partitions associated with it if the table has partitions
// delete data (including partitions) if deleteData is set to true
void drop_table(1:string dbname, 2:string name, 3:bool deleteData)
throws(1:NoSuchObjectException o1, 2:MetaException o3)
void drop_table_with_environment_context(1:string dbname, 2:string name, 3:bool deleteData,
4:EnvironmentContext environment_context)
throws(1:NoSuchObjectException o1, 2:MetaException o3)
list<string> get_tables(1: string db_name, 2: string pattern) throws (1: MetaException o1)
list<string> get_all_tables(1: string db_name) throws (1: MetaException o1)
Table get_table(1:string dbname, 2:string tbl_name)
throws (1:MetaException o1, 2:NoSuchObjectException o2)
list<Table> get_table_objects_by_name(1:string dbname, 2:list<string> tbl_names)
throws (1:MetaException o1, 2:InvalidOperationException o2, 3:UnknownDBException o3)
// Get a list of table names that match a filter.
// The filter operators are LIKE, <, <=, >, >=, =, <>
//
// In the filter statement, values interpreted as strings must be enclosed in quotes,
// while values interpreted as integers should not be. Strings and integers are the only
// supported value types.
//
// The currently supported key names in the filter are:
// Constants.HIVE_FILTER_FIELD_OWNER, which filters on the tables' owner's name
// and supports all filter operators
// Constants.HIVE_FILTER_FIELD_LAST_ACCESS, which filters on the last access times
// and supports all filter operators except LIKE
// Constants.HIVE_FILTER_FIELD_PARAMS, which filters on the tables' parameter keys and values
// and only supports the filter operators = and <>.
// Append the parameter key name to HIVE_FILTER_FIELD_PARAMS in the filter statement.
// For example, to filter on parameter keys called "retention", the key name in the filter
// statement should be Constants.HIVE_FILTER_FIELD_PARAMS + "retention"
// Also, = and <> only work for keys that exist
// in the tables. E.g., if you are looking for tables where key1 <> value, it will only
// look at tables that have a value for the parameter key1.
// Some example filter statements include:
// filter = Constants.HIVE_FILTER_FIELD_OWNER + " like \".*test.*\" and " +
// Constants.HIVE_FILTER_FIELD_LAST_ACCESS + " = 0";
// filter = Constants.HIVE_FILTER_FIELD_PARAMS + "retention = \"30\" or " +
// Constants.HIVE_FILTER_FIELD_PARAMS + "retention = \"90\""
// @param dbName
// The name of the database from which you will retrieve the table names
// @param filterType
// The type of filter
// @param filter
// The filter string
// @param max_tables
// The maximum number of tables returned
// @return A list of table names that match the desired filter
list<string> get_table_names_by_filter(1:string dbname, 2:string filter, 3:i16 max_tables=-1)
throws (1:MetaException o1, 2:InvalidOperationException o2, 3:UnknownDBException o3)
// alter table applies to only future partitions not for existing partitions
// * See notes on DDL_TIME
void alter_table(1:string dbname, 2:string tbl_name, 3:Table new_tbl)
throws (1:InvalidOperationException o1, 2:MetaException o2)
void alter_table_with_environment_context(1:string dbname, 2:string tbl_name,
3:Table new_tbl, 4:EnvironmentContext environment_context)
throws (1:InvalidOperationException o1, 2:MetaException o2)
// the following applies to only tables that have partitions
// * See notes on DDL_TIME
Partition add_partition(1:Partition new_part)
throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
Partition add_partition_with_environment_context(1:Partition new_part,
2:EnvironmentContext environment_context)
throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2,
3:MetaException o3)
i32 add_partitions(1:list<Partition> new_parts)
throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
i32 add_partitions_pspec(1:list<PartitionSpec> new_parts)
throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
Partition append_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals)
throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
AddPartitionsResult add_partitions_req(1:AddPartitionsRequest request)
throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
Partition append_partition_with_environment_context(1:string db_name, 2:string tbl_name,
3:list<string> part_vals, 4:EnvironmentContext environment_context)
throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
Partition append_partition_by_name(1:string db_name, 2:string tbl_name, 3:string part_name)
throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
Partition append_partition_by_name_with_environment_context(1:string db_name, 2:string tbl_name,
3:string part_name, 4:EnvironmentContext environment_context)
throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
bool drop_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals, 4:bool deleteData)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
bool drop_partition_with_environment_context(1:string db_name, 2:string tbl_name,
3:list<string> part_vals, 4:bool deleteData, 5:EnvironmentContext environment_context)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
bool drop_partition_by_name(1:string db_name, 2:string tbl_name, 3:string part_name, 4:bool deleteData)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
bool drop_partition_by_name_with_environment_context(1:string db_name, 2:string tbl_name,
3:string part_name, 4:bool deleteData, 5:EnvironmentContext environment_context)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
DropPartitionsResult drop_partitions_req(1: DropPartitionsRequest req)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
Partition get_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
Partition exchange_partition(1:map<string, string> partitionSpecs, 2:string source_db,
3:string source_table_name, 4:string dest_db, 5:string dest_table_name)
throws(1:MetaException o1, 2:NoSuchObjectException o2, 3:InvalidObjectException o3,
4:InvalidInputException o4)
Partition get_partition_with_auth(1:string db_name, 2:string tbl_name, 3:list<string> part_vals,
4: string user_name, 5: list<string> group_names) throws(1:MetaException o1, 2:NoSuchObjectException o2)
Partition get_partition_by_name(1:string db_name 2:string tbl_name, 3:string part_name)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
// returns all the partitions for this table in reverse chronological order.
// If max parts is given then it will return only that many.
list<Partition> get_partitions(1:string db_name, 2:string tbl_name, 3:i16 max_parts=-1)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
list<Partition> get_partitions_with_auth(1:string db_name, 2:string tbl_name, 3:i16 max_parts=-1,
4: string user_name, 5: list<string> group_names) throws(1:NoSuchObjectException o1, 2:MetaException o2)
list<PartitionSpec> get_partitions_pspec(1:string db_name, 2:string tbl_name, 3:i32 max_parts=-1)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
list<string> get_partition_names(1:string db_name, 2:string tbl_name, 3:i16 max_parts=-1)
throws(1:MetaException o2)
// get_partition*_ps methods allow filtering by a partial partition specification,
// as needed for dynamic partitions. The values that are not restricted should
// be empty strings. Nulls were considered (instead of "") but caused errors in
// generated Python code. The size of part_vals may be smaller than the
// number of partition columns - the unspecified values are considered the same
// as "".
list<Partition> get_partitions_ps(1:string db_name 2:string tbl_name
3:list<string> part_vals, 4:i16 max_parts=-1)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
list<Partition> get_partitions_ps_with_auth(1:string db_name, 2:string tbl_name, 3:list<string> part_vals, 4:i16 max_parts=-1,
5: string user_name, 6: list<string> group_names) throws(1:NoSuchObjectException o1, 2:MetaException o2)
list<string> get_partition_names_ps(1:string db_name,
2:string tbl_name, 3:list<string> part_vals, 4:i16 max_parts=-1)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
// get the partitions matching the given partition filter
list<Partition> get_partitions_by_filter(1:string db_name 2:string tbl_name
3:string filter, 4:i16 max_parts=-1)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
// List partitions as PartitionSpec instances.
list<PartitionSpec> get_part_specs_by_filter(1:string db_name 2:string tbl_name
3:string filter, 4:i32 max_parts=-1)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
// get the partitions matching the given partition filter
// unlike get_partitions_by_filter, takes serialized hive expression, and with that can work
// with any filter (get_partitions_by_filter only works if the filter can be pushed down to JDOQL.
PartitionsByExprResult get_partitions_by_expr(1:PartitionsByExprRequest req)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
// get partitions give a list of partition names
list<Partition> get_partitions_by_names(1:string db_name 2:string tbl_name 3:list<string> names)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
// changes the partition to the new partition object. partition is identified from the part values
// in the new_part
// * See notes on DDL_TIME
void alter_partition(1:string db_name, 2:string tbl_name, 3:Partition new_part)
throws (1:InvalidOperationException o1, 2:MetaException o2)
// change a list of partitions. All partitions are altered atomically and all
// prehooks are fired together followed by all post hooks
void alter_partitions(1:string db_name, 2:string tbl_name, 3:list<Partition> new_parts)
throws (1:InvalidOperationException o1, 2:MetaException o2)
void alter_partition_with_environment_context(1:string db_name,
2:string tbl_name, 3:Partition new_part,
4:EnvironmentContext environment_context)
throws (1:InvalidOperationException o1, 2:MetaException o2)
// rename the old partition to the new partition object by changing old part values to the part values
// in the new_part. old partition is identified from part_vals.
// partition keys in new_part should be the same as those in old partition.
void rename_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals, 4:Partition new_part)
throws (1:InvalidOperationException o1, 2:MetaException o2)
// returns whether or not the partition name is valid based on the value of the config
// hive.metastore.partition.name.whitelist.pattern
bool partition_name_has_valid_characters(1:list<string> part_vals, 2:bool throw_exception)
throws(1: MetaException o1)
// gets the value of the configuration key in the metastore server. returns
// defaultValue if the key does not exist. if the configuration key does not
// begin with "hive", "mapred", or "hdfs", a ConfigValSecurityException is
// thrown.
string get_config_value(1:string name, 2:string defaultValue)
throws(1:ConfigValSecurityException o1)
// converts a partition name into a partition values array
list<string> partition_name_to_vals(1: string part_name)
throws(1: MetaException o1)
// converts a partition name into a partition specification (a mapping from
// the partition cols to the values)
map<string, string> partition_name_to_spec(1: string part_name)
throws(1: MetaException o1)
void markPartitionForEvent(1:string db_name, 2:string tbl_name, 3:map<string,string> part_vals,
4:PartitionEventType eventType) throws (1: MetaException o1, 2: NoSuchObjectException o2,
3: UnknownDBException o3, 4: UnknownTableException o4, 5: UnknownPartitionException o5,
6: InvalidPartitionException o6)
bool isPartitionMarkedForEvent(1:string db_name, 2:string tbl_name, 3:map<string,string> part_vals,
4: PartitionEventType eventType) throws (1: MetaException o1, 2:NoSuchObjectException o2,
3: UnknownDBException o3, 4: UnknownTableException o4, 5: UnknownPartitionException o5,
6: InvalidPartitionException o6)
//index
Index add_index(1:Index new_index, 2: Table index_table)
throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
void alter_index(1:string dbname, 2:string base_tbl_name, 3:string idx_name, 4:Index new_idx)
throws (1:InvalidOperationException o1, 2:MetaException o2)
bool drop_index_by_name(1:string db_name, 2:string tbl_name, 3:string index_name, 4:bool deleteData)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
Index get_index_by_name(1:string db_name 2:string tbl_name, 3:string index_name)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
list<Index> get_indexes(1:string db_name, 2:string tbl_name, 3:i16 max_indexes=-1)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
list<string> get_index_names(1:string db_name, 2:string tbl_name, 3:i16 max_indexes=-1)
throws(1:MetaException o2)
// column statistics interfaces
// update APIs persist the column statistics object(s) that are passed in. If statistics already
// exists for one or more columns, the existing statistics will be overwritten. The update APIs
// validate that the dbName, tableName, partName, colName[] passed in as part of the ColumnStatistics
// struct are valid, throws InvalidInputException/NoSuchObjectException if found to be invalid
bool update_table_column_statistics(1:ColumnStatistics stats_obj) throws (1:NoSuchObjectException o1,
2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4)
bool update_partition_column_statistics(1:ColumnStatistics stats_obj) throws (1:NoSuchObjectException o1,
2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4)
// get APIs return the column statistics corresponding to db_name, tbl_name, [part_name], col_name if
// such statistics exists. If the required statistics doesn't exist, get APIs throw NoSuchObjectException
// For instance, if get_table_column_statistics is called on a partitioned table for which only
// partition level column stats exist, get_table_column_statistics will throw NoSuchObjectException
ColumnStatistics get_table_column_statistics(1:string db_name, 2:string tbl_name, 3:string col_name) throws
(1:NoSuchObjectException o1, 2:MetaException o2, 3:InvalidInputException o3, 4:InvalidObjectException o4)
ColumnStatistics get_partition_column_statistics(1:string db_name, 2:string tbl_name, 3:string part_name,
4:string col_name) throws (1:NoSuchObjectException o1, 2:MetaException o2,
3:InvalidInputException o3, 4:InvalidObjectException o4)
TableStatsResult get_table_statistics_req(1:TableStatsRequest request) throws
(1:NoSuchObjectException o1, 2:MetaException o2)
PartitionsStatsResult get_partitions_statistics_req(1:PartitionsStatsRequest request) throws
(1:NoSuchObjectException o1, 2:MetaException o2)
AggrStats get_aggr_stats_for(1:PartitionsStatsRequest request) throws
(1:NoSuchObjectException o1, 2:MetaException o2)
bool set_aggr_stats_for(1:SetPartitionsStatsRequest request) throws
(1:NoSuchObjectException o1, 2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4)
// delete APIs attempt to delete column statistics, if found, associated with a given db_name, tbl_name, [part_name]
// and col_name. If the delete API doesn't find the statistics record in the metastore, throws NoSuchObjectException
// Delete API validates the input and if the input is invalid throws InvalidInputException/InvalidObjectException.
bool delete_partition_column_statistics(1:string db_name, 2:string tbl_name, 3:string part_name, 4:string col_name) throws
(1:NoSuchObjectException o1, 2:MetaException o2, 3:InvalidObjectException o3,
4:InvalidInputException o4)
bool delete_table_column_statistics(1:string db_name, 2:string tbl_name, 3:string col_name) throws
(1:NoSuchObjectException o1, 2:MetaException o2, 3:InvalidObjectException o3,
4:InvalidInputException o4)
//
// user-defined functions
//
void create_function(1:Function func)
throws (1:AlreadyExistsException o1,
2:InvalidObjectException o2,
3:MetaException o3,
4:NoSuchObjectException o4)
void drop_function(1:string dbName, 2:string funcName)
throws (1:NoSuchObjectException o1, 2:MetaException o3)
void alter_function(1:string dbName, 2:string funcName, 3:Function newFunc)
throws (1:InvalidOperationException o1, 2:MetaException o2)
list<string> get_functions(1:string dbName, 2:string pattern)
throws (1:MetaException o1)
Function get_function(1:string dbName, 2:string funcName)
throws (1:MetaException o1, 2:NoSuchObjectException o2)
//authorization privileges
bool create_role(1:Role role) throws(1:MetaException o1)
bool drop_role(1:string role_name) throws(1:MetaException o1)
list<string> get_role_names() throws(1:MetaException o1)
// Deprecated, use grant_revoke_role()
bool grant_role(1:string role_name, 2:string principal_name, 3:PrincipalType principal_type,
4:string grantor, 5:PrincipalType grantorType, 6:bool grant_option) throws(1:MetaException o1)
// Deprecated, use grant_revoke_role()
bool revoke_role(1:string role_name, 2:string principal_name, 3:PrincipalType principal_type)
throws(1:MetaException o1)
list<Role> list_roles(1:string principal_name, 2:PrincipalType principal_type) throws(1:MetaException o1)
GrantRevokeRoleResponse grant_revoke_role(1:GrantRevokeRoleRequest request) throws(1:MetaException o1)
// get all role-grants for users/roles that have been granted the given role
// Note that in the returned list of RolePrincipalGrants, the roleName is
// redundant as it would match the role_name argument of this function
GetPrincipalsInRoleResponse get_principals_in_role(1: GetPrincipalsInRoleRequest request) throws(1:MetaException o1)
// get grant information of all roles granted to the given principal
// Note that in the returned list of RolePrincipalGrants, the principal name,type is
// redundant as it would match the principal name,type arguments of this function
GetRoleGrantsForPrincipalResponse get_role_grants_for_principal(1: GetRoleGrantsForPrincipalRequest request) throws(1:MetaException o1)
PrincipalPrivilegeSet get_privilege_set(1:HiveObjectRef hiveObject, 2:string user_name,
3: list<string> group_names) throws(1:MetaException o1)
list<HiveObjectPrivilege> list_privileges(1:string principal_name, 2:PrincipalType principal_type,
3: HiveObjectRef hiveObject) throws(1:MetaException o1)
// Deprecated, use grant_revoke_privileges()
bool grant_privileges(1:PrivilegeBag privileges) throws(1:MetaException o1)
// Deprecated, use grant_revoke_privileges()
bool revoke_privileges(1:PrivilegeBag privileges) throws(1:MetaException o1)
GrantRevokePrivilegeResponse grant_revoke_privileges(1:GrantRevokePrivilegeRequest request) throws(1:MetaException o1);
// this is used by metastore client to send UGI information to metastore server immediately
// after setting up a connection.
list<string> set_ugi(1:string user_name, 2:list<string> group_names) throws (1:MetaException o1)
//Authentication (delegation token) interfaces
// get metastore server delegation token for use from the map/reduce tasks to authenticate
// to metastore server
string get_delegation_token(1:string token_owner, 2:string renewer_kerberos_principal_name)
throws (1:MetaException o1)
// method to renew delegation token obtained from metastore server
i64 renew_delegation_token(1:string token_str_form) throws (1:MetaException o1)
// method to cancel delegation token obtained from metastore server
void cancel_delegation_token(1:string token_str_form) throws (1:MetaException o1)
// Transaction and lock management calls
// Get just list of open transactions
GetOpenTxnsResponse get_open_txns()
// Get list of open transactions with state (open, aborted)
GetOpenTxnsInfoResponse get_open_txns_info()
OpenTxnsResponse open_txns(1:OpenTxnRequest rqst)
void abort_txn(1:AbortTxnRequest rqst) throws (1:NoSuchTxnException o1)
void commit_txn(1:CommitTxnRequest rqst) throws (1:NoSuchTxnException o1, 2:TxnAbortedException o2)
LockResponse lock(1:LockRequest rqst) throws (1:NoSuchTxnException o1, 2:TxnAbortedException o2)
LockResponse check_lock(1:CheckLockRequest rqst)
throws (1:NoSuchTxnException o1, 2:TxnAbortedException o2, 3:NoSuchLockException o3)
void unlock(1:UnlockRequest rqst) throws (1:NoSuchLockException o1, 2:TxnOpenException o2)
ShowLocksResponse show_locks(1:ShowLocksRequest rqst)
void heartbeat(1:HeartbeatRequest ids) throws (1:NoSuchLockException o1, 2:NoSuchTxnException o2, 3:TxnAbortedException o3)
HeartbeatTxnRangeResponse heartbeat_txn_range(1:HeartbeatTxnRangeRequest txns)
void compact(1:CompactionRequest rqst)
ShowCompactResponse show_compact(1:ShowCompactRequest rqst)
}
// * Note about the DDL_TIME: When creating or altering a table or a partition,
// if the DDL_TIME is not set, the current time will be used.
// For storing info about archived partitions in parameters
// Whether the partition is archived
const string IS_ARCHIVED = "is_archived",
// The original location of the partition, before archiving. After archiving,
// this directory will contain the archive. When the partition
// is dropped, this directory will be deleted
const string ORIGINAL_LOCATION = "original_location",
// Whether or not the table is considered immutable - immutable tables can only be
// overwritten or created if unpartitioned, or if partitioned, partitions inside them
// can only be overwritten or created. Immutability supports write-once and replace
// semantics, but not append.
const string IS_IMMUTABLE = "immutable",
// these should be needed only for backward compatibility with filestore
const string META_TABLE_COLUMNS = "columns",
const string META_TABLE_COLUMN_TYPES = "columns.types",
const string BUCKET_FIELD_NAME = "bucket_field_name",
const string BUCKET_COUNT = "bucket_count",
const string FIELD_TO_DIMENSION = "field_to_dimension",
const string META_TABLE_NAME = "name",
const string META_TABLE_DB = "db",
const string META_TABLE_LOCATION = "location",
const string META_TABLE_SERDE = "serde",
const string META_TABLE_PARTITION_COLUMNS = "partition_columns",
const string META_TABLE_PARTITION_COLUMN_TYPES = "partition_columns.types",
const string FILE_INPUT_FORMAT = "file.inputformat",
const string FILE_OUTPUT_FORMAT = "file.outputformat",
const string META_TABLE_STORAGE = "storage_handler",
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.tools.dsl
import org.apache.atlas.tools.hive.HiveMockMetadataService
import org.apache.atlas.typesystem.types.utils.TypesUtil
import org.apache.atlas.typesystem.types.{DataTypes, StructType, TypeSystem}
import org.json4s.native.JsonMethods._
import org.junit.{Assert, Before, Test}
/**
* DSL Test.
*/
class DSLTest {
val STRUCT_TYPE_1: String = "t1"
val STRUCT_TYPE_2: String = "t2"
@Before
def setup {
val ts: TypeSystem = TypeSystem.getInstance
ts.reset()
val structType: StructType = ts.defineStructType(
STRUCT_TYPE_1, true,
TypesUtil.createRequiredAttrDef("a", DataTypes.INT_TYPE),
TypesUtil.createOptionalAttrDef("b", DataTypes.BOOLEAN_TYPE),
TypesUtil.createOptionalAttrDef("c", DataTypes.BYTE_TYPE),
TypesUtil.createOptionalAttrDef("d", DataTypes.SHORT_TYPE),
TypesUtil.createOptionalAttrDef("e", DataTypes.INT_TYPE),
TypesUtil.createOptionalAttrDef("f", DataTypes.INT_TYPE),
TypesUtil.createOptionalAttrDef("g", DataTypes.LONG_TYPE),
TypesUtil.createOptionalAttrDef("h", DataTypes.FLOAT_TYPE),
TypesUtil.createOptionalAttrDef("i", DataTypes.DOUBLE_TYPE),
TypesUtil.createOptionalAttrDef("j", DataTypes.BIGINTEGER_TYPE),
TypesUtil.createOptionalAttrDef("k", DataTypes.BIGDECIMAL_TYPE),
TypesUtil.createOptionalAttrDef("l", DataTypes.DATE_TYPE),
TypesUtil.createOptionalAttrDef("m", ts.defineArrayType(DataTypes.INT_TYPE)),
TypesUtil.createOptionalAttrDef("n", ts.defineArrayType(DataTypes.BIGDECIMAL_TYPE)),
TypesUtil.createOptionalAttrDef("o",
ts.defineMapType(DataTypes.STRING_TYPE, DataTypes.DOUBLE_TYPE)))
val recursiveStructType: StructType = ts.defineStructType(
STRUCT_TYPE_2, true,
TypesUtil.createRequiredAttrDef("a", DataTypes.INT_TYPE),
TypesUtil.createOptionalAttrDef("s", STRUCT_TYPE_2))
}
@Test def test1 {
// 1. Existing Types in System
//Assert.assertEquals(s"${listTypes}", "[t2, t1, int, array<bigdecimal>, long, double, date, float, short, biginteger, byte, string, boolean, bigdecimal, map<string,double>, array<int>]")
defineStructType("mytype",
attrDef("a", INT_TYPE, ATTR_REQUIRED),
attrDef("b", BOOLEAN_TYPE),
attrDef("c", BYTE_TYPE),
attrDef("d", SHORT_TYPE),
attrDef("e", INT_TYPE),
attrDef("f", INT_TYPE),
attrDef("g", LONG_TYPE),
attrDef("h", FLOAT_TYPE),
attrDef("i", DOUBLE_TYPE),
attrDef("j", BIGINT_TYPE),
attrDef("k", BIGDECIMAL_TYPE),
attrDef("l", DATE_TYPE),
attrDef("m", arrayType(INT_TYPE)),
attrDef("n", arrayType(BIGDECIMAL_TYPE)),
attrDef("o", mapType(STRING_TYPE, DOUBLE_TYPE)))
// 2. 'mytype' available as a a Type
Assert.assertEquals(s"${listTypes}", "[mytype,t1,t2]")
// 3. Create a 'mytype' instance from Json
val i = createInstance("mytype", """
{
"$typeName$":"mytype",
"e":1,
"n":[1,1.1],
"h":1.0,
"b":true,
"k":1,
"j":1,
"d":2,
"m":[1,1],
"g":1,
"a":1,
"i":1.0,
"c":1,
"l":"2014-12-03T08:00:00.000Z",
"f":1,
"o":{
"b":2.0,
"a":1.0
}
}
""")
// 4. Navigate mytype instance in code
// Examples of Navigate mytype instance in code
Assert.assertEquals(s"${i.a}", "1")
Assert.assertEquals(s"${i.o}", "{b=2.0, a=1.0}")
Assert.assertEquals(s"${i.o.asInstanceOf[java.util.Map[_, _]].keySet}", "[b, a]")
// 5. Serialize mytype instance to Json
Assert.assertEquals(s"${pretty(render(i))}", "{\n \"$typeName$\":\"mytype\",\n \"e\":1," + "\n \"n\":[1,1.100000000000000088817841970012523233890533447265625],\n \"h\":1.0,\n \"b\":true,\n \"k\":1,\n \"j\":1,\n \"d\":2,\n \"m\":[1,1],\n \"g\":1,\n \"a\":1,\n \"i\":1.0,\n \"c\":1,\n \"l\":\"2014-12-03T08:00:00.000Z\",\n \"f\":1,\n \"o\":{\n \"b\":2.0,\n \"a\":1.0\n }\n}")
}
@Test def test2 {
// 1. Existing Types in System
Assert.assertEquals(s"${listTypes}", "[t1,t2]")
val addrType = defineStructType("addressType",
attrDef("houseNum", INT_TYPE, ATTR_REQUIRED),
attrDef("street", STRING_TYPE, ATTR_REQUIRED),
attrDef("city", STRING_TYPE, ATTR_REQUIRED),
attrDef("state", STRING_TYPE, ATTR_REQUIRED),
attrDef("zip", INT_TYPE, ATTR_REQUIRED),
attrDef("country", STRING_TYPE, ATTR_REQUIRED)
)
val personType = defineStructType("personType",
attrDef("first_name", STRING_TYPE, ATTR_REQUIRED),
attrDef("last_name", STRING_TYPE, ATTR_REQUIRED),
attrDef("address", addrType)
)
// 2. updated Types in System
Assert.assertEquals(s"${listTypes}", "[addressType,personType,t1,t2]")
// 3. Construct a Person in Code
val person = createInstance("personType")
val address = createInstance("addressType")
person.first_name = "Meta"
person.last_name = "Hadoop"
address.houseNum = 3460
address.street = "W Bayshore Rd"
address.city = "Palo Alto"
address.state = "CA"
address.zip = 94303
address.country = "USA"
person.address = address
// 4. Convert to Json
Assert.assertEquals(s"${pretty(render(person))}", "{\n \"$typeName$\":\"personType\",\n \"first_name\":\"Meta\",\n \"address\":{\n \"$typeName$\":\"addressType\",\n \"houseNum\":3460,\n \"city\":\"Palo Alto\",\n \"country\":\"USA\",\n \"state\":\"CA\",\n \"zip\":94303,\n \"street\":\"W Bayshore Rd\"\n },\n \"last_name\":\"Hadoop\"\n}");
val p2 = createInstance("personType", """{
"first_name":"Meta",
"address":{
"houseNum":3460,
"city":"Palo Alto",
"country":"USA",
"state":"CA",
"zip":94303,
"street":"W Bayshore Rd"
},
"last_name":"Hadoop"
}""")
}
@Test def testHive(): Unit = {
val hiveTable = HiveMockMetadataService.getTable("tpcds", "date_dim")
//println(hiveTable)
//name : String, typeName : String, comment : String
val fieldType = defineStructType("FieldSchema",
attrDef("name", STRING_TYPE, ATTR_REQUIRED),
attrDef("typeName", STRING_TYPE, ATTR_REQUIRED),
attrDef("comment", STRING_TYPE)
)
/*
SerDe(name : String, serializationLib : String, parameters : Map[String, String])
*/
defineStructType("SerDe",
attrDef("name", STRING_TYPE, ATTR_REQUIRED),
attrDef("serializationLib", STRING_TYPE, ATTR_REQUIRED),
attrDef("parameters", mapType(STRING_TYPE, STRING_TYPE))
)
/*
StorageDescriptor(fields : List[FieldSchema],
location : String, inputFormat : String,
outputFormat : String, compressed : Boolean,
numBuckets : Int, bucketColumns : List[String],
sortColumns : List[String],
parameters : Map[String, String],
storedAsSubDirs : Boolean
)
*/
val sdType = defineStructType("StorageDescriptor",
attrDef("location", STRING_TYPE, ATTR_REQUIRED),
attrDef("inputFormat", STRING_TYPE, ATTR_REQUIRED),
attrDef("outputFormat", STRING_TYPE, ATTR_REQUIRED),
attrDef("compressed", BOOLEAN_TYPE),
attrDef("numBuckets", INT_TYPE),
attrDef("bucketColumns", arrayType(STRING_TYPE)),
attrDef("sortColumns", arrayType(STRING_TYPE)),
attrDef("parameters", mapType(STRING_TYPE, STRING_TYPE)),
attrDef("storedAsSubDirs", BOOLEAN_TYPE)
)
/*
case class Table(dbName : String, tableName : String, storageDesc : StorageDescriptor,
parameters : Map[String, String],
tableType : String)
*/
defineStructType("Table",
attrDef("dbName", STRING_TYPE, ATTR_REQUIRED),
attrDef("tableName", STRING_TYPE, ATTR_REQUIRED),
attrDef("storageDesc", sdType, ATTR_REQUIRED),
attrDef("compressed", BOOLEAN_TYPE),
attrDef("numBuckets", INT_TYPE),
attrDef("bucketColumns", arrayType(STRING_TYPE)),
attrDef("sortColumns", arrayType(STRING_TYPE)),
attrDef("parameters", mapType(STRING_TYPE, STRING_TYPE)),
attrDef("storedAsSubDirs", BOOLEAN_TYPE)
)
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.tools.hive
object HiveMockMetadataService {
def getTable(dbName: String, table: String): Table = {
return Table(dbName, table,
StorageDescriptor(
List[FieldSchema](
FieldSchema("d_date_sk", "int", null),
FieldSchema("d_date_id", "string", null),
FieldSchema("d_date", "string", null),
FieldSchema("d_month_seq", "int", null),
FieldSchema("d_week_seq", "int", null),
FieldSchema("d_quarter_seq", "int", null),
FieldSchema("d_year", "int", null),
FieldSchema("d_dow", "int", null),
FieldSchema("d_moy", "int", null),
FieldSchema("d_dom", "int", null),
FieldSchema("d_qoy", "int", null),
FieldSchema("d_fy_year", "int", null),
FieldSchema("d_fy_quarter_seq", "int", null),
FieldSchema("d_fy_week_seq", "int", null),
FieldSchema("d_day_name", "string", null),
FieldSchema("d_quarter_name", "string", null),
FieldSchema("d_holiday", "string", null),
FieldSchema("d_weekend", "string", null),
FieldSchema("d_following_holiday", "string", null),
FieldSchema("d_first_dom", "int", null),
FieldSchema("d_last_dom", "int", null),
FieldSchema("d_same_day_ly", "int", null),
FieldSchema("d_same_day_lq", "int", null),
FieldSchema("d_current_day", "string", null),
FieldSchema("d_current_week", "string", null),
FieldSchema("d_current_month", "string", null),
FieldSchema("d_current_quarter", "string", null),
FieldSchema("d_current_year", "string", null)
),
"file:/tmp/warehouse/tpcds.db/date_dim",
"org.apache.hadoop.hive.ql.io.orc.OrcInputFormat",
"org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat",
false,
0, List[String](), List[String](),
Map[String, String](),
false
),
Map[String, String](),
"Table")
}
case class FieldSchema(name: String, typeName: String, comment: String)
case class SerDe(name: String, serializationLib: String, parameters: Map[String, String])
case class StorageDescriptor(fields: List[FieldSchema],
location: String, inputFormat: String,
outputFormat: String, compressed: Boolean,
numBuckets: Int, bucketColumns: List[String],
sortColumns: List[String],
parameters: Map[String, String],
storedAsSubDirs: Boolean
)
case class Table(dbName: String, tableName: String, storageDesc: StorageDescriptor,
parameters: Map[String, String],
tableType: String)
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.tools.thrift
import org.junit.{Assert, Test}
import scala.util.parsing.input.CharArrayReader
class ThriftLexerTest {
@Test def testSimple {
val p = new ThriftParser
val r = scan(p, """efg abc""")
Assert.assertTrue(r.successful)
}
@Test def testStruct {
val p = new ThriftParser
val r = scan(p, """struct PartitionSpecWithSharedSD {
1: list<PartitionWithoutSD> partitions,
2: StorageDescriptor sd,
}""")
Assert.assertTrue(r.successful)
}
@Test def testTableStruct {
val p = new ThriftParser
val r = scan(p, """// table information
struct Table {
1: string tableName, // name of the table
2: string dbName, // database name ('default')
3: string owner, // owner of this table
4: i32 createTime, // creation time of the table
5: i32 lastAccessTime, // last access time (usually this will be filled from HDFS and shouldn't be relied on)
6: i32 retention, // retention time
7: StorageDescriptor sd, // storage descriptor of the table
8: list<FieldSchema> partitionKeys, // partition keys of the table. only primitive types are supported
9: map<string, string> parameters, // to store comments or any other user level parameters
10: string viewOriginalText, // original view text, null for non-view
11: string viewExpandedText, // expanded view text, null for non-view
12: string tableType, // table type enum, e.g. EXTERNAL_TABLE
13: optional PrincipalPrivilegeSet privileges,
14: optional bool temporary=false
}""")
Assert.assertTrue(r.successful)
}
@Test def testIncorrectStruct {
val p = new ThriftParser
val r = scan(p, """// table information
struct Table {
| 1: string tableName, // name of the table
| 2: string dbName
}""")
Assert.assertFalse(r.successful)
}
@Test def testNegativeInt {
val p = new ThriftParser
val r = scan(p, """-1""")
Assert.assertTrue(r.successful)
}
@Test def testComment {
val p = new ThriftParser
val r = scan(p, """/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/""")
}
def scan(p: ThriftParser, str: String): p.lexical.ParseResult[_] = {
val l = p.lexical
var s: l.Input = new CharArrayReader(str.toCharArray)
var r = (l.whitespace.? ~ l.token)(s)
s = r.next
while (r.successful && !s.atEnd) {
s = r.next
if (!s.atEnd) {
r = (l.whitespace.? ~ l.token)(s)
}
}
r.asInstanceOf[p.lexical.ParseResult[_]]
}
@Test def testService {
val p = new ThriftParser
val r = scan(p, """/**
* This interface is live.
*/
service ThriftHiveMetastore extends fb303.FacebookService
{
string getMetaConf(1:string key) throws(1:MetaException o1)
void setMetaConf(1:string key, 2:string value) throws(1:MetaException o1)
void create_database(1:Database database) throws(1:AlreadyExistsException o1, 2:InvalidObjectException o2, 3:MetaException o3)
Database get_database(1:string name) throws(1:NoSuchObjectException o1, 2:MetaException o2)
void drop_database(1:string name, 2:bool deleteData, 3:bool cascade) throws(1:NoSuchObjectException o1, 2:InvalidOperationException o2, 3:MetaException o3)
list<string> get_databases(1:string pattern) throws(1:MetaException o1)
list<string> get_all_databases() throws(1:MetaException o1)
void alter_database(1:string dbname, 2:Database db) throws(1:MetaException o1, 2:NoSuchObjectException o2)
// returns the type with given name (make seperate calls for the dependent types if needed)
Type get_type(1:string name) throws(1:MetaException o1, 2:NoSuchObjectException o2)
bool create_type(1:Type type) throws(1:AlreadyExistsException o1, 2:InvalidObjectException o2, 3:MetaException o3)
bool drop_type(1:string type) throws(1:MetaException o1, 2:NoSuchObjectException o2)
map<string, Type> get_type_all(1:string name)
throws(1:MetaException o2)
// Gets a list of FieldSchemas describing the columns of a particular table
list<FieldSchema> get_fields(1: string db_name, 2: string table_name) throws (1: MetaException o1, 2: UnknownTableException o2, 3: UnknownDBException o3),
// Gets a list of FieldSchemas describing both the columns and the partition keys of a particular table
list<FieldSchema> get_schema(1: string db_name, 2: string table_name) throws (1: MetaException o1, 2: UnknownTableException o2, 3: UnknownDBException o3)
// create a Hive table. Following fields must be set
// tableName
// database (only 'default' for now until Hive QL supports databases)
// owner (not needed, but good to have for tracking purposes)
// sd.cols (list of field schemas)
// sd.inputFormat (SequenceFileInputFormat (binary like falcon tables or u_full) or TextInputFormat)
// sd.outputFormat (SequenceFileInputFormat (binary) or TextInputFormat)
// sd.serdeInfo.serializationLib (SerDe class name eg org.apache.hadoop.hive.serde.simple_meta.MetadataTypedColumnsetSerDe
// * See notes on DDL_TIME
void create_table(1:Table tbl) throws(1:AlreadyExistsException o1, 2:InvalidObjectException o2, 3:MetaException o3, 4:NoSuchObjectException o4)
void create_table_with_environment_context(1:Table tbl,
2:EnvironmentContext environment_context)
throws (1:AlreadyExistsException o1,
2:InvalidObjectException o2, 3:MetaException o3,
4:NoSuchObjectException o4)
// drops the table and all the partitions associated with it if the table has partitions
// delete data (including partitions) if deleteData is set to true
void drop_table(1:string dbname, 2:string name, 3:bool deleteData)
throws(1:NoSuchObjectException o1, 2:MetaException o3)
void drop_table_with_environment_context(1:string dbname, 2:string name, 3:bool deleteData,
4:EnvironmentContext environment_context)
throws(1:NoSuchObjectException o1, 2:MetaException o3)
list<string> get_tables(1: string db_name, 2: string pattern) throws (1: MetaException o1)
list<string> get_all_tables(1: string db_name) throws (1: MetaException o1)
Table get_table(1:string dbname, 2:string tbl_name)
throws (1:MetaException o1, 2:NoSuchObjectException o2)
list<Table> get_table_objects_by_name(1:string dbname, 2:list<string> tbl_names)
throws (1:MetaException o1, 2:InvalidOperationException o2, 3:UnknownDBException o3)
// Get a list of table names that match a filter.
// The filter operators are LIKE, <, <=, >, >=, =, <>
//
// In the filter statement, values interpreted as strings must be enclosed in quotes,
// while values interpreted as integers should not be. Strings and integers are the only
// supported value types.
//
// The currently supported key names in the filter are:
// Constants.HIVE_FILTER_FIELD_OWNER, which filters on the tables' owner's name
// and supports all filter operators
// Constants.HIVE_FILTER_FIELD_LAST_ACCESS, which filters on the last access times
// and supports all filter operators except LIKE
// Constants.HIVE_FILTER_FIELD_PARAMS, which filters on the tables' parameter keys and values
// and only supports the filter operators = and <>.
// Append the parameter key name to HIVE_FILTER_FIELD_PARAMS in the filter statement.
// For example, to filter on parameter keys called "retention", the key name in the filter
// statement should be Constants.HIVE_FILTER_FIELD_PARAMS + "retention"
// Also, = and <> only work for keys that exist
// in the tables. E.g., if you are looking for tables where key1 <> value, it will only
// look at tables that have a value for the parameter key1.
// Some example filter statements include:
// filter = Constants.HIVE_FILTER_FIELD_OWNER + " like \".*test.*\" and " +
// Constants.HIVE_FILTER_FIELD_LAST_ACCESS + " = 0";
// filter = Constants.HIVE_FILTER_FIELD_PARAMS + "retention = \"30\" or " +
// Constants.HIVE_FILTER_FIELD_PARAMS + "retention = \"90\""
// @param dbName
// The name of the database from which you will retrieve the table names
// @param filterType
// The type of filter
// @param filter
// The filter string
// @param max_tables
// The maximum number of tables returned
// @return A list of table names that match the desired filter
list<string> get_table_names_by_filter(1:string dbname, 2:string filter, 3:i16 max_tables=-1)
throws (1:MetaException o1, 2:InvalidOperationException o2, 3:UnknownDBException o3)
// alter table applies to only future partitions not for existing partitions
// * See notes on DDL_TIME
void alter_table(1:string dbname, 2:string tbl_name, 3:Table new_tbl)
throws (1:InvalidOperationException o1, 2:MetaException o2)
void alter_table_with_environment_context(1:string dbname, 2:string tbl_name,
3:Table new_tbl, 4:EnvironmentContext environment_context)
throws (1:InvalidOperationException o1, 2:MetaException o2)
// the following applies to only tables that have partitions
// * See notes on DDL_TIME
Partition add_partition(1:Partition new_part)
throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
Partition add_partition_with_environment_context(1:Partition new_part,
2:EnvironmentContext environment_context)
throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2,
3:MetaException o3)
i32 add_partitions(1:list<Partition> new_parts)
throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
i32 add_partitions_pspec(1:list<PartitionSpec> new_parts)
throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
Partition append_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals)
throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
AddPartitionsResult add_partitions_req(1:AddPartitionsRequest request)
throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
Partition append_partition_with_environment_context(1:string db_name, 2:string tbl_name,
3:list<string> part_vals, 4:EnvironmentContext environment_context)
throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
Partition append_partition_by_name(1:string db_name, 2:string tbl_name, 3:string part_name)
throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
Partition append_partition_by_name_with_environment_context(1:string db_name, 2:string tbl_name,
3:string part_name, 4:EnvironmentContext environment_context)
throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
bool drop_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals, 4:bool deleteData)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
bool drop_partition_with_environment_context(1:string db_name, 2:string tbl_name,
3:list<string> part_vals, 4:bool deleteData, 5:EnvironmentContext environment_context)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
bool drop_partition_by_name(1:string db_name, 2:string tbl_name, 3:string part_name, 4:bool deleteData)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
bool drop_partition_by_name_with_environment_context(1:string db_name, 2:string tbl_name,
3:string part_name, 4:bool deleteData, 5:EnvironmentContext environment_context)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
DropPartitionsResult drop_partitions_req(1: DropPartitionsRequest req)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
Partition get_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
Partition exchange_partition(1:map<string, string> partitionSpecs, 2:string source_db,
3:string source_table_name, 4:string dest_db, 5:string dest_table_name)
throws(1:MetaException o1, 2:NoSuchObjectException o2, 3:InvalidObjectException o3,
4:InvalidInputException o4)
Partition get_partition_with_auth(1:string db_name, 2:string tbl_name, 3:list<string> part_vals,
4: string user_name, 5: list<string> group_names) throws(1:MetaException o1, 2:NoSuchObjectException o2)
Partition get_partition_by_name(1:string db_name 2:string tbl_name, 3:string part_name)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
// returns all the partitions for this table in reverse chronological order.
// If max parts is given then it will return only that many.
list<Partition> get_partitions(1:string db_name, 2:string tbl_name, 3:i16 max_parts=-1)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
list<Partition> get_partitions_with_auth(1:string db_name, 2:string tbl_name, 3:i16 max_parts=-1,
4: string user_name, 5: list<string> group_names) throws(1:NoSuchObjectException o1, 2:MetaException o2)
list<PartitionSpec> get_partitions_pspec(1:string db_name, 2:string tbl_name, 3:i32 max_parts=-1)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
list<string> get_partition_names(1:string db_name, 2:string tbl_name, 3:i16 max_parts=-1)
throws(1:MetaException o2)
// get_partition*_ps methods allow filtering by a partial partition specification,
// as needed for dynamic partitions. The values that are not restricted should
// be empty strings. Nulls were considered (instead of "") but caused errors in
// generated Python code. The size of part_vals may be smaller than the
// number of partition columns - the unspecified values are considered the same
// as "".
list<Partition> get_partitions_ps(1:string db_name 2:string tbl_name
3:list<string> part_vals, 4:i16 max_parts=-1)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
list<Partition> get_partitions_ps_with_auth(1:string db_name, 2:string tbl_name, 3:list<string> part_vals, 4:i16 max_parts=-1,
5: string user_name, 6: list<string> group_names) throws(1:NoSuchObjectException o1, 2:MetaException o2)
list<string> get_partition_names_ps(1:string db_name,
2:string tbl_name, 3:list<string> part_vals, 4:i16 max_parts=-1)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
// get the partitions matching the given partition filter
list<Partition> get_partitions_by_filter(1:string db_name 2:string tbl_name
3:string filter, 4:i16 max_parts=-1)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
// List partitions as PartitionSpec instances.
list<PartitionSpec> get_part_specs_by_filter(1:string db_name 2:string tbl_name
3:string filter, 4:i32 max_parts=-1)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
// get the partitions matching the given partition filter
// unlike get_partitions_by_filter, takes serialized hive expression, and with that can work
// with any filter (get_partitions_by_filter only works if the filter can be pushed down to JDOQL.
PartitionsByExprResult get_partitions_by_expr(1:PartitionsByExprRequest req)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
// get partitions give a list of partition names
list<Partition> get_partitions_by_names(1:string db_name 2:string tbl_name 3:list<string> names)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
// changes the partition to the new partition object. partition is identified from the part values
// in the new_part
// * See notes on DDL_TIME
void alter_partition(1:string db_name, 2:string tbl_name, 3:Partition new_part)
throws (1:InvalidOperationException o1, 2:MetaException o2)
// change a list of partitions. All partitions are altered atomically and all
// prehooks are fired together followed by all post hooks
void alter_partitions(1:string db_name, 2:string tbl_name, 3:list<Partition> new_parts)
throws (1:InvalidOperationException o1, 2:MetaException o2)
void alter_partition_with_environment_context(1:string db_name,
2:string tbl_name, 3:Partition new_part,
4:EnvironmentContext environment_context)
throws (1:InvalidOperationException o1, 2:MetaException o2)
// rename the old partition to the new partition object by changing old part values to the part values
// in the new_part. old partition is identified from part_vals.
// partition keys in new_part should be the same as those in old partition.
void rename_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals, 4:Partition new_part)
throws (1:InvalidOperationException o1, 2:MetaException o2)
// returns whether or not the partition name is valid based on the value of the config
// hive.metastore.partition.name.whitelist.pattern
bool partition_name_has_valid_characters(1:list<string> part_vals, 2:bool throw_exception)
throws(1: MetaException o1)
// gets the value of the configuration key in the metastore server. returns
// defaultValue if the key does not exist. if the configuration key does not
// begin with "hive", "mapred", or "hdfs", a ConfigValSecurityException is
// thrown.
string get_config_value(1:string name, 2:string defaultValue)
throws(1:ConfigValSecurityException o1)
// converts a partition name into a partition values array
list<string> partition_name_to_vals(1: string part_name)
throws(1: MetaException o1)
// converts a partition name into a partition specification (a mapping from
// the partition cols to the values)
map<string, string> partition_name_to_spec(1: string part_name)
throws(1: MetaException o1)
void markPartitionForEvent(1:string db_name, 2:string tbl_name, 3:map<string,string> part_vals,
4:PartitionEventType eventType) throws (1: MetaException o1, 2: NoSuchObjectException o2,
3: UnknownDBException o3, 4: UnknownTableException o4, 5: UnknownPartitionException o5,
6: InvalidPartitionException o6)
bool isPartitionMarkedForEvent(1:string db_name, 2:string tbl_name, 3:map<string,string> part_vals,
4: PartitionEventType eventType) throws (1: MetaException o1, 2:NoSuchObjectException o2,
3: UnknownDBException o3, 4: UnknownTableException o4, 5: UnknownPartitionException o5,
6: InvalidPartitionException o6)
//index
Index add_index(1:Index new_index, 2: Table index_table)
throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
void alter_index(1:string dbname, 2:string base_tbl_name, 3:string idx_name, 4:Index new_idx)
throws (1:InvalidOperationException o1, 2:MetaException o2)
bool drop_index_by_name(1:string db_name, 2:string tbl_name, 3:string index_name, 4:bool deleteData)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
Index get_index_by_name(1:string db_name 2:string tbl_name, 3:string index_name)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
list<Index> get_indexes(1:string db_name, 2:string tbl_name, 3:i16 max_indexes=-1)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
list<string> get_index_names(1:string db_name, 2:string tbl_name, 3:i16 max_indexes=-1)
throws(1:MetaException o2)
// column statistics interfaces
// update APIs persist the column statistics object(s) that are passed in. If statistics already
// exists for one or more columns, the existing statistics will be overwritten. The update APIs
// validate that the dbName, tableName, partName, colName[] passed in as part of the ColumnStatistics
// struct are valid, throws InvalidInputException/NoSuchObjectException if found to be invalid
bool update_table_column_statistics(1:ColumnStatistics stats_obj) throws (1:NoSuchObjectException o1,
2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4)
bool update_partition_column_statistics(1:ColumnStatistics stats_obj) throws (1:NoSuchObjectException o1,
2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4)
// get APIs return the column statistics corresponding to db_name, tbl_name, [part_name], col_name if
// such statistics exists. If the required statistics doesn't exist, get APIs throw NoSuchObjectException
// For instance, if get_table_column_statistics is called on a partitioned table for which only
// partition level column stats exist, get_table_column_statistics will throw NoSuchObjectException
ColumnStatistics get_table_column_statistics(1:string db_name, 2:string tbl_name, 3:string col_name) throws
(1:NoSuchObjectException o1, 2:MetaException o2, 3:InvalidInputException o3, 4:InvalidObjectException o4)
ColumnStatistics get_partition_column_statistics(1:string db_name, 2:string tbl_name, 3:string part_name,
4:string col_name) throws (1:NoSuchObjectException o1, 2:MetaException o2,
3:InvalidInputException o3, 4:InvalidObjectException o4)
TableStatsResult get_table_statistics_req(1:TableStatsRequest request) throws
(1:NoSuchObjectException o1, 2:MetaException o2)
PartitionsStatsResult get_partitions_statistics_req(1:PartitionsStatsRequest request) throws
(1:NoSuchObjectException o1, 2:MetaException o2)
AggrStats get_aggr_stats_for(1:PartitionsStatsRequest request) throws
(1:NoSuchObjectException o1, 2:MetaException o2)
bool set_aggr_stats_for(1:SetPartitionsStatsRequest request) throws
(1:NoSuchObjectException o1, 2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4)
// delete APIs attempt to delete column statistics, if found, associated with a given db_name, tbl_name, [part_name]
// and col_name. If the delete API doesn't find the statistics record in the metastore, throws NoSuchObjectException
// Delete API validates the input and if the input is invalid throws InvalidInputException/InvalidObjectException.
bool delete_partition_column_statistics(1:string db_name, 2:string tbl_name, 3:string part_name, 4:string col_name) throws
(1:NoSuchObjectException o1, 2:MetaException o2, 3:InvalidObjectException o3,
4:InvalidInputException o4)
bool delete_table_column_statistics(1:string db_name, 2:string tbl_name, 3:string col_name) throws
(1:NoSuchObjectException o1, 2:MetaException o2, 3:InvalidObjectException o3,
4:InvalidInputException o4)
//
// user-defined functions
//
void create_function(1:Function func)
throws (1:AlreadyExistsException o1,
2:InvalidObjectException o2,
3:MetaException o3,
4:NoSuchObjectException o4)
void drop_function(1:string dbName, 2:string funcName)
throws (1:NoSuchObjectException o1, 2:MetaException o3)
void alter_function(1:string dbName, 2:string funcName, 3:Function newFunc)
throws (1:InvalidOperationException o1, 2:MetaException o2)
list<string> get_functions(1:string dbName, 2:string pattern)
throws (1:MetaException o1)
Function get_function(1:string dbName, 2:string funcName)
throws (1:MetaException o1, 2:NoSuchObjectException o2)
//authorization privileges
bool create_role(1:Role role) throws(1:MetaException o1)
bool drop_role(1:string role_name) throws(1:MetaException o1)
list<string> get_role_names() throws(1:MetaException o1)
// Deprecated, use grant_revoke_role()
bool grant_role(1:string role_name, 2:string principal_name, 3:PrincipalType principal_type,
4:string grantor, 5:PrincipalType grantorType, 6:bool grant_option) throws(1:MetaException o1)
// Deprecated, use grant_revoke_role()
bool revoke_role(1:string role_name, 2:string principal_name, 3:PrincipalType principal_type)
throws(1:MetaException o1)
list<Role> list_roles(1:string principal_name, 2:PrincipalType principal_type) throws(1:MetaException o1)
GrantRevokeRoleResponse grant_revoke_role(1:GrantRevokeRoleRequest request) throws(1:MetaException o1)
// get all role-grants for users/roles that have been granted the given role
// Note that in the returned list of RolePrincipalGrants, the roleName is
// redundant as it would match the role_name argument of this function
GetPrincipalsInRoleResponse get_principals_in_role(1: GetPrincipalsInRoleRequest request) throws(1:MetaException o1)
// get grant information of all roles granted to the given principal
// Note that in the returned list of RolePrincipalGrants, the principal name,type is
// redundant as it would match the principal name,type arguments of this function
GetRoleGrantsForPrincipalResponse get_role_grants_for_principal(1: GetRoleGrantsForPrincipalRequest request) throws(1:MetaException o1)
PrincipalPrivilegeSet get_privilege_set(1:HiveObjectRef hiveObject, 2:string user_name,
3: list<string> group_names) throws(1:MetaException o1)
list<HiveObjectPrivilege> list_privileges(1:string principal_name, 2:PrincipalType principal_type,
3: HiveObjectRef hiveObject) throws(1:MetaException o1)
// Deprecated, use grant_revoke_privileges()
bool grant_privileges(1:PrivilegeBag privileges) throws(1:MetaException o1)
// Deprecated, use grant_revoke_privileges()
bool revoke_privileges(1:PrivilegeBag privileges) throws(1:MetaException o1)
GrantRevokePrivilegeResponse grant_revoke_privileges(1:GrantRevokePrivilegeRequest request) throws(1:MetaException o1);
// this is used by metastore client to send UGI information to metastore server immediately
// after setting up a connection.
list<string> set_ugi(1:string user_name, 2:list<string> group_names) throws (1:MetaException o1)
//Authentication (delegation token) interfaces
// get metastore server delegation token for use from the map/reduce tasks to authenticate
// to metastore server
string get_delegation_token(1:string token_owner, 2:string renewer_kerberos_principal_name)
throws (1:MetaException o1)
// method to renew delegation token obtained from metastore server
i64 renew_delegation_token(1:string token_str_form) throws (1:MetaException o1)
// method to cancel delegation token obtained from metastore server
void cancel_delegation_token(1:string token_str_form) throws (1:MetaException o1)
// Transaction and lock management calls
// Get just list of open transactions
GetOpenTxnsResponse get_open_txns()
// Get list of open transactions with state (open, aborted)
GetOpenTxnsInfoResponse get_open_txns_info()
OpenTxnsResponse open_txns(1:OpenTxnRequest rqst)
void abort_txn(1:AbortTxnRequest rqst) throws (1:NoSuchTxnException o1)
void commit_txn(1:CommitTxnRequest rqst) throws (1:NoSuchTxnException o1, 2:TxnAbortedException o2)
LockResponse lock(1:LockRequest rqst) throws (1:NoSuchTxnException o1, 2:TxnAbortedException o2)
LockResponse check_lock(1:CheckLockRequest rqst)
throws (1:NoSuchTxnException o1, 2:TxnAbortedException o2, 3:NoSuchLockException o3)
void unlock(1:UnlockRequest rqst) throws (1:NoSuchLockException o1, 2:TxnOpenException o2)
ShowLocksResponse show_locks(1:ShowLocksRequest rqst)
void heartbeat(1:HeartbeatRequest ids) throws (1:NoSuchLockException o1, 2:NoSuchTxnException o2, 3:TxnAbortedException o3)
HeartbeatTxnRangeResponse heartbeat_txn_range(1:HeartbeatTxnRangeRequest txns)
void compact(1:CompactionRequest rqst)
ShowCompactResponse show_compact(1:ShowCompactRequest rqst)
}""")
Assert.assertTrue(r.successful)
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.tools.thrift
import com.google.gson.JsonParser
import org.json4s.native.JsonMethods._
import org.json4s.native.Serialization.{write => swrite}
import org.json4s.{NoTypeHints, _}
import org.junit.{Assert, Test}
import scala.io.Source
import scala.reflect.ClassTag
/**
* Copied from
* [[https://github.com/json4s/json4s/blob/master/ext/src/main/scala/org/json4s/ext/EnumSerializer.scala json4s github]]
* to avoid dependency on json4s-ext.
*/
class EnumNameSerializer[E <: Enumeration: ClassTag](enum: E) extends Serializer[E#Value] {
import JsonDSL._
val EnumerationClass = classOf[E#Value]
def deserialize(implicit format: Formats): PartialFunction[(TypeInfo, JValue), E#Value] = {
case (t@TypeInfo(EnumerationClass, _), json) if (isValid(json)) => {
json match {
case JString(value) => enum.withName(value)
case value => throw new MappingException("Can't convert " +
value + " to " + EnumerationClass)
}
}
}
private[this] def isValid(json: JValue) = json match {
case JString(value) if (enum.values.exists(_.toString == value)) => true
case _ => false
}
def serialize(implicit format: Formats): PartialFunction[Any, JValue] = {
case i: E#Value => i.toString
}
}
class ThriftParserTest {
@Test def testSimple {
var p = new ThriftParser
val parser = new JsonParser
var td: Option[ThriftDef] = p( """include "share/fb303/if/fb303.thrift"
namespace java org.apache.hadoop.hive.metastore.api
namespace php metastore
namespace cpp Apache.Hadoop.Hive
""")
val parsed = parser.parse(toJson(td.get))
val sample = parser.parse( """{
"includes":[{
"value":"share/fb303/if/fb303.thrift"
}],
"cppIncludes":[],
"namespaces":[{
"lang":"",
"name":"Apache.Hadoop.Hive",
"otherLang":"cpp"
},{
"lang":"",
"name":"metastore",
"otherLang":"php"
},{
"lang":"",
"name":"org.apache.hadoop.hive.metastore.api",
"otherLang":"java"
}],
"constants":[],
"typedefs":[],
"enums":[],
"senums":[],
"structs":[],
"unions":[],
"xceptions":[],
"services":[]
}""")
Assert.assertEquals(parsed.toString, sample.toString)
}
@Test def testStruct {
val p = new ThriftParser
val parser = new JsonParser
var td: Option[ThriftDef] = p( """struct PartitionSpecWithSharedSD {
1: list<PartitionWithoutSD> partitions,
2: StorageDescriptor sd
}""")
val parsed = parser.parse(toJson(td.get))
val sample = parser.parse( """{
"includes":[],
"cppIncludes":[],
"namespaces":[],
"constants":[],
"typedefs":[],
"enums":[],
"senums":[],
"structs":[{
"name":"PartitionSpecWithSharedSD",
"xsdAll":false,
"fields":[{
"id":{
"value":1
},
"requiredNess":false,
"fieldType":{
"elemType":{
"name":"PartitionWithoutSD"
}
},
"name":"partitions",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":2
},
"requiredNess":false,
"fieldType":{
"name":"StorageDescriptor"
},
"name":"sd",
"xsdOptional":false,
"xsdNillable":false
}]
}],
"unions":[],
"xceptions":[],
"services":[]
}""")
Assert.assertEquals(parsed.toString, sample.toString)
}
def toJson(td: ThriftDef) = {
implicit val formats = org.json4s.native.Serialization.formats(NoTypeHints) + new EnumNameSerializer(BASE_TYPES) +
new EnumNameSerializer(THRIFT_LANG)
val ser = swrite(td)
pretty(render(parse(ser)))
}
@Test def testTableStruct {
val p = new ThriftParser
val parser = new JsonParser
var td: Option[ThriftDef] = p( """// table information
struct Table {
1: string tableName, // name of the table
2: string dbName, // database name ('default')
3: string owner, // owner of this table
4: i32 createTime, // creation time of the table
5: i32 lastAccessTime, // last access time (usually this will be filled from HDFS and shouldn't be relied on)
6: i32 retention, // retention time
7: StorageDescriptor sd, // storage descriptor of the table
8: list<FieldSchema> partitionKeys, // partition keys of the table. only primitive types are supported
9: map<string, string> parameters, // to store comments or any other user level parameters
10: string viewOriginalText, // original view text, null for non-view
11: string viewExpandedText, // expanded view text, null for non-view
12: string tableType, // table type enum, e.g. EXTERNAL_TABLE
13: optional PrincipalPrivilegeSet privileges,
14: optional bool temporary=false
}""")
val parsed = parser.parse(toJson(td.get))
val sample = parser.parse( """{
"includes":[],
"cppIncludes":[],
"namespaces":[],
"constants":[],
"typedefs":[],
"enums":[],
"senums":[],
"structs":[{
"name":"Table",
"xsdAll":false,
"fields":[{
"id":{
"value":1
},
"requiredNess":false,
"fieldType":{
"typ":"string"
},
"name":"tableName",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":2
},
"requiredNess":false,
"fieldType":{
"typ":"string"
},
"name":"dbName",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":3
},
"requiredNess":false,
"fieldType":{
"typ":"string"
},
"name":"owner",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":4
},
"requiredNess":false,
"fieldType":{
"typ":"i32"
},
"name":"createTime",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":5
},
"requiredNess":false,
"fieldType":{
"typ":"i32"
},
"name":"lastAccessTime",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":6
},
"requiredNess":false,
"fieldType":{
"typ":"i32"
},
"name":"retention",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":7
},
"requiredNess":false,
"fieldType":{
"name":"StorageDescriptor"
},
"name":"sd",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":8
},
"requiredNess":false,
"fieldType":{
"elemType":{
"name":"FieldSchema"
}
},
"name":"partitionKeys",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":9
},
"requiredNess":false,
"fieldType":{
"keyType":{
"typ":"string"
},
"valueType":{
"typ":"string"
}
},
"name":"parameters",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":10
},
"requiredNess":false,
"fieldType":{
"typ":"string"
},
"name":"viewOriginalText",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":11
},
"requiredNess":false,
"fieldType":{
"typ":"string"
},
"name":"viewExpandedText",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":12
},
"requiredNess":false,
"fieldType":{
"typ":"string"
},
"name":"tableType",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":13
},
"requiredNess":false,
"fieldType":{
"name":"PrincipalPrivilegeSet"
},
"name":"privileges",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":14
},
"requiredNess":false,
"fieldType":{
"typ":"bool"
},
"name":"temporary",
"fieldValue":{
"value":"false"
},
"xsdOptional":false,
"xsdNillable":false
}]
}],
"unions":[],
"xceptions":[],
"services":[]
}""")
Assert.assertEquals(parsed.toString, sample.toString)
}
@Test def testHiveThrift {
val p = new ThriftParser
val is = getClass().getResourceAsStream("/test.thrift")
val src: Source = Source.fromInputStream(is)
val t: String = src.getLines().mkString("\n")
var td: Option[ThriftDef] = p(t)
Assert.assertTrue(td.isDefined)
//println(toJson(td.get))
}
@Test def testService {
val p = new ThriftParser
val parser = new JsonParser
var td: Option[ThriftDef] = p( """/**
* This interface is live.
*/
service ThriftHiveMetastore extends fb303.FacebookService
{
string getMetaConf(1:string key) throws(1:MetaException o1)
void setMetaConf(1:string key, 2:string value) throws(1:MetaException o1)
void create_database(1:Database database) throws(1:AlreadyExistsException o1, 2:InvalidObjectException o2, 3:MetaException o3)
Database get_database(1:string name) throws(1:NoSuchObjectException o1, 2:MetaException o2)
void drop_database(1:string name, 2:bool deleteData, 3:bool cascade) throws(1:NoSuchObjectException o1, 2:InvalidOperationException o2, 3:MetaException o3)
list<string> get_databases(1:string pattern) throws(1:MetaException o1)
list<string> get_all_databases() throws(1:MetaException o1)
void alter_database(1:string dbname, 2:Database db) throws(1:MetaException o1, 2:NoSuchObjectException o2)
}""")
val parsed = parser.parse(toJson(td.get))
val sample = parser.parse( """{
"includes":[],
"cppIncludes":[],
"namespaces":[],
"constants":[],
"typedefs":[],
"enums":[],
"senums":[],
"structs":[],
"unions":[],
"xceptions":[],
"services":[{
"name":"ThriftHiveMetastore",
"superName":"fb303.FacebookService",
"functions":[{
"oneway":false,
"returnType":{
"typ":"string"
},
"name":"getMetaConf",
"parameters":[{
"id":{
"value":1
},
"requiredNess":false,
"fieldType":{
"typ":"string"
},
"name":"key",
"xsdOptional":false,
"xsdNillable":false
}],
"throwFields":[{
"id":{
"value":1
},
"requiredNess":false,
"fieldType":{
"name":"MetaException"
},
"name":"o1",
"xsdOptional":false,
"xsdNillable":false
}]
},{
"oneway":false,
"returnType":{
},
"name":"setMetaConf",
"parameters":[{
"id":{
"value":1
},
"requiredNess":false,
"fieldType":{
"typ":"string"
},
"name":"key",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":2
},
"requiredNess":false,
"fieldType":{
"typ":"string"
},
"name":"value",
"xsdOptional":false,
"xsdNillable":false
}],
"throwFields":[{
"id":{
"value":1
},
"requiredNess":false,
"fieldType":{
"name":"MetaException"
},
"name":"o1",
"xsdOptional":false,
"xsdNillable":false
}]
},{
"oneway":false,
"returnType":{
},
"name":"create_database",
"parameters":[{
"id":{
"value":1
},
"requiredNess":false,
"fieldType":{
"name":"Database"
},
"name":"database",
"xsdOptional":false,
"xsdNillable":false
}],
"throwFields":[{
"id":{
"value":1
},
"requiredNess":false,
"fieldType":{
"name":"AlreadyExistsException"
},
"name":"o1",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":2
},
"requiredNess":false,
"fieldType":{
"name":"InvalidObjectException"
},
"name":"o2",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":3
},
"requiredNess":false,
"fieldType":{
"name":"MetaException"
},
"name":"o3",
"xsdOptional":false,
"xsdNillable":false
}]
},{
"oneway":false,
"returnType":{
"name":"Database"
},
"name":"get_database",
"parameters":[{
"id":{
"value":1
},
"requiredNess":false,
"fieldType":{
"typ":"string"
},
"name":"name",
"xsdOptional":false,
"xsdNillable":false
}],
"throwFields":[{
"id":{
"value":1
},
"requiredNess":false,
"fieldType":{
"name":"NoSuchObjectException"
},
"name":"o1",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":2
},
"requiredNess":false,
"fieldType":{
"name":"MetaException"
},
"name":"o2",
"xsdOptional":false,
"xsdNillable":false
}]
},{
"oneway":false,
"returnType":{
},
"name":"drop_database",
"parameters":[{
"id":{
"value":1
},
"requiredNess":false,
"fieldType":{
"typ":"string"
},
"name":"name",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":2
},
"requiredNess":false,
"fieldType":{
"typ":"bool"
},
"name":"deleteData",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":3
},
"requiredNess":false,
"fieldType":{
"typ":"bool"
},
"name":"cascade",
"xsdOptional":false,
"xsdNillable":false
}],
"throwFields":[{
"id":{
"value":1
},
"requiredNess":false,
"fieldType":{
"name":"NoSuchObjectException"
},
"name":"o1",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":2
},
"requiredNess":false,
"fieldType":{
"name":"InvalidOperationException"
},
"name":"o2",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":3
},
"requiredNess":false,
"fieldType":{
"name":"MetaException"
},
"name":"o3",
"xsdOptional":false,
"xsdNillable":false
}]
},{
"oneway":false,
"returnType":{
"elemType":{
"typ":"string"
}
},
"name":"get_databases",
"parameters":[{
"id":{
"value":1
},
"requiredNess":false,
"fieldType":{
"typ":"string"
},
"name":"pattern",
"xsdOptional":false,
"xsdNillable":false
}],
"throwFields":[{
"id":{
"value":1
},
"requiredNess":false,
"fieldType":{
"name":"MetaException"
},
"name":"o1",
"xsdOptional":false,
"xsdNillable":false
}]
},{
"oneway":false,
"returnType":{
"elemType":{
"typ":"string"
}
},
"name":"get_all_databases",
"parameters":[],
"throwFields":[{
"id":{
"value":1
},
"requiredNess":false,
"fieldType":{
"name":"MetaException"
},
"name":"o1",
"xsdOptional":false,
"xsdNillable":false
}]
},{
"oneway":false,
"returnType":{
},
"name":"alter_database",
"parameters":[{
"id":{
"value":1
},
"requiredNess":false,
"fieldType":{
"typ":"string"
},
"name":"dbname",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":2
},
"requiredNess":false,
"fieldType":{
"name":"Database"
},
"name":"db",
"xsdOptional":false,
"xsdNillable":false
}],
"throwFields":[{
"id":{
"value":1
},
"requiredNess":false,
"fieldType":{
"name":"MetaException"
},
"name":"o1",
"xsdOptional":false,
"xsdNillable":false
},{
"id":{
"value":2
},
"requiredNess":false,
"fieldType":{
"name":"NoSuchObjectException"
},
"name":"o2",
"xsdOptional":false,
"xsdNillable":false
}]
}]
}]
}""")
Assert.assertEquals(parsed.toString, sample.toString)
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment