Commit 6e02ec5b by rmani Committed by Madhan Neethiraj

ATLAS-2491: Hive hook should use v2 notifications

parent dee8a2da
......@@ -68,6 +68,7 @@ public class FalconBridge {
public static final String RUNSON = "runs-on";
public static final String STOREDIN = "stored-in";
public static final String FREQUENCY = "frequency";
public static final String ATTRIBUTE_DB = "db";
/**
* Creates cluster entity
......@@ -357,7 +358,7 @@ public class FalconBridge {
tableRef.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
HiveMetaStoreBridge.getTableQualifiedName(clusterName, dbName, tableName));
tableRef.set(AtlasClient.NAME, tableName.toLowerCase());
tableRef.set(HiveMetaStoreBridge.DB, dbRef);
tableRef.set(ATTRIBUTE_DB, dbRef);
entities.add(tableRef);
return entities;
......
......@@ -106,6 +106,12 @@
<dependency>
<groupId>org.apache.atlas</groupId>
<artifactId>atlas-client-v2</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.atlas</groupId>
<artifactId>atlas-notification</artifactId>
</dependency>
......
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.hook;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.hadoop.hive.ql.hooks.HookContext;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
public class AtlasHiveHookContext {
private final HiveHook hook;
private final HiveOperation hiveOperation;
private final HookContext hiveContext;
private final Hive hive;
private final Map<String, AtlasEntity> qNameEntityMap = new HashMap<>();
public AtlasHiveHookContext(HiveHook hook, HiveOperation hiveOperation, HookContext hiveContext) throws Exception {
this.hook = hook;
this.hiveOperation = hiveOperation;
this.hiveContext = hiveContext;
this.hive = Hive.get(hiveContext.getConf());
}
public HookContext getHiveContext() {
return hiveContext;
}
public Hive getHive() {
return hive;
}
public HiveOperation getHiveOperation() {
return hiveOperation;
}
public void putEntity(String qualifiedName, AtlasEntity entity) {
qNameEntityMap.put(qualifiedName, entity);
}
public AtlasEntity getEntity(String qualifiedName) {
return qNameEntityMap.get(qualifiedName);
}
public Collection<AtlasEntity> getEntities() { return qNameEntityMap.values(); }
public String getClusterName() {
return hook.getClusterName();
}
public boolean isKnownDatabase(String dbQualifiedName) {
return hook.isKnownDatabase(dbQualifiedName);
}
public boolean isKnownTable(String tblQualifiedName) {
return hook.isKnownTable(tblQualifiedName);
}
public void addToKnownEntities(Collection<AtlasEntity> entities) {
hook.addToKnownEntities(entities);
}
public void removeFromKnownDatabase(String dbQualifiedName) {
hook.removeFromKnownDatabase(dbQualifiedName);
}
public void removeFromKnownTable(String tblQualifiedName) {
hook.removeFromKnownTable(tblQualifiedName);
}
}
......@@ -6,19 +6,36 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
*
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.rewrite;
public class RewriteException extends Exception {
public RewriteException(final String message, final Exception exception) {
super(message, exception);
package org.apache.atlas.hive.hook.events;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
import org.apache.atlas.model.notification.HookNotification;
import java.util.Collections;
import java.util.List;
public class AlterDatabase extends CreateDatabase {
public AlterDatabase(AtlasHiveHookContext context) {
super(context);
}
@Override
public List<HookNotification> getNotificationMessages() throws Exception {
AtlasEntitiesWithExtInfo entities = getEntities();
HookNotification notification = new HookNotification.EntityUpdateRequestV2(getUserName(), entities);
List<HookNotification> ret = Collections.singletonList(notification);
return ret;
}
}
......@@ -6,21 +6,37 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
*
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.rewrite;
package org.apache.atlas.hive.hook.events;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.model.notification.HookNotification.EntityUpdateRequestV2;
public interface ASTRewriter {
import java.util.Collections;
import java.util.List;
void rewrite(RewriteContext ctx, ASTNode node) throws RewriteException;
public class AlterTable extends CreateTable {
public AlterTable(AtlasHiveHookContext context) {
super(context, true);
}
@Override
public List<HookNotification> getNotificationMessages() throws Exception {
AtlasEntitiesWithExtInfo entities = getEntities();
HookNotification notification = new EntityUpdateRequestV2(getUserName(), entities);
List<HookNotification> ret = Collections.singletonList(notification);
return ret;
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.hook.events;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityExtInfo;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.model.notification.HookNotification.EntityPartialUpdateRequestV2;
import org.apache.atlas.model.notification.HookNotification.EntityUpdateRequestV2;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.ql.hooks.Entity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
public class AlterTableRename extends BaseHiveEvent {
private static final Logger LOG = LoggerFactory.getLogger(AlterTableRename.class);
public AlterTableRename(AtlasHiveHookContext context) {
super(context);
}
@Override
public List<HookNotification> getNotificationMessages() throws Exception {
List<HookNotification> ret = new ArrayList<>();
if (CollectionUtils.isEmpty(getHiveContext().getInputs())) {
LOG.error("AlterTableRename: old-table not found in inputs list");
return ret;
}
Table oldTable = getHiveContext().getInputs().iterator().next().getTable();
Table newTable = null;
if (CollectionUtils.isNotEmpty(getHiveContext().getOutputs())) {
for (WriteEntity entity : getHiveContext().getOutputs()) {
if (entity.getType() == Entity.Type.TABLE) {
newTable = entity.getTable();
//Hive sends with both old and new table names in the outputs which is weird. So skipping that with the below check
if (StringUtils.equalsIgnoreCase(newTable.getDbName(), oldTable.getDbName()) && StringUtils.equalsIgnoreCase(newTable.getTableName(), oldTable.getTableName())) {
newTable = null;
continue;
}
break;
}
}
}
if (newTable == null) {
LOG.error("AlterTableRename: renamed table not found in outputs list");
return ret;
}
AtlasEntityWithExtInfo oldTableEntity = toTableEntity(oldTable);
// first update with oldTable info, so that the table will be created if it is not present in Atlas
ret.add(new EntityUpdateRequestV2(getUserName(), new AtlasEntitiesWithExtInfo(oldTableEntity)));
// update qualifiedName for all columns, partitionKeys, storageDesc
String newTableQualifiedName = getQualifiedName(newTable);
renameColumns((List<AtlasObjectId>) oldTableEntity.getEntity().getAttribute(ATTRIBUTE_COLUMNS), oldTableEntity, newTableQualifiedName, ret);
renameColumns((List<AtlasObjectId>) oldTableEntity.getEntity().getAttribute(ATTRIBUTE_PARTITION_KEYS), oldTableEntity, newTableQualifiedName, ret);
renameStorageDesc((AtlasObjectId) oldTableEntity.getEntity().getAttribute(ATTRIBUTE_STORAGEDESC), oldTableEntity, newTableQualifiedName, ret);
// update qualifiedName and other attributes (like params - which include lastModifiedTime, lastModifiedBy) of the table
AtlasEntityWithExtInfo newTableEntity = toTableEntity(newTable);
// set previous name as the alias
newTableEntity.getEntity().setAttribute(ATTRIBUTE_ALIASES, Collections.singletonList(oldTable.getTableName()));
// remove columns, partitionKeys and storageDesc - as they have already been updated above
removeAttribute(newTableEntity, ATTRIBUTE_COLUMNS);
removeAttribute(newTableEntity, ATTRIBUTE_PARTITION_KEYS);
removeAttribute(newTableEntity, ATTRIBUTE_STORAGEDESC);
AtlasObjectId oldTableId = new AtlasObjectId(oldTableEntity.getEntity().getTypeName(), ATTRIBUTE_QUALIFIED_NAME, oldTableEntity.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME));
ret.add(new EntityPartialUpdateRequestV2(getUserName(), oldTableId, newTableEntity));
context.removeFromKnownTable((String) oldTableEntity.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME));
return ret;
}
private void renameColumns(List<AtlasObjectId> columns, AtlasEntityExtInfo oldEntityExtInfo, String newTableQualifiedName, List<HookNotification> notifications) {
if (CollectionUtils.isNotEmpty(columns)) {
for (AtlasObjectId columnId : columns) {
AtlasEntity oldColumn = oldEntityExtInfo.getEntity(columnId.getGuid());
AtlasObjectId oldColumnId = new AtlasObjectId(oldColumn.getTypeName(), ATTRIBUTE_QUALIFIED_NAME, oldColumn.getAttribute(ATTRIBUTE_QUALIFIED_NAME));
AtlasEntity newColumn = new AtlasEntity(oldColumn.getTypeName(), ATTRIBUTE_QUALIFIED_NAME, getColumnQualifiedName(newTableQualifiedName, (String) oldColumn.getAttribute(ATTRIBUTE_NAME)));
notifications.add(new EntityPartialUpdateRequestV2(getUserName(), oldColumnId, new AtlasEntityWithExtInfo(newColumn)));
}
}
}
private void renameStorageDesc(AtlasObjectId sdId, AtlasEntityExtInfo oldEntityExtInfo, String newTableQualifiedName, List<HookNotification> notifications) {
if (sdId != null) {
AtlasEntity oldSd = oldEntityExtInfo.getEntity(sdId.getGuid());
AtlasObjectId oldSdId = new AtlasObjectId(oldSd.getTypeName(), ATTRIBUTE_QUALIFIED_NAME, oldSd.getAttribute(ATTRIBUTE_QUALIFIED_NAME));
AtlasEntity newSd = new AtlasEntity(oldSd.getTypeName(), ATTRIBUTE_QUALIFIED_NAME, getStorageDescQualifiedName(newTableQualifiedName));
notifications.add(new EntityPartialUpdateRequestV2(getUserName(), oldSdId, new AtlasEntityWithExtInfo(newSd)));
}
}
private String getColumnQualifiedName(String tblQualifiedName, String columnName) {
int sepPos = tblQualifiedName.lastIndexOf(QNAME_SEP_CLUSTER_NAME);
if (sepPos == -1) {
return tblQualifiedName + QNAME_SEP_ENTITY_NAME + columnName.toLowerCase();
} else {
return tblQualifiedName.substring(0, sepPos) + QNAME_SEP_ENTITY_NAME + columnName.toLowerCase() + tblQualifiedName.substring(sepPos);
}
}
private void removeAttribute(AtlasEntityWithExtInfo entity, String attributeName) {
Object attributeValue = entity.getEntity().getAttribute(attributeName);
entity.getEntity().getAttributes().remove(attributeName);
if (attributeValue instanceof AtlasObjectId) {
AtlasObjectId objectId = (AtlasObjectId) attributeValue;
entity.removeReferredEntity(objectId.getGuid());
} else if (attributeValue instanceof Collection) {
for (Object item : (Collection) attributeValue)
if (item instanceof AtlasObjectId) {
AtlasObjectId objectId = (AtlasObjectId) item;
entity.removeReferredEntity(objectId.getGuid());
}
}
}
private String getStorageDescQualifiedName(String tblQualifiedName) {
return tblQualifiedName + "_storage";
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.hook.events;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.model.notification.HookNotification.EntityPartialUpdateRequestV2;
import org.apache.commons.collections.CollectionUtils;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
public class AlterTableRenameCol extends AlterTable {
private static final Logger LOG = LoggerFactory.getLogger(AlterTableRenameCol.class);
public AlterTableRenameCol(AtlasHiveHookContext context) {
super(context);
}
@Override
public List<HookNotification> getNotificationMessages() throws Exception {
if (CollectionUtils.isEmpty(getHiveContext().getInputs())) {
LOG.error("AlterTableRenameCol: old-table not found in inputs list");
return null;
}
if (CollectionUtils.isEmpty(getHiveContext().getOutputs())) {
LOG.error("AlterTableRenameCol: new-table not found in outputs list");
return null;
}
List<HookNotification> ret = new ArrayList<>(super.getNotificationMessages());
Table oldTable = getHiveContext().getInputs().iterator().next().getTable();
Table newTable = getHiveContext().getOutputs().iterator().next().getTable();
newTable = getHive().getTable(newTable.getDbName(), newTable.getTableName());
List<FieldSchema> oldColumns = oldTable.getCols();
List<FieldSchema> newColumns = newTable.getCols();
FieldSchema changedColumnOld = null;
FieldSchema changedColumnNew = null;
for (FieldSchema oldColumn : oldColumns) {
if (!newColumns.contains(oldColumn)) {
changedColumnOld = oldColumn;
break;
}
}
for (FieldSchema newColumn : newColumns) {
if (!oldColumns.contains(newColumn)) {
changedColumnNew = newColumn;
break;
}
}
if (changedColumnOld != null && changedColumnNew != null) {
AtlasObjectId oldColumnId = new AtlasObjectId(HIVE_TYPE_COLUMN, ATTRIBUTE_QUALIFIED_NAME, getQualifiedName(oldTable, changedColumnOld));
AtlasEntity newColumn = new AtlasEntity(HIVE_TYPE_COLUMN);
newColumn.setAttribute(ATTRIBUTE_NAME, changedColumnNew.getName());
newColumn.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getQualifiedName(newTable, changedColumnNew));
ret.add(0, new EntityPartialUpdateRequestV2(getUserName(), oldColumnId, new AtlasEntityWithExtInfo(newColumn)));
} else {
LOG.error("AlterTableRenameCol: no renamed column detected");
}
return ret;
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.hook.events;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.model.notification.HookNotification.EntityCreateRequestV2;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.ql.hooks.Entity;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Collections;
import java.util.List;
public class CreateDatabase extends BaseHiveEvent {
private static final Logger LOG = LoggerFactory.getLogger(CreateDatabase.class);
public CreateDatabase(AtlasHiveHookContext context) {
super(context);
}
@Override
public List<HookNotification> getNotificationMessages() throws Exception {
AtlasEntitiesWithExtInfo entities = getEntities();
HookNotification notification = new EntityCreateRequestV2(getUserName(), entities);
List<HookNotification> ret = Collections.singletonList(notification);
return ret;
}
public AtlasEntitiesWithExtInfo getEntities() throws Exception {
AtlasEntitiesWithExtInfo ret = new AtlasEntitiesWithExtInfo();
for (Entity entity : getHiveContext().getOutputs()) {
if (entity.getType() == Entity.Type.DATABASE) {
Database db = entity.getDatabase();
if (db != null) {
db = getHive().getDatabase(db.getName());
}
if (db != null) {
AtlasEntity dbEntity = toDbEntity(db);
ret.addEntity(dbEntity);
} else {
LOG.error("CreateDatabase.getEntities(): failed to retrieve db");
}
}
}
addProcessedEntities(ret);
return ret;
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.hook.events;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.model.notification.HookNotification.EntityCreateRequestV2;
import org.apache.commons.collections.CollectionUtils;
import org.apache.hadoop.hive.ql.hooks.Entity;
import org.apache.hadoop.hive.ql.hooks.HookContext;
import org.apache.hadoop.hive.ql.hooks.LineageInfo;
import org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo;
import org.apache.hadoop.hive.ql.hooks.LineageInfo.Dependency;
import org.apache.hadoop.hive.ql.hooks.LineageInfo.DependencyKey;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class CreateHiveProcess extends BaseHiveEvent {
private static final Logger LOG = LoggerFactory.getLogger(CreateHiveProcess.class);
public CreateHiveProcess(AtlasHiveHookContext context) {
super(context);
}
@Override
public List<HookNotification> getNotificationMessages() throws Exception {
AtlasEntitiesWithExtInfo entities = getEntities();
List<HookNotification> ret = entities != null ? Collections.singletonList(new EntityCreateRequestV2(getUserName(), entities)) : null;
return ret;
}
public AtlasEntitiesWithExtInfo getEntities() throws Exception {
AtlasEntitiesWithExtInfo ret = null;
if (!skipProcess()) {
List<AtlasEntity> inputs = new ArrayList<>();
List<AtlasEntity> outputs = new ArrayList<>();
HookContext hiveContext = getHiveContext();
Set<String> processedNames = new HashSet<>();
ret = new AtlasEntitiesWithExtInfo();
if (hiveContext.getInputs() != null) {
for (ReadEntity input : hiveContext.getInputs()) {
String qualifiedName = getQualifiedName(input);
if (qualifiedName == null || !processedNames.add(qualifiedName)) {
continue;
}
AtlasEntity entity = getInputOutputEntity(input, ret);
if (entity != null) {
inputs.add(entity);
}
}
}
if (hiveContext.getOutputs() != null) {
for (WriteEntity output : hiveContext.getOutputs()) {
String qualifiedName = getQualifiedName(output);
if (qualifiedName == null || !processedNames.add(qualifiedName)) {
continue;
}
AtlasEntity entity = getInputOutputEntity(output, ret);
if (entity != null) {
outputs.add(entity);
}
}
}
if (!inputs.isEmpty() || !outputs.isEmpty()) {
AtlasEntity process = getHiveProcessEntity(inputs, outputs);
ret.addEntity(process);
processColumnLineage(process, ret);
addProcessedEntities(ret);
} else {
ret = null;
}
}
return ret;
}
private void processColumnLineage(AtlasEntity hiveProcess, AtlasEntitiesWithExtInfo entities) {
LineageInfo lineageInfo = getHiveContext().getLinfo();
if (lineageInfo == null || CollectionUtils.isEmpty(lineageInfo.entrySet())) {
return;
}
for (Map.Entry<DependencyKey, Dependency> entry : lineageInfo.entrySet()) {
String outputColName = getQualifiedName(entry.getKey());
AtlasEntity outputColumn = context.getEntity(outputColName);
if (outputColumn == null) {
LOG.warn("column-lineage: non-existing output-column {}", outputColName);
continue;
}
List<AtlasEntity> inputColumns = new ArrayList<>();
for (BaseColumnInfo baseColumn : entry.getValue().getBaseCols()) {
String inputColName = getQualifiedName(baseColumn);
AtlasEntity inputColumn = context.getEntity(inputColName);
if (inputColumn == null) {
LOG.warn("column-lineage: non-existing input-column {} for output-column={}", inputColName, outputColName);
continue;
}
inputColumns.add(inputColumn);
}
if (inputColumns.isEmpty()) {
continue;
}
AtlasEntity columnLineageProcess = new AtlasEntity(HIVE_TYPE_COLUMN_LINEAGE);
columnLineageProcess.setAttribute(ATTRIBUTE_NAME, hiveProcess.getAttribute(ATTRIBUTE_NAME) + ":" + outputColumn.getAttribute(ATTRIBUTE_NAME));
columnLineageProcess.setAttribute(ATTRIBUTE_QUALIFIED_NAME, hiveProcess.getAttribute(ATTRIBUTE_QUALIFIED_NAME) + ":" + outputColumn.getAttribute(ATTRIBUTE_NAME));
columnLineageProcess.setAttribute(ATTRIBUTE_INPUTS, getObjectIds(inputColumns));
columnLineageProcess.setAttribute(ATTRIBUTE_OUTPUTS, Collections.singletonList(getObjectId(outputColumn)));
columnLineageProcess.setAttribute(ATTRIBUTE_QUERY, getObjectId(hiveProcess));
columnLineageProcess.setAttribute(ATTRIBUTE_DEPENDENCY_TYPE, entry.getValue().getType());
columnLineageProcess.setAttribute(ATTRIBUTE_EXPRESSION, entry.getValue().getExpr());
entities.addEntity(columnLineageProcess);
}
}
private boolean skipProcess() {
Set<ReadEntity> inputs = getHiveContext().getInputs();
Set<WriteEntity> outputs = getHiveContext().getOutputs();
boolean ret = CollectionUtils.isEmpty(inputs) && CollectionUtils.isEmpty(outputs);
if (!ret) {
if (getContext().getHiveOperation() == HiveOperation.QUERY) {
// Select query has only one output
if (outputs.size() == 1) {
WriteEntity output = outputs.iterator().next();
if (output.getType() == Entity.Type.DFS_DIR || output.getType() == Entity.Type.LOCAL_DIR) {
if (output.getWriteType() == WriteEntity.WriteType.PATH_WRITE && output.isTempURI()) {
ret = true;
}
}
}
}
}
return ret;
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.hook.events;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.model.notification.HookNotification.EntityCreateRequestV2;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.ql.hooks.Entity;
import org.apache.hadoop.hive.ql.metadata.Table;
import java.util.Collections;
import java.util.List;
public class CreateTable extends BaseHiveEvent {
private final boolean skipTempTables;
public CreateTable(AtlasHiveHookContext context, boolean skipTempTables) {
super(context);
this.skipTempTables = skipTempTables;
}
@Override
public List<HookNotification> getNotificationMessages() throws Exception {
AtlasEntitiesWithExtInfo entities = getEntities();
HookNotification notification = new EntityCreateRequestV2(getUserName(), entities);
List<HookNotification> ret = Collections.singletonList(notification);
return ret;
}
public AtlasEntitiesWithExtInfo getEntities() throws Exception {
AtlasEntitiesWithExtInfo ret = new AtlasEntitiesWithExtInfo();
Database db = null;
Table table = null;
for (Entity entity : getHiveContext().getOutputs()) {
if (entity.getType() == Entity.Type.TABLE) {
table = entity.getTable();
if (table != null) {
db = getHive().getDatabase(table.getDbName());
table = getHive().getTable(table.getDbName(), table.getTableName());
if (table != null) {
// If its an external table, even though the temp table skip flag is on, we create the table since we need the HDFS path to temp table lineage.
if (skipTempTables && table.isTemporary() && !TableType.EXTERNAL_TABLE.equals(table.getTableType())) {
table = null;
} else {
break;
}
}
}
}
}
if (table != null) {
AtlasEntity tblEntity = toTableEntity(table, ret);
if (TableType.EXTERNAL_TABLE.equals(table.getTableType())) {
AtlasEntity hdfsPathEntity = getHDFSPathEntity(table.getDataLocation());
AtlasEntity processEntity = getHiveProcessEntity(Collections.singletonList(hdfsPathEntity), Collections.singletonList(tblEntity));
ret.addEntity(processEntity);
ret.addReferredEntity(hdfsPathEntity);
}
}
addProcessedEntities(ret);
return ret;
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.hook.events;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.model.notification.HookNotification.EntityDeleteRequestV2;
import org.apache.hadoop.hive.ql.hooks.Entity;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class DropDatabase extends BaseHiveEvent {
public DropDatabase(AtlasHiveHookContext context) {
super(context);
}
@Override
public List<HookNotification> getNotificationMessages() throws Exception {
List<AtlasObjectId> entities = getEntities();
HookNotification notification = new EntityDeleteRequestV2(getUserName(), entities);
List<HookNotification> ret = Collections.singletonList(notification);
return ret;
}
public List<AtlasObjectId> getEntities() throws Exception {
List<AtlasObjectId> ret = new ArrayList<>();
for (Entity entity : getHiveContext().getOutputs()) {
if (entity.getType() == Entity.Type.DATABASE) {
String dbQName = getQualifiedName(entity.getDatabase());
AtlasObjectId dbId = new AtlasObjectId(HIVE_TYPE_DB, ATTRIBUTE_QUALIFIED_NAME, dbQName);
context.removeFromKnownDatabase(dbQName);
ret.add(dbId);
} else if (entity.getType() == Entity.Type.TABLE) {
String tblQName = getQualifiedName(entity.getTable());
AtlasObjectId dbId = new AtlasObjectId(HIVE_TYPE_TABLE, ATTRIBUTE_QUALIFIED_NAME, tblQName);
context.removeFromKnownTable(tblQName);
ret.add(dbId);
}
}
return ret;
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.hook.events;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.notification.HookNotification;
import org.apache.atlas.model.notification.HookNotification.EntityDeleteRequestV2;
import org.apache.hadoop.hive.ql.hooks.Entity;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class DropTable extends BaseHiveEvent {
public DropTable(AtlasHiveHookContext context) {
super(context);
}
@Override
public List<HookNotification> getNotificationMessages() throws Exception {
List<AtlasObjectId> entities = getEntities();
HookNotification notification = new EntityDeleteRequestV2(getUserName(), entities);
List<HookNotification> ret = Collections.singletonList(notification);
return ret;
}
public List<AtlasObjectId> getEntities() throws Exception {
List<AtlasObjectId> ret = new ArrayList<>();
for (Entity entity : getHiveContext().getOutputs()) {
if (entity.getType() == Entity.Type.TABLE) {
String tblQName = getQualifiedName(entity.getTable());
AtlasObjectId dbId = new AtlasObjectId(HIVE_TYPE_TABLE, ATTRIBUTE_QUALIFIED_NAME, tblQName);
context.removeFromKnownTable(tblQName);
ret.add(dbId);
}
}
return ret;
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.rewrite;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.ParseDriver;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.parse.ParseException;
import org.apache.hadoop.hive.ql.parse.ParseUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class HiveASTRewriter {
private Context queryContext;
private RewriteContext rwCtx;
private List<ASTRewriter> rewriters = new ArrayList<>();
private static final Logger LOG = LoggerFactory.getLogger(HiveASTRewriter.class);
public HiveASTRewriter(HiveConf conf) throws RewriteException {
try {
queryContext = new Context(conf);
setUpRewriters();
} catch (IOException e) {
throw new RewriteException("Exception while rewriting query : " , e);
}
}
private void setUpRewriters() throws RewriteException {
ASTRewriter rewriter = new LiteralRewriter();
rewriters.add(rewriter);
}
public String rewrite(String sourceQry) throws RewriteException {
String result = sourceQry;
ASTNode tree = null;
try {
ParseDriver pd = new ParseDriver();
tree = pd.parse(sourceQry, queryContext, true);
tree = ParseUtils.findRootNonNullToken(tree);
this.rwCtx = new RewriteContext(sourceQry, tree, queryContext.getTokenRewriteStream());
rewrite(tree);
result = toSQL();
} catch (ParseException e) {
LOG.error("Could not parse the query {} ", sourceQry, e);
throw new RewriteException("Could not parse query : " , e);
}
return result;
}
private void rewrite(ASTNode origin) throws RewriteException {
ASTNode node = origin;
if (node != null) {
for(ASTRewriter rewriter : rewriters) {
rewriter.rewrite(rwCtx, node);
}
if (node.getChildren() != null) {
for (int i = 0; i < node.getChildren().size(); i++) {
rewrite((ASTNode) node.getChild(i));
}
}
}
}
public String toSQL() {
return rwCtx.getTokenRewriteStream().toString();
}
public String printAST() {
return rwCtx.getOriginNode().dump();
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.rewrite;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.HiveParser;
import java.util.HashMap;
import java.util.Map;
public class LiteralRewriter implements ASTRewriter {
public static Map<Integer, String> LITERAL_TOKENS = new HashMap<Integer, String>() {{
put(HiveParser.Number, "NUMBER_LITERAL");
put(HiveParser.Digit, "DIGIT_LITERAL");
put(HiveParser.HexDigit, "HEX_LITERAL");
put(HiveParser.Exponent, "EXPONENT_LITERAL");
put(HiveParser.StringLiteral, "'STRING_LITERAL'");
put(HiveParser.BigintLiteral, "BIGINT_LITERAL");
put(HiveParser.SmallintLiteral, "SMALLINT_LITERAL");
put(HiveParser.TinyintLiteral, "TINYINT_LITERAL");
put(HiveParser.DecimalLiteral, "DECIMAL_LITERAL");
put(HiveParser.ByteLengthLiteral, "BYTE_LENGTH_LITERAL");
put(HiveParser.TOK_STRINGLITERALSEQUENCE, "'STRING_LITERAL_SEQ'");
put(HiveParser.TOK_CHARSETLITERAL, "'CHARSET_LITERAL'");
put(HiveParser.KW_TRUE, "BOOLEAN_LITERAL");
put(HiveParser.KW_FALSE, "BOOLEAN_LITERAL");
}};
@Override
public void rewrite(RewriteContext ctx, final ASTNode node) throws RewriteException {
try {
processLiterals(ctx, node);
} catch(Exception e) {
throw new RewriteException("Could not normalize query", e);
}
}
private void processLiterals(final RewriteContext ctx, final ASTNode node) {
// Take child ident.totext
if (isLiteral(node)) {
replaceLiteral(ctx, node);
}
}
private boolean isLiteral(ASTNode node) {
if (LITERAL_TOKENS.containsKey(node.getType())) {
return true;
}
return false;
}
void replaceLiteral(RewriteContext ctx, ASTNode valueNode) {
//Reset the token stream
String literalVal = LITERAL_TOKENS.get(valueNode.getType());
ctx.getTokenRewriteStream().replace(valueNode.getTokenStartIndex(),
valueNode.getTokenStopIndex(), literalVal);
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.rewrite;
import org.antlr.runtime.TokenRewriteStream;
import org.apache.hadoop.hive.ql.parse.ASTNode;
public class RewriteContext {
private String origQuery;
private TokenRewriteStream rewriteStream;
private ASTNode origin;
RewriteContext(String origQuery, ASTNode origin, TokenRewriteStream rewriteStream) {
this.origin = origin;
this.rewriteStream = rewriteStream;
}
public TokenRewriteStream getTokenRewriteStream() {
return rewriteStream;
}
public ASTNode getOriginNode() {
return origin;
}
public String getOriginalQuery() {
return origQuery;
}
}
......@@ -18,7 +18,6 @@
package org.apache.atlas.hive.bridge;
import org.apache.atlas.AtlasClient;
import org.apache.atlas.hive.model.HiveDataTypes;
import org.apache.atlas.v1.model.instance.Referenceable;
import org.apache.hadoop.hive.ql.hooks.LineageInfo;
......@@ -33,6 +32,10 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.apache.atlas.hive.hook.events.BaseHiveEvent.ATTRIBUTE_COLUMNS;
import static org.apache.atlas.hive.hook.events.BaseHiveEvent.ATTRIBUTE_QUALIFIED_NAME;
public class ColumnLineageUtils {
public static final Logger LOG = LoggerFactory.getLogger(ColumnLineageUtils.class);
public static class HiveColumnLineageInfo {
......@@ -127,10 +130,10 @@ public class ColumnLineageUtils {
static void populateColumnReferenceableMap(Map<String, Referenceable> m,
Referenceable r) {
if (r.getTypeName().equals(HiveDataTypes.HIVE_TABLE.getName())) {
String qName = (String) r.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME);
String qName = (String) r.get(ATTRIBUTE_QUALIFIED_NAME);
String[] qNameComps = extractComponents(qName);
for (Referenceable col : (List<Referenceable>) r.get(HiveMetaStoreBridge.COLUMNS)) {
String cName = (String) col.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME);
for (Referenceable col : (List<Referenceable>) r.get(ATTRIBUTE_COLUMNS)) {
String cName = (String) col.get(ATTRIBUTE_QUALIFIED_NAME);
String[] colQNameComps = extractComponents(cName);
String colQName = colQNameComps[0] + "." + colQNameComps[1] + "." + colQNameComps[2];
m.put(colQName, col);
......
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.hive.bridge;
import org.apache.atlas.hive.hook.HiveHook;
import org.apache.atlas.hive.rewrite.HiveASTRewriter;
import org.apache.atlas.hive.rewrite.RewriteException;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@Test(enabled = false)
public class HiveLiteralRewriterTest {
private HiveConf conf;
@BeforeClass(enabled = false)
public void setup() {
conf = new HiveConf();
conf.addResource("/hive-site.xml");
SessionState ss = new SessionState(conf, "testuser");
SessionState.start(ss);
conf.set("hive.lock.manager", "org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager");
}
@Test(enabled=false)
public void testLiteralRewrite() throws RewriteException {
HiveHook.HiveEventContext ctx = new HiveHook.HiveEventContext();
ctx.setQueryStr("insert into table testTable partition(dt='2014-01-01') select * from test1 where dt = '2014-01-01'" +
" and intColumn = 10" +
" and decimalColumn = 1.10" +
" and charColumn = 'a'" +
" and hexColumn = unhex('\\0xFF')" +
" and expColumn = cast('-1.5e2' as int)" +
" and boolCol = true");
HiveASTRewriter queryRewriter = new HiveASTRewriter(conf);
String result = queryRewriter.rewrite(ctx.getQueryStr());
System.out.println("normlized sql : " + result);
final String normalizedSQL = "insert into table testTable partition(dt='STRING_LITERAL') " +
"select * from test1 where dt = 'STRING_LITERAL' " +
"and intColumn = NUMBER_LITERAL " +
"and decimalColumn = NUMBER_LITERAL and " +
"charColumn = 'STRING_LITERAL' and " +
"hexColumn = unhex('STRING_LITERAL') and " +
"expColumn = cast('STRING_LITERAL' as int) and " +
"boolCol = BOOLEAN_LITERAL";
Assert.assertEquals(result, normalizedSQL);
}
}
......@@ -21,8 +21,8 @@ package org.apache.atlas.hive.bridge;
import org.apache.atlas.AtlasClient;
import org.apache.atlas.hive.HiveITBase;
import org.apache.atlas.hive.model.HiveDataTypes;
import org.apache.atlas.v1.model.instance.Referenceable;
import org.apache.atlas.v1.model.instance.Id;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.testng.annotations.Test;
import java.util.List;
......@@ -34,34 +34,36 @@ public class HiveMetastoreBridgeIT extends HiveITBase {
@Test
public void testCreateTableAndImport() throws Exception {
String tableName = tableName();
String pFile = createTestDFSPath("parentPath");
final String query = String.format("create EXTERNAL table %s(id string, cnt int) location '%s'", tableName, pFile);
String query = String.format("create EXTERNAL table %s(id string, cnt int) location '%s'", tableName, pFile);
runCommand(query);
String dbId = assertDatabaseIsRegistered(DEFAULT_DB);
String tableId = assertTableIsRegistered(DEFAULT_DB, tableName);
//verify lineage is created
String processId = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(),
AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
getTableProcessQualifiedName(DEFAULT_DB, tableName), null);
Referenceable processReference = atlasClient.getEntity(processId);
validateHDFSPaths(processReference, INPUTS, pFile);
String processId = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getTableProcessQualifiedName(DEFAULT_DB, tableName), null);
AtlasEntity processsEntity = atlasClientV2.getEntityByGuid(processId).getEntity();
validateHDFSPaths(processsEntity, INPUTS, pFile);
List<AtlasObjectId> outputs = toAtlasObjectIdList(processsEntity.getAttribute(OUTPUTS));
List<Id> outputs = (List<Id>) processReference.get(OUTPUTS);
assertEquals(outputs.size(), 1);
assertEquals(outputs.get(0).getId(), tableId);
assertEquals(outputs.get(0).getGuid(), tableId);
int tableCount = atlasClient.listEntities(HiveDataTypes.HIVE_TABLE.getName()).size();
//Now import using import tool - should be no-op. This also tests update since table exists
hiveMetaStoreBridge.importTable(atlasClient.getEntity(dbId), DEFAULT_DB, tableName, true);
AtlasEntity dbEntity = atlasClientV2.getEntityByGuid(dbId).getEntity();
hiveMetaStoreBridge.importTable(dbEntity, DEFAULT_DB, tableName, true);
String tableId2 = assertTableIsRegistered(DEFAULT_DB, tableName);
assertEquals(tableId2, tableId);
String processId2 = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(),
AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
getTableProcessQualifiedName(DEFAULT_DB, tableName), null);
String processId2 = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getTableProcessQualifiedName(DEFAULT_DB, tableName), null);
assertEquals(processId2, processId);
//assert that table is de-duped and no new entity is created
......@@ -73,17 +75,22 @@ public class HiveMetastoreBridgeIT extends HiveITBase {
public void testImportCreatedTable() throws Exception {
String tableName = tableName();
String pFile = createTestDFSPath("parentPath");
runCommand(driverWithoutContext, String.format("create EXTERNAL table %s(id string) location '%s'", tableName, pFile));
String dbId = assertDatabaseIsRegistered(DEFAULT_DB);
hiveMetaStoreBridge.importTable(atlasClient.getEntity(dbId), DEFAULT_DB, tableName, true);
AtlasEntity dbEntity = atlasClientV2.getEntityByGuid(dbId).getEntity();
hiveMetaStoreBridge.importTable(dbEntity, DEFAULT_DB, tableName, true);
String tableId = assertTableIsRegistered(DEFAULT_DB, tableName);
String processId = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(),
AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
getTableProcessQualifiedName(DEFAULT_DB, tableName), null);
List<Id> outputs = (List<Id>) atlasClient.getEntity(processId).get(OUTPUTS);
String processId = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getTableProcessQualifiedName(DEFAULT_DB, tableName), null);
AtlasEntity processEntity = atlasClientV2.getEntityByGuid(processId).getEntity();
List<AtlasObjectId> outputs = toAtlasObjectIdList(processEntity.getAttribute(OUTPUTS));
assertEquals(outputs.size(), 1);
assertEquals(outputs.get(0).getId(), tableId);
assertEquals(outputs.get(0).getGuid(), tableId);
}
}
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -68,6 +68,7 @@ public class SqoopHook extends SqoopJobDataPublisher {
public static final String CMD_LINE_OPTS = "commandlineOpts";
public static final String INPUTS = "inputs";
public static final String OUTPUTS = "outputs";
public static final String ATTRIBUTE_DB = "db";
static {
org.apache.hadoop.conf.Configuration.addDefaultResource("sqoop-site.xml");
......@@ -114,7 +115,7 @@ public class SqoopHook extends SqoopJobDataPublisher {
entHiveTable.setAttribute(AtlasClient.NAME, tableName.toLowerCase());
entHiveTable.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, qualifiedName);
entHiveTable.setAttribute(HiveMetaStoreBridge.DB, AtlasTypeUtil.getAtlasObjectId(entHiveDb));
entHiveTable.setAttribute(ATTRIBUTE_DB, AtlasTypeUtil.getAtlasObjectId(entHiveDb));
return entHiveTable;
}
......
......@@ -64,6 +64,7 @@ public class StormAtlasHook extends AtlasHook implements ISubmitterHook {
public static final String ANONYMOUS_OWNER = "anonymous";
public static final String HBASE_NAMESPACE_DEFAULT = "default";
public static final String ATTRIBUTE_DB = "db";
private final HdfsNameServiceResolver hdfsNameServiceResolver = HdfsNameServiceResolver.getInstance();
......@@ -271,7 +272,7 @@ public class StormAtlasHook extends AtlasHook implements ISubmitterHook {
final String tableQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(clusterName,
databaseName, hiveTableName);
dataSetReferenceable.set(AtlasClient.NAME, hiveTableName);
dataSetReferenceable.set(HiveMetaStoreBridge.DB, dbReferenceable);
dataSetReferenceable.set(ATTRIBUTE_DB, dbReferenceable);
dataSetReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName);
break;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment