Commit 8b50ac0c by Nikhil Bonte Committed by Sarath Subramanian

ATLAS-3836 Add Apache Ozone support in hive hook

parent ce95c629
...@@ -157,6 +157,7 @@ public class AtlasHiveHookContext { ...@@ -157,6 +157,7 @@ public class AtlasHiveHookContext {
public Collection<AtlasEntity> getEntities() { return qNameEntityMap.values(); } public Collection<AtlasEntity> getEntities() { return qNameEntityMap.values(); }
public Map<String, AtlasEntity> getQNameToEntityMap() { return qNameEntityMap; }
public String getMetadataNamespace() { public String getMetadataNamespace() {
return hook.getMetadataNamespace(); return hook.getMetadataNamespace();
...@@ -168,8 +169,8 @@ public class AtlasHiveHookContext { ...@@ -168,8 +169,8 @@ public class AtlasHiveHookContext {
return hook.isConvertHdfsPathToLowerCase(); return hook.isConvertHdfsPathToLowerCase();
} }
public boolean isAwsS3AtlasModelVersionV2() { public String getAwsS3AtlasModelVersion() {
return hook.isAwsS3AtlasModelVersionV2(); return hook.getAwsS3AtlasModelVersion();
} }
public boolean getSkipHiveColumnLineageHive20633() { public boolean getSkipHiveColumnLineageHive20633() {
......
...@@ -77,7 +77,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -77,7 +77,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
private static final int nameCacheDatabaseMaxCount; private static final int nameCacheDatabaseMaxCount;
private static final int nameCacheTableMaxCount; private static final int nameCacheTableMaxCount;
private static final int nameCacheRebuildIntervalSeconds; private static final int nameCacheRebuildIntervalSeconds;
private static final boolean isAwsS3AtlasModelVersionV2; private static final String awsS3AtlasModelVersion;
private static final boolean skipHiveColumnLineageHive20633; private static final boolean skipHiveColumnLineageHive20633;
private static final int skipHiveColumnLineageHive20633InputsThreshold; private static final int skipHiveColumnLineageHive20633InputsThreshold;
...@@ -101,7 +101,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -101,7 +101,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
nameCacheDatabaseMaxCount = atlasProperties.getInt(HOOK_NAME_CACHE_DATABASE_COUNT, 10000); nameCacheDatabaseMaxCount = atlasProperties.getInt(HOOK_NAME_CACHE_DATABASE_COUNT, 10000);
nameCacheTableMaxCount = atlasProperties.getInt(HOOK_NAME_CACHE_TABLE_COUNT, 10000); nameCacheTableMaxCount = atlasProperties.getInt(HOOK_NAME_CACHE_TABLE_COUNT, 10000);
nameCacheRebuildIntervalSeconds = atlasProperties.getInt(HOOK_NAME_CACHE_REBUID_INTERVAL_SEC, 60 * 60); // 60 minutes default nameCacheRebuildIntervalSeconds = atlasProperties.getInt(HOOK_NAME_CACHE_REBUID_INTERVAL_SEC, 60 * 60); // 60 minutes default
isAwsS3AtlasModelVersionV2 = StringUtils.equalsIgnoreCase(atlasProperties.getString(HOOK_AWS_S3_ATLAS_MODEL_VERSION, HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2), HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2); awsS3AtlasModelVersion = atlasProperties.getString(HOOK_AWS_S3_ATLAS_MODEL_VERSION, HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2);
skipHiveColumnLineageHive20633 = atlasProperties.getBoolean(HOOK_SKIP_HIVE_COLUMN_LINEAGE_HIVE_20633, false); skipHiveColumnLineageHive20633 = atlasProperties.getBoolean(HOOK_SKIP_HIVE_COLUMN_LINEAGE_HIVE_20633, false);
skipHiveColumnLineageHive20633InputsThreshold = atlasProperties.getInt(HOOK_SKIP_HIVE_COLUMN_LINEAGE_HIVE_20633_INPUTS_THRESHOLD, 15); // skip if avg # of inputs is > 15 skipHiveColumnLineageHive20633InputsThreshold = atlasProperties.getInt(HOOK_SKIP_HIVE_COLUMN_LINEAGE_HIVE_20633_INPUTS_THRESHOLD, 15); // skip if avg # of inputs is > 15
hiveProcessPopulateDeprecatedAttributes = atlasProperties.getBoolean(HOOK_HIVE_PROCESS_POPULATE_DEPRECATED_ATTRIBUTES, false); hiveProcessPopulateDeprecatedAttributes = atlasProperties.getBoolean(HOOK_HIVE_PROCESS_POPULATE_DEPRECATED_ATTRIBUTES, false);
...@@ -257,7 +257,9 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -257,7 +257,9 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
return convertHdfsPathToLowerCase; return convertHdfsPathToLowerCase;
} }
public boolean isAwsS3AtlasModelVersionV2() { return isAwsS3AtlasModelVersionV2; } public String getAwsS3AtlasModelVersion() {
return awsS3AtlasModelVersion;
}
public boolean getSkipHiveColumnLineageHive20633() { public boolean getSkipHiveColumnLineageHive20633() {
return skipHiveColumnLineageHive20633; return skipHiveColumnLineageHive20633;
......
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.utils;
import org.apache.atlas.model.instance.AtlasEntity;
import java.util.HashMap;
import java.util.Map;
public class PathExtractorContext {
private final String metadataNamespace;
private final Map<String, AtlasEntity> knownEntities;
private final boolean isConvertPathToLowerCase;
private final String awsS3AtlasModelVersion;
public PathExtractorContext(String metadataNamespace) {
this(metadataNamespace, new HashMap<>(), false, null) ;
}
public PathExtractorContext(String metadataNamespace, String awsS3AtlasModelVersion) {
this(metadataNamespace, new HashMap<>(), false, awsS3AtlasModelVersion) ;
}
public PathExtractorContext(String metadataNamespace, boolean isConvertPathToLowerCase, String awsS3AtlasModelVersion) {
this(metadataNamespace, new HashMap<>(), isConvertPathToLowerCase, awsS3AtlasModelVersion) ;
}
public PathExtractorContext(String metadataNamespace, Map<String, AtlasEntity> knownEntities, boolean isConvertPathToLowerCase, String awsS3AtlasModelVersion) {
this.metadataNamespace = metadataNamespace;
this.knownEntities = knownEntities;
this.isConvertPathToLowerCase = isConvertPathToLowerCase;
this.awsS3AtlasModelVersion = awsS3AtlasModelVersion;
}
public String getMetadataNamespace() {
return metadataNamespace;
}
public Map<String, AtlasEntity> getKnownEntities() {
return knownEntities;
}
public void putEntity(String qualifiedName, AtlasEntity entity) {
knownEntities.put(qualifiedName, entity);
}
public AtlasEntity getEntity(String qualifiedName) {
return knownEntities.get(qualifiedName);
}
public boolean isConvertPathToLowerCase() {
return isConvertPathToLowerCase;
}
public String getAwsS3AtlasModelVersion() {
return awsS3AtlasModelVersion;
}
}
\ No newline at end of file
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.utils;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.annotations.Test;
import org.apache.hadoop.fs.Path;
import java.util.HashMap;
import java.util.Map;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertNull;
public class AtlasPathExtractorUtilTest {
private static final Logger LOG = LoggerFactory.getLogger(AtlasPathExtractorUtilTest.class);
// Common
private static final String METADATA_NAMESPACE = "metaspace";
private static final String QNAME_METADATA_NAMESPACE = '@' + METADATA_NAMESPACE;
private static final String SCHEME_SEPARATOR = "://";
private static final String ATTRIBUTE_NAME = "name";
private static final String ATTRIBUTE_QUALIFIED_NAME = "qualifiedName";
// HDFS
private static final String HDFS_PATH_TYPE = "hdfs_path";
private static final String ATTRIBUTE_PATH = "path";
private static final String ATTRIBUTE_CLUSTER_NAME = "clusterName";
// Ozone
private static final String OZONE_VOLUME = "ozone_volume";
private static final String OZONE_BUCKET = "ozone_bucket";
private static final String OZONE_KEY = "ozone_key";
private static final String OZONE_SCHEME = "ofs" + SCHEME_SEPARATOR;
private static final String OZONE_3_SCHEME = "o3fs" + SCHEME_SEPARATOR;
private static final String OZONE_PATH = OZONE_SCHEME + "bucket1.volume1.ozone1/files/file.txt";
private static final String OZONE_3_PATH = OZONE_3_SCHEME + "bucket1.volume1.ozone1/files/file.txt";
// HDFS
private static final String HDFS_SCHEME = "hdfs" + SCHEME_SEPARATOR;
private static final String HDFS_PATH = HDFS_SCHEME + "host_name:8020/warehouse/tablespace/external/hive/taBlE_306";
@Test
public void testGetPathEntityOzone3Path() {
PathExtractorContext extractorContext = new PathExtractorContext(METADATA_NAMESPACE);
Path path = new Path(OZONE_3_PATH);
AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
AtlasEntity entity = entityWithExtInfo.getEntity();
assertNotNull(entity);
assertEquals(entity.getTypeName(), OZONE_KEY);
verifyOzoneKeyEntity(OZONE_3_PATH, entity);
assertEquals(entityWithExtInfo.getReferredEntities().size(), 2);
verifyOzoneEntities(OZONE_3_SCHEME, OZONE_3_PATH, extractorContext.getKnownEntities());
assertEquals(extractorContext.getKnownEntities().size(), 3);
verifyOzoneEntities(OZONE_3_SCHEME, OZONE_3_PATH, extractorContext.getKnownEntities());
}
@Test
public void testGetPathEntityOzonePath() {
PathExtractorContext extractorContext = new PathExtractorContext(METADATA_NAMESPACE);
Path path = new Path(OZONE_PATH);
AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
AtlasEntity entity = entityWithExtInfo.getEntity();
assertNotNull(entity);
assertEquals(entity.getTypeName(), OZONE_KEY);
verifyOzoneKeyEntity(OZONE_PATH, entity);
assertEquals(entityWithExtInfo.getReferredEntities().size(), 2);
verifyOzoneEntities(OZONE_SCHEME, OZONE_PATH, extractorContext.getKnownEntities());
assertEquals(extractorContext.getKnownEntities().size(), 3);
verifyOzoneEntities(OZONE_SCHEME, OZONE_PATH, extractorContext.getKnownEntities());
}
@Test
public void testGetPathEntityHdfsPath() {
Map<String, AtlasEntity> knownEntities = new HashMap<>();
AtlasEntityWithExtInfo extInfo = new AtlasEntityWithExtInfo();
PathExtractorContext extractorContext = new PathExtractorContext(METADATA_NAMESPACE);
Path path = new Path(HDFS_PATH);
AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
AtlasEntity entity = entityWithExtInfo.getEntity();
assertNotNull(entity);
assertEquals(entity.getTypeName(), HDFS_PATH_TYPE);
verifyHDFSEntity(entity, false);
assertNull(extInfo.getReferredEntities());
assertEquals(extractorContext.getKnownEntities().size(), 1);
extractorContext.getKnownEntities().values().forEach(x -> verifyHDFSEntity(x, false));
}
@Test
public void testGetPathEntityHdfsPathLowerCase() {
PathExtractorContext extractorContext = new PathExtractorContext(METADATA_NAMESPACE, true, null);
Path path = new Path(HDFS_PATH);
AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
AtlasEntity entity = entityWithExtInfo.getEntity();
assertNotNull(entity);
assertEquals(entity.getTypeName(), HDFS_PATH_TYPE);
verifyHDFSEntity(entity, true);
assertNull(entityWithExtInfo.getReferredEntities());
assertEquals(extractorContext.getKnownEntities().size(), 1);
extractorContext.getKnownEntities().values().forEach(x -> verifyHDFSEntity(x, true));
}
private void verifyOzoneEntities(String scheme, String path, Map<String, AtlasEntity> knownEntities) {
for (AtlasEntity knownEntity : knownEntities.values()) {
switch (knownEntity.getTypeName()){
case OZONE_KEY:
verifyOzoneKeyEntity(path, knownEntity);
break;
case OZONE_VOLUME:
assertEquals(knownEntity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), scheme + "volume1" + QNAME_METADATA_NAMESPACE);
assertEquals(knownEntity.getAttribute(ATTRIBUTE_NAME), "volume1");
break;
case OZONE_BUCKET:
assertEquals(knownEntity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), scheme + "volume1.bucket1" + QNAME_METADATA_NAMESPACE);
assertEquals(knownEntity.getAttribute(ATTRIBUTE_NAME), "bucket1");
break;
}
}
}
private void verifyOzoneKeyEntity(String path, AtlasEntity entity) {
assertEquals(entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), path + QNAME_METADATA_NAMESPACE);
assertEquals(entity.getAttribute(ATTRIBUTE_NAME), "/files/file.txt");
}
private void verifyHDFSEntity(AtlasEntity entity, boolean toLowerCase) {
if (toLowerCase) {
assertEquals(entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), HDFS_PATH.toLowerCase() + QNAME_METADATA_NAMESPACE);
assertEquals(entity.getAttribute(ATTRIBUTE_NAME), "/warehouse/tablespace/external/hive/table_306");
assertEquals(entity.getAttribute(ATTRIBUTE_PATH), HDFS_PATH.toLowerCase());
assertEquals(entity.getAttribute(ATTRIBUTE_CLUSTER_NAME), METADATA_NAMESPACE);
} else {
assertEquals(entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), HDFS_PATH + QNAME_METADATA_NAMESPACE);
assertEquals(entity.getAttribute(ATTRIBUTE_NAME), "/warehouse/tablespace/external/hive/taBlE_306");
assertEquals(entity.getAttribute(ATTRIBUTE_PATH), HDFS_PATH);
assertEquals(entity.getAttribute(ATTRIBUTE_CLUSTER_NAME), METADATA_NAMESPACE);
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment