Commit e72aa107 by Nikhil Bonte Committed by nixonrodrigues

ATLAS-3871 Add unit tests to path extractor utility for s3, s3 v2, adls path entities

parent f932e52c
......@@ -22,6 +22,7 @@ import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.apache.hadoop.fs.Path;
......@@ -53,56 +54,68 @@ public class AtlasPathExtractorUtilTest {
private static final String OZONE_KEY = "ozone_key";
private static final String OZONE_SCHEME = "ofs" + SCHEME_SEPARATOR;
private static final String OZONE_3_SCHEME = "o3fs" + SCHEME_SEPARATOR;
private static final String OZONE_PATH = OZONE_SCHEME + "bucket1.volume1.ozone1/files/file.txt";
private static final String OZONE_3_PATH = OZONE_3_SCHEME + "bucket1.volume1.ozone1/files/file.txt";
// HDFS
private static final String HDFS_SCHEME = "hdfs" + SCHEME_SEPARATOR;
private static final String HDFS_PATH = HDFS_SCHEME + "host_name:8020/warehouse/tablespace/external/hive/taBlE_306";
@Test
public void testGetPathEntityOzone3Path() {
PathExtractorContext extractorContext = new PathExtractorContext(METADATA_NAMESPACE);
Path path = new Path(OZONE_3_PATH);
AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
AtlasEntity entity = entityWithExtInfo.getEntity();
assertNotNull(entity);
assertEquals(entity.getTypeName(), OZONE_KEY);
verifyOzoneKeyEntity(OZONE_3_PATH, entity);
assertEquals(entityWithExtInfo.getReferredEntities().size(), 2);
verifyOzoneEntities(OZONE_3_SCHEME, OZONE_3_PATH, extractorContext.getKnownEntities());
assertEquals(extractorContext.getKnownEntities().size(), 3);
verifyOzoneEntities(OZONE_3_SCHEME, OZONE_3_PATH, extractorContext.getKnownEntities());
// ADLS Gen2
private static final String ADLS_GEN2_ACCOUNT = "adls_gen2_account";
private static final String ADLS_GEN2_CONTAINER = "adls_gen2_container";
private static final String ADLS_GEN2_DIRECTORY = "adls_gen2_directory";
private static final String ABFS_SCHEME = "abfs" + SCHEME_SEPARATOR;
private static final String ABFSS_SCHEME = "abfss" + SCHEME_SEPARATOR;
private static final String ABFS_PATH = ABFS_SCHEME + "data@razrangersan.dfs.core.windows.net/tmp/cdp-demo/sample.csv";
private static final String ABFSS_PATH = ABFSS_SCHEME + "data@razrangersan.dfs.core.windows.net/tmp/cdp-demo/sample.csv";
// AWS S3
private static final String AWS_S3_ATLAS_MODEL_VERSION_V2 = "V2";
private static final String AWS_S3_BUCKET = "aws_s3_bucket";
private static final String AWS_S3_PSEUDO_DIR = "aws_s3_pseudo_dir";
private static final String AWS_S3_V2_BUCKET = "aws_s3_v2_bucket";
private static final String AWS_S3_V2_PSEUDO_DIR = "aws_s3_v2_directory";
private static final String S3_SCHEME = "s3" + SCHEME_SEPARATOR;
private static final String S3A_SCHEME = "s3a" + SCHEME_SEPARATOR;
private static final String ATTRIBUTE_OBJECT_PREFIX = "objectPrefix";
private static final String S3_PATH = S3_SCHEME + "aws_my_bucket1/1234567890/renders/Irradiance_A.csv";
private static final String S3A_PATH = S3A_SCHEME + "aws_my_bucket1/1234567890/renders/Irradiance_A.csv";
@DataProvider(name = "ozonePathProvider")
private Object[][] ozonePathProvider(){
return new Object[][]{
{ OZONE_SCHEME, "bucket1.volume1.ozone1/files/file.txt", "/files/file.txt" },
{ OZONE_SCHEME, "bucket1.volume1.ozone1/file21.txt", "/file21.txt" },
{ OZONE_SCHEME, "bucket1.volume1.ozone1/quarter_one/sales", "/quarter_one/sales" },
{ OZONE_SCHEME, "bucket1.volume1.ozone1/quarter_one/sales/", "/quarter_one/sales" },
{ OZONE_3_SCHEME, "bucket1.volume1.ozone1/files/file.txt", "/files/file.txt" },
{ OZONE_3_SCHEME, "bucket1.volume1.ozone1/file21.txt", "/file21.txt"},
{ OZONE_3_SCHEME, "bucket1.volume1.ozone1/quarter_one/sales", "/quarter_one/sales" },
{ OZONE_3_SCHEME, "bucket1.volume1.ozone1/quarter_one/sales/", "/quarter_one/sales" },
};
}
@Test
public void testGetPathEntityOzonePath() {
@Test(dataProvider = "ozonePathProvider")
public void testGetPathEntityOzone3Path(String scheme, String location, String keyName) {
String ozonePath = scheme + location;
PathExtractorContext extractorContext = new PathExtractorContext(METADATA_NAMESPACE);
Path path = new Path(OZONE_PATH);
Path path = new Path(ozonePath);
AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
AtlasEntity entity = entityWithExtInfo.getEntity();
assertNotNull(entity);
assertEquals(entity.getTypeName(), OZONE_KEY);
verifyOzoneKeyEntity(OZONE_PATH, entity);
verifyOzoneKeyEntity(ozonePath, keyName, entity);
assertEquals(entityWithExtInfo.getReferredEntities().size(), 2);
verifyOzoneEntities(OZONE_SCHEME, OZONE_PATH, extractorContext.getKnownEntities());
verifyOzoneEntities(scheme, ozonePath, keyName, entityWithExtInfo.getReferredEntities());
assertEquals(extractorContext.getKnownEntities().size(), 3);
verifyOzoneEntities(OZONE_SCHEME, OZONE_PATH, extractorContext.getKnownEntities());
verifyOzoneEntities(scheme, ozonePath, keyName, extractorContext.getKnownEntities());
}
@Test
public void testGetPathEntityHdfsPath() {
Map<String, AtlasEntity> knownEntities = new HashMap<>();
AtlasEntityWithExtInfo extInfo = new AtlasEntityWithExtInfo();
PathExtractorContext extractorContext = new PathExtractorContext(METADATA_NAMESPACE);
Path path = new Path(HDFS_PATH);
......@@ -113,7 +126,7 @@ public class AtlasPathExtractorUtilTest {
assertEquals(entity.getTypeName(), HDFS_PATH_TYPE);
verifyHDFSEntity(entity, false);
assertNull(extInfo.getReferredEntities());
assertNull(entityWithExtInfo.getReferredEntities());
assertEquals(extractorContext.getKnownEntities().size(), 1);
extractorContext.getKnownEntities().values().forEach(x -> verifyHDFSEntity(x, false));
}
......@@ -135,11 +148,107 @@ public class AtlasPathExtractorUtilTest {
extractorContext.getKnownEntities().values().forEach(x -> verifyHDFSEntity(x, true));
}
private void verifyOzoneEntities(String scheme, String path, Map<String, AtlasEntity> knownEntities) {
@Test
public void testGetPathEntityABFSPath() {
PathExtractorContext extractorContext = new PathExtractorContext(METADATA_NAMESPACE);
Path path = new Path(ABFS_PATH);
AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
AtlasEntity entity = entityWithExtInfo.getEntity();
assertNotNull(entity);
assertEquals(entity.getTypeName(), ADLS_GEN2_DIRECTORY);
assertEquals(entityWithExtInfo.getReferredEntities().size(), 2);
verifyABFSAdlsGen2Dir(ABFS_SCHEME, ABFS_PATH, entity);
verifyABFSKnownEntities(ABFS_SCHEME, ABFS_PATH, extractorContext.getKnownEntities());
}
@Test
public void testGetPathEntityABFSSPath() {
PathExtractorContext extractorContext = new PathExtractorContext(METADATA_NAMESPACE);
Path path = new Path(ABFSS_PATH);
AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
AtlasEntity entity = entityWithExtInfo.getEntity();
assertNotNull(entity);
assertEquals(entity.getTypeName(), ADLS_GEN2_DIRECTORY);
assertEquals(entityWithExtInfo.getReferredEntities().size(), 2);
verifyABFSAdlsGen2Dir(ABFSS_SCHEME, ABFSS_PATH, entity);
verifyABFSKnownEntities(ABFSS_SCHEME, ABFSS_PATH, extractorContext.getKnownEntities());
}
@Test
public void testGetPathEntityS3V2Path() {
PathExtractorContext extractorContext = new PathExtractorContext(METADATA_NAMESPACE, AWS_S3_ATLAS_MODEL_VERSION_V2);
Path path = new Path(S3_PATH);
AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
AtlasEntity entity = entityWithExtInfo.getEntity();
assertNotNull(entity);
assertEquals(entity.getTypeName(), AWS_S3_V2_PSEUDO_DIR);
assertEquals(entityWithExtInfo.getReferredEntities().size(), 1);
verifyS3V2PseudoDir(S3A_SCHEME, S3_PATH, entity);
verifyS3V2KnownEntities(S3_SCHEME, S3_PATH, extractorContext.getKnownEntities());
}
@Test
public void testGetPathEntityS3AV2Path() {
PathExtractorContext extractorContext = new PathExtractorContext(METADATA_NAMESPACE, AWS_S3_ATLAS_MODEL_VERSION_V2);
Path path = new Path(S3A_PATH);
AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
AtlasEntity entity = entityWithExtInfo.getEntity();
assertNotNull(entity);
assertEquals(entity.getTypeName(), AWS_S3_V2_PSEUDO_DIR);
assertEquals(entityWithExtInfo.getReferredEntities().size(), 1);
verifyS3V2PseudoDir(S3A_SCHEME, S3A_PATH, entity);
verifyS3V2KnownEntities(S3A_SCHEME, S3A_PATH, extractorContext.getKnownEntities());
}
@Test
public void testGetPathEntityS3Path() {
PathExtractorContext extractorContext = new PathExtractorContext(METADATA_NAMESPACE);
Path path = new Path(S3_PATH);
AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
AtlasEntity entity = entityWithExtInfo.getEntity();
assertNotNull(entity);
assertEquals(entity.getTypeName(), AWS_S3_PSEUDO_DIR);
assertEquals(entityWithExtInfo.getReferredEntities().size(), 1);
verifyS3PseudoDir(S3_PATH, entity);
verifyS3KnownEntities(S3_SCHEME, S3_PATH, extractorContext.getKnownEntities());
}
@Test
public void testGetPathEntityS3APath() {
PathExtractorContext extractorContext = new PathExtractorContext(METADATA_NAMESPACE);
Path path = new Path(S3A_PATH);
AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(path, extractorContext);
AtlasEntity entity = entityWithExtInfo.getEntity();
assertNotNull(entity);
assertEquals(entity.getTypeName(), AWS_S3_PSEUDO_DIR);
assertEquals(entityWithExtInfo.getReferredEntities().size(), 1);
verifyS3PseudoDir(S3A_PATH, entity);
verifyS3KnownEntities(S3A_SCHEME, S3A_PATH, extractorContext.getKnownEntities());
}
private void verifyOzoneEntities(String scheme, String path, String keyName, Map<String, AtlasEntity> knownEntities) {
for (AtlasEntity knownEntity : knownEntities.values()) {
switch (knownEntity.getTypeName()){
case OZONE_KEY:
verifyOzoneKeyEntity(path, knownEntity);
verifyOzoneKeyEntity(path, keyName, knownEntity);
break;
case OZONE_VOLUME:
......@@ -155,9 +264,11 @@ public class AtlasPathExtractorUtilTest {
}
}
private void verifyOzoneKeyEntity(String path, AtlasEntity entity) {
private void verifyOzoneKeyEntity(String path, String name, AtlasEntity entity) {
//remove trailing "/" if present from path
path = (path.charAt(path.length()-1) == '/') ? path.substring(0, path.length()-1) : path;
assertEquals(entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), path + QNAME_METADATA_NAMESPACE);
assertEquals(entity.getAttribute(ATTRIBUTE_NAME), "/files/file.txt");
assertEquals(entity.getAttribute(ATTRIBUTE_NAME), name);
}
private void verifyHDFSEntity(AtlasEntity entity, boolean toLowerCase) {
......@@ -173,4 +284,112 @@ public class AtlasPathExtractorUtilTest {
assertEquals(entity.getAttribute(ATTRIBUTE_CLUSTER_NAME), METADATA_NAMESPACE);
}
}
private void verifyABFSAdlsGen2Dir(String abfsScheme, String path, AtlasEntity entity){
String pathQName = abfsScheme + "data@razrangersan/tmp/cdp-demo/sample.csv/" + QNAME_METADATA_NAMESPACE;
String entityQName = (String) entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME);
if (pathQName.equalsIgnoreCase(entityQName)){
assertEquals(entity.getAttribute(ATTRIBUTE_NAME), "sample.csv");
} else {
pathQName = abfsScheme + "data@razrangersan/tmp/cdp-demo/" + QNAME_METADATA_NAMESPACE;
if (pathQName.equalsIgnoreCase(entityQName)){
assertEquals(entity.getAttribute(ATTRIBUTE_NAME), "cdp-demo");
} else {
assertEquals(entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), abfsScheme + "data@razrangersan/tmp/" + QNAME_METADATA_NAMESPACE);
assertEquals(entity.getAttribute(ATTRIBUTE_NAME), "tmp");
}
}
}
private void verifyABFSKnownEntities(String scheme, String path, Map<String, AtlasEntity> knownEntities) {
assertEquals(knownEntities.size(), 5);
int directoryCount = 0;
for (AtlasEntity knownEntity : knownEntities.values()) {
switch (knownEntity.getTypeName()){
case ADLS_GEN2_DIRECTORY:
verifyABFSAdlsGen2Dir(scheme, path, knownEntity);
directoryCount++;
break;
case ADLS_GEN2_CONTAINER:
assertEquals(knownEntity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), scheme + "data@razrangersan" + QNAME_METADATA_NAMESPACE);
assertEquals(knownEntity.getAttribute(ATTRIBUTE_NAME), "data");
break;
case ADLS_GEN2_ACCOUNT:
assertEquals(knownEntity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), scheme + "razrangersan" + QNAME_METADATA_NAMESPACE);
assertEquals(knownEntity.getAttribute(ATTRIBUTE_NAME), "razrangersan");
break;
}
}
assertEquals(directoryCount, 3);
}
private void verifyS3V2PseudoDir(String s3Scheme, String path, AtlasEntity entity){
String pathQName = path + "/" + QNAME_METADATA_NAMESPACE;
String entityQName = (String) entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME);
if (pathQName.equalsIgnoreCase(entityQName)){
assertEquals(entity.getAttribute(ATTRIBUTE_NAME), "Irradiance_A.csv");
assertEquals(entity.getAttribute(ATTRIBUTE_OBJECT_PREFIX), "/1234567890/renders/Irradiance_A.csv/");
} else {
pathQName = s3Scheme + "aws_my_bucket1/1234567890/" + QNAME_METADATA_NAMESPACE;
if (pathQName.equalsIgnoreCase(entityQName)){
assertEquals(entity.getAttribute(ATTRIBUTE_NAME), "1234567890");
assertEquals(entity.getAttribute(ATTRIBUTE_OBJECT_PREFIX), "/1234567890/");
} else {
assertEquals(entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), s3Scheme + "aws_my_bucket1/1234567890/renders/" + QNAME_METADATA_NAMESPACE);
assertEquals(entity.getAttribute(ATTRIBUTE_NAME), "renders");
assertEquals(entity.getAttribute(ATTRIBUTE_OBJECT_PREFIX), "/1234567890/renders/");
}
}
}
private void verifyS3V2KnownEntities(String scheme, String path, Map<String, AtlasEntity> knownEntities) {
assertEquals(knownEntities.size(), 4);
int dirCount = 0;
for (AtlasEntity knownEntity : knownEntities.values()) {
switch (knownEntity.getTypeName()){
case AWS_S3_V2_PSEUDO_DIR:
verifyS3V2PseudoDir(scheme, path, knownEntity);
dirCount++;
break;
case AWS_S3_V2_BUCKET:
verifyS3BucketEntity(scheme, knownEntity);
break;
}
}
assertEquals(dirCount, 3);
}
private void verifyS3PseudoDir(String path, AtlasEntity entity){
assertEquals(entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), path.toLowerCase() + QNAME_METADATA_NAMESPACE);
assertEquals(entity.getAttribute(ATTRIBUTE_NAME), "/1234567890/renders/irradiance_a.csv");
assertEquals(entity.getAttribute(ATTRIBUTE_OBJECT_PREFIX), "/1234567890/renders/irradiance_a.csv");
}
private void verifyS3KnownEntities(String scheme, String path, Map<String, AtlasEntity> knownEntities) {
assertEquals(knownEntities.size(), 2);
int dirCount = 0;
for (AtlasEntity knownEntity : knownEntities.values()) {
switch (knownEntity.getTypeName()){
case AWS_S3_PSEUDO_DIR:
verifyS3PseudoDir(path, knownEntity);
dirCount++;
break;
case AWS_S3_BUCKET:
verifyS3BucketEntity(scheme, knownEntity);
break;
}
}
assertEquals(dirCount, 1);
}
private void verifyS3BucketEntity(String scheme, AtlasEntity entity) {
assertEquals(entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), scheme + "aws_my_bucket1" + QNAME_METADATA_NAMESPACE);
assertEquals(entity.getAttribute(ATTRIBUTE_NAME), "aws_my_bucket1");
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment