Commit 160b2874 by ashutoshm Committed by Madhan Neethiraj

ATLAS-1665: export optimization to reduce file-size and export-time

parent 537f6e31
......@@ -196,6 +196,7 @@ public class AtlasEntity extends AtlasStruct implements Serializable {
}
sb.append("AtlasEntity{");
super.toString(sb);
sb.append("guid='").append(guid).append('\'');
sb.append(", status=").append(status);
sb.append(", createdBy='").append(createdBy).append('\'');
......@@ -207,7 +208,6 @@ public class AtlasEntity extends AtlasStruct implements Serializable {
AtlasBaseTypeDef.dumpObjects(classifications, sb);
sb.append(']');
sb.append(", ");
super.toString(sb);
sb.append('}');
return sb;
......
......@@ -17,14 +17,6 @@
*/
package org.apache.atlas.repository.store.graph.v1;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.atlas.AtlasErrorCode;
import org.apache.atlas.exception.AtlasBaseException;
import org.apache.atlas.model.TypeCategory;
......@@ -34,12 +26,26 @@ import org.apache.atlas.model.instance.AtlasStruct;
import org.apache.atlas.repository.store.graph.EntityGraphDiscovery;
import org.apache.atlas.repository.store.graph.EntityGraphDiscoveryContext;
import org.apache.atlas.repository.store.graph.EntityResolver;
import org.apache.atlas.type.*;
import org.apache.atlas.type.AtlasArrayType;
import org.apache.atlas.type.AtlasBuiltInTypes.AtlasObjectIdType;
import org.apache.atlas.type.AtlasEntityType;
import org.apache.atlas.type.AtlasMapType;
import org.apache.atlas.type.AtlasStructType;
import org.apache.atlas.type.AtlasStructType.AtlasAttribute;
import org.apache.atlas.type.AtlasType;
import org.apache.atlas.type.AtlasTypeRegistry;
import org.apache.atlas.type.AtlasTypeUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class AtlasEntityGraphDiscoveryV1 implements EntityGraphDiscovery {
private static final Logger LOG = LoggerFactory.getLogger(AtlasEntityGraphDiscoveryV1.class);
......
......@@ -159,13 +159,14 @@ public class AtlasEntityStoreV1 implements AtlasEntityStore {
int progressReportedAtCount = 0;
while (entityStream.hasNext()) {
AtlasEntity entity = entityStream.next();
AtlasEntityWithExtInfo entityWithExtInfo = entityStream.getNextEntityWithExtInfo();
AtlasEntity entity = entityWithExtInfo != null ? entityWithExtInfo.getEntity() : null;
if(entity == null || processedGuids.contains(entity.getGuid())) {
continue;
}
AtlasEntityStreamForImport oneEntityStream = new AtlasEntityStreamForImport(entity, entityStream);
AtlasEntityStreamForImport oneEntityStream = new AtlasEntityStreamForImport(entityWithExtInfo, entityStream);
EntityMutationResponse resp = createOrUpdate(oneEntityStream, false, true);
......@@ -177,7 +178,7 @@ public class AtlasEntityStoreV1 implements AtlasEntityStore {
updateImportMetrics("entity:%s:updated", resp.getUpdatedEntities(), processedGuids, importResult);
updateImportMetrics("entity:%s:deleted", resp.getDeletedEntities(), processedGuids, importResult);
if ((processedGuids.size() - progressReportedAtCount) > 10) {
if ((processedGuids.size() - progressReportedAtCount) > 1000) {
progressReportedAtCount = processedGuids.size();
LOG.info("bulkImport(): in progress.. number of entities imported: {}", progressReportedAtCount);
......
......@@ -24,9 +24,9 @@ import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
import java.util.Iterator;
public class AtlasEntityStream implements EntityStream {
private final AtlasEntitiesWithExtInfo entitiesWithExtInfo;
private final EntityStream entityStream;
private Iterator<AtlasEntity> iterator;
protected final AtlasEntitiesWithExtInfo entitiesWithExtInfo;
protected final EntityStream entityStream;
private Iterator<AtlasEntity> iterator;
public AtlasEntityStream(AtlasEntity entity) {
......@@ -49,6 +49,12 @@ public class AtlasEntityStream implements EntityStream {
this.entityStream = entityStream;
}
public AtlasEntityStream(AtlasEntityWithExtInfo entityWithExtInfo, EntityStream entityStream) {
this.entitiesWithExtInfo = new AtlasEntitiesWithExtInfo(entityWithExtInfo);
this.iterator = this.entitiesWithExtInfo.getEntities().iterator();
this.entityStream = entityStream;
}
@Override
public boolean hasNext() {
return iterator.hasNext();
......@@ -66,7 +72,7 @@ public class AtlasEntityStream implements EntityStream {
@Override
public AtlasEntity getByGuid(String guid) {
return entityStream != null ? entityStream.getByGuid(guid) : entitiesWithExtInfo.getEntity(guid);
return entityStream != null ? entityStream.getByGuid(guid) : entitiesWithExtInfo.getEntity(guid);
}
@Override
......
......@@ -18,17 +18,29 @@
package org.apache.atlas.repository.store.graph.v1;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntityHeader;
import java.util.List;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
public class AtlasEntityStreamForImport extends AtlasEntityStream implements EntityImportStream {
public AtlasEntityStreamForImport(AtlasEntity entity) {
super(entity);
public AtlasEntityStreamForImport(AtlasEntityWithExtInfo entityWithExtInfo, EntityStream entityStream) {
super(entityWithExtInfo, entityStream);
}
@Override
public AtlasEntityWithExtInfo getNextEntityWithExtInfo() {
AtlasEntity entity = next();
return entity != null ? new AtlasEntityWithExtInfo(entity, super.entitiesWithExtInfo) : null;
}
public AtlasEntityStreamForImport(AtlasEntity entity, EntityStream entityStream) {
super(entity, entityStream);
@Override
public AtlasEntity getByGuid(String guid) {
AtlasEntity ent = super.entitiesWithExtInfo.getEntity(guid);
if(ent == null && entityStream != null) {
return entityStream.getByGuid(guid);
}
return ent;
}
@Override
......
......@@ -18,7 +18,11 @@
package org.apache.atlas.repository.store.graph.v1;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
public interface EntityImportStream extends EntityStream {
AtlasEntityWithExtInfo getNextEntityWithExtInfo();
void onImportComplete(String guid);
}
......@@ -18,7 +18,6 @@
package org.apache.atlas.repository.store.graph.v1;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasObjectId;
public interface EntityStream {
......
......@@ -19,9 +19,7 @@ package org.apache.atlas.repository.store.graph.v1;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasObjectId;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
......
......@@ -38,11 +38,11 @@ public class AtlasGremlin2QueryProvider extends AtlasGremlinQueryProvider {
case ENTITIES_FOR_TAG_METRIC:
return "g.V().has('__typeName', T.in, g.V().has('__type', 'typeSystem').filter{it.getProperty('__type.category').name() == 'TRAIT'}.'__type.name'.toSet()).groupCount{it.getProperty('__typeName')}.cap.toList()";
case EXPORT_BY_GUID_FULL:
return "g.V('__guid', startGuid).bothE().bothV().has('__guid').__guid.dedup().toList()";
return "g.V('__guid', startGuid).bothE().bothV().has('__guid').transform{[__guid:it.__guid,isProcess:(it.__superTypeNames != null) ? it.__superTypeNames.contains('Process') : false ]}.dedup().toList()";
case EXPORT_BY_GUID_CONNECTED_IN_EDGE:
return "g.V('__guid', startGuid).inE().outV().has('__guid').__guid.dedup().toList()";
return "g.V('__guid', startGuid).inE().outV().has('__guid').transform{[__guid:it.__guid,isProcess:(it.__superTypeNames != null) ? it.__superTypeNames.contains('Process') : false ]}.dedup().toList()";
case EXPORT_BY_GUID_CONNECTED_OUT_EDGE:
return "g.V('__guid', startGuid).outE().inV().has('__guid').__guid.dedup().toList()";
return "g.V('__guid', startGuid).outE().inV().has('__guid').transform{[__guid:it.__guid,isProcess:(it.__superTypeNames != null) ? it.__superTypeNames.contains('Process') : false ]}.dedup().toList()";
case EXPORT_TYPE_STARTS_WITH:
return "g.V().has('__typeName',typeName).filter({it.getProperty(attrName).startsWith(attrValue)}).has('__guid').__guid.toList()";
case EXPORT_TYPE_ENDS_WITH:
......
......@@ -45,6 +45,11 @@ public class ZipSink {
saveToZip(entity.getGuid(), jsonData);
}
public void add(AtlasEntity.AtlasEntityWithExtInfo entityWithExtInfo) throws AtlasBaseException {
String jsonData = convertToJSON(entityWithExtInfo);
saveToZip(entityWithExtInfo.getEntity().getGuid(), jsonData);
}
public void setResult(AtlasExportResult result) throws AtlasBaseException {
String jsonData = convertToJSON(result);
saveToZip(ZipExportFileNames.ATLAS_EXPORT_INFO_NAME, jsonData);
......
......@@ -17,17 +17,19 @@
*/
package org.apache.atlas.web.resources;
import org.apache.atlas.model.instance.AtlasEntityHeader;
import org.codehaus.jackson.type.TypeReference;
import org.apache.atlas.exception.AtlasBaseException;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
import org.apache.atlas.model.typedef.AtlasTypesDef;
import org.apache.atlas.repository.store.graph.v1.EntityImportStream;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.type.TypeReference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
......@@ -57,7 +59,7 @@ public class ZipSource implements EntityImportStream {
public AtlasTypesDef getTypesDef() throws AtlasBaseException {
final String fileName = ZipExportFileNames.ATLAS_TYPESDEF_NAME.toString();
String s = getFromCache(fileName);
String s = (String) getFromCache(fileName);
return convertFromJson(AtlasTypesDef.class, s);
}
......@@ -104,9 +106,10 @@ public class ZipSource implements EntityImportStream {
return this.creationOrder;
}
public AtlasEntity getEntity(String guid) throws AtlasBaseException {
String s = getFromCache(guid);
return convertFromJson(AtlasEntity.class, s);
public AtlasEntity.AtlasEntityWithExtInfo getEntityWithExtInfo(String guid) throws AtlasBaseException {
String s = (String) getFromCache(guid);
AtlasEntity.AtlasEntityWithExtInfo entityWithExtInfo = convertFromJson(AtlasEntity.AtlasEntityWithExtInfo.class, s);
return entityWithExtInfo;
}
private <T> T convertFromJson(TypeReference clazz, String jsonData) throws AtlasBaseException {
......@@ -136,9 +139,7 @@ public class ZipSource implements EntityImportStream {
}
private String getFromCache(String entryName) {
if(!guidEntityJsonMap.containsKey(entryName)) return "";
return guidEntityJsonMap.get(entryName).toString();
return guidEntityJsonMap.get(entryName);
}
public void close() {
......@@ -158,8 +159,15 @@ public class ZipSource implements EntityImportStream {
@Override
public AtlasEntity next() {
AtlasEntityWithExtInfo entityWithExtInfo = getNextEntityWithExtInfo();
return entityWithExtInfo != null ? entityWithExtInfo.getEntity() : null;
}
@Override
public AtlasEntityWithExtInfo getNextEntityWithExtInfo() {
try {
return getEntity(this.iterator.next());
return getEntityWithExtInfo(this.iterator.next());
} catch (AtlasBaseException e) {
e.printStackTrace();
return null;
......@@ -186,10 +194,16 @@ public class ZipSource implements EntityImportStream {
}
}
private AtlasEntity getEntity(String guid) throws AtlasBaseException {
if(guidEntityJsonMap.containsKey(guid)) {
return getEntityWithExtInfo(guid).getEntity();
}
return null;
}
@Override
public void onImportComplete(String guid) {
if(guid != null) {
guidEntityJsonMap.remove(guid);
}
guidEntityJsonMap.remove(guid);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment