Commit 160b2874 by ashutoshm Committed by Madhan Neethiraj

ATLAS-1665: export optimization to reduce file-size and export-time

parent 537f6e31
...@@ -196,6 +196,7 @@ public class AtlasEntity extends AtlasStruct implements Serializable { ...@@ -196,6 +196,7 @@ public class AtlasEntity extends AtlasStruct implements Serializable {
} }
sb.append("AtlasEntity{"); sb.append("AtlasEntity{");
super.toString(sb);
sb.append("guid='").append(guid).append('\''); sb.append("guid='").append(guid).append('\'');
sb.append(", status=").append(status); sb.append(", status=").append(status);
sb.append(", createdBy='").append(createdBy).append('\''); sb.append(", createdBy='").append(createdBy).append('\'');
...@@ -207,7 +208,6 @@ public class AtlasEntity extends AtlasStruct implements Serializable { ...@@ -207,7 +208,6 @@ public class AtlasEntity extends AtlasStruct implements Serializable {
AtlasBaseTypeDef.dumpObjects(classifications, sb); AtlasBaseTypeDef.dumpObjects(classifications, sb);
sb.append(']'); sb.append(']');
sb.append(", "); sb.append(", ");
super.toString(sb);
sb.append('}'); sb.append('}');
return sb; return sb;
......
...@@ -17,14 +17,6 @@ ...@@ -17,14 +17,6 @@
*/ */
package org.apache.atlas.repository.store.graph.v1; package org.apache.atlas.repository.store.graph.v1;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.atlas.AtlasErrorCode; import org.apache.atlas.AtlasErrorCode;
import org.apache.atlas.exception.AtlasBaseException; import org.apache.atlas.exception.AtlasBaseException;
import org.apache.atlas.model.TypeCategory; import org.apache.atlas.model.TypeCategory;
...@@ -34,12 +26,26 @@ import org.apache.atlas.model.instance.AtlasStruct; ...@@ -34,12 +26,26 @@ import org.apache.atlas.model.instance.AtlasStruct;
import org.apache.atlas.repository.store.graph.EntityGraphDiscovery; import org.apache.atlas.repository.store.graph.EntityGraphDiscovery;
import org.apache.atlas.repository.store.graph.EntityGraphDiscoveryContext; import org.apache.atlas.repository.store.graph.EntityGraphDiscoveryContext;
import org.apache.atlas.repository.store.graph.EntityResolver; import org.apache.atlas.repository.store.graph.EntityResolver;
import org.apache.atlas.type.*; import org.apache.atlas.type.AtlasArrayType;
import org.apache.atlas.type.AtlasBuiltInTypes.AtlasObjectIdType; import org.apache.atlas.type.AtlasBuiltInTypes.AtlasObjectIdType;
import org.apache.atlas.type.AtlasEntityType;
import org.apache.atlas.type.AtlasMapType;
import org.apache.atlas.type.AtlasStructType;
import org.apache.atlas.type.AtlasStructType.AtlasAttribute; import org.apache.atlas.type.AtlasStructType.AtlasAttribute;
import org.apache.atlas.type.AtlasType;
import org.apache.atlas.type.AtlasTypeRegistry;
import org.apache.atlas.type.AtlasTypeUtil;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class AtlasEntityGraphDiscoveryV1 implements EntityGraphDiscovery { public class AtlasEntityGraphDiscoveryV1 implements EntityGraphDiscovery {
private static final Logger LOG = LoggerFactory.getLogger(AtlasEntityGraphDiscoveryV1.class); private static final Logger LOG = LoggerFactory.getLogger(AtlasEntityGraphDiscoveryV1.class);
......
...@@ -159,13 +159,14 @@ public class AtlasEntityStoreV1 implements AtlasEntityStore { ...@@ -159,13 +159,14 @@ public class AtlasEntityStoreV1 implements AtlasEntityStore {
int progressReportedAtCount = 0; int progressReportedAtCount = 0;
while (entityStream.hasNext()) { while (entityStream.hasNext()) {
AtlasEntity entity = entityStream.next(); AtlasEntityWithExtInfo entityWithExtInfo = entityStream.getNextEntityWithExtInfo();
AtlasEntity entity = entityWithExtInfo != null ? entityWithExtInfo.getEntity() : null;
if(entity == null || processedGuids.contains(entity.getGuid())) { if(entity == null || processedGuids.contains(entity.getGuid())) {
continue; continue;
} }
AtlasEntityStreamForImport oneEntityStream = new AtlasEntityStreamForImport(entity, entityStream); AtlasEntityStreamForImport oneEntityStream = new AtlasEntityStreamForImport(entityWithExtInfo, entityStream);
EntityMutationResponse resp = createOrUpdate(oneEntityStream, false, true); EntityMutationResponse resp = createOrUpdate(oneEntityStream, false, true);
...@@ -177,7 +178,7 @@ public class AtlasEntityStoreV1 implements AtlasEntityStore { ...@@ -177,7 +178,7 @@ public class AtlasEntityStoreV1 implements AtlasEntityStore {
updateImportMetrics("entity:%s:updated", resp.getUpdatedEntities(), processedGuids, importResult); updateImportMetrics("entity:%s:updated", resp.getUpdatedEntities(), processedGuids, importResult);
updateImportMetrics("entity:%s:deleted", resp.getDeletedEntities(), processedGuids, importResult); updateImportMetrics("entity:%s:deleted", resp.getDeletedEntities(), processedGuids, importResult);
if ((processedGuids.size() - progressReportedAtCount) > 10) { if ((processedGuids.size() - progressReportedAtCount) > 1000) {
progressReportedAtCount = processedGuids.size(); progressReportedAtCount = processedGuids.size();
LOG.info("bulkImport(): in progress.. number of entities imported: {}", progressReportedAtCount); LOG.info("bulkImport(): in progress.. number of entities imported: {}", progressReportedAtCount);
......
...@@ -24,8 +24,8 @@ import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo; ...@@ -24,8 +24,8 @@ import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
import java.util.Iterator; import java.util.Iterator;
public class AtlasEntityStream implements EntityStream { public class AtlasEntityStream implements EntityStream {
private final AtlasEntitiesWithExtInfo entitiesWithExtInfo; protected final AtlasEntitiesWithExtInfo entitiesWithExtInfo;
private final EntityStream entityStream; protected final EntityStream entityStream;
private Iterator<AtlasEntity> iterator; private Iterator<AtlasEntity> iterator;
...@@ -49,6 +49,12 @@ public class AtlasEntityStream implements EntityStream { ...@@ -49,6 +49,12 @@ public class AtlasEntityStream implements EntityStream {
this.entityStream = entityStream; this.entityStream = entityStream;
} }
public AtlasEntityStream(AtlasEntityWithExtInfo entityWithExtInfo, EntityStream entityStream) {
this.entitiesWithExtInfo = new AtlasEntitiesWithExtInfo(entityWithExtInfo);
this.iterator = this.entitiesWithExtInfo.getEntities().iterator();
this.entityStream = entityStream;
}
@Override @Override
public boolean hasNext() { public boolean hasNext() {
return iterator.hasNext(); return iterator.hasNext();
......
...@@ -18,17 +18,29 @@ ...@@ -18,17 +18,29 @@
package org.apache.atlas.repository.store.graph.v1; package org.apache.atlas.repository.store.graph.v1;
import org.apache.atlas.model.instance.AtlasEntity; import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntityHeader; import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
import java.util.List;
public class AtlasEntityStreamForImport extends AtlasEntityStream implements EntityImportStream { public class AtlasEntityStreamForImport extends AtlasEntityStream implements EntityImportStream {
public AtlasEntityStreamForImport(AtlasEntity entity) { public AtlasEntityStreamForImport(AtlasEntityWithExtInfo entityWithExtInfo, EntityStream entityStream) {
super(entity); super(entityWithExtInfo, entityStream);
}
@Override
public AtlasEntityWithExtInfo getNextEntityWithExtInfo() {
AtlasEntity entity = next();
return entity != null ? new AtlasEntityWithExtInfo(entity, super.entitiesWithExtInfo) : null;
}
@Override
public AtlasEntity getByGuid(String guid) {
AtlasEntity ent = super.entitiesWithExtInfo.getEntity(guid);
if(ent == null && entityStream != null) {
return entityStream.getByGuid(guid);
} }
public AtlasEntityStreamForImport(AtlasEntity entity, EntityStream entityStream) { return ent;
super(entity, entityStream);
} }
@Override @Override
......
...@@ -18,7 +18,11 @@ ...@@ -18,7 +18,11 @@
package org.apache.atlas.repository.store.graph.v1; package org.apache.atlas.repository.store.graph.v1;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
public interface EntityImportStream extends EntityStream { public interface EntityImportStream extends EntityStream {
AtlasEntityWithExtInfo getNextEntityWithExtInfo();
void onImportComplete(String guid); void onImportComplete(String guid);
} }
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
package org.apache.atlas.repository.store.graph.v1; package org.apache.atlas.repository.store.graph.v1;
import org.apache.atlas.model.instance.AtlasEntity; import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasObjectId;
public interface EntityStream { public interface EntityStream {
......
...@@ -19,9 +19,7 @@ package org.apache.atlas.repository.store.graph.v1; ...@@ -19,9 +19,7 @@ package org.apache.atlas.repository.store.graph.v1;
import org.apache.atlas.model.instance.AtlasEntity; import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasObjectId;
import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map; import java.util.Map;
......
...@@ -38,11 +38,11 @@ public class AtlasGremlin2QueryProvider extends AtlasGremlinQueryProvider { ...@@ -38,11 +38,11 @@ public class AtlasGremlin2QueryProvider extends AtlasGremlinQueryProvider {
case ENTITIES_FOR_TAG_METRIC: case ENTITIES_FOR_TAG_METRIC:
return "g.V().has('__typeName', T.in, g.V().has('__type', 'typeSystem').filter{it.getProperty('__type.category').name() == 'TRAIT'}.'__type.name'.toSet()).groupCount{it.getProperty('__typeName')}.cap.toList()"; return "g.V().has('__typeName', T.in, g.V().has('__type', 'typeSystem').filter{it.getProperty('__type.category').name() == 'TRAIT'}.'__type.name'.toSet()).groupCount{it.getProperty('__typeName')}.cap.toList()";
case EXPORT_BY_GUID_FULL: case EXPORT_BY_GUID_FULL:
return "g.V('__guid', startGuid).bothE().bothV().has('__guid').__guid.dedup().toList()"; return "g.V('__guid', startGuid).bothE().bothV().has('__guid').transform{[__guid:it.__guid,isProcess:(it.__superTypeNames != null) ? it.__superTypeNames.contains('Process') : false ]}.dedup().toList()";
case EXPORT_BY_GUID_CONNECTED_IN_EDGE: case EXPORT_BY_GUID_CONNECTED_IN_EDGE:
return "g.V('__guid', startGuid).inE().outV().has('__guid').__guid.dedup().toList()"; return "g.V('__guid', startGuid).inE().outV().has('__guid').transform{[__guid:it.__guid,isProcess:(it.__superTypeNames != null) ? it.__superTypeNames.contains('Process') : false ]}.dedup().toList()";
case EXPORT_BY_GUID_CONNECTED_OUT_EDGE: case EXPORT_BY_GUID_CONNECTED_OUT_EDGE:
return "g.V('__guid', startGuid).outE().inV().has('__guid').__guid.dedup().toList()"; return "g.V('__guid', startGuid).outE().inV().has('__guid').transform{[__guid:it.__guid,isProcess:(it.__superTypeNames != null) ? it.__superTypeNames.contains('Process') : false ]}.dedup().toList()";
case EXPORT_TYPE_STARTS_WITH: case EXPORT_TYPE_STARTS_WITH:
return "g.V().has('__typeName',typeName).filter({it.getProperty(attrName).startsWith(attrValue)}).has('__guid').__guid.toList()"; return "g.V().has('__typeName',typeName).filter({it.getProperty(attrName).startsWith(attrValue)}).has('__guid').__guid.toList()";
case EXPORT_TYPE_ENDS_WITH: case EXPORT_TYPE_ENDS_WITH:
......
...@@ -45,6 +45,11 @@ public class ZipSink { ...@@ -45,6 +45,11 @@ public class ZipSink {
saveToZip(entity.getGuid(), jsonData); saveToZip(entity.getGuid(), jsonData);
} }
public void add(AtlasEntity.AtlasEntityWithExtInfo entityWithExtInfo) throws AtlasBaseException {
String jsonData = convertToJSON(entityWithExtInfo);
saveToZip(entityWithExtInfo.getEntity().getGuid(), jsonData);
}
public void setResult(AtlasExportResult result) throws AtlasBaseException { public void setResult(AtlasExportResult result) throws AtlasBaseException {
String jsonData = convertToJSON(result); String jsonData = convertToJSON(result);
saveToZip(ZipExportFileNames.ATLAS_EXPORT_INFO_NAME, jsonData); saveToZip(ZipExportFileNames.ATLAS_EXPORT_INFO_NAME, jsonData);
......
...@@ -17,17 +17,19 @@ ...@@ -17,17 +17,19 @@
*/ */
package org.apache.atlas.web.resources; package org.apache.atlas.web.resources;
import org.apache.atlas.model.instance.AtlasEntityHeader;
import org.codehaus.jackson.type.TypeReference;
import org.apache.atlas.exception.AtlasBaseException; import org.apache.atlas.exception.AtlasBaseException;
import org.apache.atlas.model.instance.AtlasEntity; import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
import org.apache.atlas.model.typedef.AtlasTypesDef; import org.apache.atlas.model.typedef.AtlasTypesDef;
import org.apache.atlas.repository.store.graph.v1.EntityImportStream; import org.apache.atlas.repository.store.graph.v1.EntityImportStream;
import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.type.TypeReference;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.*; import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
...@@ -57,7 +59,7 @@ public class ZipSource implements EntityImportStream { ...@@ -57,7 +59,7 @@ public class ZipSource implements EntityImportStream {
public AtlasTypesDef getTypesDef() throws AtlasBaseException { public AtlasTypesDef getTypesDef() throws AtlasBaseException {
final String fileName = ZipExportFileNames.ATLAS_TYPESDEF_NAME.toString(); final String fileName = ZipExportFileNames.ATLAS_TYPESDEF_NAME.toString();
String s = getFromCache(fileName); String s = (String) getFromCache(fileName);
return convertFromJson(AtlasTypesDef.class, s); return convertFromJson(AtlasTypesDef.class, s);
} }
...@@ -104,9 +106,10 @@ public class ZipSource implements EntityImportStream { ...@@ -104,9 +106,10 @@ public class ZipSource implements EntityImportStream {
return this.creationOrder; return this.creationOrder;
} }
public AtlasEntity getEntity(String guid) throws AtlasBaseException { public AtlasEntity.AtlasEntityWithExtInfo getEntityWithExtInfo(String guid) throws AtlasBaseException {
String s = getFromCache(guid); String s = (String) getFromCache(guid);
return convertFromJson(AtlasEntity.class, s); AtlasEntity.AtlasEntityWithExtInfo entityWithExtInfo = convertFromJson(AtlasEntity.AtlasEntityWithExtInfo.class, s);
return entityWithExtInfo;
} }
private <T> T convertFromJson(TypeReference clazz, String jsonData) throws AtlasBaseException { private <T> T convertFromJson(TypeReference clazz, String jsonData) throws AtlasBaseException {
...@@ -136,9 +139,7 @@ public class ZipSource implements EntityImportStream { ...@@ -136,9 +139,7 @@ public class ZipSource implements EntityImportStream {
} }
private String getFromCache(String entryName) { private String getFromCache(String entryName) {
if(!guidEntityJsonMap.containsKey(entryName)) return ""; return guidEntityJsonMap.get(entryName);
return guidEntityJsonMap.get(entryName).toString();
} }
public void close() { public void close() {
...@@ -158,8 +159,15 @@ public class ZipSource implements EntityImportStream { ...@@ -158,8 +159,15 @@ public class ZipSource implements EntityImportStream {
@Override @Override
public AtlasEntity next() { public AtlasEntity next() {
AtlasEntityWithExtInfo entityWithExtInfo = getNextEntityWithExtInfo();
return entityWithExtInfo != null ? entityWithExtInfo.getEntity() : null;
}
@Override
public AtlasEntityWithExtInfo getNextEntityWithExtInfo() {
try { try {
return getEntity(this.iterator.next()); return getEntityWithExtInfo(this.iterator.next());
} catch (AtlasBaseException e) { } catch (AtlasBaseException e) {
e.printStackTrace(); e.printStackTrace();
return null; return null;
...@@ -186,10 +194,16 @@ public class ZipSource implements EntityImportStream { ...@@ -186,10 +194,16 @@ public class ZipSource implements EntityImportStream {
} }
} }
private AtlasEntity getEntity(String guid) throws AtlasBaseException {
if(guidEntityJsonMap.containsKey(guid)) {
return getEntityWithExtInfo(guid).getEntity();
}
return null;
}
@Override @Override
public void onImportComplete(String guid) { public void onImportComplete(String guid) {
if(guid != null) {
guidEntityJsonMap.remove(guid); guidEntityJsonMap.remove(guid);
} }
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment