Commit 64c7a3be by skoritala Committed by Ashutosh Mestry

ATLAS-3251: Implement Patch to populate classification text for legacy data.

parent 4b6380fe
......@@ -17,6 +17,7 @@
*/
package org.apache.atlas.repository.patches;
import org.apache.atlas.exception.AtlasBaseException;
import org.apache.atlas.model.patches.AtlasPatch.PatchStatus;
import static org.apache.atlas.model.patches.AtlasPatch.PatchStatus.UNKNOWN;
......@@ -64,5 +65,5 @@ public abstract class AtlasPatchHandler {
return patchId;
}
public abstract void apply();
public abstract void apply() throws AtlasBaseException;
}
......@@ -22,6 +22,7 @@ import org.apache.atlas.model.patches.AtlasPatch;
import org.apache.atlas.model.patches.AtlasPatch.PatchStatus;
import org.apache.atlas.repository.graph.GraphBackedSearchIndexer;
import org.apache.atlas.repository.graphdb.AtlasGraph;
import org.apache.atlas.repository.store.graph.v2.EntityGraphMapper;
import org.apache.atlas.type.AtlasTypeRegistry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -39,8 +40,8 @@ public class AtlasPatchManager {
private final PatchContext context;
@Inject
public AtlasPatchManager(AtlasGraph atlasGraph, AtlasTypeRegistry typeRegistry, GraphBackedSearchIndexer indexer) {
this.context = new PatchContext(atlasGraph, typeRegistry, indexer);
public AtlasPatchManager(AtlasGraph atlasGraph, AtlasTypeRegistry typeRegistry, GraphBackedSearchIndexer indexer, EntityGraphMapper entityGraphMapper) {
this.context = new PatchContext(atlasGraph, typeRegistry, indexer, entityGraphMapper);
}
public AtlasPatch.AtlasPatches getAllPatches() {
......@@ -49,7 +50,8 @@ public class AtlasPatchManager {
public void applyAll() {
final AtlasPatchHandler handlers[] = {
new UniqueAttributePatch(context)
new UniqueAttributePatch(context),
new ClassificationTextPatch(context)
};
try {
......
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.repository.patches;
import org.apache.atlas.exception.AtlasBaseException;
import org.apache.atlas.repository.graphdb.AtlasVertex;
import org.apache.atlas.type.AtlasEntityType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.atlas.model.patches.AtlasPatch.PatchStatus.APPLIED;
public class ClassificationTextPatch extends AtlasPatchHandler {
private static final Logger LOG = LoggerFactory.getLogger(ClassificationTextPatch.class);
private static final String PATCH_ID = "JAVA_PATCH_0000_002";
private static final String PATCH_DESCRIPTION = "Populates Classification Text attribute for entities from classifications applied on them.";
private final PatchContext context;
public ClassificationTextPatch(PatchContext context) {
super(context.getPatchRegistry(), PATCH_ID, PATCH_DESCRIPTION);
this.context = context;
}
@Override
public void apply() throws AtlasBaseException {
ConcurrentPatchProcessor patchProcessor = new ClassificationTextPatchProcessor(context);
patchProcessor.apply();
setStatus(APPLIED);
LOG.info("ClassificationTextPatch.apply(): patchId={}, status={}", getPatchId(), getStatus());
}
public static class ClassificationTextPatchProcessor extends ConcurrentPatchProcessor {
public ClassificationTextPatchProcessor(PatchContext context) {
super(context);
}
@Override
protected void processVertexItem(Long vertexId, AtlasVertex vertex, String typeName, AtlasEntityType entityType) throws AtlasBaseException {
processItem(vertexId, vertex, typeName, entityType);
}
@Override
protected void prepareForExecution() {
//do nothing
}
protected void processItem(Long vertexId, AtlasVertex vertex, String typeName, AtlasEntityType entityType) throws AtlasBaseException {
if(LOG.isDebugEnabled()) {
LOG.debug("processItem(typeName={}, vertexId={})", typeName, vertexId);
}
getEntityGraphMapper().updateClassificationText(vertex);
if(LOG.isDebugEnabled()) {
LOG.debug("processItem(typeName={}, vertexId={}): Done!", typeName, vertexId);
}
}
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.repository.patches;
import org.apache.atlas.ApplicationProperties;
import org.apache.atlas.AtlasException;
import org.apache.atlas.exception.AtlasBaseException;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.pc.WorkItemBuilder;
import org.apache.atlas.pc.WorkItemConsumer;
import org.apache.atlas.pc.WorkItemManager;
import org.apache.atlas.repository.graph.GraphBackedSearchIndexer;
import org.apache.atlas.repository.graphdb.*;
import org.apache.atlas.repository.store.graph.v2.AtlasGraphUtilsV2;
import org.apache.atlas.repository.store.graph.v2.EntityGraphMapper;
import org.apache.atlas.type.AtlasEntityType;
import org.apache.atlas.type.AtlasTypeRegistry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Iterator;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.atomic.AtomicLong;
public abstract class ConcurrentPatchProcessor {
private static final Logger LOG = LoggerFactory.getLogger(ConcurrentPatchProcessor.class);
private static final String NUM_WORKERS_PROPERTY = "atlas.patch.numWorkers";
private static final String BATCH_SIZE_PROPERTY = "atlas.patch.batchSize";
private static final String ATLAS_SOLR_SHARDS = "ATLAS_SOLR_SHARDS";
private static final String WORKER_NAME_PREFIX = "patchWorkItem";
private static final int NUM_WORKERS;
private static final int BATCH_SIZE;
private final EntityGraphMapper entityGraphMapper;
public AtlasGraph getGraph() {
return graph;
}
public GraphBackedSearchIndexer getIndexer() {
return indexer;
}
public AtlasTypeRegistry getTypeRegistry() {
return typeRegistry;
}
private final AtlasGraph graph;
private final GraphBackedSearchIndexer indexer;
private final AtlasTypeRegistry typeRegistry;
static {
int numWorkers = 3;
int batchSize = 300;
try {
numWorkers = ApplicationProperties.get().getInt(NUM_WORKERS_PROPERTY, getDefaultNumWorkers());
batchSize = ApplicationProperties.get().getInt(BATCH_SIZE_PROPERTY, 300);
LOG.info("UniqueAttributePatch: {}={}, {}={}", NUM_WORKERS_PROPERTY, numWorkers, BATCH_SIZE_PROPERTY, batchSize);
} catch (Exception e) {
LOG.error("Error retrieving configuration.", e);
}
NUM_WORKERS = numWorkers;
BATCH_SIZE = batchSize;
}
private static int getDefaultNumWorkers() throws AtlasException {
return ApplicationProperties.get().getInt(ATLAS_SOLR_SHARDS, 1) * 3;
}
public ConcurrentPatchProcessor(PatchContext context) {
this.graph = context.getGraph();
this.indexer = context.getIndexer();
this.typeRegistry = context.getTypeRegistry();
this.entityGraphMapper = context.getEntityGraphMapper();
}
public EntityGraphMapper getEntityGraphMapper() {
return entityGraphMapper;
}
public void apply() throws AtlasBaseException {
prepareForExecution();
execute();
}
private void execute() {
Iterable<Object> iterable = graph.query().vertexIds();
WorkItemManager manager = new WorkItemManager(
new ConsumerBuilder(graph, typeRegistry, this), WORKER_NAME_PREFIX,
BATCH_SIZE, NUM_WORKERS, false);
try {
for (Iterator<Object> iter = iterable.iterator(); iter.hasNext(); ) {
Object vertexId = iter.next();
submitForProcessing((Long) vertexId, manager);
}
manager.drain();
} finally {
try {
manager.shutdown();
} catch (InterruptedException e) {
LOG.error("ConcurrentPatchProcessor.execute(): interrupted during WorkItemManager shutdown.", e);
}
}
}
private void submitForProcessing(Long vertexId, WorkItemManager manager) {
manager.checkProduce(vertexId);
}
private static class ConsumerBuilder implements WorkItemBuilder<Consumer, Long> {
private final AtlasTypeRegistry typeRegistry;
private final AtlasGraph graph;
private final ConcurrentPatchProcessor patchItemProcessor;
public ConsumerBuilder(AtlasGraph graph, AtlasTypeRegistry typeRegistry, ConcurrentPatchProcessor patchItemProcessor) {
this.graph = graph;
this.typeRegistry = typeRegistry;
this.patchItemProcessor = patchItemProcessor;
}
@Override
public Consumer build(BlockingQueue<Long> queue) {
return new Consumer(graph, typeRegistry, queue, patchItemProcessor);
}
}
private static class Consumer extends WorkItemConsumer<Long> {
private int MAX_COMMIT_RETRY_COUNT = 3;
private final AtlasGraph graph;
private final AtlasTypeRegistry typeRegistry;
private final AtomicLong counter;
private final ConcurrentPatchProcessor individualItemProcessor;
public Consumer(AtlasGraph graph, AtlasTypeRegistry typeRegistry, BlockingQueue<Long> queue, ConcurrentPatchProcessor individualItemProcessor) {
super(queue);
this.graph = graph;
this.typeRegistry = typeRegistry;
this.counter = new AtomicLong(0);
this.individualItemProcessor = individualItemProcessor;
}
@Override
protected void doCommit() {
if (counter.get() % BATCH_SIZE == 0) {
LOG.info("Processed: {}", counter.get());
attemptCommit();
}
}
@Override
protected void commitDirty() {
attemptCommit();
LOG.info("Total: Commit: {}", counter.get());
super.commitDirty();
}
private void attemptCommit() {
for (int retryCount = 1; retryCount <= MAX_COMMIT_RETRY_COUNT; retryCount++) {
try {
graph.commit();
break;
} catch(Exception ex) {
LOG.error("Commit exception: ", retryCount, ex);
try {
Thread.currentThread().sleep(300 * retryCount);
} catch (InterruptedException e) {
LOG.error("Commit exception: Pause: Interrputed!", e);
}
}
}
}
@Override
protected void processItem(Long vertexId) {
counter.incrementAndGet();
AtlasVertex vertex = graph.getVertex(Long.toString(vertexId));
if (vertex == null) {
LOG.warn("processItem(vertexId={}): AtlasVertex not found!", vertexId);
return;
}
if (AtlasGraphUtilsV2.isTypeVertex(vertex)) {
return;
}
if (AtlasGraphUtilsV2.getState(vertex) != AtlasEntity.Status.ACTIVE) {
return;
}
String typeName = AtlasGraphUtilsV2.getTypeName(vertex);
AtlasEntityType entityType = typeRegistry.getEntityTypeByName(typeName);
if (entityType == null) {
return;
}
try {
individualItemProcessor.processVertexItem(vertexId, vertex, typeName, entityType);
} catch (AtlasBaseException e) {
LOG.error("Error processing: {}", vertexId, e);
}
}
}
protected abstract void processVertexItem(Long vertexId, AtlasVertex vertex, String typeName, AtlasEntityType entityType) throws AtlasBaseException;
protected abstract void prepareForExecution() throws AtlasBaseException;
}
......@@ -20,6 +20,7 @@ package org.apache.atlas.repository.patches;
import org.apache.atlas.repository.graph.GraphBackedSearchIndexer;
import org.apache.atlas.repository.graphdb.AtlasGraph;
import org.apache.atlas.repository.store.graph.v2.EntityGraphMapper;
import org.apache.atlas.type.AtlasTypeRegistry;
public class PatchContext {
......@@ -27,12 +28,14 @@ public class PatchContext {
private final AtlasTypeRegistry typeRegistry;
private final GraphBackedSearchIndexer indexer;
private final AtlasPatchRegistry patchRegistry;
private final EntityGraphMapper entityGraphMapper;
public PatchContext(AtlasGraph graph, AtlasTypeRegistry typeRegistry, GraphBackedSearchIndexer indexer) {
public PatchContext(AtlasGraph graph, AtlasTypeRegistry typeRegistry, GraphBackedSearchIndexer indexer, EntityGraphMapper entityGraphMapper) {
this.graph = graph;
this.typeRegistry = typeRegistry;
this.indexer = indexer;
this.patchRegistry = new AtlasPatchRegistry(this.graph);
this.entityGraphMapper = entityGraphMapper;
}
public AtlasGraph getGraph() {
......@@ -50,4 +53,5 @@ public class PatchContext {
public AtlasPatchRegistry getPatchRegistry() {
return patchRegistry;
}
public EntityGraphMapper getEntityGraphMapper() { return entityGraphMapper;}
}
......@@ -1675,14 +1675,20 @@ public class EntityGraphMapper {
updateModificationMetadata(entityVertex);
for (Map.Entry<AtlasVertex, List<AtlasClassification>> entry : removedClassifications.entrySet()) {
AtlasVertex vertex = entry.getKey();
String guid = GraphHelper.getGuid(vertex);
AtlasEntity entity = updateClassificationText(entry.getKey());
List<AtlasClassification> deletedClassificationNames = entry.getValue();
entityChangeNotifier.onClassificationDeletedFromEntity(entity, deletedClassificationNames);
}
}
public AtlasEntity updateClassificationText(AtlasVertex vertex) throws AtlasBaseException {
String guid = GraphHelper.getGuid(vertex);
AtlasEntity entity = instanceConverter.getAndCacheEntity(guid);
vertex.setProperty(CLASSIFICATION_TEXT_KEY, fullTextMapperV2.getClassificationTextForEntity(entity));
entityChangeNotifier.onClassificationDeletedFromEntity(entity, deletedClassificationNames);
}
return entity;
}
public void updateClassifications(EntityMutationContext context, String guid, List<AtlasClassification> classifications) throws AtlasBaseException {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment