Commit e30ab3d8 by Shwetha GS

ATLAS-835 Falcon Integration with Atlas (sowmyaramesh via shwethags)

parent 436a5245
......@@ -16,14 +16,12 @@
* limitations under the License.
*/
package org.apache.falcon.atlas.Util;
package org.apache.atlas.falcon.Util;
import org.apache.commons.lang3.StringUtils;
import org.apache.falcon.FalconException;
import org.apache.falcon.security.CurrentUser;
import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.HashMap;
......@@ -33,7 +31,6 @@ import java.util.Map;
* Falcon event util
*/
public final class EventUtil {
private static final Logger LOG = LoggerFactory.getLogger(EventUtil.class);
private EventUtil() {}
......@@ -55,7 +52,6 @@ public final class EventUtil {
return keyValueMap;
}
public static UserGroupInformation getUgi() throws FalconException {
UserGroupInformation ugi;
try {
......
......@@ -16,11 +16,13 @@
* limitations under the License.
*/
package org.apache.falcon.atlas.event;
package org.apache.atlas.falcon.event;
import org.apache.falcon.entity.v0.Entity;
import org.apache.hadoop.security.UserGroupInformation;
import java.util.Date;
/**
* Falcon event to interface with Atlas Service.
*/
......@@ -40,7 +42,12 @@ public class FalconEvent {
}
public enum OPERATION {
ADD_PROCESS, UPDATE_PROCESS
ADD_CLUSTER,
UPDATE_CLUSTER,
ADD_FEED,
UPDATE_FEED,
ADD_PROCESS,
UPDATE_PROCESS,
}
public String getUser() {
......@@ -55,8 +62,8 @@ public class FalconEvent {
return operation;
}
public long getTimestamp() {
return timestamp;
public Date getTimestamp() {
return new Date(timestamp);
}
public Entity getEntity() {
......
......@@ -22,19 +22,15 @@ package org.apache.atlas.falcon.model;
* Falcon Data Types for model and bridge.
*/
public enum FalconDataTypes {
FALCON_PROCESS_ENTITY("falcon_process"),
;
private final String name;
FalconDataTypes(java.lang.String name) {
this.name = name;
}
// Classes
FALCON_CLUSTER,
FALCON_FEED_CREATION,
FALCON_FEED,
FALCON_FEED_REPLICATION,
FALCON_PROCESS;
public String getName() {
return name;
return name().toLowerCase();
}
}
......@@ -16,10 +16,10 @@
* limitations under the License.
*/
package org.apache.falcon.atlas.publisher;
package org.apache.atlas.falcon.publisher;
import org.apache.falcon.atlas.event.FalconEvent;
import org.apache.atlas.falcon.event.FalconEvent;
/**
* Falcon publisher for Atlas
......
......@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
......@@ -16,17 +16,16 @@
* limitations under the License.
*/
package org.apache.falcon.atlas.service;
package org.apache.atlas.falcon.service;
import org.apache.atlas.falcon.Util.EventUtil;
import org.apache.atlas.falcon.event.FalconEvent;
import org.apache.atlas.falcon.hook.FalconHook;
import org.apache.atlas.falcon.publisher.FalconEventPublisher;
import org.apache.falcon.FalconException;
import org.apache.falcon.atlas.Util.EventUtil;
import org.apache.falcon.atlas.event.FalconEvent;
import org.apache.falcon.atlas.publisher.FalconEventPublisher;
import org.apache.falcon.entity.store.ConfigurationStore;
import org.apache.falcon.entity.v0.Entity;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.process.Process;
import org.apache.falcon.service.ConfigurationChangeListener;
import org.apache.falcon.service.FalconService;
import org.apache.hadoop.security.UserGroupInformation;
......@@ -57,7 +56,6 @@ public class AtlasService implements FalconService, ConfigurationChangeListener
publisher = new FalconHook();
}
@Override
public void destroy() throws FalconException {
ConfigurationStore.get().unregisterListener(this);
......@@ -65,14 +63,26 @@ public class AtlasService implements FalconService, ConfigurationChangeListener
@Override
public void onAdd(Entity entity) throws FalconException {
EntityType entityType = entity.getEntityType();
switch (entityType) {
try {
EntityType entityType = entity.getEntityType();
switch (entityType) {
case CLUSTER:
addEntity(entity, FalconEvent.OPERATION.ADD_CLUSTER);
break;
case PROCESS:
addProcessEntity((Process) entity, FalconEvent.OPERATION.ADD_PROCESS);
addEntity(entity, FalconEvent.OPERATION.ADD_PROCESS);
break;
case FEED:
addEntity(entity, FalconEvent.OPERATION.ADD_FEED);
break;
default:
LOG.debug("Entity type not processed " + entityType);
LOG.debug("Entity type not processed {}", entityType);
}
} catch(Throwable t) {
LOG.warn("Error handling entity {}", entity, t);
}
}
......@@ -82,15 +92,29 @@ public class AtlasService implements FalconService, ConfigurationChangeListener
@Override
public void onChange(Entity oldEntity, Entity newEntity) throws FalconException {
/**
* Skipping update for now - update uses full update currently and this might result in all attributes wiped for hive entities
EntityType entityType = newEntity.getEntityType();
switch (entityType) {
case PROCESS:
addProcessEntity((Process) newEntity, FalconEvent.OPERATION.UPDATE_PROCESS);
break;
default:
LOG.debug("Entity type not processed " + entityType);
case CLUSTER:
addEntity(newEntity, FalconEvent.OPERATION.UPDATE_CLUSTER);
break;
case PROCESS:
addEntity(newEntity, FalconEvent.OPERATION.UPDATE_PROCESS);
break;
case FEED:
FalconEvent.OPERATION operation = isReplicationFeed((Feed) newEntity) ?
FalconEvent.OPERATION.UPDATE_REPLICATION_FEED :
FalconEvent.OPERATION.UPDATE_FEED;
addEntity(newEntity, operation);
break;
default:
LOG.debug("Entity type not processed {}", entityType);
}
**/
}
@Override
......@@ -99,17 +123,19 @@ public class AtlasService implements FalconService, ConfigurationChangeListener
onAdd(entity);
}
private void addProcessEntity(Process entity, FalconEvent.OPERATION operation) throws FalconException {
LOG.info("Adding process entity to Atlas: {}", entity.getName());
private void addEntity(Entity entity, FalconEvent.OPERATION operation) throws FalconException {
LOG.info("Adding {} entity to Atlas: {}", entity.getEntityType().name(), entity.getName());
try {
String user = entity.getACL() != null ? entity.getACL().getOwner() :
UserGroupInformation.getLoginUser().getShortUserName();
FalconEvent event = new FalconEvent(user, EventUtil.getUgi(), operation, System.currentTimeMillis(), entity);
FalconEvent event =
new FalconEvent(user, EventUtil.getUgi(), operation, System.currentTimeMillis(), entity);
FalconEventPublisher.Data data = new FalconEventPublisher.Data(event);
publisher.publish(data);
} catch (Exception ex) {
throw new FalconException("Unable to publish data to publisher " + ex.getMessage(), ex);
}
}
}
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<feed description="test input" name="testinput" xmlns="uri:falcon:feed:0.1">
<groups>online,bi</groups>
<frequency>hours(1)</frequency>
<timezone>UTC</timezone>
<late-arrival cut-off="hours(3)"/>
<clusters>
<cluster name="testcluster" type="source">
<validity start="2010-01-01T00:00Z" end="2012-04-21T00:00Z"/>
<retention limit="hours(24)" action="delete"/>
<table uri="catalog:indb:intable#ds=${YEAR}-${MONTH}-${DAY}-${HOUR}" />
</cluster>
<cluster name="testcluster" type="target">
<validity start="2010-01-01T00:00Z" end="2012-04-21T00:00Z"/>
<retention limit="hours(24)" action="delete"/>
<table uri="catalog:outdb:outtable#ds=${YEAR}-${MONTH}-${DAY}-${HOUR}" />
</cluster>
</clusters>
<table uri="catalog:indb:unused#ds=${YEAR}-${MONTH}-${DAY}-${HOUR}" />
<ACL owner="testuser" group="group" permission="0x755"/>
<schema location="hcat" provider="hcat"/>
</feed>
......@@ -1538,9 +1538,6 @@ public class HiveHookIT {
return assertTableIsRegistered(dbName, tableName, null, false);
}
private String assertTableIsRegistered(String dbName, String tableName, boolean isTemporary) throws Exception {
return assertTableIsRegistered(dbName, tableName, null, isTemporary);
}
private String assertTableIsRegistered(String dbName, String tableName, AssertPredicate assertPredicate, boolean isTemporary) throws Exception {
LOG.debug("Searching for table {}.{}", dbName, tableName);
......
......@@ -23,23 +23,21 @@
<appender name="console" class="org.apache.log4j.ConsoleAppender">
<param name="Target" value="System.out"/>
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d %-5p - [%t:%x] ~ %m (%c{1}:%L)%n"/>
<param name="ConversionPattern" value="%d %-5p - [%t:%x] ~ %m (%C{1}:%L)%n"/>
</layout>
</appender>
<appender name="FILE" class="org.apache.log4j.DailyRollingFileAppender">
<param name="File" value="${atlas.log.dir}/${atlas.log.file}"/>
<param name="Append" value="true"/>
<param name="Threshold" value="info"/>
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d %-5p - [%t:%x] ~ %m (%c{1}:%L)%n"/>
<param name="ConversionPattern" value="%d %-5p - [%t:%x] ~ %m (%C{1}:%L)%n"/>
</layout>
</appender>
<appender name="AUDIT" class="org.apache.log4j.DailyRollingFileAppender">
<param name="File" value="${atlas.log.dir}/audit.log"/>
<param name="Append" value="true"/>
<param name="Threshold" value="info"/>
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d %x %m%n"/>
</layout>
......@@ -55,6 +53,12 @@
<appender-ref ref="FILE"/>
</logger>
<!-- to avoid logs - The configuration log.flush.interval.messages = 1 was supplied but isn't a known config -->
<logger name="org.apache.kafka.common.config.AbstractConfig" additivity="false">
<level value="error"/>
<appender-ref ref="FILE"/>
</logger>
<logger name="AUDIT" additivity="false">
<level value="info"/>
<appender-ref ref="AUDIT"/>
......
......@@ -3,18 +3,26 @@
---++ Falcon Model
The default falcon modelling is available in org.apache.atlas.falcon.model.FalconDataModelGenerator. It defines the following types:
<verbatim>
falcon_process(ClassType) - super types [Process] - attributes [timestamp, owned-by, tags]
falcon_cluster(ClassType) - super types [Infrastructure] - attributes [timestamp, colo, owner, tags]
falcon_feed(ClassType) - super types [DataSet] - attributes [timestamp, stored-in, owner, groups, tags]
falcon_feed_creation(ClassType) - super types [Process] - attributes [timestamp, stored-in, owner]
falcon_feed_replication(ClassType) - super types [Process] - attributes [timestamp, owner]
falcon_process(ClassType) - super types [Process] - attributes [timestamp, runs-on, owner, tags, pipelines, workflow-properties]
</verbatim>
One falcon_process entity is created for every cluster that the falcon process is defined for.
The entities are created and de-duped using unique qualified name. They provide namespace and can be used for querying/lineage as well. The unique attributes are:
* falcon_process - attribute name - <process name>@<cluster name>
The entities are created and de-duped using unique qualifiedName attribute. They provide namespace and can be used for querying/lineage as well. The unique attributes are:
* falcon_process - <process name>@<cluster name>
* falcon_cluster - <cluster name>
* falcon_feed - <feed name>@<cluster name>
* falcon_feed_creation - <feed name>
* falcon_feed_replication - <feed name>
---++ Falcon Hook
Falcon supports listeners on falcon entity submission. This is used to add entities in Atlas using the model defined in org.apache.atlas.falcon.model.FalconDataModelGenerator.
The hook submits the request to a thread pool executor to avoid blocking the command execution. The thread submits the entities as message to the notification server and atlas server reads these messages and registers the entities.
* Add 'org.apache.falcon.atlas.service.AtlasService' to application.services in <falcon-conf>/startup.properties
* Add 'org.apache.atlas.falcon.service.AtlasService' to application.services in <falcon-conf>/startup.properties
* Link falcon hook jars in falcon classpath - 'ln -s <atlas-home>/hook/falcon/* <falcon-home>/server/webapp/falcon/WEB-INF/lib/'
* In <falcon_conf>/falcon-env.sh, set an environment variable as follows:
<verbatim>
......@@ -33,5 +41,4 @@ Refer [[Configuration][Configuration]] for notification related configurations
---++ Limitations
* Only the process entity creation is currently handled. This model will be expanded to include all Falcon metadata
* In falcon cluster entity, cluster name used should be uniform across components like hive, falcon, sqoop etc. If used with ambari, ambari cluster name should be used for cluster entity
......@@ -115,8 +115,11 @@ public abstract class AtlasHook {
static void notifyEntitiesInternal(List<HookNotification.HookNotificationMessage> messages, int maxRetries,
NotificationInterface notificationInterface,
boolean shouldLogFailedMessages, FailedMessagesLogger logger) {
final String message = messages.toString();
if (messages == null || messages.isEmpty()) {
return;
}
final String message = messages.toString();
int numRetries = 0;
while (true) {
try {
......
......@@ -61,7 +61,11 @@ public class AtlasHookTest {
@Test
public void testNotifyEntitiesRetriesOnException() throws NotificationException {
List<HookNotification.HookNotificationMessage> hookNotificationMessages = new ArrayList<>();
List<HookNotification.HookNotificationMessage> hookNotificationMessages =
new ArrayList<HookNotification.HookNotificationMessage>() {{
add(new HookNotification.EntityCreateRequest("user"));
}
};
doThrow(new NotificationException(new Exception())).when(notificationInterface)
.send(NotificationInterface.NotificationType.HOOK, hookNotificationMessages);
AtlasHook.notifyEntitiesInternal(hookNotificationMessages, 2, notificationInterface, false,
......@@ -73,7 +77,11 @@ public class AtlasHookTest {
@Test
public void testFailedMessageIsLoggedIfRequired() throws NotificationException {
List<HookNotification.HookNotificationMessage> hookNotificationMessages = new ArrayList<>();
List<HookNotification.HookNotificationMessage> hookNotificationMessages =
new ArrayList<HookNotification.HookNotificationMessage>() {{
add(new HookNotification.EntityCreateRequest("user"));
}
};
doThrow(new NotificationException(new Exception(), Arrays.asList("test message")))
.when(notificationInterface)
.send(NotificationInterface.NotificationType.HOOK, hookNotificationMessages);
......@@ -97,7 +105,11 @@ public class AtlasHookTest {
@Test
public void testAllFailedMessagesAreLogged() throws NotificationException {
List<HookNotification.HookNotificationMessage> hookNotificationMessages = new ArrayList<>();
List<HookNotification.HookNotificationMessage> hookNotificationMessages =
new ArrayList<HookNotification.HookNotificationMessage>() {{
add(new HookNotification.EntityCreateRequest("user"));
}
};
doThrow(new NotificationException(new Exception(), Arrays.asList("test message1", "test message2")))
.when(notificationInterface)
.send(NotificationInterface.NotificationType.HOOK, hookNotificationMessages);
......
......@@ -3,6 +3,7 @@ Apache Atlas Release Notes
--trunk - unreleased
INCOMPATIBLE CHANGES:
ATLAS-835 Falcon Integration with Atlas (sowmyaramesh via shwethags)
ATLAS-912 Update to use Kafka 0.10.0.0 (from 0.9.0.0) (madhan.neethiraj via yhemanth)
ATLAS-542 Make qualifiedName and name consistent across all Datasets and Process (sumasai via yhemanth)
ATLAS-716 Entity update/delete notifications (shwethags)
......
......@@ -101,3 +101,7 @@ atlas.jaas.KafkaClient.option.principal = atlas/_HOST@EXAMPLE.COM
atlas.server.ha.enabled=false
#atlas.server.ids=id1
#atlas.server.address.id1=localhost:21000
#########POLICY FILE PATH #########
atlas.auth.policy.file=${sys:user.dir}/distro/src/conf/policy-store.txt
......@@ -23,7 +23,7 @@
<appender name="console" class="org.apache.log4j.ConsoleAppender">
<param name="Target" value="System.out"/>
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d %-5p - [%t:%x] ~ %m (%c{1}:%L)%n"/>
<param name="ConversionPattern" value="%d %-5p - [%t:%x] ~ %m (%C{1}:%L)%n"/>
</layout>
</appender>
......@@ -43,7 +43,7 @@
<logger name="com.thinkaurelius.titan" additivity="false">
<level value="info"/>
<appender-ref ref="FILE"/>
<appender-ref ref="console"/>
</logger>
<logger name="AUDIT">
......
......@@ -73,7 +73,7 @@ public class AuditFilter implements Filter {
// put the request id into the response so users can trace logs for this request
((HttpServletResponse) response).setHeader(AtlasClient.REQUEST_ID, requestId);
currentThread.setName(oldName);
RequestContext.clear();;
RequestContext.clear();
}
}
......@@ -88,7 +88,7 @@ public class AuditFilter implements Filter {
final String whatURL = Servlets.getRequestURL(httpRequest);
final String whatAddrs = httpRequest.getLocalAddr();
LOG.debug("Audit: {}/{} performed request {} {} ({}) at time {}", who, fromAddress, whatRequest, whatURL,
LOG.info("Audit: {}/{} performed request {} {} ({}) at time {}", who, fromAddress, whatRequest, whatURL,
whatAddrs, whenISO9601);
audit(who, fromAddress, whatRequest, fromHost, whatURL, whatAddrs, whenISO9601);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment