Commit a15b6ef1 by Shwetha GS

full text search + other minor fixes

parent 5324e905
......@@ -34,3 +34,4 @@ maven-eclipse.xml
#log files
logs
*.log
test-output
......@@ -62,7 +62,7 @@ public class HiveMetaStoreBridge {
}
}
public static final String DGI_URL_PROPERTY = "hive.dgi.url";
public static final String DGI_URL_PROPERTY = "hive.hook.dgi.url";
private static final Logger LOG = LoggerFactory.getLogger(HiveMetaStoreBridge.class);
......
......@@ -78,6 +78,8 @@ import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
/**
......@@ -91,6 +93,14 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
private static final int WAIT_TIME = 3;
private static ExecutorService executor;
private static final String MIN_THREADS = "hive.hook.dgi.minThreads";
private static final String MAX_THREADS = "hive.hook.dgi.maxThreads";
private static final String KEEP_ALIVE_TIME = "hive.hook.dgi.keepAliveTime";
private static final int minThreadsDefault = 5;
private static final int maxThreadsDefault = 5;
private static final long keepAliveTimeDefault = 10;
static {
// anything shared should be initialized here and destroyed in the
// shutdown hook The hook contract is weird in that it creates a
......@@ -99,7 +109,14 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
// initialize the async facility to process hook calls. We don't
// want to do this inline since it adds plenty of overhead for the
// query.
executor = Executors.newSingleThreadExecutor(
HiveConf hiveConf = new HiveConf();
int minThreads = hiveConf.getInt(MIN_THREADS, minThreadsDefault);
int maxThreads = hiveConf.getInt(MAX_THREADS, maxThreadsDefault);
long keepAliveTime = hiveConf.getLong(KEEP_ALIVE_TIME, keepAliveTimeDefault);
executor = new ThreadPoolExecutor(minThreads, maxThreads,
keepAliveTime, TimeUnit.MILLISECONDS,
new LinkedBlockingQueue<Runnable>(),
new ThreadFactoryBuilder()
.setDaemon(true)
.setNameFormat("DGI Logger %d")
......@@ -199,8 +216,16 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
}
private void registerCTAS(HiveMetaStoreBridge dgiBridge, HookContext hookContext, HiveConf conf) throws Exception {
LOG.debug("Registering CTAS");
Set<ReadEntity> inputs = hookContext.getInputs();
Set<WriteEntity> outputs = hookContext.getOutputs();
//Even explain CTAS has operation name as CREATETABLE_AS_SELECT
if (inputs.isEmpty() && outputs.isEmpty()) {
LOG.info("Explain statement. Skipping...");
}
String user = hookContext.getUserName();
HiveOperation operation = HiveOperation.valueOf(hookContext.getOperationName());
String queryId = null;
......@@ -214,7 +239,6 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
queryStartTime = plan.getQueryStartTime();
}
Referenceable processReferenceable = new Referenceable(HiveDataTypes.HIVE_PROCESS.getName());
processReferenceable.set("processName", operation.getOperationName());
processReferenceable.set("startTime", queryStartTime);
......@@ -311,7 +335,7 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
ExplainTask explain = new ExplainTask();
explain.initialize(conf, queryPlan, null);
org.json.JSONObject explainPlan = explain.getJSONLogicalPlan(null, ew);
org.json.JSONObject explainPlan = explain.getJSONPlan(null, ew);
return explainPlan.toString();
}
......
......@@ -20,12 +20,13 @@ Hive metadata can be modelled in DGI using its Type System. The default modellin
---++ Importing Hive Metadata
org.apache.hadoop.metadata.hive.bridge.HiveMetaStoreBridge imports the hive metadata into DGI using the typesystem defined in org.apache.hadoop.metadata.hive.model.HiveDataModelGenerator. import-hive.sh command can be used to facilitate this. Set-up the following configs in <dgi package>/conf/hive-site.xml:
org.apache.hadoop.metadata.hive.bridge.HiveMetaStoreBridge imports the hive metadata into DGI using the typesystem defined in org.apache.hadoop.metadata.hive.model.HiveDataModelGenerator. import-hive.sh command can be used to facilitate this.
Set-up the following configs in <dgi package>/conf/hive-site.xml:
* Hive metastore configuration - Refer [[https://cwiki.apache.org/confluence/display/Hive/AdminManual+MetastoreAdmin][Hive Metastore Configuration documentation]]
* DGI endpoint - Add the following property with the DGI endpoint for your set-up
<verbatim>
<property>
<name>hive.dgi.url</name>
<name>hive.hook.dgi.url</name>
<value>http://localhost:21000/</value>
</property>
</verbatim>
......@@ -34,13 +35,26 @@ Usage: <dgi package>/bin/import-hive.sh
---++ Hive Hook
Hive supports listeners on hive command execution using hive hooks. This can be used to add/update/remove entities in DGI. Follow the following instructions in your hive set-up
* Add org.apache.hadoop.metadata.hive.hook.HiveHook as post execution hook in hive-ste.xml
Hive supports listeners on hive command execution using hive hooks. This is used to add/update/remove entities in DGI using the model defined in org.apache.hadoop.metadata.hive.model.HiveDataModelGenerator.
The hook submits the request to a thread pool executor to avoid blocking the command execution. Follow the these instructions in your hive set-up to add hive hook for DGI:
* Add org.apache.hadoop.metadata.hive.hook.HiveHook as post execution hook in hive-site.xml
<verbatim>
<property>
<name>hive.exec.post.hooks</name>
<value>org.apache.hadoop.metadata.hive.hook.HiveHook</value>
</property>
</verbatim>
* Add the following property in hive-ste.xml with the DGI endpoint for your set-up
<verbatim>
<property>
<name>hive.hook.dgi.url</name>
<value>http://localhost:21000/</value>
</property>
</verbatim>
* Add 'export HIVE_AUX_JARS_PATH=<dgi package>/hook/hive' in hive-env.sh
* Restart hive-server2
The following properties in hive-site.xml control the thread pool details:
* hive.hook.dgi.minThreads - core number of threads. default 5
* hive.hook.dgi.maxThreads - maximum number of threads. default 5
* hive.hook.dgi.keepAliveTime - keep alive time in msecs. default 10
......@@ -69,6 +69,7 @@ public class HiveHookIT {
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, System.getProperty("user.dir") + "/target/metastore");
hiveConf.set(HiveMetaStoreBridge.DGI_URL_PROPERTY, DGI_URL);
hiveConf.set("javax.jdo.option.ConnectionURL", "jdbc:derby:./target/metastore_db;create=true");
hiveConf.set("debug", "true");
return hiveConf;
}
......
......@@ -19,11 +19,11 @@
######### Graph Database Configs #########
# Graph Storage
metadata.graph.storage.backend=berkeleyje
metadata.graph.storage.directory=./data/berkeley
metadata.graph.storage.directory=./target/data/berkeley
# Graph Search Index
metadata.graph.index.search.backend=elasticsearch
metadata.graph.index.search.directory=./data/es
metadata.graph.index.search.directory=./target/data/es
metadata.graph.index.search.elasticsearch.client-only=false
metadata.graph.index.search.elasticsearch.local-mode=true
......
......@@ -29,7 +29,7 @@ import org.apache.hadoop.metadata.repository.graph.GraphBackedMetadataRepository
import org.apache.hadoop.metadata.repository.graph.GraphBackedSearchIndexer;
import org.apache.hadoop.metadata.repository.graph.GraphProvider;
import org.apache.hadoop.metadata.repository.graph.TitanGraphProvider;
import org.apache.hadoop.metadata.repository.typestore.GraphTypeStore;
import org.apache.hadoop.metadata.repository.typestore.GraphBackedTypeStore;
import org.apache.hadoop.metadata.repository.typestore.ITypeStore;
import org.apache.hadoop.metadata.services.DefaultMetadataService;
import org.apache.hadoop.metadata.services.MetadataService;
......@@ -55,7 +55,7 @@ public class RepositoryMetadataModule extends com.google.inject.AbstractModule {
// get the impl classes for the repo and the graph service
// this.graphServiceClass = gsp.getImplClass();
this.metadataRepoClass = GraphBackedMetadataRepository.class;
this.typeStore = GraphTypeStore.class;
this.typeStore = GraphBackedTypeStore.class;
this.metadataService = DefaultMetadataService.class;
this.discoveryService = GraphBackedDiscoveryService.class;
this.searchIndexer = GraphBackedSearchIndexer.class;
......
......@@ -27,6 +27,11 @@ import java.util.Map;
public interface DiscoveryService {
/**
* Full text search
*/
String searchByFullText(String query) throws DiscoveryException;
/**
* Search using query DSL.
*
* @param dslQuery query in DSL format.
......
......@@ -18,9 +18,15 @@
package org.apache.hadoop.metadata.discovery.graph;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.thinkaurelius.titan.core.TitanGraph;
import com.thinkaurelius.titan.core.TitanGraphQuery;
import com.thinkaurelius.titan.core.TitanProperty;
import com.thinkaurelius.titan.core.TitanVertex;
import com.thinkaurelius.titan.core.attribute.Text;
import com.tinkerpop.blueprints.Vertex;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.metadata.discovery.DiscoveryException;
import org.apache.hadoop.metadata.discovery.DiscoveryService;
import org.apache.hadoop.metadata.query.Expressions;
......@@ -30,8 +36,12 @@ import org.apache.hadoop.metadata.query.GremlinQueryResult;
import org.apache.hadoop.metadata.query.GremlinTranslator;
import org.apache.hadoop.metadata.query.QueryParser;
import org.apache.hadoop.metadata.query.QueryProcessor;
import org.apache.hadoop.metadata.repository.Constants;
import org.apache.hadoop.metadata.repository.MetadataRepository;
import org.apache.hadoop.metadata.repository.graph.GraphProvider;
import org.apache.hadoop.metadata.typesystem.Referenceable;
import org.apache.hadoop.metadata.typesystem.json.InstanceSerialization;
import org.apache.hadoop.metadata.typesystem.types.TypeSystem;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.util.Either;
......@@ -44,8 +54,11 @@ import javax.script.ScriptEngineManager;
import javax.script.ScriptException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Graph backed implementation of Search.
......@@ -64,6 +77,23 @@ public class GraphBackedDiscoveryService implements DiscoveryService {
this.graphPersistenceStrategy = new DefaultGraphPersistenceStrategy(metadataRepository);
}
@Override
public String searchByFullText(String query) throws DiscoveryException {
Iterator iterator = titanGraph.query().has(Constants.ENTITY_TEXT_PROPERTY_KEY, Text.CONTAINS, query).vertices().iterator();
JsonArray results = new JsonArray();
while (iterator.hasNext()) {
Vertex vertex = (Vertex) iterator.next();
JsonObject row = new JsonObject();
row.addProperty("guid", vertex.<String>getProperty(Constants.GUID_PROPERTY_KEY));
row.addProperty("typeName", vertex.<String>getProperty(Constants.ENTITY_TYPE_PROPERTY_KEY));
results.add(row);
}
JsonObject response = new JsonObject();
response.addProperty("query", query);
response.add("results", results);
return response.toString();
}
/**
* Search using query DSL.
*
......@@ -118,47 +148,48 @@ public class GraphBackedDiscoveryService implements DiscoveryService {
try {
Object o = engine.eval(gremlinQuery, bindings);
if (!(o instanceof List)) {
throw new DiscoveryException(
String.format("Cannot process gremlin result %s", o.toString()));
}
return extractResult(o);
} catch (ScriptException se) {
throw new DiscoveryException(se);
}
}
List l = (List) o;
List<Map<String, String>> result = new ArrayList<>();
for (Object r : l) {
Map<String, String> oRow = new HashMap<>();
if (r instanceof Map) {
@SuppressWarnings("unchecked")
Map<Object, Object> iRow = (Map) r;
for (Map.Entry e : iRow.entrySet()) {
Object k = e.getKey();
Object v = e.getValue();
oRow.put(k.toString(), v.toString());
}
} else if (r instanceof TitanVertex) {
Iterable<TitanProperty> ps = ((TitanVertex) r).getProperties();
for (TitanProperty tP : ps) {
String pName = tP.getPropertyKey().getName();
Object pValue = ((TitanVertex) r).getProperty(pName);
if (pValue != null) {
oRow.put(pName, pValue.toString());
}
}
private List<Map<String, String>> extractResult(Object o) throws DiscoveryException {
if (!(o instanceof List)) {
throw new DiscoveryException(String.format("Cannot process result %s", o.toString()));
}
} else if (r instanceof String) {
oRow.put("", r.toString());
} else {
throw new DiscoveryException(
String.format("Cannot process gremlin result %s", o.toString()));
List l = (List) o;
List<Map<String, String>> result = new ArrayList<>();
for (Object r : l) {
Map<String, String> oRow = new HashMap<>();
if (r instanceof Map) {
@SuppressWarnings("unchecked")
Map<Object, Object> iRow = (Map) r;
for (Map.Entry e : iRow.entrySet()) {
Object k = e.getKey();
Object v = e.getValue();
oRow.put(k.toString(), v.toString());
}
} else if (r instanceof TitanVertex) {
Iterable<TitanProperty> ps = ((TitanVertex) r).getProperties();
for (TitanProperty tP : ps) {
String pName = tP.getPropertyKey().getName();
Object pValue = ((TitanVertex) r).getProperty(pName);
if (pValue != null) {
oRow.put(pName, pValue.toString());
}
}
result.add(oRow);
} else if (r instanceof String) {
oRow.put("", r.toString());
} else {
throw new DiscoveryException(String.format("Cannot process result %s", o.toString()));
}
return result;
} catch (ScriptException se) {
throw new DiscoveryException(se);
result.add(oRow);
}
return result;
}
}
......@@ -32,6 +32,8 @@ public final class Constants {
public static final String ENTITY_TYPE_PROPERTY_KEY = "typeName";
public static final String ENTITY_TYPE_INDEX = "type_index";
public static final String ENTITY_TEXT_PROPERTY_KEY = "entityText";
/**
* Properties for type store graph
*/
......
......@@ -36,9 +36,12 @@ import org.apache.hadoop.metadata.typesystem.ITypedReferenceableInstance;
import org.apache.hadoop.metadata.typesystem.ITypedStruct;
import org.apache.hadoop.metadata.typesystem.persistence.Id;
import org.apache.hadoop.metadata.typesystem.persistence.MapIds;
import org.apache.hadoop.metadata.typesystem.persistence.StructInstance;
import org.apache.hadoop.metadata.typesystem.types.AttributeInfo;
import org.apache.hadoop.metadata.typesystem.types.ClassType;
import org.apache.hadoop.metadata.typesystem.types.DataTypes;
import org.apache.hadoop.metadata.typesystem.types.EnumType;
import org.apache.hadoop.metadata.typesystem.types.EnumValue;
import org.apache.hadoop.metadata.typesystem.types.IDataType;
import org.apache.hadoop.metadata.typesystem.types.Multiplicity;
import org.apache.hadoop.metadata.typesystem.types.ObjectGraphWalker;
......@@ -67,6 +70,7 @@ public class GraphBackedMetadataRepository implements MetadataRepository {
private static final Logger LOG =
LoggerFactory.getLogger(GraphBackedMetadataRepository.class);
private static final String FULL_TEXT_DELIMITER = " ";
private final AtomicInteger ID_SEQ = new AtomicInteger(0);
......@@ -422,7 +426,71 @@ public class GraphBackedMetadataRepository implements MetadataRepository {
List<ITypedReferenceableInstance> newTypedInstances = discoverInstances(entityProcessor);
entityProcessor.createVerticesForClassTypes(newTypedInstances);
return addDiscoveredInstances(typedInstance, entityProcessor, newTypedInstances);
String guid = addDiscoveredInstances(typedInstance, entityProcessor, newTypedInstances);
addFullTextProperty(entityProcessor, newTypedInstances);
return guid;
}
private void addFullTextProperty(EntityProcessor entityProcessor, List<ITypedReferenceableInstance> newTypedInstances) throws MetadataException {
for (ITypedReferenceableInstance typedInstance : newTypedInstances) { // Traverse
Id id = typedInstance.getId();
Vertex instanceVertex = entityProcessor.idToVertexMap.get(id);
String fullText = getFullText(instanceVertex, true);
instanceVertex.setProperty(Constants.ENTITY_TEXT_PROPERTY_KEY, fullText);
}
}
private String getFullText(Vertex instanceVertex, boolean followReferences) throws MetadataException {
String guid = instanceVertex.getProperty(Constants.GUID_PROPERTY_KEY);
ITypedReferenceableInstance typedReference = graphToInstanceMapper.mapGraphToTypedInstance(guid, instanceVertex);
return getFullText(typedReference, followReferences);
}
private String getFullText(ITypedInstance typedInstance, boolean followReferences) throws MetadataException {
StringBuilder fullText = new StringBuilder();
for (AttributeInfo attributeInfo : typedInstance.fieldMapping().fields.values()) {
Object attrValue = typedInstance.get(attributeInfo.name);
if (attrValue == null) {
continue;
}
String attrFullText = null;
switch(attributeInfo.dataType().getTypeCategory()) {
case PRIMITIVE:
attrFullText = String.valueOf(attrValue);
break;
case ENUM:
attrFullText = ((EnumValue)attrValue).value;
break;
case ARRAY:
break;
case MAP:
break;
case CLASS:
if (followReferences) {
String refGuid = ((ITypedReferenceableInstance) attrValue).getId()._getId();
Vertex refVertex = getVertexForGUID(refGuid);
attrFullText = getFullText(refVertex, false);
}
break;
case STRUCT:
case TRAIT:
if (followReferences) {
attrFullText = getFullText((ITypedInstance) attrValue, false);
}
break;
}
if (attrFullText != null) {
fullText = fullText.append(FULL_TEXT_DELIMITER).append(attributeInfo.name)
.append(FULL_TEXT_DELIMITER).append(attrFullText);
}
}
return fullText.toString();
}
/**
......@@ -518,6 +586,9 @@ public class GraphBackedMetadataRepository implements MetadataRepository {
IDataType dataType) throws MetadataException {
LOG.debug("mapping attributeInfo {}", attributeInfo);
final String propertyName = typedInstance.getTypeName() + "." + attributeInfo.name;
if (typedInstance.get(attributeInfo.name) == null) {
return;
}
switch (dataType.getTypeCategory()) {
case PRIMITIVE:
......@@ -824,8 +895,7 @@ public class GraphBackedMetadataRepository implements MetadataRepository {
AttributeInfo attributeInfo) throws MetadataException {
LOG.debug("mapping attributeInfo = " + attributeInfo);
final IDataType dataType = attributeInfo.dataType();
final String vertexPropertyName =
typedInstance.getTypeName() + "." + attributeInfo.name;
final String vertexPropertyName = typedInstance.getTypeName() + "." + attributeInfo.name;
switch (dataType.getTypeCategory()) {
case PRIMITIVE:
......@@ -833,6 +903,10 @@ public class GraphBackedMetadataRepository implements MetadataRepository {
break; // add only if vertex has this attribute
case ENUM:
if (instanceVertex.getProperty(vertexPropertyName) == null) {
return;
}
typedInstance.setInt(attributeInfo.name,
instanceVertex.<Integer>getProperty(vertexPropertyName));
break;
......@@ -1060,8 +1134,7 @@ public class GraphBackedMetadataRepository implements MetadataRepository {
ITypedInstance typedInstance,
AttributeInfo attributeInfo) throws MetadataException {
LOG.debug("Adding primitive {} from vertex {}", attributeInfo, instanceVertex);
final String vertexPropertyName = typedInstance.getTypeName() + "." +
attributeInfo.name;
final String vertexPropertyName = typedInstance.getTypeName() + "." + attributeInfo.name;
if (instanceVertex.getProperty(vertexPropertyName) == null) {
return;
}
......
......@@ -95,9 +95,30 @@ public class GraphBackedSearchIndexer implements SearchIndexer {
createCompositeAndMixedIndex(Constants.TRAIT_NAMES_INDEX,
Constants.TRAIT_NAMES_PROPERTY_KEY, String.class, false, Cardinality.SET);
//Index for full text search
createVertexMixedIndex(Constants.ENTITY_TEXT_PROPERTY_KEY, String.class, Cardinality.SINGLE);
//Indexes for graph backed type system store
createTypeStoreIndexes();
LOG.info("Index creation for global keys complete.");
}
private void createTypeStoreIndexes() {
//Create unique index on typeName
createCompositeIndex(Constants.TYPENAME_PROPERTY_KEY, Constants.TYPENAME_PROPERTY_KEY, String.class,
true, Cardinality.SINGLE);
//Create index on vertex type + typeName
//todo doesn't work, review
TitanManagement management = titanGraph.getManagementSystem();
PropertyKey vertexType = management.makePropertyKey(Constants.VERTEX_TYPE_PROPERTY_KEY).dataType(String.class).make();
PropertyKey typeName = management.getPropertyKey(Constants.TYPENAME_PROPERTY_KEY);
management.buildIndex("byTypeName", Vertex.class).addKey(vertexType).addKey(typeName).buildCompositeIndex();
management.commit();
LOG.debug("Created composite index on {} and {}", Constants.VERTEX_TYPE_PROPERTY_KEY, Constants.TYPENAME_PROPERTY_KEY);
}
/**
* This is upon adding a new type to Store.
*
......
......@@ -52,7 +52,8 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
public class GraphTypeStore implements ITypeStore {
public class GraphBackedTypeStore implements ITypeStore {
public static final String VERTEX_TYPE = "typeSystem";
private static final String PROPERTY_PREFIX = "type.";
public static final String SUPERTYPE_EDGE_LABEL = PROPERTY_PREFIX + ".supertype";
public static final String SUBTYPE_EDGE_LABEL = PROPERTY_PREFIX + ".subtype";
......@@ -60,12 +61,12 @@ public class GraphTypeStore implements ITypeStore {
private static final ImmutableList META_PROPERTIES = ImmutableList.of(Constants.VERTEX_TYPE_PROPERTY_KEY,
Constants.TYPE_CATEGORY_PROPERTY_KEY, Constants.TYPENAME_PROPERTY_KEY);
private static Logger LOG = LoggerFactory.getLogger(GraphTypeStore.class);
private static Logger LOG = LoggerFactory.getLogger(GraphBackedTypeStore.class);
private final TitanGraph titanGraph;
@Inject
public GraphTypeStore(GraphProvider<TitanGraph> graphProvider) {
public GraphBackedTypeStore(GraphProvider<TitanGraph> graphProvider) {
titanGraph = graphProvider.get();
}
......@@ -82,7 +83,7 @@ public class GraphTypeStore implements ITypeStore {
for (String typeName : typeNames) {
if (!coreTypes.contains(typeName)) {
IDataType dataType = typeSystem.getDataType(IDataType.class, typeName);
LOG.debug("Adding {}.{} to type store", dataType.getTypeCategory(), dataType.getName());
LOG.debug("Processing {}.{} in type store", dataType.getTypeCategory(), dataType.getName());
switch (dataType.getTypeCategory()) {
case ENUM:
storeInGraph((EnumType)dataType);
......@@ -207,7 +208,7 @@ public class GraphTypeStore implements ITypeStore {
@Override
public TypesDef restore() throws MetadataException {
//Get all vertices for type system
Iterator vertices = titanGraph.query().has(Constants.VERTEX_TYPE_PROPERTY_KEY, PROPERTY_PREFIX).vertices().iterator();
Iterator vertices = titanGraph.query().has(Constants.VERTEX_TYPE_PROPERTY_KEY, VERTEX_TYPE).vertices().iterator();
ImmutableList.Builder<EnumTypeDefinition> enums = ImmutableList.builder();
ImmutableList.Builder<StructTypeDefinition> structs = ImmutableList.builder();
......@@ -294,10 +295,9 @@ public class GraphTypeStore implements ITypeStore {
* @return vertex
*/
private Vertex findVertex(DataTypes.TypeCategory category, String typeName) {
LOG.debug("Finding vertex for ({} - {}), ({} - {})", Constants.TYPE_CATEGORY_PROPERTY_KEY, category,
Constants.TYPENAME_PROPERTY_KEY, typeName);
LOG.debug("Finding vertex for {}.{}", category, typeName);
Iterator results = titanGraph.query().has(Constants.VERTEX_TYPE_PROPERTY_KEY, PROPERTY_PREFIX)
Iterator results = titanGraph.query().has(Constants.VERTEX_TYPE_PROPERTY_KEY, VERTEX_TYPE)
.has(Constants.TYPENAME_PROPERTY_KEY, typeName).vertices().iterator();
Vertex vertex = null;
if (results != null && results.hasNext()) {
......@@ -312,7 +312,7 @@ public class GraphTypeStore implements ITypeStore {
if (vertex == null) {
LOG.debug("Adding vertex {}{}", PROPERTY_PREFIX, typeName);
vertex = titanGraph.addVertex(null);
vertex.setProperty(Constants.VERTEX_TYPE_PROPERTY_KEY, PROPERTY_PREFIX); //Mark as type vertex
vertex.setProperty(Constants.VERTEX_TYPE_PROPERTY_KEY, VERTEX_TYPE); //Mark as type vertex
vertex.setProperty(Constants.TYPE_CATEGORY_PROPERTY_KEY, category);
vertex.setProperty(Constants.TYPENAME_PROPERTY_KEY, typeName);
}
......
......@@ -37,6 +37,7 @@ import org.testng.Assert;
import static org.apache.hadoop.metadata.typesystem.types.utils.TypesUtil.createClassTypeDef;
import static org.apache.hadoop.metadata.typesystem.types.utils.TypesUtil.createOptionalAttrDef;
import static org.apache.hadoop.metadata.typesystem.types.utils.TypesUtil.createRequiredAttrDef;
import static org.apache.hadoop.metadata.typesystem.types.utils.TypesUtil.createStructTypeDef;
import static org.apache.hadoop.metadata.typesystem.types.utils.TypesUtil.createTraitTypeDef;
/**
......@@ -61,10 +62,13 @@ public final class TestUtils {
new EnumTypeDefinition("OrgLevel", new EnumValue("L1", 1), new EnumValue("L2", 2));
ts.defineEnumType(orgLevelEnum);
StructTypeDefinition addressDetails = createStructTypeDef("Address",
createRequiredAttrDef("street", DataTypes.STRING_TYPE),
createRequiredAttrDef("city", DataTypes.STRING_TYPE));
HierarchicalTypeDefinition<ClassType> deptTypeDef =
createClassTypeDef("Department", ImmutableList.<String>of(),
createRequiredAttrDef("name", DataTypes.STRING_TYPE),
createOptionalAttrDef("orgLevel", ts.getDataType(EnumType.class, "OrgLevel")),
new AttributeDefinition("employees",
String.format("array<%s>", "Person"), Multiplicity.COLLECTION, true,
"department")
......@@ -73,6 +77,8 @@ public final class TestUtils {
HierarchicalTypeDefinition<ClassType> personTypeDef = createClassTypeDef("Person",
ImmutableList.<String>of(),
createRequiredAttrDef("name", DataTypes.STRING_TYPE),
createOptionalAttrDef("orgLevel", ts.getDataType(EnumType.class, "OrgLevel")),
createOptionalAttrDef("address", "Address"),
new AttributeDefinition("department",
"Department", Multiplicity.REQUIRED, false, "employees"),
new AttributeDefinition("manager",
......@@ -92,7 +98,7 @@ public final class TestUtils {
createRequiredAttrDef("level", DataTypes.INT_TYPE)
);
ts.defineTypes(ImmutableList.<StructTypeDefinition>of(),
ts.defineTypes(ImmutableList.of(addressDetails),
ImmutableList.of(securityClearanceTypeDef),
ImmutableList.of(deptTypeDef, personTypeDef, managerTypeDef));
}
......@@ -101,12 +107,21 @@ public final class TestUtils {
Referenceable hrDept = new Referenceable("Department");
Referenceable john = new Referenceable("Person");
Referenceable jane = new Referenceable("Manager", "SecurityClearance");
Referenceable johnAddr = new Referenceable("Address");
Referenceable janeAddr = new Referenceable("Address");
hrDept.set("name", "hr");
john.set("name", "John");
john.set("department", hrDept);
johnAddr.set("street", "Stewart Drive");
johnAddr.set("city", "Sunnyvale");
john.set("address", johnAddr);
jane.set("name", "Jane");
jane.set("department", hrDept);
janeAddr.set("street", "Great Americal Parkway");
janeAddr.set("city", "Santa Clara");
jane.set("address", janeAddr);
john.set("manager", jane);
......
......@@ -291,4 +291,32 @@ public class GraphBackedDiscoveryServiceTest {
Assert.assertNotEquals(name, "null");
}
}
@Test
public void testFullTextSearch() throws Exception {
//person in hr department whose name is john
String response = discoveryService.searchByFullText("john hr");
Assert.assertNotNull(response);
JSONObject jsonResponse = new JSONObject(response);
JSONArray results = jsonResponse.getJSONArray("results");
Assert.assertEquals(results.length(), 1);
JSONObject row = (JSONObject) results.get(0);
Assert.assertEquals(row.get("typeName"), "Person");
//person in hr department who lives in santa clara
response = discoveryService.searchByFullText("hr santa clara");
Assert.assertNotNull(response);
jsonResponse = new JSONObject(response);
results = jsonResponse.getJSONArray("results");
Assert.assertEquals(results.length(), 1);
row = (JSONObject) results.get(0);
Assert.assertEquals(row.get("typeName"), "Manager");
//search for hr should return - hr department and its 2 employess
response = discoveryService.searchByFullText("hr");
Assert.assertNotNull(response);
jsonResponse = new JSONObject(response);
results = jsonResponse.getJSONArray("results");
Assert.assertEquals(results.length(), 3);
}
}
\ No newline at end of file
......@@ -195,7 +195,7 @@ public class EnumTest extends BaseTest {
"\tj : \t1\n" +
"\tk : \t1\n" +
"\tenum3 : \tCOMMITTED\n" +
"\tl : \t2014-12-10\n" +
"\tl : \t2014-12-11\n" +
"\tm : \t[1, 1]\n" +
"\tn : \t[1.1, 1.1]\n" +
"\to : \t{b=2.0, a=1.0}\n" +
......@@ -227,7 +227,7 @@ public class EnumTest extends BaseTest {
"\tj : \t1\n" +
"\tk : \t1\n" +
"\tenum3 : \tCOMMITTED\n" +
"\tl : \t2014-12-10\n" +
"\tl : \t2014-12-11\n" +
"\tm : \t[1, 1]\n" +
"\tn : \t[1.1, 1.1]\n" +
"\to : \t{b=2.0, a=1.0}\n" +
......@@ -264,7 +264,7 @@ public class EnumTest extends BaseTest {
"\tj : \t1\n" +
"\tk : \t1\n" +
"\tenum3 : \tCOMMITTED\n" +
"\tl : \t2014-12-10\n" +
"\tl : \t2014-12-11\n" +
"\tm : \t[1, 1]\n" +
"\tn : \t[1.1, 1.1]\n" +
"\to : \t{b=2.0, a=1.0}\n" +
......
......@@ -58,7 +58,7 @@ public class StructTest extends BaseTest {
"\ti : \t1.0\n" +
"\tj : \t1\n" +
"\tk : \t1\n" +
"\tl : \t2014-12-10\n" +
"\tl : \t2014-12-11\n" +
"\tm : \t[1, 1]\n" +
"\tn : \t[1.1, 1.1]\n" +
"\to : \t{b=2.0, a=1.0}\n" +
......@@ -101,7 +101,7 @@ public class StructTest extends BaseTest {
"\ti : \t1.0\n" +
"\tj : \t1\n" +
"\tk : \t1\n" +
"\tl : \t2014-12-10\n" +
"\tl : \t2014-12-11\n" +
"\tm : \t[1, 1]\n" +
"\tn : \t[1.100000000000000088817841970012523233890533447265625, 1" +
".100000000000000088817841970012523233890533447265625]\n" +
......
......@@ -34,6 +34,7 @@ import org.apache.hadoop.metadata.typesystem.types.ClassType;
import org.apache.hadoop.metadata.typesystem.types.DataTypes;
import org.apache.hadoop.metadata.typesystem.types.EnumTypeDefinition;
import org.apache.hadoop.metadata.typesystem.types.HierarchicalTypeDefinition;
import org.apache.hadoop.metadata.typesystem.types.StructTypeDefinition;
import org.apache.hadoop.metadata.typesystem.types.TraitType;
import org.apache.hadoop.metadata.typesystem.types.TypeSystem;
import org.testng.annotations.BeforeClass;
......@@ -44,7 +45,7 @@ import javax.inject.Inject;
import java.util.List;
@Guice(modules = RepositoryMetadataModule.class)
public class GraphTypeStoreTest {
public class GraphBackedTypeStoreTest {
@Inject
private GraphProvider<TitanGraph> graphProvider;
......@@ -85,7 +86,9 @@ public class GraphTypeStoreTest {
Assert.assertEquals(1, enumTypes.size());
//validate class
Assert.assertTrue(types.structTypesAsJavaList().isEmpty());
List<StructTypeDefinition> structTypes = types.structTypesAsJavaList();
Assert.assertEquals(1, structTypes.size());
List<HierarchicalTypeDefinition<ClassType>> classTypes = types.classTypesAsJavaList();
Assert.assertEquals(3, classTypes.size());
for (HierarchicalTypeDefinition<ClassType> classType : classTypes) {
......
......@@ -22,7 +22,7 @@ metadata.graph.storage.backend=inmemory
# Graph Search Index
metadata.graph.index.search.backend=elasticsearch
metadata.graph.index.search.directory=./data/es
metadata.graph.index.search.directory=./target/data/es
metadata.graph.index.search.elasticsearch.client-only=false
metadata.graph.index.search.elasticsearch.local-mode=true
......
......@@ -19,11 +19,11 @@
######### Graph Database Configs #########
# Graph Storage
metadata.graph.storage.backend=berkeleyje
metadata.graph.storage.directory=./data/berkeley
metadata.graph.storage.directory=./target/data/berkeley
# Graph Search Index
metadata.graph.index.search.backend=elasticsearch
metadata.graph.index.search.directory=./data/es
metadata.graph.index.search.directory=./target/data/es
metadata.graph.index.search.elasticsearch.client-only=false
metadata.graph.index.search.elasticsearch.local-mode=true
......
......@@ -35,13 +35,22 @@ import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TimeZone;
import java.util.concurrent.ConcurrentHashMap;
@Singleton
@InterfaceAudience.Private
public class TypeSystem {
private static final TypeSystem INSTANCE = new TypeSystem();
public static SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
public static ThreadLocal<DateFormat> dateFormat = new ThreadLocal() {
@Override
public DateFormat initialValue() {
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
return dateFormat;
}
};
private Map<String, IDataType> types;
private IdType idType;
......@@ -277,7 +286,7 @@ public class TypeSystem {
}
public DateFormat getDateFormat() {
return dateFormat;
return dateFormat.get();
}
public boolean allowNullsInCollections() {
......
......@@ -26,6 +26,7 @@ import org.apache.hadoop.metadata.typesystem.types.EnumValue;
import org.apache.hadoop.metadata.typesystem.types.HierarchicalTypeDefinition;
import org.apache.hadoop.metadata.typesystem.types.IDataType;
import org.apache.hadoop.metadata.typesystem.types.Multiplicity;
import org.apache.hadoop.metadata.typesystem.types.StructTypeDefinition;
import org.apache.hadoop.metadata.typesystem.types.TraitType;
/**
......@@ -73,6 +74,10 @@ public class TypesUtil {
return new HierarchicalTypeDefinition<>(TraitType.class, name, superTypes, attrDefs);
}
public static StructTypeDefinition createStructTypeDef(String name, AttributeDefinition... attrDefs) {
return new StructTypeDefinition(name, attrDefs);
}
public static HierarchicalTypeDefinition<ClassType> createClassTypeDef(
String name, ImmutableList<String> superTypes, AttributeDefinition... attrDefs) {
return new HierarchicalTypeDefinition<>(ClassType.class, name, superTypes, attrDefs);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment