Commit 17803eed by skoritala Committed by Madhan Neethiraj

ATLAS-3246: enhancements in free-text search functionality

parent 8b722eb8
......@@ -39,7 +39,7 @@ public final class Constants {
public static final String GUID_PROPERTY_KEY = encodePropertyKey(INTERNAL_PROPERTY_KEY_PREFIX + "guid");
public static final String RELATIONSHIP_GUID_PROPERTY_KEY = encodePropertyKey(RELATIONSHIP_PROPERTY_KEY_PREFIX + GUID_PROPERTY_KEY);
public static final String HISTORICAL_GUID_PROPERTY_KEY = encodePropertyKey(INTERNAL_PROPERTY_KEY_PREFIX + "historicalGuids");
public static final String FREETEXT_REQUEST_HANDLER = "/freetext";
/**
* Entity type name property key.
*/
......
......@@ -32,7 +32,7 @@ LIB = "lib"
CONF = "conf"
LOG = "logs"
WEBAPP = "server" + os.sep + "webapp"
SOLR_CONF_DIR = "conf" + os.sep + "solr"
CONFIG_SETS_CONF = "server" + os.sep + "solr" + os.sep + "configsets" + os.sep + "_default" + os.sep + "conf"
DATA = "data"
ATLAS_CONF = "ATLAS_CONF"
ATLAS_LOG = "ATLAS_LOG_DIR"
......@@ -112,7 +112,7 @@ def elasticsearchBinDir(dir):
return os.environ.get(SOLR_BIN, os.path.join(dir, "elasticsearch", BIN))
def solrConfDir(dir):
return os.environ.get(SOLR_CONF, os.path.join(dir, SOLR_CONF_DIR))
return os.environ.get(SOLR_CONF, os.path.join(dir, "solr", CONFIG_SETS_CONF))
def solrPort():
return os.environ.get(SOLR_PORT, DEFAULT_SOLR_PORT)
......
......@@ -524,20 +524,6 @@
class="solr.UUIDField"
indexed="true" />
<fieldType name="freetext" class="solr.TextField" omitNorms="true"
omitTermFreqAndPositions="true">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="16" />
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<dynamicField name="*_uuid" type="uuid" indexed="true" stored="true"/>
......@@ -545,16 +531,4 @@
<field name="ttl" type="string" indexed="true" stored="true" />
<field name="expire_at" type="date" indexed="true" stored="true" />
<field name="timestamp" type="date" indexed="true" stored="true" />
<field name="allt1s" type="freetext" multiValued="true" indexed="true" stored="false"/>
<field name="allt2s" type="freetext" multiValued="true" indexed="true" stored="false"/>
<field name="allt3s" type="freetext" multiValued="true" indexed="true" stored="false"/>
<field name="allt4s" type="freetext" multiValued="true" indexed="true" stored="false"/>
<field name="allt5s" type="freetext" multiValued="true" indexed="true" stored="false"/>
<field name="allt6s" type="freetext" multiValued="true" indexed="true" stored="false"/>
<field name="allt7s" type="freetext" multiValued="true" indexed="true" stored="false"/>
<field name="allt8s" type="freetext" multiValued="true" indexed="true" stored="false"/>
<field name="allt9s" type="freetext" multiValued="true" indexed="true" stored="false"/>
<field name="allt10s" type="freetext" multiValued="true" indexed="true" stored="false"/>
</schema>
......@@ -478,21 +478,6 @@
</lst>
</requestHandler>
<requestHandler name="/freetext" class="solr.SearchHandler">
<lst name="defaults">
<str name="defType">edismax</str>
<int name="rows">100</int>
<str name="lowercaseOperators">true</str>
<str name="qf">
allt10s^10 allt9s^9 allt8s^8 allt7s^7 allt6s^6 allt5s^5 allt4s^4 allt3s^3 allt2s^2 allt1s^1
</str>
<str name="hl.fl">*</str>
<str name="hl.requireFieldMatch">true</str>
<str name="lowercaseOperators">true</str>
<str name="facet.limit">5</str>
</lst>
</requestHandler>
<!--
The export request handler is used to export full sorted result sets.
......
......@@ -17,15 +17,18 @@
*/
package org.apache.atlas.repository.graphdb;
import java.util.Map;
/**
* Represents a graph client work with indices used by Jansgraph.
*/
public interface AtlasGraphIndexClient {
/**
* The implementers should create a mapping from source propertyName to mapping field name.
* @param indexName the name of index that needs to be worked with.
* @param sourcePropertyName the name of the source attribute.
* @param targetPropertyName the name of the target attribute to which the mapping is getting done.
* The implementers should apply the search weights for the passed in attributes.
* @param collectionName the name of the collection for which the search weight needs to be applied
* @param attributeName2SearchWeightMap the map containing search weights from attribute name to search weights.
*/
void createCopyField(String indexName, String sourcePropertyName, String targetPropertyName);
void applySearchWeight(String collectionName, Map<String, Integer> attributeName2SearchWeightMap);
}
......@@ -196,7 +196,7 @@ public class AtlasJanusGraph implements AtlasGraph<AtlasJanusVertex, AtlasJanusE
@Override
public AtlasGraphIndexClient getGraphIndexClient() throws AtlasException {
try {
return new AtlasJanusGraphSolrIndexClient();
return new AtlasJanusGraphSolrIndexClient(this);
} catch (Exception e) {
LOG.error("Error encountered in creating Graph Index Client.", e);
throw new AtlasException(e);
......
......@@ -17,45 +17,144 @@
*/
package org.apache.atlas.repository.graphdb.janus;
import com.google.common.annotations.VisibleForTesting;
import org.apache.atlas.repository.graphdb.AtlasGraph;
import org.apache.atlas.repository.graphdb.AtlasGraphIndexClient;
import org.apache.atlas.repository.graphdb.AtlasGraphManagement;
import org.apache.atlas.repository.graphdb.AtlasPropertyKey;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.schema.SchemaRequest;
import org.apache.solr.client.solrj.response.schema.SchemaResponse;
import org.apache.solr.client.solrj.request.V2Request;
import org.janusgraph.diskstorage.solr.Solr6Index;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Arrays;
import java.util.*;
import static org.apache.atlas.repository.Constants.FREETEXT_REQUEST_HANDLER;
public class AtlasJanusGraphSolrIndexClient implements AtlasGraphIndexClient {
private static final Logger LOG = LoggerFactory.getLogger(AtlasJanusGraphSolrIndexClient.class);
private final SolrClient solrClient;
private final AtlasGraph graph;
public AtlasJanusGraphSolrIndexClient(AtlasGraph graph) {
// get solr client using same settings as that of Janus Graph
this.solrClient = Solr6Index.getSolrClient();
this.graph = graph;
public AtlasJanusGraphSolrIndexClient() throws Exception {
//well, this is temp hack to get solr client using same settings as that of Janus Graph
solrClient = Solr6Index.getSolrClient();
if(solrClient == null) {
LOG.warn("The indexing system is not solr based. Non SOLR based indexing systems are not supported yet.");
LOG.warn("Non SOLR index stores are not supported yet.");
}
}
@Override
public void createCopyField(String collectionName, String srcFieldName, String mappedCopyFieldName) {
if(solrClient == null) {
LOG.error("The indexing system is not solr based. Copy fields can not be created in non SOLR based indexing systems. This request will be treated as no op.");
public void applySearchWeight(String collectionName, Map<String, Integer> attributeName2SearchWeightMap) {
//1) try updating request handler
//2) if update fails, try creating request handler
try {
LOG.info("Attempting to update free text request handler {} for collection {}", FREETEXT_REQUEST_HANDLER, collectionName);
updateSearchWeights(collectionName, attributeName2SearchWeightMap);
LOG.info("Successfully updated free text request handler {} for collection {}..", FREETEXT_REQUEST_HANDLER, collectionName);
return;
} catch (Throwable t) {
LOG.warn("Error encountered in updating request handler {} for collection {}. Attempting to create one", FREETEXT_REQUEST_HANDLER, collectionName, t);
}
SchemaRequest.AddCopyField addCopyFieldRequest =
new SchemaRequest.AddCopyField(srcFieldName, Arrays.asList(mappedCopyFieldName));
SchemaResponse.UpdateResponse addCopyFieldResponse = null;
try {
addCopyFieldResponse = addCopyFieldRequest.process(solrClient, collectionName);
} catch (SolrServerException | IOException e) {
String msg = String.format("Error encountered in creating the copy field from %s to %s for collection %s.", srcFieldName, mappedCopyFieldName, collectionName);
LOG.error(msg);
throw new RuntimeException(msg, e);
LOG.info("Attempting to create free text request handler {} for collection {}", FREETEXT_REQUEST_HANDLER, collectionName);
createFreeTextRequestHandler(collectionName, attributeName2SearchWeightMap);
LOG.info("Successfully created free text request handler {} for collection {}", FREETEXT_REQUEST_HANDLER, collectionName);
} catch (Throwable t) {
String msg = String.format("Error encountered in creating the request handler '%s' for collection '%s'.", FREETEXT_REQUEST_HANDLER, collectionName);
LOG.error(msg, t);
throw new RuntimeException(msg, t);
}
}
private void updateSearchWeights(String collectionName, Map<String, Integer> attributeName2SearchWeightMap) {
try {
updateFreeTextRequestHandler(collectionName, attributeName2SearchWeightMap);
} catch (Throwable t) {
String msg = String.format("Error encountered in updating the request handler '%s' for collection '%s'", FREETEXT_REQUEST_HANDLER, collectionName);
LOG.error(msg, t);
throw new RuntimeException(msg, t);
}
LOG.info("Updated free text request handler for collection {}.", collectionName);
}
private String generateSearchWeightString(AtlasGraphManagement management, String indexName, Map<String, Integer> searchWeightsMap) {
StringBuilder searchWeightBuilder = new StringBuilder();
Set<Map.Entry<String, Integer>> searchWeightFields = searchWeightsMap.entrySet();
for (Map.Entry<String, Integer> entry : searchWeightFields) {
AtlasPropertyKey propertyKey = management.getPropertyKey(entry.getKey());
String indexFieldName = management.getIndexFieldName(indexName, propertyKey);
searchWeightBuilder.append(" ")
.append(indexFieldName)
.append("^")
.append(entry.getValue().intValue());
}
return searchWeightBuilder.toString();
}
private void updateFreeTextRequestHandler(String collectionName, Map<String, Integer> attributeName2SearchWeightMap) throws IOException, SolrServerException {
String searchWeightString = generateSearchWeightString(graph.getManagementSystem(), collectionName, attributeName2SearchWeightMap);
String payLoadString = generatePayLoadForFreeText("update-requesthandler", FREETEXT_REQUEST_HANDLER, searchWeightString);
performRequestHandlerAction(collectionName, solrClient, payLoadString);
}
private void createFreeTextRequestHandler(String collectionName, Map<String, Integer> attributeName2SearchWeightMap) throws IOException, SolrServerException {
String searchWeightString = generateSearchWeightString(graph.getManagementSystem(), collectionName, attributeName2SearchWeightMap);
String payLoadString = generatePayLoadForFreeText("create-requesthandler", FREETEXT_REQUEST_HANDLER, searchWeightString);
performRequestHandlerAction(collectionName, solrClient, payLoadString);
}
@VisibleForTesting
static String generatePayLoadForFreeText(String action, String handlerName, String qfValue) {
return String.format("{" +
" %s : { " +
" 'name' : '%s', " +
" 'class': 'solr.SearchHandler' , " +
" 'defaults': " + "{" +
" 'defType': 'edismax' , " +
" 'rows': 100 , " +
" 'lowercaseOperators': true , " +
" 'qf': '%s' , " +
" 'hl.fl': '*' , " +
" 'hl.requireFieldMatch': true , " +
" 'lowercaseOperators': true , " +
" }" +
" }" +
"}", action, handlerName, qfValue);
}
private void performRequestHandlerAction(String collectionName, SolrClient solrClient,
String actionPayLoad) throws IOException, SolrServerException {
V2Request v2Request = new V2Request.Builder(String.format("/collections/%s/config", collectionName))
.withMethod(SolrRequest.METHOD.POST)
.withPayload(actionPayLoad)
.build();
v2Request.process(solrClient);
}
}
......@@ -261,7 +261,7 @@ public class AtlasStructDef extends AtlasBaseTypeDef implements Serializable {
public static class AtlasAttributeDef implements Serializable {
private static final long serialVersionUID = 1L;
public static final int DEFAULT_SEARCHWEIGHT = -1;
public static final int DEFAULT_SEARCHWEIGHT_FOR_STRINGS = 3;
public static final String SEARCH_WEIGHT_ATTR_NAME = "searchWeight";
public static final String ATTRDEF_OPTION_SOFT_REFERENCE = "isSoftReference";
private final String STRING_TRUE = "true";
......
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.repository.graph;
public interface IndexChangeListener {
void onChange();
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.repository.graph;
import org.apache.atlas.AtlasException;
import org.apache.atlas.discovery.SearchContext;
import org.apache.atlas.model.typedef.AtlasEntityDef;
import org.apache.atlas.model.typedef.AtlasStructDef.AtlasAttributeDef;
import org.apache.atlas.repository.Constants;
import org.apache.atlas.repository.graphdb.AtlasGraph;
import org.apache.atlas.repository.graphdb.AtlasGraphIndexClient;
import org.apache.atlas.type.AtlasTypeRegistry;
import org.apache.commons.collections.CollectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import static org.apache.atlas.model.typedef.AtlasStructDef.AtlasAttributeDef.DEFAULT_SEARCHWEIGHT;
import static org.apache.atlas.repository.Constants.CLASSIFICATION_TEXT_KEY;
/**
This is a component that will go through all entity type definitions and create free text index
request handler with SOLR. This is a no op class in non-solr index based deployments.
This component needs to be initialized after type definitions are completely fixed with the needed patches (Ordder 3 initialization).
*/
public class SolrIndexHelper implements IndexChangeListener {
private static final Logger LOG = LoggerFactory.getLogger(SolrIndexHelper.class);
public static final int DEFAULT_SEARCHWEIGHT_FOR_STRINGS = 3;
private final AtlasTypeRegistry typeRegistry;
public SolrIndexHelper(AtlasTypeRegistry typeRegistry) {
this.typeRegistry = typeRegistry;
}
@Override
public void onChange() {
LOG.info("SolrIndexHelper.onChange()");
if(!SearchContext.isIndexSolrBased()) {
LOG.warn("Not a Solr based index store. Free text search is not supported");
return;
}
try {
AtlasGraph atlasGraph = AtlasGraphProvider.getGraphInstance();
AtlasGraphIndexClient atlasGraphIndexClient = atlasGraph.getGraphIndexClient();
Map<String, Integer> attributeName2SearchWeightMap = getAttributesWithSearchWeights();
atlasGraphIndexClient.applySearchWeight(Constants.VERTEX_INDEX, attributeName2SearchWeightMap);
} catch (AtlasException e) {
LOG.error("Error encountered in handling type system change notification.", e);
throw new RuntimeException("Error encountered in handling type system change notification.", e);
}
}
private Map<String, Integer> getAttributesWithSearchWeights() {
Map<String, Integer> attributesWithSearchWeights = new HashMap<>();
Collection<AtlasEntityDef> allEntityDefs = typeRegistry.getAllEntityDefs();
attributesWithSearchWeights.put(CLASSIFICATION_TEXT_KEY,10);
if (CollectionUtils.isNotEmpty(allEntityDefs)) {
for (AtlasEntityDef entityDef : allEntityDefs) {
processEntity(attributesWithSearchWeights, entityDef);
}
}
return attributesWithSearchWeights;
}
private void processEntity(Map<String, Integer> attributesWithSearchWeights, AtlasEntityDef entityDef) {
for (AtlasAttributeDef attributeDef : entityDef.getAttributeDefs()) {
processAttributeDefinition(attributesWithSearchWeights, entityDef, attributeDef);
}
}
private void processAttributeDefinition(Map<String, Integer> attributesWithSearchWeights, AtlasEntityDef entityDef, AtlasAttributeDef attributeDef) {
if (GraphBackedSearchIndexer.isStringAttribute(attributeDef)) {
final String attributeName = GraphBackedSearchIndexer.getEncodedPropertyName(entityDef.getName(), attributeDef);
int searchWeight = attributeDef.getSearchWeight();
if (searchWeight == DEFAULT_SEARCHWEIGHT) {
//We will use default search weight of 3 for string attributes.
//this will make the string data searchable like in FullTextIndex Searcher using Free Text searcher.
searchWeight = DEFAULT_SEARCHWEIGHT_FOR_STRINGS;
} else if (!GraphBackedSearchIndexer.isValidSearchWeight(searchWeight)) { //validate the value provided in the model.
String msg = String.format("Invalid search weight '%d' for attribute %s.%s", searchWeight, entityDef.getName(), attributeName);
LOG.error(msg);
throw new RuntimeException(msg);
}
LOG.info("Applying search weight {} for attribute {}.{}", searchWeight, entityDef.getName(), attributeName);
attributesWithSearchWeights.put(attributeName, searchWeight);
}
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment