Concept "Find lowercase ngrams in HasEntity* and HasEntityAnnotations.HasAnnotationObjectValue"

From SMW CindyKate - Main
Component0741498519
Jump to: navigation, search

Content

HasElasticConceptType MappingAndQueryConfiguration

  1. A document contains a field HasEntityKeyword with a value Matterhorn.
  2. The document should be hit for query strings like:
    • Matter (would not be hit if min_gram > 6 and/or max_gram < 6?)
    • horn
    • Matter and something else
Index Settings Index Mappings Query Design
 ...
 analyzer: {
   my_lowercase_analyzer: {
     type: 'custom',
     tokenizer: 'standard',
     filter: 'lowercase'
   },
   my_ngram_tokenizer_analyzer: {
     type: 'custom',
     tokenizer: 'my_ngram_tokenizer',
     filter: 'lowercase'
   }
 },
 tokenizer: {
   my_ngram_tokenizer: {
     type: 'ngram',
     min_gram: 4,
     max_gram: 10,
     token_chars: [
       'letter',
       'digit'
     ]
   }
 }
 ...
 ...
 HasEntityKeywords: {
   type: 'text',
   index: true,
   analyzer: 'my_ngram_tokenizer_analyzer',
   term_vector: 'with_positions_offsets_payloads'
 }
 ...
 ...
 query: {
   bool:{
     should: [
       {
         multi_match: {
           query: sQueryTerm,
           analyzer: 'my_lowercase_analyzer',
           fields: [
             'HasEntityContent',
             'HasEntityTitle^10',
             'HasEntityKeywords^10'
           ]
         }
       },
       {
         nested: {
           path: 'HasEntityAnnotations',
           query: {
             multi_match: {
               query: sQueryTerm,
               analyzer: 'my_lowercase_analyzer',
               fields: [
                 'HasEntityAnnotations.HasAnnotationObjectValue'
               ]
             }
           }
         }
       }
     ]
   }
 }
 ...