from django_elasticsearch_dsl import Document, fields from django_elasticsearch_dsl.registries import registry from .models import PDFDocument @registry.register_document class PDFDocumentDocument(Document): title = fields.TextField() content = fields.TextField(analyzer='portuguese') class Index: name = 'pdf_documents' settings = { 'number_of_shards': 1, 'number_of_replicas': 0, 'analysis': { 'analyzer': { 'portuguese': { 'type': 'custom', 'tokenizer': 'standard', 'filter': [ 'lowercase', 'ascii_folding', 'portuguese_stemmer', 'stop', 'portuguese_synonyms', ] }, 'portuguese_search': { 'type': 'custom', 'tokenizer': 'standard', 'filter': [ 'lowercase', 'ascii_folding', 'portuguese_stemmer', 'stop', 'suggest_shingle', ] } }, 'filter': { 'suggest_shingle': { 'type': 'shingle', 'min_shingle_size': 2, 'max_shingle_size': 3 }, 'stop': { 'type': 'stop', 'stopwords': '_portuguese_' }, 'ascii_folding': { 'type': 'asciifolding' }, 'portuguese_stemmer': { 'type': 'stemmer', 'language': 'portuguese' }, 'portuguese_synonyms':{ 'type': 'synonym', 'synonyms_path': 'synonyms.txt', 'expand': True } } } } class Django: model = PDFDocument fields = ['uploaded_at']