Files
Diarios-Oficiais-ALEMS/diarios/documents.py

69 lines
2.3 KiB
Python

from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry
from .models import PDFDocument
@registry.register_document
class PDFDocumentDocument(Document):
title = fields.TextField()
content = fields.TextField(analyzer='portuguese')
class Index:
name = 'pdf_documents'
settings = {
'number_of_shards': 1,
'number_of_replicas': 0,
'analysis': {
'analyzer': {
'portuguese': {
'type': 'custom',
'tokenizer': 'standard',
'filter': [
'lowercase',
'ascii_folding',
'portuguese_stemmer',
'stop',
'portuguese_synonyms',
]
},
'portuguese_search': {
'type': 'custom',
'tokenizer': 'standard',
'filter': [
'lowercase',
'ascii_folding',
'portuguese_stemmer',
'stop',
'suggest_shingle',
]
}
},
'filter': {
'suggest_shingle': {
'type': 'shingle',
'min_shingle_size': 2,
'max_shingle_size': 3
},
'stop': {
'type': 'stop',
'stopwords': '_portuguese_'
},
'ascii_folding': {
'type': 'asciifolding'
},
'portuguese_stemmer': {
'type': 'stemmer',
'language': 'portuguese'
},
'portuguese_synonyms':{
'type': 'synonym',
'synonyms_path': 'synonyms.txt',
'expand': True
}
}
}
}
class Django:
model = PDFDocument
fields = ['uploaded_at']