69 lines
2.3 KiB
Python
69 lines
2.3 KiB
Python
from django_elasticsearch_dsl import Document, fields
|
|
from django_elasticsearch_dsl.registries import registry
|
|
from .models import PDFDocument
|
|
|
|
@registry.register_document
|
|
class PDFDocumentDocument(Document):
|
|
title = fields.TextField()
|
|
content = fields.TextField(analyzer='portuguese')
|
|
|
|
class Index:
|
|
name = 'pdf_documents'
|
|
settings = {
|
|
'number_of_shards': 1,
|
|
'number_of_replicas': 0,
|
|
'analysis': {
|
|
'analyzer': {
|
|
'portuguese': {
|
|
'type': 'custom',
|
|
'tokenizer': 'standard',
|
|
'filter': [
|
|
'lowercase',
|
|
'ascii_folding',
|
|
'portuguese_stemmer',
|
|
'stop',
|
|
'portuguese_synonyms',
|
|
]
|
|
},
|
|
'portuguese_search': {
|
|
'type': 'custom',
|
|
'tokenizer': 'standard',
|
|
'filter': [
|
|
'lowercase',
|
|
'ascii_folding',
|
|
'portuguese_stemmer',
|
|
'stop',
|
|
'suggest_shingle',
|
|
]
|
|
}
|
|
},
|
|
'filter': {
|
|
'suggest_shingle': {
|
|
'type': 'shingle',
|
|
'min_shingle_size': 2,
|
|
'max_shingle_size': 3
|
|
},
|
|
'stop': {
|
|
'type': 'stop',
|
|
'stopwords': '_portuguese_'
|
|
},
|
|
'ascii_folding': {
|
|
'type': 'asciifolding'
|
|
},
|
|
'portuguese_stemmer': {
|
|
'type': 'stemmer',
|
|
'language': 'portuguese'
|
|
},
|
|
'portuguese_synonyms':{
|
|
'type': 'synonym',
|
|
'synonyms_path': 'synonyms.txt',
|
|
'expand': True
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
class Django:
|
|
model = PDFDocument
|
|
fields = ['uploaded_at']
|