from django_elasticsearch_dsl import Document, fields from django_elasticsearch_dsl.registries import registry from .models import PDFDocument @registry.register_document class PDFDocumentDocument(Document): title = fields.TextField() content = fields.TextField(analyzer="portuguese") pages = fields.NestedField( properties={ "number": fields.IntegerField(), "content": fields.TextField(analyzer="portuguese"), } ) class Index: name = "pdf_documents" settings = { "number_of_shards": 1, "number_of_replicas": 0, "analysis": { "analyzer": { "portuguese": { "type": "custom", "tokenizer": "standard", "filter": [ "lowercase", "ascii_folding", "portuguese_stemmer", "stop", "portuguese_synonyms", ], }, "portuguese_search": { "type": "custom", "tokenizer": "standard", "filter": [ "lowercase", "ascii_folding", "portuguese_stemmer", "stop", "suggest_shingle", ], }, }, "filter": { "suggest_shingle": { "type": "shingle", "min_shingle_size": 2, "max_shingle_size": 3, }, "stop": {"type": "stop", "stopwords": "_portuguese_"}, "ascii_folding": {"type": "asciifolding"}, "portuguese_stemmer": {"type": "stemmer", "language": "portuguese"}, "portuguese_synonyms": { "type": "synonym", "synonyms_path": "synonyms.txt", "expand": True, }, }, }, } class Django: model = PDFDocument fields = ["uploaded_at", "file"] from django_elasticsearch_dsl import Document, fields from django_elasticsearch_dsl.registries import registry from .models import DiarioOficial @registry.register_document class DiarioOficialDocument(Document): # Campos principais title = fields.TextField() tipo = fields.KeywordField() # Campo para arquivo PDF (se aplicável) arquivo = fields.TextField(attr="arquivo.url") # Nested field para páginas (usando o page_content) pages = fields.NestedField( properties={ "number": fields.IntegerField(), "content": fields.TextField(analyzer="portuguese") } ) class Index: name = "diarios_oficiais" settings = { "number_of_shards": 1, "number_of_replicas": 0, "analysis": { "analyzer": { "portuguese": { "type": "custom", "tokenizer": "standard", "filter": [ "lowercase", "ascii_folding", "portuguese_stemmer", "stop", "portuguese_synonyms", ] }, "portuguese_search": { "type": "custom", "tokenizer": "standard", "filter": [ "lowercase", "ascii_folding", "portuguese_stemmer", "stop", "suggest_shingle", ] } }, "filter": { "suggest_shingle": { "type": "shingle", "min_shingle_size": 2, "max_shingle_size": 3 }, "stop": {"type": "stop", "stopwords": "_portuguese_"}, "ascii_folding": {"type": "asciifolding"}, "portuguese_stemmer": {"type": "stemmer", "language": "portuguese"}, "portuguese_synonyms": { "type": "synonym", "synonyms_path": "synonyms.txt", "expand": True } } } } class Django: model = DiarioOficial fields = [ "data", "numero", "link", ] def prepare_tipo(self, instance): return instance.tipo.nome if instance.tipo else None def prepare_title(self, instance): return f"{instance.tipo.nome if instance.tipo else 'Diário'} {instance.numero}" def prepare_pages(self, instance): # Prepara o campo pages usando o page_content if instance.page_content: return instance.page_content # page_content já é uma lista de dicionários return []