remove o modelo pdfdocument

This commit is contained in:
root
2025-03-15 16:52:23 +01:00
parent f2e5cd73b7
commit 78e994eb6a
6 changed files with 337 additions and 127 deletions

View File

@ -1,18 +1,13 @@
from django.contrib import admin
from .models import PDFDocument, DiarioOficial, TipoDiarioOficial
from django.db import models
@admin.register(PDFDocument)
class PDFDocumentAdmin(admin.ModelAdmin):
pass
from .models import DiarioOficial, TipoDiarioOficial
@admin.register(DiarioOficial)
class DiarioOficialAdmin(admin.ModelAdmin):
pass
@admin.register(TipoDiarioOficial)
class TipoDiarioOficialAdmin(admin.ModelAdmin):
pass

View File

@ -1,7 +1,9 @@
from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry
from .models import DiarioOficial
@registry.register_document
class DiarioOficialDocument(Document):
tipo = fields.ObjectField(properties={
@ -46,7 +48,6 @@ class DiarioOficialDocument(Document):
'lei, legislação, norma',
'processo, procedimento, autos',
'contrato, acordo, convênio',
# Adicione mais sinônimos relevantes para o contexto legal
]
}
},

View File

@ -0,0 +1,16 @@
# Generated by Django 5.0.12 on 2025-03-15 15:51
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("diarios", "0004_remove_diariooficial_finalizado_and_more"),
]
operations = [
migrations.DeleteModel(
name="PDFDocument",
),
]

View File

@ -1,43 +1,13 @@
import requests
import json
import os
from urllib.parse import urlparse
from django.core.files.base import ContentFile
from django.db import models
import PyPDF2
import json
from django.core.serializers.json import DjangoJSONEncoder
import requests
from babel.dates import format_date
class PDFDocument(models.Model):
title = models.CharField(max_length=255)
file = models.FileField(upload_to="pdfs/")
content = models.TextField(blank=True)
uploaded_at = models.DateTimeField(auto_now_add=True)
page_content = models.TextField(blank=True)
def __str__(self):
return self.title
def save(self, *args, **kwargs):
if self.file:
pdf = PyPDF2.PdfReader(self.file)
texto = []
pages_data = []
for i, pagina in enumerate(pdf.pages):
page_text = pagina.extract_text()
pages_data.append(
{
"number": i + 1,
"content": page_text,
}
)
texto.append(pagina.extract_text())
self.content = "\n".join(texto)
self.page_content = json.dumps(pages_data)
super().save(*args, **kwargs)
from django.core.files.base import ContentFile
from django.core.serializers.json import DjangoJSONEncoder
from django.db import models
class TipoDiarioOficial(models.Model):

View File

@ -5,16 +5,69 @@
<h1>Busca de Diários Oficiais</h1>
<form method="GET" action="{% url 'search_diarios' %}" class="mb-4">
<div class="row mb-3">
<div class="col-md-12">
<div class="input-group">
<input type="text" name="q" class="form-control" value="{{ query }}" placeholder="Digite sua busca...">
<button type="submit" class="btn btn-primary">Buscar</button>
</div>
</div>
</div>
<div class="row mb-3">
<div class="col-md-4">
<label for="date_start" class="form-label">Data inicial:</label>
<input type="date" id="date_start" name="date_start" class="form-control" value="{{ date_start }}">
</div>
<div class="col-md-4">
<label for="date_end" class="form-label">Data final:</label>
<input type="date" id="date_end" name="date_end" class="form-control" value="{{ date_end }}">
</div>
<div class="col-md-4">
<label class="form-label">Tipo de correspondência:</label>
<div class="form-check">
<input class="form-check-input" type="radio" name="match_type" id="match_partial" value="partial" {% if match_type == 'partial' or not match_type %}checked{% endif %}>
<label class="form-check-label" for="match_partial">
Qualquer palavra
</label>
</div>
<div class="form-check">
<input class="form-check-input" type="radio" name="match_type" id="match_exact" value="exact" {% if match_type == 'exact' %}checked{% endif %}>
<label class="form-check-label" for="match_exact">
Todas as palavras (frase exata)
</label>
</div>
</div>
</div>
</form>
{% if error %}
<div class="alert alert-danger">
Erro na pesquisa: {{ error }}
</div>
{% endif %}
{% if query %}
<div class="mb-3">
<h2>Resultados para "{{ query }}"</h2>
<p>Encontrados {{ total }} resultados</p>
{% if did_you_mean %}
<div class="alert alert-info">
Você quis dizer: <a href="?q={{ did_you_mean }}&date_start={{ date_start }}&date_end={{ date_end }}&match_type={{ match_type }}">{{ did_you_mean }}</a>?
</div>
{% endif %}
{% if search_suggestions %}
<div class="mt-3 mb-3">
<h5>Pesquisas relacionadas:</h5>
<div class="d-flex flex-wrap gap-2">
{% for suggestion in search_suggestions %}
<a href="?q={{ suggestion }}&date_start={{ date_start }}&date_end={{ date_end }}&match_type={{ match_type }}" class="badge bg-light text-dark p-2 text-decoration-none">{{ suggestion }}</a>
{% endfor %}
</div>
</div>
{% endif %}
</div>
{% if results %}
@ -24,6 +77,9 @@
<div class="card-header">
<h5>{{ result.tipo }} nº {{ result.numero }}</h5>
<p class="text-muted">Data: {{ result.data }}</p>
{% if result.occurrences > 0 %}
<span class="badge bg-info">{{ result.occurrences }} ocorrências encontradas</span>
{% endif %}
</div>
<div class="card-body">
{% if result.highlight %}
@ -70,29 +126,50 @@
{% endfor %}
</div>
<!-- Paginação -->
<!-- Paginação aprimorada -->
{% if total_pages > 1 %}
<nav aria-label="Paginação">
<ul class="pagination">
{% if page > 1 %}
<li class="page-item">
<a class="page-link" href="?q={{ query }}&page={{ page|add:'-1' }}&size={{ size }}">Anterior</a>
<ul class="pagination justify-content-center">
<!-- Botão primeira página -->
<li class="page-item {% if page == 1 %}disabled{% endif %}">
<a class="page-link" href="?q={{ query }}&date_start={{ date_start }}&date_end={{ date_end }}&match_type={{ match_type }}&page=1&size={{ size }}">
<span aria-hidden="true">&laquo;&laquo;</span>
</a>
</li>
{% endif %}
{% for i in total_pages|ljust:"5" %}
{% if i > 0 and i <= total_pages %}
<!-- Botão página anterior -->
<li class="page-item {% if page == 1 %}disabled{% endif %}">
<a class="page-link" href="?q={{ query }}&date_start={{ date_start }}&date_end={{ date_end }}&match_type={{ match_type }}&page={{ page|add:'-1' }}&size={{ size }}">
<span aria-hidden="true">&laquo;</span>
</a>
</li>
<!-- Mostrar apenas um conjunto de páginas ao redor da página atual -->
{% with ''|center:total_pages as range %}
{% for _ in range %}
{% with forloop.counter as i %}
{% if i >= page|add:'-2' and i <= page|add:'2' and i > 0 and i <= total_pages %}
<li class="page-item {% if i == page %}active{% endif %}">
<a class="page-link" href="?q={{ query }}&page={{ i }}&size={{ size }}">{{ i }}</a>
<a class="page-link" href="?q={{ query }}&date_start={{ date_start }}&date_end={{ date_end }}&match_type={{ match_type }}&page={{ i }}&size={{ size }}">{{ i }}</a>
</li>
{% endif %}
{% endwith %}
{% endfor %}
{% endwith %}
{% if page < total_pages %}
<li class="page-item">
<a class="page-link" href="?q={{ query }}&page={{ page|add:'1' }}&size={{ size }}">Próxima</a>
<!-- Botão próxima página -->
<li class="page-item {% if page >= total_pages %}disabled{% endif %}">
<a class="page-link" href="?q={{ query }}&date_start={{ date_start }}&date_end={{ date_end }}&match_type={{ match_type }}&page={{ page|add:'1' }}&size={{ size }}">
<span aria-hidden="true">&raquo;</span>
</a>
</li>
<!-- Botão última página -->
<li class="page-item {% if page >= total_pages %}disabled{% endif %}">
<a class="page-link" href="?q={{ query }}&date_start={{ date_start }}&date_end={{ date_end }}&match_type={{ match_type }}&page={{ total_pages }}&size={{ size }}">
<span aria-hidden="true">&raquo;&raquo;</span>
</a>
</li>
{% endif %}
</ul>
</nav>
{% endif %}
@ -100,6 +177,14 @@
{% else %}
<div class="alert alert-info">
Nenhum resultado encontrado para a sua busca.
{% if date_start or date_end %}
<p class="mt-2">Tente expandir o período de busca ou remover os filtros de data.</p>
{% endif %}
{% if match_type == 'exact' %}
<p class="mt-2">Tente usar a opção "Qualquer palavra" para resultados mais abrangentes.</p>
{% endif %}
</div>
{% endif %}
{% endif %}
@ -119,6 +204,23 @@
padding: 2px;
border-radius: 2px;
}
.badge {
font-size: 0.85rem;
}
</style>
<script>
// Validar datas ao enviar o formulário
document.querySelector('form').addEventListener('submit', function(e) {
const dateStart = document.getElementById('date_start').value;
const dateEnd = document.getElementById('date_end').value;
if (dateStart && dateEnd && dateStart > dateEnd) {
e.preventDefault();
alert('A data inicial não pode ser posterior à data final.');
}
});
</script>
{% endblock %}

View File

@ -1,40 +1,145 @@
from django.shortcuts import render
from elasticsearch_dsl import Q
from datetime import datetime
from .documents import DiarioOficialDocument
from elasticsearch.exceptions import RequestError
def search_diarios(request):
q = request.GET.get('q', '')
page = int(request.GET.get('page', 1))
size = int(request.GET.get('size', 10))
# Parâmetros de filtro de data
date_start = request.GET.get('date_start', '')
date_end = request.GET.get('date_end', '')
# Tipo de correspondência (exata ou parcial)
match_type = request.GET.get('match_type', 'partial') # 'exact' ou 'partial'
start = (page - 1) * size
end = start + size
results = []
total = 0
did_you_mean = None
search_suggestions = []
try:
if q:
# Busca principal com boost para relevância
# Construir a consulta base
search = DiarioOficialDocument.search()
# Determinar o tipo de consulta com base no match_type
if match_type == 'exact':
# Correspondência exata (frase exata)
query = Q(
'multi_match',
query=q,
fields=['content^3', 'tipo.nome^2', 'numero', 'pages.content'],
fuzziness='AUTO'
type='phrase'
)
else:
# Correspondência parcial (qualquer termo)
query = Q(
'multi_match',
query=q,
fields=['content^3', 'tipo.nome^2', 'numero', 'pages.content'],
fuzziness='AUTO',
operator='or' # Pelo menos um termo deve corresponder
)
# Pesquisa com highlighting
search = DiarioOficialDocument.search()
# Aplicar a consulta principal
search = search.query(query)
# Aplicar filtros de data se fornecidos
date_filters = []
if date_start:
try:
date_start_obj = datetime.strptime(date_start, '%Y-%m-%d')
date_filters.append(Q('range', data={'gte': date_start_obj}))
except ValueError:
pass # Ignorar datas inválidas
if date_end:
try:
date_end_obj = datetime.strptime(date_end, '%Y-%m-%d')
date_filters.append(Q('range', data={'lte': date_end_obj}))
except ValueError:
pass # Ignorar datas inválidas
if date_filters:
for date_filter in date_filters:
search = search.filter(date_filter)
# Configuração do highlighting
search = search.highlight('content', fragment_size=150, number_of_fragments=3)
search = search.highlight('pages.content', fragment_size=150, number_of_fragments=3)
# Paginação
total_search = search.count()
search = search[start:end]
# Executar a pesquisa
response = search.execute()
total = response.hits.total.value
# "Você quis dizer" - sugestão para termos com erros de digitação
if total < 3 and q: # Se poucos resultados, sugira correções
suggestion_search = DiarioOficialDocument.search()
suggestion_search = suggestion_search.suggest(
'phrase_suggestion',
q,
phrase={
'field': 'content',
'size': 5,
'highlight': {
'pre_tag': '<em>',
'post_tag': '</em>'
}
}
)
suggestion_result = suggestion_search.execute()
# Processe as sugestões
if hasattr(suggestion_result, 'suggest') and 'phrase_suggestion' in suggestion_result.suggest:
suggestions = suggestion_result.suggest['phrase_suggestion'][0]['options']
if suggestions:
for suggestion in suggestions:
if suggestion['text'].lower() != q.lower():
did_you_mean = suggestion['text']
break
# Gerar sugestões de pesquisa relacionadas
if q:
# Use a expansão de termos para sugerir pesquisas relacionadas
related_search = DiarioOficialDocument.search()
related_search = related_search.query(
'more_like_this',
fields=['content'],
like=q,
min_term_freq=1,
max_query_terms=12
)
related_search = related_search[:5] # Limite para 5 sugestões
try:
related_results = related_search.execute()
# Extraia termos relevantes dos resultados relacionados
for hit in related_results:
if hasattr(hit, 'content') and hit.content:
# Extraia alguns termos significativos do conteúdo
content_terms = hit.content.split()[:10] # Primeiros 10 termos
suggestion = ' '.join(content_terms)
if suggestion not in search_suggestions and suggestion != q:
search_suggestions.append(suggestion)
if len(search_suggestions) >= 5: # Limite para 5 sugestões
break
except:
# Ignore erros de sugestões relacionadas
pass
# Processar resultados
for hit in response:
# Adicionar destaque
highlight = ""
@ -42,9 +147,17 @@ def search_diarios(request):
if 'content' in hit.meta.highlight:
highlight = "...".join(hit.meta.highlight.content)
# Processando páginas com destaque
# Processar páginas com destaque
highlighted_pages = []
total_occurrences = 0
if hasattr(hit.meta, 'highlight') and 'pages.content' in hit.meta.highlight:
# Calcular o número total de ocorrências
for content in hit.meta.highlight['pages.content']:
# Contar o número de <em> tags, que representam termos destacados
total_occurrences += content.count('<em>')
# Processar os destaques por página
for i, content in enumerate(hit.meta.highlight['pages.content']):
# Encontre a página correspondente
page_number = i + 1 # Lógica simplificada, pode precisar de ajuste
@ -61,18 +174,31 @@ def search_diarios(request):
'data': hit.data,
'link': hit.link,
'highlight': highlight,
'highlighted_pages': highlighted_pages
'highlighted_pages': highlighted_pages,
'occurrences': total_occurrences
}
results.append(result)
except RequestError as e:
# Tratar erros de consulta do Elasticsearch
error_message = str(e)
return render(request, 'diarios/diarios_search.html', {
'error': error_message,
'query': q
})
context = {
'query': q,
'date_start': date_start,
'date_end': date_end,
'match_type': match_type,
'results': results,
'total': total,
'page': page,
'size': size,
'total_pages': (total + size - 1) // size if total > 0 else 0,
'did_you_mean': did_you_mean,
'search_suggestions': search_suggestions[:5] # Limite para 5 sugestões
}
return render(request, 'diarios/diarios_search.html', context)