remove o modelo pdfdocument

This commit is contained in:
root
2025-03-15 16:52:23 +01:00
parent f2e5cd73b7
commit 78e994eb6a
6 changed files with 337 additions and 127 deletions

View File

@ -1,18 +1,13 @@
from django.contrib import admin
from .models import PDFDocument, DiarioOficial, TipoDiarioOficial
from django.db import models
@admin.register(PDFDocument)
class PDFDocumentAdmin(admin.ModelAdmin):
pass
from .models import DiarioOficial, TipoDiarioOficial
@admin.register(DiarioOficial)
class DiarioOficialAdmin(admin.ModelAdmin):
pass
@admin.register(TipoDiarioOficial)
class TipoDiarioOficialAdmin(admin.ModelAdmin):
pass

View File

@ -1,7 +1,9 @@
from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry
from .models import DiarioOficial
@registry.register_document
class DiarioOficialDocument(Document):
tipo = fields.ObjectField(properties={
@ -46,7 +48,6 @@ class DiarioOficialDocument(Document):
'lei, legislação, norma',
'processo, procedimento, autos',
'contrato, acordo, convênio',
# Adicione mais sinônimos relevantes para o contexto legal
]
}
},

View File

@ -0,0 +1,16 @@
# Generated by Django 5.0.12 on 2025-03-15 15:51
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("diarios", "0004_remove_diariooficial_finalizado_and_more"),
]
operations = [
migrations.DeleteModel(
name="PDFDocument",
),
]

View File

@ -1,43 +1,13 @@
import requests
import json
import os
from urllib.parse import urlparse
from django.core.files.base import ContentFile
from django.db import models
import PyPDF2
import json
from django.core.serializers.json import DjangoJSONEncoder
import requests
from babel.dates import format_date
class PDFDocument(models.Model):
title = models.CharField(max_length=255)
file = models.FileField(upload_to="pdfs/")
content = models.TextField(blank=True)
uploaded_at = models.DateTimeField(auto_now_add=True)
page_content = models.TextField(blank=True)
def __str__(self):
return self.title
def save(self, *args, **kwargs):
if self.file:
pdf = PyPDF2.PdfReader(self.file)
texto = []
pages_data = []
for i, pagina in enumerate(pdf.pages):
page_text = pagina.extract_text()
pages_data.append(
{
"number": i + 1,
"content": page_text,
}
)
texto.append(pagina.extract_text())
self.content = "\n".join(texto)
self.page_content = json.dumps(pages_data)
super().save(*args, **kwargs)
from django.core.files.base import ContentFile
from django.core.serializers.json import DjangoJSONEncoder
from django.db import models
class TipoDiarioOficial(models.Model):

View File

@ -3,20 +3,73 @@
{% block content %}
<div class="container">
<h1>Busca de Diários Oficiais</h1>
<form method="GET" action="{% url 'search_diarios' %}" class="mb-4">
<div class="input-group">
<input type="text" name="q" class="form-control" value="{{ query }}" placeholder="Digite sua busca...">
<button type="submit" class="btn btn-primary">Buscar</button>
<div class="row mb-3">
<div class="col-md-12">
<div class="input-group">
<input type="text" name="q" class="form-control" value="{{ query }}" placeholder="Digite sua busca...">
<button type="submit" class="btn btn-primary">Buscar</button>
</div>
</div>
</div>
<div class="row mb-3">
<div class="col-md-4">
<label for="date_start" class="form-label">Data inicial:</label>
<input type="date" id="date_start" name="date_start" class="form-control" value="{{ date_start }}">
</div>
<div class="col-md-4">
<label for="date_end" class="form-label">Data final:</label>
<input type="date" id="date_end" name="date_end" class="form-control" value="{{ date_end }}">
</div>
<div class="col-md-4">
<label class="form-label">Tipo de correspondência:</label>
<div class="form-check">
<input class="form-check-input" type="radio" name="match_type" id="match_partial" value="partial" {% if match_type == 'partial' or not match_type %}checked{% endif %}>
<label class="form-check-label" for="match_partial">
Qualquer palavra
</label>
</div>
<div class="form-check">
<input class="form-check-input" type="radio" name="match_type" id="match_exact" value="exact" {% if match_type == 'exact' %}checked{% endif %}>
<label class="form-check-label" for="match_exact">
Todas as palavras (frase exata)
</label>
</div>
</div>
</div>
</form>
{% if error %}
<div class="alert alert-danger">
Erro na pesquisa: {{ error }}
</div>
{% endif %}
{% if query %}
<div class="mb-3">
<h2>Resultados para "{{ query }}"</h2>
<p>Encontrados {{ total }} resultados</p>
{% if did_you_mean %}
<div class="alert alert-info">
Você quis dizer: <a href="?q={{ did_you_mean }}&date_start={{ date_start }}&date_end={{ date_end }}&match_type={{ match_type }}">{{ did_you_mean }}</a>?
</div>
{% endif %}
{% if search_suggestions %}
<div class="mt-3 mb-3">
<h5>Pesquisas relacionadas:</h5>
<div class="d-flex flex-wrap gap-2">
{% for suggestion in search_suggestions %}
<a href="?q={{ suggestion }}&date_start={{ date_start }}&date_end={{ date_end }}&match_type={{ match_type }}" class="badge bg-light text-dark p-2 text-decoration-none">{{ suggestion }}</a>
{% endfor %}
</div>
</div>
{% endif %}
</div>
{% if results %}
<div class="search-results">
{% for result in results %}
@ -24,6 +77,9 @@
<div class="card-header">
<h5>{{ result.tipo }} nº {{ result.numero }}</h5>
<p class="text-muted">Data: {{ result.data }}</p>
{% if result.occurrences > 0 %}
<span class="badge bg-info">{{ result.occurrences }} ocorrências encontradas</span>
{% endif %}
</div>
<div class="card-body">
{% if result.highlight %}
@ -32,7 +88,7 @@
<div class="highlight-content">{{ result.highlight|safe }}</div>
</div>
{% endif %}
{% if result.highlighted_pages %}
<div class="highlighted-pages">
<h6>Páginas com o termo buscado:</h6>
@ -40,12 +96,12 @@
{% for page in result.highlighted_pages %}
<div class="accordion-item">
<h2 class="accordion-header">
<button class="accordion-button collapsed" type="button" data-bs-toggle="collapse"
<button class="accordion-button collapsed" type="button" data-bs-toggle="collapse"
data-bs-target="#page{{ result.id }}_{{ page.number }}">
Página {{ page.number }}
</button>
</h2>
<div id="page{{ result.id }}_{{ page.number }}" class="accordion-collapse collapse"
<div id="page{{ result.id }}_{{ page.number }}" class="accordion-collapse collapse"
data-bs-parent="#pagesAccordion{{ result.id }}">
<div class="accordion-body">
{{ page.content|safe }}
@ -56,7 +112,7 @@
</div>
</div>
{% endif %}
<div class="mt-3">
<a href="{{ result.link }}" target="_blank" class="btn btn-sm btn-outline-primary">
Ver Diário Online
@ -69,37 +125,66 @@
</div>
{% endfor %}
</div>
<!-- Paginação -->
<!-- Paginação aprimorada -->
{% if total_pages > 1 %}
<nav aria-label="Paginação">
<ul class="pagination">
{% if page > 1 %}
<li class="page-item">
<a class="page-link" href="?q={{ query }}&page={{ page|add:'-1' }}&size={{ size }}">Anterior</a>
<ul class="pagination justify-content-center">
<!-- Botão primeira página -->
<li class="page-item {% if page == 1 %}disabled{% endif %}">
<a class="page-link" href="?q={{ query }}&date_start={{ date_start }}&date_end={{ date_end }}&match_type={{ match_type }}&page=1&size={{ size }}">
<span aria-hidden="true">&laquo;&laquo;</span>
</a>
</li>
{% endif %}
{% for i in total_pages|ljust:"5" %}
{% if i > 0 and i <= total_pages %}
<li class="page-item {% if i == page %}active{% endif %}">
<a class="page-link" href="?q={{ query }}&page={{ i }}&size={{ size }}">{{ i }}</a>
</li>
{% endif %}
{% endfor %}
{% if page < total_pages %}
<li class="page-item">
<a class="page-link" href="?q={{ query }}&page={{ page|add:'1' }}&size={{ size }}">Próxima</a>
<!-- Botão página anterior -->
<li class="page-item {% if page == 1 %}disabled{% endif %}">
<a class="page-link" href="?q={{ query }}&date_start={{ date_start }}&date_end={{ date_end }}&match_type={{ match_type }}&page={{ page|add:'-1' }}&size={{ size }}">
<span aria-hidden="true">&laquo;</span>
</a>
</li>
<!-- Mostrar apenas um conjunto de páginas ao redor da página atual -->
{% with ''|center:total_pages as range %}
{% for _ in range %}
{% with forloop.counter as i %}
{% if i >= page|add:'-2' and i <= page|add:'2' and i > 0 and i <= total_pages %}
<li class="page-item {% if i == page %}active{% endif %}">
<a class="page-link" href="?q={{ query }}&date_start={{ date_start }}&date_end={{ date_end }}&match_type={{ match_type }}&page={{ i }}&size={{ size }}">{{ i }}</a>
</li>
{% endif %}
{% endwith %}
{% endfor %}
{% endwith %}
<!-- Botão próxima página -->
<li class="page-item {% if page >= total_pages %}disabled{% endif %}">
<a class="page-link" href="?q={{ query }}&date_start={{ date_start }}&date_end={{ date_end }}&match_type={{ match_type }}&page={{ page|add:'1' }}&size={{ size }}">
<span aria-hidden="true">&raquo;</span>
</a>
</li>
<!-- Botão última página -->
<li class="page-item {% if page >= total_pages %}disabled{% endif %}">
<a class="page-link" href="?q={{ query }}&date_start={{ date_start }}&date_end={{ date_end }}&match_type={{ match_type }}&page={{ total_pages }}&size={{ size }}">
<span aria-hidden="true">&raquo;&raquo;</span>
</a>
</li>
{% endif %}
</ul>
</nav>
{% endif %}
{% else %}
<div class="alert alert-info">
Nenhum resultado encontrado para a sua busca.
{% if date_start or date_end %}
<p class="mt-2">Tente expandir o período de busca ou remover os filtros de data.</p>
{% endif %}
{% if match_type == 'exact' %}
<p class="mt-2">Tente usar a opção "Qualquer palavra" para resultados mais abrangentes.</p>
{% endif %}
</div>
{% endif %}
{% endif %}
@ -112,13 +197,30 @@
padding: 2px;
border-radius: 2px;
}
.accordion-body em {
background-color: #ffeeba;
font-style: normal;
padding: 2px;
border-radius: 2px;
}
.badge {
font-size: 0.85rem;
}
</style>
<script>
// Validar datas ao enviar o formulário
document.querySelector('form').addEventListener('submit', function(e) {
const dateStart = document.getElementById('date_start').value;
const dateEnd = document.getElementById('date_end').value;
if (dateStart && dateEnd && dateStart > dateEnd) {
e.preventDefault();
alert('A data inicial não pode ser posterior à data final.');
}
});
</script>
{% endblock %}

View File

@ -1,80 +1,206 @@
from django.shortcuts import render
from elasticsearch_dsl import Q
from datetime import datetime
from .documents import DiarioOficialDocument
from elasticsearch.exceptions import RequestError
def search_diarios(request):
q = request.GET.get('q', '')
page = int(request.GET.get('page', 1))
size = int(request.GET.get('size', 10))
# Parâmetros de filtro de data
date_start = request.GET.get('date_start', '')
date_end = request.GET.get('date_end', '')
# Tipo de correspondência (exata ou parcial)
match_type = request.GET.get('match_type', 'partial') # 'exact' ou 'partial'
start = (page - 1) * size
end = start + size
results = []
total = 0
did_you_mean = None
search_suggestions = []
if q:
# Busca principal com boost para relevância
query = Q(
'multi_match',
query=q,
fields=['content^3', 'tipo.nome^2', 'numero', 'pages.content'],
fuzziness='AUTO'
)
# Pesquisa com highlighting
search = DiarioOficialDocument.search()
search = search.query(query)
search = search.highlight('content', fragment_size=150, number_of_fragments=3)
search = search.highlight('pages.content', fragment_size=150, number_of_fragments=3)
# Paginação
search = search[start:end]
response = search.execute()
total = response.hits.total.value
for hit in response:
# Adicionar destaque
highlight = ""
if hasattr(hit.meta, 'highlight'):
if 'content' in hit.meta.highlight:
highlight = "...".join(hit.meta.highlight.content)
try:
if q:
# Construir a consulta base
search = DiarioOficialDocument.search()
# Determinar o tipo de consulta com base no match_type
if match_type == 'exact':
# Correspondência exata (frase exata)
query = Q(
'multi_match',
query=q,
fields=['content^3', 'tipo.nome^2', 'numero', 'pages.content'],
type='phrase'
)
else:
# Correspondência parcial (qualquer termo)
query = Q(
'multi_match',
query=q,
fields=['content^3', 'tipo.nome^2', 'numero', 'pages.content'],
fuzziness='AUTO',
operator='or' # Pelo menos um termo deve corresponder
)
# Aplicar a consulta principal
search = search.query(query)
# Aplicar filtros de data se fornecidos
date_filters = []
if date_start:
try:
date_start_obj = datetime.strptime(date_start, '%Y-%m-%d')
date_filters.append(Q('range', data={'gte': date_start_obj}))
except ValueError:
pass # Ignorar datas inválidas
if date_end:
try:
date_end_obj = datetime.strptime(date_end, '%Y-%m-%d')
date_filters.append(Q('range', data={'lte': date_end_obj}))
except ValueError:
pass # Ignorar datas inválidas
if date_filters:
for date_filter in date_filters:
search = search.filter(date_filter)
# Configuração do highlighting
search = search.highlight('content', fragment_size=150, number_of_fragments=3)
search = search.highlight('pages.content', fragment_size=150, number_of_fragments=3)
# Paginação
total_search = search.count()
search = search[start:end]
# Executar a pesquisa
response = search.execute()
total = response.hits.total.value
# "Você quis dizer" - sugestão para termos com erros de digitação
if total < 3 and q: # Se poucos resultados, sugira correções
suggestion_search = DiarioOficialDocument.search()
suggestion_search = suggestion_search.suggest(
'phrase_suggestion',
q,
phrase={
'field': 'content',
'size': 5,
'highlight': {
'pre_tag': '<em>',
'post_tag': '</em>'
}
}
)
suggestion_result = suggestion_search.execute()
# Processando páginas com destaque
highlighted_pages = []
if hasattr(hit.meta, 'highlight') and 'pages.content' in hit.meta.highlight:
for i, content in enumerate(hit.meta.highlight['pages.content']):
# Encontre a página correspondente
page_number = i + 1 # Lógica simplificada, pode precisar de ajuste
highlighted_pages.append({
'number': page_number,
'content': content
})
# Processe as sugestões
if hasattr(suggestion_result, 'suggest') and 'phrase_suggestion' in suggestion_result.suggest:
suggestions = suggestion_result.suggest['phrase_suggestion'][0]['options']
if suggestions:
for suggestion in suggestions:
if suggestion['text'].lower() != q.lower():
did_you_mean = suggestion['text']
break
# Gerar sugestões de pesquisa relacionadas
if q:
# Use a expansão de termos para sugerir pesquisas relacionadas
related_search = DiarioOficialDocument.search()
related_search = related_search.query(
'more_like_this',
fields=['content'],
like=q,
min_term_freq=1,
max_query_terms=12
)
related_search = related_search[:5] # Limite para 5 sugestões
try:
related_results = related_search.execute()
# Extraia termos relevantes dos resultados relacionados
for hit in related_results:
if hasattr(hit, 'content') and hit.content:
# Extraia alguns termos significativos do conteúdo
content_terms = hit.content.split()[:10] # Primeiros 10 termos
suggestion = ' '.join(content_terms)
if suggestion not in search_suggestions and suggestion != q:
search_suggestions.append(suggestion)
if len(search_suggestions) >= 5: # Limite para 5 sugestões
break
except:
# Ignore erros de sugestões relacionadas
pass
# Combine dados do documento com os destaques
result = {
'id': hit.id,
'tipo': hit.tipo.nome if hasattr(hit, 'tipo') and hit.tipo else '',
'numero': hit.numero,
'data': hit.data,
'link': hit.link,
'highlight': highlight,
'highlighted_pages': highlighted_pages
}
results.append(result)
# Processar resultados
for hit in response:
# Adicionar destaque
highlight = ""
if hasattr(hit.meta, 'highlight'):
if 'content' in hit.meta.highlight:
highlight = "...".join(hit.meta.highlight.content)
# Processar páginas com destaque
highlighted_pages = []
total_occurrences = 0
if hasattr(hit.meta, 'highlight') and 'pages.content' in hit.meta.highlight:
# Calcular o número total de ocorrências
for content in hit.meta.highlight['pages.content']:
# Contar o número de <em> tags, que representam termos destacados
total_occurrences += content.count('<em>')
# Processar os destaques por página
for i, content in enumerate(hit.meta.highlight['pages.content']):
# Encontre a página correspondente
page_number = i + 1 # Lógica simplificada, pode precisar de ajuste
highlighted_pages.append({
'number': page_number,
'content': content
})
# Combine dados do documento com os destaques
result = {
'id': hit.id,
'tipo': hit.tipo.nome if hasattr(hit, 'tipo') and hit.tipo else '',
'numero': hit.numero,
'data': hit.data,
'link': hit.link,
'highlight': highlight,
'highlighted_pages': highlighted_pages,
'occurrences': total_occurrences
}
results.append(result)
except RequestError as e:
# Tratar erros de consulta do Elasticsearch
error_message = str(e)
return render(request, 'diarios/diarios_search.html', {
'error': error_message,
'query': q
})
context = {
'query': q,
'date_start': date_start,
'date_end': date_end,
'match_type': match_type,
'results': results,
'total': total,
'page': page,
'size': size,
'total_pages': (total + size - 1) // size if total > 0 else 0,
'did_you_mean': did_you_mean,
'search_suggestions': search_suggestions[:5] # Limite para 5 sugestões
}
return render(request, 'diarios/diarios_search.html', context)
def diario_detail(request, pk):