Files
Diarios-Oficiais-ALEMS/diarios/views.py

160 lines
5.5 KiB
Python

import json
import debugpy
from django.shortcuts import render
from elasticsearch_dsl import Search, Q
from elasticsearch_dsl.connections import connections
from django.conf import settings
import re
from .documents import PDFDocument
from django.http import JsonResponse
# Configuração da conexão com o Elasticsearch
connections.create_connection(hosts=[settings.ELASTICSEARCH_HOSTS])
def spellcheck_view(request):
query = request.GET.get("q", "")
suggestions = []
if query:
s = Search(index="pdf_documents")
s = s.suggest(
"auto_correct",
query,
phrase={
"field": "suggest",
"size": 3,
"gram_size": 3,
"confidence": 2.0,
"direct_generator": [{"field": "suggest", "suggest_mode": "popular"}],
},
)
response = s.execute()
if hasattr(response.suggest, "auto_correct"):
for option in response.suggest.auto_correct[0].options:
suggestions.append(option.text)
return JsonResponse({"suggestions": suggestions})
def search_view(request):
query = request.GET.get('q', '') # Obtém o termo de pesquisa da URL
page = int(request.GET.get('page', 1))
results = []
suggestions = []
spelling_correction = None
total_hits = 0
per_page = 10
if query:
# Processamento especial para termos entre aspas
exact_phrases = re.findall(r'"([^"]*)"', query)
# Remove os termos entre aspas da consulta principal
cleaned_query = query
for phrase in exact_phrases:
cleaned_query = cleaned_query.replace(f'"{phrase}"', '')
# Remove espaços extras e pontuação desnecessária
cleaned_query = re.sub(r'\s+', ' ', cleaned_query).strip()
# Cria uma consulta no Elasticsearch
search = Search(index='diarios_oficiais')
# Lista para armazenar todas as consultas
queries = []
# Adiciona consulta para termos gerais (com fuzziness para tolerância a erros)
if cleaned_query:
queries.append(
Q('multi_match',
query=cleaned_query,
fields=['title^3', 'pages.content^2'],
fuzziness='AUTO',
boost=2)
)
# Adiciona consultas exatas para frases entre aspas (sem fuzziness)
for phrase in exact_phrases:
if phrase.strip():
queries.append(
Q('match_phrase',
pages__content={
'query': phrase,
'boost': 2,
'slop': 0 # Sem flexibilidade na ordem das palavras
})
)
# Combina as consultas com OR (se houver alguma)
if queries:
search = search.query(
Q('bool', should=queries, minimum_should_match=1)
)
# Configuração do highlight para mostrar mais contexto
search = search.highlight(
'pages.content',
fragment_size=300,
number_of_fragments=2,
pre_tags=['<mark>'],
post_tags=['</mark>']
)
# Paginação
search = search[(page-1)*per_page:page*per_page]
# Executa a consulta
response = search.execute()
total_hits = response.hits.total.value
# Processa os resultados
for hit in response:
# Extrai o conteúdo destacado ou usa o original
if hasattr(hit.meta, 'highlight') and hasattr(hit.meta.highlight, 'pages.content'):
highlighted_content = ' ... '.join(hit.meta.highlight['pages.content'])
else:
highlighted_content = ""
# Extrai o título destacado ou usa o original
if hasattr(hit.meta, 'highlight') and hasattr(hit.meta.highlight, 'title'):
highlighted_title = hit.meta.highlight.title[0]
else:
highlighted_title = hit.title
# Verifica se o resultado corresponde a uma frase exata
is_exact_match = any(phrase.lower() in hit.pages.content.lower() or
phrase.lower() in hit.title.lower()
for phrase in exact_phrases)
results.append({
'id': hit.meta.id,
'title': hit.title,
'highlighted_title': highlighted_title,
'highlighted_content': highlighted_content,
'data': hit.data,
'numero': hit.numero,
'link': hit.link,
'finalizado': hit.finalizado,
'is_exact_match': is_exact_match
})
# Calcula a paginação
total_pages = (total_hits + per_page - 1) // per_page if total_hits > 0 else 0
# Renderiza o template com os resultados
return render(request, 'diarios/search_results.html', {
'query': query,
'results': results,
'suggestions': suggestions[:5], # Limita a 5 sugestões
'spelling_correction': spelling_correction,
'total_hits': total_hits,
'page': page,
'total_pages': total_pages,
'page_range': range(max(1, page-2), min(total_pages+1, page+3)),
'has_exact_phrases': bool(exact_phrases)
})