import json import debugpy from django.shortcuts import render from elasticsearch_dsl import Search, Q from elasticsearch_dsl.connections import connections from django.conf import settings import re from .documents import PDFDocument from django.http import JsonResponse # Configuração da conexão com o Elasticsearch connections.create_connection(hosts=[settings.ELASTICSEARCH_HOSTS]) def spellcheck_view(request): query = request.GET.get("q", "") suggestions = [] if query: s = Search(index="pdf_documents") s = s.suggest( "auto_correct", query, phrase={ "field": "suggest", "size": 3, "gram_size": 3, "confidence": 2.0, "direct_generator": [{"field": "suggest", "suggest_mode": "popular"}], }, ) response = s.execute() if hasattr(response.suggest, "auto_correct"): for option in response.suggest.auto_correct[0].options: suggestions.append(option.text) return JsonResponse({"suggestions": suggestions}) def search_view(request): query = request.GET.get('q', '') # Obtém o termo de pesquisa da URL page = int(request.GET.get('page', 1)) results = [] suggestions = [] spelling_correction = None total_hits = 0 per_page = 10 if query: # Processamento especial para termos entre aspas exact_phrases = re.findall(r'"([^"]*)"', query) # Remove os termos entre aspas da consulta principal cleaned_query = query for phrase in exact_phrases: cleaned_query = cleaned_query.replace(f'"{phrase}"', '') # Remove espaços extras e pontuação desnecessária cleaned_query = re.sub(r'\s+', ' ', cleaned_query).strip() # Cria uma consulta no Elasticsearch search = Search(index='diarios_oficiais') # Lista para armazenar todas as consultas queries = [] # Adiciona consulta para termos gerais (com fuzziness para tolerância a erros) if cleaned_query: queries.append( Q('multi_match', query=cleaned_query, fields=['title^3', 'pages.content^2'], fuzziness='AUTO', boost=2) ) # Adiciona consultas exatas para frases entre aspas (sem fuzziness) for phrase in exact_phrases: if phrase.strip(): queries.append( Q('match_phrase', pages__content={ 'query': phrase, 'boost': 2, 'slop': 0 # Sem flexibilidade na ordem das palavras }) ) # Combina as consultas com OR (se houver alguma) if queries: search = search.query( Q('bool', should=queries, minimum_should_match=1) ) # Configuração do highlight para mostrar mais contexto search = search.highlight( 'pages.content', fragment_size=300, number_of_fragments=2, pre_tags=[''], post_tags=[''] ) # Paginação search = search[(page-1)*per_page:page*per_page] # Executa a consulta response = search.execute() total_hits = response.hits.total.value # Processa os resultados for hit in response: # Extrai o conteúdo destacado ou usa o original if hasattr(hit.meta, 'highlight') and hasattr(hit.meta.highlight, 'pages.content'): highlighted_content = ' ... '.join(hit.meta.highlight['pages.content']) else: highlighted_content = "" # Extrai o título destacado ou usa o original if hasattr(hit.meta, 'highlight') and hasattr(hit.meta.highlight, 'title'): highlighted_title = hit.meta.highlight.title[0] else: highlighted_title = hit.title # Verifica se o resultado corresponde a uma frase exata is_exact_match = any(phrase.lower() in hit.pages.content.lower() or phrase.lower() in hit.title.lower() for phrase in exact_phrases) results.append({ 'id': hit.meta.id, 'title': hit.title, 'highlighted_title': highlighted_title, 'highlighted_content': highlighted_content, 'data': hit.data, 'numero': hit.numero, 'link': hit.link, 'finalizado': hit.finalizado, 'is_exact_match': is_exact_match }) # Calcula a paginação total_pages = (total_hits + per_page - 1) // per_page if total_hits > 0 else 0 # Renderiza o template com os resultados return render(request, 'diarios/search_results.html', { 'query': query, 'results': results, 'suggestions': suggestions[:5], # Limita a 5 sugestões 'spelling_correction': spelling_correction, 'total_hits': total_hits, 'page': page, 'total_pages': total_pages, 'page_range': range(max(1, page-2), min(total_pages+1, page+3)), 'has_exact_phrases': bool(exact_phrases) })