from django.shortcuts import render from elasticsearch_dsl import Search, Q from elasticsearch_dsl.connections import connections from django.conf import settings import re from django.http import JsonResponse # Configuração da conexão com o Elasticsearch connections.create_connection(hosts=[settings.ELASTICSEARCH_HOSTS]) def spellcheck_view(request): query = request.GET.get('q', '') suggestions = [] if query: s = Search(index='pdf_documents') s = s.suggest('auto_correct', query, phrase={ 'field': 'suggest', 'size': 3, 'gram_size': 3, 'confidence': 2.0, 'direct_generator': [{ 'field': 'suggest', 'suggest_mode': 'popular' }] }) response = s.execute() if hasattr(response.suggest, 'auto_correct'): for option in response.suggest.auto_correct[0].options: suggestions.append(option.text) return JsonResponse({'suggestions': suggestions}) def search_view(request): query = request.GET.get('q', '') # Obtém o termo de pesquisa da URL page = int(request.GET.get('page', 1)) results = [] suggestions = [] spelling_correction = None total_hits = 0 per_page = 10 if query: # Processamento especial para termos entre aspas exact_phrases = re.findall(r'"([^"]*)"', query) # Remove os termos entre aspas da consulta principal cleaned_query = query for phrase in exact_phrases: cleaned_query = cleaned_query.replace(f'"{phrase}"', '') # Remove espaços extras e pontuação desnecessária cleaned_query = re.sub(r'\s+', ' ', cleaned_query).strip() # Cria uma consulta no Elasticsearch search = Search(index='pdf_documents') # Lista para armazenar todas as consultas queries = [] # Adiciona consulta para termos gerais (com fuzziness para tolerância a erros) if cleaned_query: queries.append( Q('multi_match', query=cleaned_query, fields=['title^3', 'content^2', 'synonyms^1'], fuzziness='AUTO', boost=2,) ) queries.append( Q('match', synonyms={ 'query': cleaned_query, 'boost': 0.5 }) ) # Adiciona consultas exatas para frases entre aspas (sem fuzziness) for phrase in exact_phrases: if phrase.strip(): # Consulta de frase exata para o título com peso alto queries.append( Q('match_phrase', title={ 'query': phrase, 'boost': 3, 'slop': 0 # Sem flexibilidade na ordem das palavras }) ) # Consulta de frase exata para o conteúdo com peso médio queries.append( Q('match_phrase', content={ 'query': phrase, 'boost': 2, 'slop': 0 # Sem flexibilidade na ordem das palavras }) ) # Combina as consultas com OR (se houver alguma) if queries: search = search.query( Q('bool', should=queries, minimum_should_match=1) ) # Configuração do highlight para mostrar mais contexto search = search.highlight('content', fragment_size=300, number_of_fragments=2, pre_tags=[''], post_tags=['']) search = search.highlight('title', fragment_size=300, number_of_fragments=1, pre_tags=[''], post_tags=['']) # Paginação search = search[(page-1)*per_page:page*per_page] # Executa a consulta response = search.execute() total_hits = response.hits.total.value # Processa os resultados for hit in response: # Extrai o conteúdo destacado ou usa o original if hasattr(hit.meta, 'highlight') and hasattr(hit.meta.highlight, 'content'): highlighted_content = ' ... '.join(hit.meta.highlight.content) else: # Se não houver highlight, pegue os primeiros 300 caracteres highlighted_content = hit.content[:300] + '...' if len(hit.content) > 300 else hit.content # Extrai o título destacado ou usa o original if hasattr(hit.meta, 'highlight') and hasattr(hit.meta.highlight, 'title'): highlighted_title = hit.meta.highlight.title[0] else: highlighted_title = hit.title # Verifica se o resultado corresponde a uma frase exata is_exact_match = any(phrase.lower() in hit.content.lower() or phrase.lower() in hit.title.lower() for phrase in exact_phrases) results.append({ 'id': hit.meta.id, 'title': hit.title, 'highlighted_title': highlighted_title, 'highlighted_content': highlighted_content, 'uploaded_at': hit.uploaded_at, 'score': hit.meta.score, 'is_exact_match': is_exact_match }) # Sugestões "Você quis dizer" (apenas para termos fora de aspas) if total_hits < 5 and cleaned_query: suggestion_search = Search(index='pdf_documents') suggestion_search = suggestion_search.suggest( 'term_suggestion', cleaned_query, term={ 'field': 'content', 'suggest_mode': 'popular', 'size': 5 } ) suggestion_response = suggestion_search.execute() if hasattr(suggestion_response, 'suggest') and hasattr(suggestion_response.suggest, 'term_suggestion'): for suggestion in suggestion_response.suggest.term_suggestion: for option in suggestion.options: suggestions.append(option.text) # Cria uma correção ortográfica se necessário if suggestions and total_hits == 0: corrected_query = cleaned_query for suggestion_term in suggestion_response.suggest.term_suggestion: if suggestion_term.options: # Substitui palavras incorretas por sugestões word_to_replace = suggestion_term.text corrected_word = suggestion_term.options[0].text corrected_query = re.sub(r'\b' + re.escape(word_to_replace) + r'\b', corrected_word, corrected_query, flags=re.IGNORECASE) # Reconstrói a consulta original mantendo as frases entre aspas if corrected_query != cleaned_query: spelling_correction = corrected_query for phrase in exact_phrases: spelling_correction += f' "{phrase}"' spelling_correction = spelling_correction.strip() # Busca por termos relacionados (apenas se houver poucos resultados) if total_hits < 3 and cleaned_query: related_terms = Search(index='pdf_documents') related_terms = related_terms.query( 'more_like_this', fields=['content', 'title'], like=cleaned_query, min_term_freq=1, max_query_terms=10, min_doc_freq=1 ) related_terms = related_terms[:5] related_response = related_terms.execute() for hit in related_response: # Verifica se este documento já está nos resultados if not any(r.get('id') == hit.meta.id for r in results): results.append({ 'id': hit.meta.id, 'title': hit.title, 'highlighted_title': hit.title, 'highlighted_content': hit.content[:300] + '...' if len(hit.content) > 300 else hit.content, 'uploaded_at': hit.uploaded_at, 'score': hit.meta.score, 'is_related': True }) # Calcula a paginação total_pages = (total_hits + per_page - 1) // per_page if total_hits > 0 else 0 # Renderiza o template com os resultados return render(request, 'diarios/search_results.html', { 'query': query, 'results': results, 'suggestions': suggestions[:5], # Limita a 5 sugestões 'spelling_correction': spelling_correction, 'total_hits': total_hits, 'page': page, 'total_pages': total_pages, 'page_range': range(max(1, page-2), min(total_pages+1, page+3)), 'has_exact_phrases': bool(exact_phrases) })