adiciona o buscador e cria views e templates para ele

This commit is contained in:
root
2025-03-07 15:31:53 +01:00
parent 1cd93f7955
commit 3f5ac79051
18 changed files with 378 additions and 235 deletions

View File

@ -6,4 +6,5 @@ set -o nounset
python manage.py migrate python manage.py migrate
exec python manage.py runserver_plus 0.0.0.0:8005 exec python manage.py runserver_plus 0.0.0.0:8005

View File

@ -83,7 +83,7 @@ THIRD_PARTY_APPS = [
LOCAL_APPS = [ LOCAL_APPS = [
"diários_oficiais_alems.users", "diários_oficiais_alems.users",
"diarios", "diarios",
'django_elasticsearch_dsl', "django_elasticsearch_dsl",
] ]
# https://docs.djangoproject.com/en/dev/ref/settings/#installed-apps # https://docs.djangoproject.com/en/dev/ref/settings/#installed-apps
INSTALLED_APPS = DJANGO_APPS + THIRD_PARTY_APPS + LOCAL_APPS INSTALLED_APPS = DJANGO_APPS + THIRD_PARTY_APPS + LOCAL_APPS
@ -278,7 +278,9 @@ ACCOUNT_FORMS = {"signup": "diários_oficiais_alems.users.forms.UserSignupForm"}
# https://docs.allauth.org/en/latest/socialaccount/configuration.html # https://docs.allauth.org/en/latest/socialaccount/configuration.html
SOCIALACCOUNT_ADAPTER = "diários_oficiais_alems.users.adapters.SocialAccountAdapter" SOCIALACCOUNT_ADAPTER = "diários_oficiais_alems.users.adapters.SocialAccountAdapter"
# https://docs.allauth.org/en/latest/socialaccount/configuration.html # https://docs.allauth.org/en/latest/socialaccount/configuration.html
SOCIALACCOUNT_FORMS = {"signup": "diários_oficiais_alems.users.forms.UserSocialSignupForm"} SOCIALACCOUNT_FORMS = {
"signup": "diários_oficiais_alems.users.forms.UserSocialSignupForm"
}
# django-compressor # django-compressor
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# https://django-compressor.readthedocs.io/en/latest/quickstart/#installation # https://django-compressor.readthedocs.io/en/latest/quickstart/#installation
@ -288,8 +290,6 @@ STATICFILES_FINDERS += ["compressor.finders.CompressorFinder"]
# Elastic Search # Elastic Search
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
ELASTICSEARCH_DSL = { ELASTICSEARCH_DSL = {
'default': { "default": {"hosts": "http://elasticsearch:9200"}, # same as above
'hosts': 'http://elasticsearch:9200' # same as above
},
} }
ELASTICSEARCH_HOSTS="http://elasticsearch:9200" ELASTICSEARCH_HOSTS = "http://elasticsearch:9200"

View File

@ -30,7 +30,8 @@ CACHES = {
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#email-backend # https://docs.djangoproject.com/en/dev/ref/settings/#email-backend
EMAIL_BACKEND = env( EMAIL_BACKEND = env(
"DJANGO_EMAIL_BACKEND", default="django.core.mail.backends.console.EmailBackend", "DJANGO_EMAIL_BACKEND",
default="django.core.mail.backends.console.EmailBackend",
) )
# WhiteNoise # WhiteNoise
@ -66,7 +67,7 @@ if env("USE_DOCKER") == "yes":
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# This is a custom setting for RunServerPlus to fix reloader issue in Windows docker environment # This is a custom setting for RunServerPlus to fix reloader issue in Windows docker environment
# Werkzeug reloader type [auto, watchdog, or stat] # Werkzeug reloader type [auto, watchdog, or stat]
RUNSERVERPLUS_POLLER_RELOADER_TYPE = 'stat' RUNSERVERPLUS_POLLER_RELOADER_TYPE = "stat"
# If you have CPU and IO load issues, you can increase this poller interval e.g) 5 # If you have CPU and IO load issues, you can increase this poller interval e.g) 5
RUNSERVERPLUS_POLLER_RELOADER_INTERVAL = 1 RUNSERVERPLUS_POLLER_RELOADER_INTERVAL = 1

View File

@ -2,5 +2,5 @@ from django.apps import AppConfig
class DiariosConfig(AppConfig): class DiariosConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField' default_auto_field = "django.db.models.BigAutoField"
name = 'diarios' name = "diarios"

View File

@ -2,67 +2,66 @@ from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry from django_elasticsearch_dsl.registries import registry
from .models import PDFDocument from .models import PDFDocument
@registry.register_document @registry.register_document
class PDFDocumentDocument(Document): class PDFDocumentDocument(Document):
title = fields.TextField() title = fields.TextField()
content = fields.TextField(analyzer='portuguese') content = fields.TextField(analyzer="portuguese")
pages = fields.NestedField(
properties={
"number": fields.IntegerField(),
"content": fields.TextField(analyzer="portuguese"),
}
)
class Index: class Index:
name = 'pdf_documents' name = "pdf_documents"
settings = { settings = {
'number_of_shards': 1, "number_of_shards": 1,
'number_of_replicas': 0, "number_of_replicas": 0,
'analysis': { "analysis": {
'analyzer': { "analyzer": {
'portuguese': { "portuguese": {
'type': 'custom', "type": "custom",
'tokenizer': 'standard', "tokenizer": "standard",
'filter': [ "filter": [
'lowercase', "lowercase",
'ascii_folding', "ascii_folding",
'portuguese_stemmer', "portuguese_stemmer",
'stop', "stop",
'portuguese_synonyms', "portuguese_synonyms",
] ],
}, },
'portuguese_search': { "portuguese_search": {
'type': 'custom', "type": "custom",
'tokenizer': 'standard', "tokenizer": "standard",
'filter': [ "filter": [
'lowercase', "lowercase",
'ascii_folding', "ascii_folding",
'portuguese_stemmer', "portuguese_stemmer",
'stop', "stop",
'suggest_shingle', "suggest_shingle",
] ],
}
}, },
'filter': {
'suggest_shingle': {
'type': 'shingle',
'min_shingle_size': 2,
'max_shingle_size': 3
}, },
'stop': { "filter": {
'type': 'stop', "suggest_shingle": {
'stopwords': '_portuguese_' "type": "shingle",
"min_shingle_size": 2,
"max_shingle_size": 3,
},
"stop": {"type": "stop", "stopwords": "_portuguese_"},
"ascii_folding": {"type": "asciifolding"},
"portuguese_stemmer": {"type": "stemmer", "language": "portuguese"},
"portuguese_synonyms": {
"type": "synonym",
"synonyms_path": "synonyms.txt",
"expand": True,
}, },
'ascii_folding': {
'type': 'asciifolding'
}, },
'portuguese_stemmer': {
'type': 'stemmer',
'language': 'portuguese'
}, },
'portuguese_synonyms':{
'type': 'synonym',
'synonyms_path': 'synonyms.txt',
'expand': True
}
}
}
} }
class Django: class Django:
model = PDFDocument model = PDFDocument
fields = ['uploaded_at'] fields = ["uploaded_at", "file"]

View File

@ -7,18 +7,25 @@ class Migration(migrations.Migration):
initial = True initial = True
dependencies = [ dependencies = []
]
operations = [ operations = [
migrations.CreateModel( migrations.CreateModel(
name='PDFDocument', name="PDFDocument",
fields=[ fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), (
('title', models.CharField(max_length=255)), "id",
('file', models.FileField(upload_to='pdfs/')), models.BigAutoField(
('content', models.TextField(blank=True)), auto_created=True,
('uploaded_at', models.DateTimeField(auto_now_add=True)), primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("title", models.CharField(max_length=255)),
("file", models.FileField(upload_to="pdfs/")),
("content", models.TextField(blank=True)),
("uploaded_at", models.DateTimeField(auto_now_add=True)),
], ],
), ),
] ]

View File

@ -0,0 +1,18 @@
# Generated by Django 5.0.12 on 2025-03-07 13:47
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("diarios", "0001_initial"),
]
operations = [
migrations.AddField(
model_name="pdfdocument",
name="page_content",
field=models.TextField(blank=True),
),
]

View File

@ -1,12 +1,14 @@
from django.db import models from django.db import models
import PyPDF2 import PyPDF2
import json
class PDFDocument(models.Model): class PDFDocument(models.Model):
title = models.CharField(max_length=255) title = models.CharField(max_length=255)
file = models.FileField(upload_to='pdfs/') file = models.FileField(upload_to="pdfs/")
content = models.TextField(blank=True) content = models.TextField(blank=True)
uploaded_at = models.DateTimeField(auto_now_add=True) uploaded_at = models.DateTimeField(auto_now_add=True)
page_content = models.TextField(blank=True)
def __str__(self): def __str__(self):
return self.title return self.title
@ -15,9 +17,18 @@ class PDFDocument(models.Model):
if self.file: if self.file:
pdf = PyPDF2.PdfReader(self.file) pdf = PyPDF2.PdfReader(self.file)
texto = [] texto = []
for pagina in pdf.pages: pages_data = []
for i, pagina in enumerate(pdf.pages):
page_text = pagina.extract_text()
pages_data.append(
{
"number": i + 1,
"content": page_text,
}
)
texto.append(pagina.extract_text()) texto.append(pagina.extract_text())
self.content = '\n'.join(texto) self.content = "\n".join(texto)
self.page_content = json.dumps(pages_data)
super().save(*args, **kwargs) super().save(*args, **kwargs)

View File

@ -13,6 +13,5 @@ def extract_text(sender, instance, created, **kwargs):
text = [] text = []
for page in pdf.pages: for page in pdf.pages:
text.append(page.extract_text()) text.append(page.extract_text())
instance.content = '\n'.join(text) instance.content = "\n".join(text)
instance.save(update_fields=['content']) instance.save(update_fields=["content"])

View File

@ -46,7 +46,16 @@
.result-item:last-child { .result-item:last-child {
border-bottom: none; border-bottom: none;
} }
.pdf-link {
color: #e74c3c;
margin-left: 10px;
font-size: 0.8em;
text-decoration: none;
}
.result-title { .result-title {
display: flex;
align-items: center;
justify-content: space-between;
color: #1a0dab; color: #1a0dab;
font-weight: 500; font-weight: 500;
margin-bottom: 5px; margin-bottom: 5px;
@ -218,11 +227,22 @@ document.getElementById('suggestionsBox').addEventListener('click', function(e)
{% endif %} {% endif %}
</div> </div>
<h5 class="result-title"> <h5 class="result-title">
<a href="#">{{ result.highlighted_title|safe }}</a> <a href="{{ result.pdf_url }}" target="_blank">{{ result.highlighted_title|safe }}</a>
<a href="{{ result.pdf_url }}" target="_blank" class="pdf-link" title="Abrir PDF completo">
<i class="bi bi-file-pdf"></i>
</a>
</h5> </h5>
<div class="result-content">{{ result.highlighted_content|safe }}</div> <div class="result-content">{{ result.highlighted_content|safe }}</div>
<div class="result-meta"> <div class="result-meta">
<i class="bi bi-calendar-date"></i> {{ result.uploaded_at|date:"d/m/Y" }} <i class="bi bi-calendar-date"></i> {{ result.uploaded_at|date:"d/m/Y" }}
{% if result.matching_pages %}
<span class="ms-3">
<i class="bi bi-file-earmark-text"></i> Páginas encontradas:
{% for page in result.matching_pages %}
<a href="{{ result.pdf_url }}#page={{ page }}" target="_blank" class="badge bg-light text-dark">{{ page }}</a>
{% endfor %}
</span>
{% endif %}
</div> </div>
</div> </div>
{% endfor %} {% endfor %}

View File

@ -2,6 +2,6 @@ from django.urls import path
from .views import search_view, spellcheck_view from .views import search_view, spellcheck_view
urlpatterns = [ urlpatterns = [
path('pesquisa/', search_view, name='search_view'), path("pesquisa/", search_view, name="search_view"),
path('spellcheck/', spellcheck_view, name='spellcheck_view'), path("spellcheck/", spellcheck_view, name="spellcheck_view"),
] ]

View File

@ -1,9 +1,11 @@
import json
import debugpy
from django.shortcuts import render from django.shortcuts import render
from elasticsearch_dsl import Search, Q from elasticsearch_dsl import Search, Q
from elasticsearch_dsl.connections import connections from elasticsearch_dsl.connections import connections
from django.conf import settings from django.conf import settings
import re import re
from .documents import PDFDocument
from django.http import JsonResponse from django.http import JsonResponse
@ -12,33 +14,39 @@ connections.create_connection(hosts=[settings.ELASTICSEARCH_HOSTS])
def spellcheck_view(request): def spellcheck_view(request):
query = request.GET.get('q', '') query = request.GET.get("q", "")
suggestions = [] suggestions = []
if query: if query:
s = Search(index='pdf_documents') s = Search(index="pdf_documents")
s = s.suggest('auto_correct', query, s = s.suggest(
"auto_correct",
query,
phrase={ phrase={
'field': 'suggest', "field": "suggest",
'size': 3, "size": 3,
'gram_size': 3, "gram_size": 3,
'confidence': 2.0, "confidence": 2.0,
'direct_generator': [{ "direct_generator": [{"field": "suggest", "suggest_mode": "popular"}],
'field': 'suggest', },
'suggest_mode': 'popular' )
}]
})
response = s.execute() response = s.execute()
if hasattr(response.suggest, 'auto_correct'): if hasattr(response.suggest, "auto_correct"):
for option in response.suggest.auto_correct[0].options: for option in response.suggest.auto_correct[0].options:
suggestions.append(option.text) suggestions.append(option.text)
return JsonResponse({'suggestions': suggestions}) return JsonResponse({"suggestions": suggestions})
def search_view(request): def search_view(request):
query = request.GET.get('q', '') # Obtém o termo de pesquisa da URL query = request.GET.get("q", "") # Obtém o termo de pesquisa da URL
page = int(request.GET.get('page', 1)) page = request.GET.get("page", 1) # Obtém o valor de "page" (padrão: 1)
# Converte page para int
try:
page = int(page)
except ValueError:
page = 1 # Valor padrão em caso de erro
results = [] results = []
suggestions = [] suggestions = []
@ -53,13 +61,13 @@ def search_view(request):
# Remove os termos entre aspas da consulta principal # Remove os termos entre aspas da consulta principal
cleaned_query = query cleaned_query = query
for phrase in exact_phrases: for phrase in exact_phrases:
cleaned_query = cleaned_query.replace(f'"{phrase}"', '') cleaned_query = cleaned_query.replace(f'"{phrase}"', "")
# Remove espaços extras e pontuação desnecessária # Remove espaços extras e pontuação desnecessária
cleaned_query = re.sub(r'\s+', ' ', cleaned_query).strip() cleaned_query = re.sub(r"\s+", " ", cleaned_query).strip()
# Cria uma consulta no Elasticsearch # Cria uma consulta no Elasticsearch
search = Search(index='pdf_documents') search = Search(index="pdf_documents")
# Lista para armazenar todas as consultas # Lista para armazenar todas as consultas
queries = [] queries = []
@ -67,55 +75,65 @@ def search_view(request):
# Adiciona consulta para termos gerais (com fuzziness para tolerância a erros) # Adiciona consulta para termos gerais (com fuzziness para tolerância a erros)
if cleaned_query: if cleaned_query:
queries.append( queries.append(
Q('multi_match', Q(
"multi_match",
query=cleaned_query, query=cleaned_query,
fields=['title^3', 'content^2', 'synonyms^1'], fields=["title^3", "content^2", "synonyms^1"],
fuzziness='AUTO', fuzziness="AUTO",
boost=2,) boost=2,
) )
queries.append(
Q('match',
synonyms={
'query': cleaned_query,
'boost': 0.5
})
) )
queries.append(Q("match", synonyms={"query": cleaned_query, "boost": 0.5}))
# Adiciona consultas exatas para frases entre aspas (sem fuzziness) # Adiciona consultas exatas para frases entre aspas (sem fuzziness)
for phrase in exact_phrases: for phrase in exact_phrases:
if phrase.strip(): if phrase.strip():
# Consulta de frase exata para o título com peso alto # Consulta de frase exata para o título com peso alto
queries.append( queries.append(
Q('match_phrase', Q(
"match_phrase",
title={ title={
'query': phrase, "query": phrase,
'boost': 3, "boost": 3,
'slop': 0 # Sem flexibilidade na ordem das palavras "slop": 0, # Sem flexibilidade na ordem das palavras
}) },
)
) )
# Consulta de frase exata para o conteúdo com peso médio # Consulta de frase exata para o conteúdo com peso médio
queries.append( queries.append(
Q('match_phrase', Q(
"match_phrase",
content={ content={
'query': phrase, "query": phrase,
'boost': 2, "boost": 2,
'slop': 0 # Sem flexibilidade na ordem das palavras "slop": 0, # Sem flexibilidade na ordem das palavras
}) },
)
) )
# Combina as consultas com OR (se houver alguma) # Combina as consultas com OR (se houver alguma)
if queries: if queries:
search = search.query( search = search.query(Q("bool", should=queries, minimum_should_match=1))
Q('bool', should=queries, minimum_should_match=1)
)
# Configuração do highlight para mostrar mais contexto # Configuração do highlight para mostrar mais contexto
search = search.highlight('content', fragment_size=300, number_of_fragments=2, pre_tags=['<mark>'], post_tags=['</mark>']) search = search.highlight(
search = search.highlight('title', fragment_size=300, number_of_fragments=1, pre_tags=['<mark>'], post_tags=['</mark>']) "content",
fragment_size=300,
number_of_fragments=2,
pre_tags=["<mark>"],
post_tags=["</mark>"],
)
search = search.highlight(
"title",
fragment_size=300,
number_of_fragments=1,
pre_tags=["<mark>"],
post_tags=["</mark>"],
)
# Paginação # Paginação
search = search[(page-1)*per_page:page*per_page] search = search[(page - 1) * per_page : page * per_page]
# Executa a consulta # Executa a consulta
response = search.execute() response = search.execute()
@ -123,49 +141,90 @@ def search_view(request):
# Processa os resultados # Processa os resultados
for hit in response: for hit in response:
# Obter o objeto PDFDocument correspondente
try:
pdf_doc = PDFDocument.objects.get(id=hit.meta.id)
pdf_url = pdf_doc.file.url # URL do PDF
matching_pages = []
if hasattr(hit.meta, 'highlight') and hasattr(hit.meta.highlight, 'pages.content'):
for highlight in hit.meta.highlight['pages.content']:
page_matches = re.findall(r'page_(\d+)', highlight)
if page_matches:
matching_pages.append(int(page_matches[0]))
if not matching_pages and query:
if pdf_doc.page_content:
try:
page_data = json.loads(pdf_doc.page_content)
for page_d in page_data:
if query.lower() in page_d['content'].lower():
matching_pages.append(page_d['number'])
except json.JSONDecodeError as e:
logger.error(f"Erro ao decodificar JSON para o documento {pdf_doc.id}: {e}")
page_data = []
else:
page_data = []
matching_pages = sorted(list(set(matching_pages)))
except PDFDocument.DoesNotExist:
pdf_url = ""
matching_pages = []
# Extrai o conteúdo destacado ou usa o original # Extrai o conteúdo destacado ou usa o original
if hasattr(hit.meta, 'highlight') and hasattr(hit.meta.highlight, 'content'): if hasattr(hit.meta, "highlight") and hasattr(
highlighted_content = ' ... '.join(hit.meta.highlight.content) hit.meta.highlight, "content"
):
highlighted_content = " ... ".join(hit.meta.highlight.content)
else: else:
# Se não houver highlight, pegue os primeiros 300 caracteres # Se não houver highlight, pegue os primeiros 300 caracteres
highlighted_content = hit.content[:300] + '...' if len(hit.content) > 300 else hit.content highlighted_content = (
hit.content[:300] + "..."
if len(hit.content) > 300
else hit.content
)
# Extrai o título destacado ou usa o original # Extrai o título destacado ou usa o original
if hasattr(hit.meta, 'highlight') and hasattr(hit.meta.highlight, 'title'): if hasattr(hit.meta, "highlight") and hasattr(
hit.meta.highlight, "title"
):
highlighted_title = hit.meta.highlight.title[0] highlighted_title = hit.meta.highlight.title[0]
else: else:
highlighted_title = hit.title highlighted_title = hit.title
# Verifica se o resultado corresponde a uma frase exata # Verifica se o resultado corresponde a uma frase exata
is_exact_match = any(phrase.lower() in hit.content.lower() or is_exact_match = any(
phrase.lower() in hit.title.lower() phrase.lower() in hit.content.lower()
for phrase in exact_phrases) or phrase.lower() in hit.title.lower()
for phrase in exact_phrases
)
results.append({ results.append(
'id': hit.meta.id, {
'title': hit.title, "id": hit.meta.id,
'highlighted_title': highlighted_title, "title": hit.title,
'highlighted_content': highlighted_content, "highlighted_title": highlighted_title,
'uploaded_at': hit.uploaded_at, "highlighted_content": highlighted_content,
'score': hit.meta.score, "uploaded_at": hit.uploaded_at,
'is_exact_match': is_exact_match "score": hit.meta.score,
}) "is_exact_match": is_exact_match,
"pdf_url": pdf_url,
"matching_pages": matching_pages,
}
)
# Sugestões "Você quis dizer" (apenas para termos fora de aspas) # Sugestões "Você quis dizer" (apenas para termos fora de aspas)
if total_hits < 5 and cleaned_query: if total_hits < 5 and cleaned_query:
suggestion_search = Search(index='pdf_documents') suggestion_search = Search(index="pdf_documents")
suggestion_search = suggestion_search.suggest( suggestion_search = suggestion_search.suggest(
'term_suggestion', "term_suggestion",
cleaned_query, cleaned_query,
term={ term={"field": "content", "suggest_mode": "popular", "size": 5},
'field': 'content',
'suggest_mode': 'popular',
'size': 5
}
) )
suggestion_response = suggestion_search.execute() suggestion_response = suggestion_search.execute()
if hasattr(suggestion_response, 'suggest') and hasattr(suggestion_response.suggest, 'term_suggestion'): if hasattr(suggestion_response, "suggest") and hasattr(
suggestion_response.suggest, "term_suggestion"
):
for suggestion in suggestion_response.suggest.term_suggestion: for suggestion in suggestion_response.suggest.term_suggestion:
for option in suggestion.options: for option in suggestion.options:
suggestions.append(option.text) suggestions.append(option.text)
@ -173,15 +232,19 @@ def search_view(request):
# Cria uma correção ortográfica se necessário # Cria uma correção ortográfica se necessário
if suggestions and total_hits == 0: if suggestions and total_hits == 0:
corrected_query = cleaned_query corrected_query = cleaned_query
for suggestion_term in suggestion_response.suggest.term_suggestion: for (
suggestion_term
) in suggestion_response.suggest.term_suggestion:
if suggestion_term.options: if suggestion_term.options:
# Substitui palavras incorretas por sugestões # Substitui palavras incorretas por sugestões
word_to_replace = suggestion_term.text word_to_replace = suggestion_term.text
corrected_word = suggestion_term.options[0].text corrected_word = suggestion_term.options[0].text
corrected_query = re.sub(r'\b' + re.escape(word_to_replace) + r'\b', corrected_query = re.sub(
r"\b" + re.escape(word_to_replace) + r"\b",
corrected_word, corrected_word,
corrected_query, corrected_query,
flags=re.IGNORECASE) flags=re.IGNORECASE,
)
# Reconstrói a consulta original mantendo as frases entre aspas # Reconstrói a consulta original mantendo as frases entre aspas
if corrected_query != cleaned_query: if corrected_query != cleaned_query:
@ -192,44 +255,54 @@ def search_view(request):
# Busca por termos relacionados (apenas se houver poucos resultados) # Busca por termos relacionados (apenas se houver poucos resultados)
if total_hits < 3 and cleaned_query: if total_hits < 3 and cleaned_query:
related_terms = Search(index='pdf_documents') related_terms = Search(index="pdf_documents")
related_terms = related_terms.query( related_terms = related_terms.query(
'more_like_this', "more_like_this",
fields=['content', 'title'], fields=["content", "title"],
like=cleaned_query, like=cleaned_query,
min_term_freq=1, min_term_freq=1,
max_query_terms=10, max_query_terms=10,
min_doc_freq=1 min_doc_freq=1,
) )
related_terms = related_terms[:5] related_terms = related_terms[:5]
related_response = related_terms.execute() related_response = related_terms.execute()
for hit in related_response: for hit in related_response:
# Verifica se este documento já está nos resultados # Verifica se este documento já está nos resultados
if not any(r.get('id') == hit.meta.id for r in results): if not any(r.get("id") == hit.meta.id for r in results):
results.append({ results.append(
'id': hit.meta.id, {
'title': hit.title, "id": hit.meta.id,
'highlighted_title': hit.title, "title": hit.title,
'highlighted_content': hit.content[:300] + '...' if len(hit.content) > 300 else hit.content, "highlighted_title": hit.title,
'uploaded_at': hit.uploaded_at, "highlighted_content": (
'score': hit.meta.score, hit.content[:300] + "..."
'is_related': True if len(hit.content) > 300
}) else hit.content
),
"uploaded_at": hit.uploaded_at,
"score": hit.meta.score,
"is_related": True,
"pdf_url": pdf_url,
}
)
# Calcula a paginação # Calcula a paginação
total_pages = (total_hits + per_page - 1) // per_page if total_hits > 0 else 0 total_pages = (total_hits + per_page - 1) // per_page if total_hits > 0 else 0
# Renderiza o template com os resultados # Renderiza o template com os resultados
return render(request, 'diarios/search_results.html', { return render(
'query': query, request,
'results': results, "diarios/search_results.html",
'suggestions': suggestions[:5], # Limita a 5 sugestões {
'spelling_correction': spelling_correction, "query": query,
'total_hits': total_hits, "results": results,
'page': page, "suggestions": suggestions[:5], # Limita a 5 sugestões
'total_pages': total_pages, "spelling_correction": spelling_correction,
'page_range': range(max(1, page-2), min(total_pages+1, page+3)), "total_hits": total_hits,
'has_exact_phrases': bool(exact_phrases) "page": page,
}) "total_pages": total_pages,
"page_range": range(max(1, page - 2), min(total_pages + 1, page + 3)),
"has_exact_phrases": bool(exact_phrases),
},
)

View File

@ -3,6 +3,7 @@ To understand why this file is here, please read:
https://cookiecutter-django.readthedocs.io/en/latest/5-help/faq.html#why-is-there-a-django-contrib-sites-directory-in-cookiecutter-django https://cookiecutter-django.readthedocs.io/en/latest/5-help/faq.html#why-is-there-a-django-contrib-sites-directory-in-cookiecutter-django
""" """
from django.conf import settings from django.conf import settings
from django.db import migrations from django.db import migrations

View File

@ -32,7 +32,9 @@ class Migration(migrations.Migration):
( (
"last_login", "last_login",
models.DateTimeField( models.DateTimeField(
blank=True, null=True, verbose_name="last login", blank=True,
null=True,
verbose_name="last login",
), ),
), ),
( (
@ -42,7 +44,8 @@ class Migration(migrations.Migration):
help_text="Designates that this user has all permissions without explicitly assigning them.", help_text="Designates that this user has all permissions without explicitly assigning them.",
verbose_name="superuser status", verbose_name="superuser status",
), ),
),( ),
(
"username", "username",
models.CharField( models.CharField(
error_messages={ error_messages={
@ -60,7 +63,9 @@ class Migration(migrations.Migration):
( (
"email", "email",
models.EmailField( models.EmailField(
blank=True, max_length=254, verbose_name="email address", blank=True,
max_length=254,
verbose_name="email address",
), ),
), ),
( (
@ -82,13 +87,16 @@ class Migration(migrations.Migration):
( (
"date_joined", "date_joined",
models.DateTimeField( models.DateTimeField(
default=django.utils.timezone.now, verbose_name="date joined", default=django.utils.timezone.now,
verbose_name="date joined",
), ),
), ),
( (
"name", "name",
models.CharField( models.CharField(
blank=True, max_length=255, verbose_name="Name of User", blank=True,
max_length=255,
verbose_name="Name of User",
), ),
), ),
( (

View File

@ -14,7 +14,9 @@ class UserFactory(DjangoModelFactory[User]):
name = Faker("name") name = Faker("name")
@post_generation @post_generation
def password(self, create: bool, extracted: Sequence[Any], **kwargs): # noqa: FBT001 def password(
self, create: bool, extracted: Sequence[Any], **kwargs
): # noqa: FBT001
password = ( password = (
extracted extracted
if extracted if extracted

View File

@ -28,7 +28,7 @@ class UserUpdateView(LoginRequiredMixin, SuccessMessageMixin, UpdateView):
assert self.request.user.is_authenticated # type guard assert self.request.user.is_authenticated # type guard
return self.request.user.get_absolute_url() return self.request.user.get_absolute_url()
def get_object(self, queryset: QuerySet | None=None) -> User: def get_object(self, queryset: QuerySet | None = None) -> User:
assert self.request.user.is_authenticated # type guard assert self.request.user.is_authenticated # type guard
return self.request.user return self.request.user

View File

@ -31,3 +31,6 @@ django-debug-toolbar==5.0.1 # https://github.com/jazzband/django-debug-toolbar
django-extensions==3.2.3 # https://github.com/django-extensions/django-extensions django-extensions==3.2.3 # https://github.com/django-extensions/django-extensions
django-coverage-plugin==3.1.0 # https://github.com/nedbat/django_coverage_plugin django-coverage-plugin==3.1.0 # https://github.com/nedbat/django_coverage_plugin
pytest-django==4.10.0 # https://github.com/pytest-dev/pytest-django pytest-django==4.10.0 # https://github.com/pytest-dev/pytest-django
debugpy
black