diff --git a/compose/local/django/start b/compose/local/django/start
index bc81e2a..c5cabe5 100644
--- a/compose/local/django/start
+++ b/compose/local/django/start
@@ -6,4 +6,5 @@ set -o nounset
python manage.py migrate
+
exec python manage.py runserver_plus 0.0.0.0:8005
diff --git a/config/settings/base.py b/config/settings/base.py
index 68af005..25df074 100644
--- a/config/settings/base.py
+++ b/config/settings/base.py
@@ -83,7 +83,7 @@ THIRD_PARTY_APPS = [
LOCAL_APPS = [
"diários_oficiais_alems.users",
"diarios",
- 'django_elasticsearch_dsl',
+ "django_elasticsearch_dsl",
]
# https://docs.djangoproject.com/en/dev/ref/settings/#installed-apps
INSTALLED_APPS = DJANGO_APPS + THIRD_PARTY_APPS + LOCAL_APPS
@@ -278,7 +278,9 @@ ACCOUNT_FORMS = {"signup": "diários_oficiais_alems.users.forms.UserSignupForm"}
# https://docs.allauth.org/en/latest/socialaccount/configuration.html
SOCIALACCOUNT_ADAPTER = "diários_oficiais_alems.users.adapters.SocialAccountAdapter"
# https://docs.allauth.org/en/latest/socialaccount/configuration.html
-SOCIALACCOUNT_FORMS = {"signup": "diários_oficiais_alems.users.forms.UserSocialSignupForm"}
+SOCIALACCOUNT_FORMS = {
+ "signup": "diários_oficiais_alems.users.forms.UserSocialSignupForm"
+}
# django-compressor
# ------------------------------------------------------------------------------
# https://django-compressor.readthedocs.io/en/latest/quickstart/#installation
@@ -288,8 +290,6 @@ STATICFILES_FINDERS += ["compressor.finders.CompressorFinder"]
# Elastic Search
# ------------------------------------------------------------------------------
ELASTICSEARCH_DSL = {
- 'default': {
- 'hosts': 'http://elasticsearch:9200' # same as above
- },
+ "default": {"hosts": "http://elasticsearch:9200"}, # same as above
}
-ELASTICSEARCH_HOSTS="http://elasticsearch:9200"
+ELASTICSEARCH_HOSTS = "http://elasticsearch:9200"
diff --git a/config/settings/local.py b/config/settings/local.py
index 1787e95..616c3a3 100644
--- a/config/settings/local.py
+++ b/config/settings/local.py
@@ -30,7 +30,8 @@ CACHES = {
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#email-backend
EMAIL_BACKEND = env(
- "DJANGO_EMAIL_BACKEND", default="django.core.mail.backends.console.EmailBackend",
+ "DJANGO_EMAIL_BACKEND",
+ default="django.core.mail.backends.console.EmailBackend",
)
# WhiteNoise
@@ -66,7 +67,7 @@ if env("USE_DOCKER") == "yes":
# ------------------------------------------------------------------------------
# This is a custom setting for RunServerPlus to fix reloader issue in Windows docker environment
# Werkzeug reloader type [auto, watchdog, or stat]
- RUNSERVERPLUS_POLLER_RELOADER_TYPE = 'stat'
+ RUNSERVERPLUS_POLLER_RELOADER_TYPE = "stat"
# If you have CPU and IO load issues, you can increase this poller interval e.g) 5
RUNSERVERPLUS_POLLER_RELOADER_INTERVAL = 1
diff --git a/config/urls.py b/config/urls.py
index c0786ff..c48dcde 100644
--- a/config/urls.py
+++ b/config/urls.py
@@ -20,7 +20,7 @@ urlpatterns = [
path("users/", include("diários_oficiais_alems.users.urls", namespace="users")),
path("accounts/", include("allauth.urls")),
# Your stuff: custom urls includes go here
- path("diarios/", include("diarios.urls")),
+ path("diarios/", include("diarios.urls")),
# Media files
*static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT),
]
diff --git a/diarios/apps.py b/diarios/apps.py
index 55164d7..8af1284 100644
--- a/diarios/apps.py
+++ b/diarios/apps.py
@@ -2,5 +2,5 @@ from django.apps import AppConfig
class DiariosConfig(AppConfig):
- default_auto_field = 'django.db.models.BigAutoField'
- name = 'diarios'
+ default_auto_field = "django.db.models.BigAutoField"
+ name = "diarios"
diff --git a/diarios/documents.py b/diarios/documents.py
index 35bff2d..6fc178a 100644
--- a/diarios/documents.py
+++ b/diarios/documents.py
@@ -2,67 +2,66 @@ from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry
from .models import PDFDocument
+
@registry.register_document
class PDFDocumentDocument(Document):
title = fields.TextField()
- content = fields.TextField(analyzer='portuguese')
-
- class Index:
- name = 'pdf_documents'
- settings = {
- 'number_of_shards': 1,
- 'number_of_replicas': 0,
- 'analysis': {
- 'analyzer': {
- 'portuguese': {
- 'type': 'custom',
- 'tokenizer': 'standard',
- 'filter': [
- 'lowercase',
- 'ascii_folding',
- 'portuguese_stemmer',
- 'stop',
- 'portuguese_synonyms',
- ]
- },
- 'portuguese_search': {
- 'type': 'custom',
- 'tokenizer': 'standard',
- 'filter': [
- 'lowercase',
- 'ascii_folding',
- 'portuguese_stemmer',
- 'stop',
- 'suggest_shingle',
- ]
- }
- },
- 'filter': {
- 'suggest_shingle': {
- 'type': 'shingle',
- 'min_shingle_size': 2,
- 'max_shingle_size': 3
- },
- 'stop': {
- 'type': 'stop',
- 'stopwords': '_portuguese_'
- },
- 'ascii_folding': {
- 'type': 'asciifolding'
- },
- 'portuguese_stemmer': {
- 'type': 'stemmer',
- 'language': 'portuguese'
- },
- 'portuguese_synonyms':{
- 'type': 'synonym',
- 'synonyms_path': 'synonyms.txt',
- 'expand': True
- }
- }
- }
+ content = fields.TextField(analyzer="portuguese")
+ pages = fields.NestedField(
+ properties={
+ "number": fields.IntegerField(),
+ "content": fields.TextField(analyzer="portuguese"),
}
-
+ )
+
+ class Index:
+ name = "pdf_documents"
+ settings = {
+ "number_of_shards": 1,
+ "number_of_replicas": 0,
+ "analysis": {
+ "analyzer": {
+ "portuguese": {
+ "type": "custom",
+ "tokenizer": "standard",
+ "filter": [
+ "lowercase",
+ "ascii_folding",
+ "portuguese_stemmer",
+ "stop",
+ "portuguese_synonyms",
+ ],
+ },
+ "portuguese_search": {
+ "type": "custom",
+ "tokenizer": "standard",
+ "filter": [
+ "lowercase",
+ "ascii_folding",
+ "portuguese_stemmer",
+ "stop",
+ "suggest_shingle",
+ ],
+ },
+ },
+ "filter": {
+ "suggest_shingle": {
+ "type": "shingle",
+ "min_shingle_size": 2,
+ "max_shingle_size": 3,
+ },
+ "stop": {"type": "stop", "stopwords": "_portuguese_"},
+ "ascii_folding": {"type": "asciifolding"},
+ "portuguese_stemmer": {"type": "stemmer", "language": "portuguese"},
+ "portuguese_synonyms": {
+ "type": "synonym",
+ "synonyms_path": "synonyms.txt",
+ "expand": True,
+ },
+ },
+ },
+ }
+
class Django:
model = PDFDocument
- fields = ['uploaded_at']
+ fields = ["uploaded_at", "file"]
diff --git a/diarios/migrations/0001_initial.py b/diarios/migrations/0001_initial.py
index ec3516f..a146501 100644
--- a/diarios/migrations/0001_initial.py
+++ b/diarios/migrations/0001_initial.py
@@ -7,18 +7,25 @@ class Migration(migrations.Migration):
initial = True
- dependencies = [
- ]
+ dependencies = []
operations = [
migrations.CreateModel(
- name='PDFDocument',
+ name="PDFDocument",
fields=[
- ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
- ('title', models.CharField(max_length=255)),
- ('file', models.FileField(upload_to='pdfs/')),
- ('content', models.TextField(blank=True)),
- ('uploaded_at', models.DateTimeField(auto_now_add=True)),
+ (
+ "id",
+ models.BigAutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
+ ("title", models.CharField(max_length=255)),
+ ("file", models.FileField(upload_to="pdfs/")),
+ ("content", models.TextField(blank=True)),
+ ("uploaded_at", models.DateTimeField(auto_now_add=True)),
],
),
]
diff --git a/diarios/migrations/0002_pdfdocument_page_content.py b/diarios/migrations/0002_pdfdocument_page_content.py
new file mode 100644
index 0000000..8aafd71
--- /dev/null
+++ b/diarios/migrations/0002_pdfdocument_page_content.py
@@ -0,0 +1,18 @@
+# Generated by Django 5.0.12 on 2025-03-07 13:47
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("diarios", "0001_initial"),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name="pdfdocument",
+ name="page_content",
+ field=models.TextField(blank=True),
+ ),
+ ]
diff --git a/diarios/models.py b/diarios/models.py
index d5391b9..1939812 100644
--- a/diarios/models.py
+++ b/diarios/models.py
@@ -1,12 +1,14 @@
from django.db import models
import PyPDF2
+import json
class PDFDocument(models.Model):
title = models.CharField(max_length=255)
- file = models.FileField(upload_to='pdfs/')
+ file = models.FileField(upload_to="pdfs/")
content = models.TextField(blank=True)
uploaded_at = models.DateTimeField(auto_now_add=True)
+ page_content = models.TextField(blank=True)
def __str__(self):
return self.title
@@ -15,9 +17,18 @@ class PDFDocument(models.Model):
if self.file:
pdf = PyPDF2.PdfReader(self.file)
texto = []
- for pagina in pdf.pages:
+ pages_data = []
+
+ for i, pagina in enumerate(pdf.pages):
+ page_text = pagina.extract_text()
+ pages_data.append(
+ {
+ "number": i + 1,
+ "content": page_text,
+ }
+ )
texto.append(pagina.extract_text())
- self.content = '\n'.join(texto)
+ self.content = "\n".join(texto)
+ self.page_content = json.dumps(pages_data)
super().save(*args, **kwargs)
-
diff --git a/diarios/signals.py b/diarios/signals.py
index bceff3c..ff262d7 100644
--- a/diarios/signals.py
+++ b/diarios/signals.py
@@ -13,6 +13,5 @@ def extract_text(sender, instance, created, **kwargs):
text = []
for page in pdf.pages:
text.append(page.extract_text())
- instance.content = '\n'.join(text)
- instance.save(update_fields=['content'])
-
+ instance.content = "\n".join(text)
+ instance.save(update_fields=["content"])
diff --git a/diarios/templates/diarios/search_results.html b/diarios/templates/diarios/search_results.html
index 86819f0..3a5b304 100644
--- a/diarios/templates/diarios/search_results.html
+++ b/diarios/templates/diarios/search_results.html
@@ -46,7 +46,16 @@
.result-item:last-child {
border-bottom: none;
}
+ .pdf-link {
+ color: #e74c3c;
+ margin-left: 10px;
+ font-size: 0.8em;
+ text-decoration: none;
+ }
.result-title {
+ display: flex;
+ align-items: center;
+ justify-content: space-between;
color: #1a0dab;
font-weight: 500;
margin-bottom: 5px;
@@ -218,12 +227,23 @@ document.getElementById('suggestionsBox').addEventListener('click', function(e)
{% endif %}
{{ result.highlighted_content|safe }}
-
- {{ result.uploaded_at|date:"d/m/Y" }}
-
+
{% endfor %}
diff --git a/diarios/urls.py b/diarios/urls.py
index 0f0075f..9b66c7c 100644
--- a/diarios/urls.py
+++ b/diarios/urls.py
@@ -2,6 +2,6 @@ from django.urls import path
from .views import search_view, spellcheck_view
urlpatterns = [
- path('pesquisa/', search_view, name='search_view'),
- path('spellcheck/', spellcheck_view, name='spellcheck_view'),
+ path("pesquisa/", search_view, name="search_view"),
+ path("spellcheck/", spellcheck_view, name="spellcheck_view"),
]
diff --git a/diarios/views.py b/diarios/views.py
index 49ef3e3..a43babc 100644
--- a/diarios/views.py
+++ b/diarios/views.py
@@ -1,9 +1,11 @@
+import json
+import debugpy
from django.shortcuts import render
from elasticsearch_dsl import Search, Q
from elasticsearch_dsl.connections import connections
from django.conf import settings
import re
-
+from .documents import PDFDocument
from django.http import JsonResponse
@@ -12,224 +14,295 @@ connections.create_connection(hosts=[settings.ELASTICSEARCH_HOSTS])
def spellcheck_view(request):
- query = request.GET.get('q', '')
+ query = request.GET.get("q", "")
suggestions = []
-
+
if query:
- s = Search(index='pdf_documents')
- s = s.suggest('auto_correct', query,
- phrase={
- 'field': 'suggest',
- 'size': 3,
- 'gram_size': 3,
- 'confidence': 2.0,
- 'direct_generator': [{
- 'field': 'suggest',
- 'suggest_mode': 'popular'
- }]
- })
+ s = Search(index="pdf_documents")
+ s = s.suggest(
+ "auto_correct",
+ query,
+ phrase={
+ "field": "suggest",
+ "size": 3,
+ "gram_size": 3,
+ "confidence": 2.0,
+ "direct_generator": [{"field": "suggest", "suggest_mode": "popular"}],
+ },
+ )
response = s.execute()
-
- if hasattr(response.suggest, 'auto_correct'):
+
+ if hasattr(response.suggest, "auto_correct"):
for option in response.suggest.auto_correct[0].options:
suggestions.append(option.text)
-
- return JsonResponse({'suggestions': suggestions})
+
+ return JsonResponse({"suggestions": suggestions})
def search_view(request):
- query = request.GET.get('q', '') # Obtém o termo de pesquisa da URL
- page = int(request.GET.get('page', 1))
-
+ query = request.GET.get("q", "") # Obtém o termo de pesquisa da URL
+ page = request.GET.get("page", 1) # Obtém o valor de "page" (padrão: 1)
+
+ # Converte page para int
+ try:
+ page = int(page)
+ except ValueError:
+ page = 1 # Valor padrão em caso de erro
+
results = []
suggestions = []
spelling_correction = None
total_hits = 0
per_page = 10
-
+
if query:
# Processamento especial para termos entre aspas
exact_phrases = re.findall(r'"([^"]*)"', query)
-
+
# Remove os termos entre aspas da consulta principal
cleaned_query = query
for phrase in exact_phrases:
- cleaned_query = cleaned_query.replace(f'"{phrase}"', '')
-
+ cleaned_query = cleaned_query.replace(f'"{phrase}"', "")
+
# Remove espaços extras e pontuação desnecessária
- cleaned_query = re.sub(r'\s+', ' ', cleaned_query).strip()
-
+ cleaned_query = re.sub(r"\s+", " ", cleaned_query).strip()
+
# Cria uma consulta no Elasticsearch
- search = Search(index='pdf_documents')
-
+ search = Search(index="pdf_documents")
+
# Lista para armazenar todas as consultas
queries = []
-
+
# Adiciona consulta para termos gerais (com fuzziness para tolerância a erros)
if cleaned_query:
queries.append(
- Q('multi_match',
- query=cleaned_query,
- fields=['title^3', 'content^2', 'synonyms^1'],
- fuzziness='AUTO',
- boost=2,)
- )
- queries.append(
- Q('match',
- synonyms={
- 'query': cleaned_query,
- 'boost': 0.5
- })
- )
-
+ Q(
+ "multi_match",
+ query=cleaned_query,
+ fields=["title^3", "content^2", "synonyms^1"],
+ fuzziness="AUTO",
+ boost=2,
+ )
+ )
+ queries.append(Q("match", synonyms={"query": cleaned_query, "boost": 0.5}))
+
# Adiciona consultas exatas para frases entre aspas (sem fuzziness)
for phrase in exact_phrases:
if phrase.strip():
# Consulta de frase exata para o título com peso alto
queries.append(
- Q('match_phrase',
- title={
- 'query': phrase,
- 'boost': 3,
- 'slop': 0 # Sem flexibilidade na ordem das palavras
- })
+ Q(
+ "match_phrase",
+ title={
+ "query": phrase,
+ "boost": 3,
+ "slop": 0, # Sem flexibilidade na ordem das palavras
+ },
+ )
)
-
+
# Consulta de frase exata para o conteúdo com peso médio
queries.append(
- Q('match_phrase',
- content={
- 'query': phrase,
- 'boost': 2,
- 'slop': 0 # Sem flexibilidade na ordem das palavras
- })
+ Q(
+ "match_phrase",
+ content={
+ "query": phrase,
+ "boost": 2,
+ "slop": 0, # Sem flexibilidade na ordem das palavras
+ },
+ )
)
-
+
# Combina as consultas com OR (se houver alguma)
if queries:
- search = search.query(
- Q('bool', should=queries, minimum_should_match=1)
- )
-
+ search = search.query(Q("bool", should=queries, minimum_should_match=1))
+
# Configuração do highlight para mostrar mais contexto
- search = search.highlight('content', fragment_size=300, number_of_fragments=2, pre_tags=[''], post_tags=[''])
- search = search.highlight('title', fragment_size=300, number_of_fragments=1, pre_tags=[''], post_tags=[''])
-
+ search = search.highlight(
+ "content",
+ fragment_size=300,
+ number_of_fragments=2,
+ pre_tags=[""],
+ post_tags=[""],
+ )
+ search = search.highlight(
+ "title",
+ fragment_size=300,
+ number_of_fragments=1,
+ pre_tags=[""],
+ post_tags=[""],
+ )
+
# Paginação
- search = search[(page-1)*per_page:page*per_page]
-
+ search = search[(page - 1) * per_page : page * per_page]
+
# Executa a consulta
response = search.execute()
total_hits = response.hits.total.value
-
+
# Processa os resultados
for hit in response:
+ # Obter o objeto PDFDocument correspondente
+ try:
+ pdf_doc = PDFDocument.objects.get(id=hit.meta.id)
+ pdf_url = pdf_doc.file.url # URL do PDF
+
+ matching_pages = []
+ if hasattr(hit.meta, 'highlight') and hasattr(hit.meta.highlight, 'pages.content'):
+ for highlight in hit.meta.highlight['pages.content']:
+ page_matches = re.findall(r'page_(\d+)', highlight)
+ if page_matches:
+ matching_pages.append(int(page_matches[0]))
+ if not matching_pages and query:
+ if pdf_doc.page_content:
+ try:
+ page_data = json.loads(pdf_doc.page_content)
+ for page_d in page_data:
+ if query.lower() in page_d['content'].lower():
+ matching_pages.append(page_d['number'])
+ except json.JSONDecodeError as e:
+ logger.error(f"Erro ao decodificar JSON para o documento {pdf_doc.id}: {e}")
+ page_data = []
+ else:
+ page_data = []
+
+ matching_pages = sorted(list(set(matching_pages)))
+ except PDFDocument.DoesNotExist:
+ pdf_url = ""
+ matching_pages = []
+
# Extrai o conteúdo destacado ou usa o original
- if hasattr(hit.meta, 'highlight') and hasattr(hit.meta.highlight, 'content'):
- highlighted_content = ' ... '.join(hit.meta.highlight.content)
+ if hasattr(hit.meta, "highlight") and hasattr(
+ hit.meta.highlight, "content"
+ ):
+ highlighted_content = " ... ".join(hit.meta.highlight.content)
else:
# Se não houver highlight, pegue os primeiros 300 caracteres
- highlighted_content = hit.content[:300] + '...' if len(hit.content) > 300 else hit.content
-
+ highlighted_content = (
+ hit.content[:300] + "..."
+ if len(hit.content) > 300
+ else hit.content
+ )
+
# Extrai o título destacado ou usa o original
- if hasattr(hit.meta, 'highlight') and hasattr(hit.meta.highlight, 'title'):
+ if hasattr(hit.meta, "highlight") and hasattr(
+ hit.meta.highlight, "title"
+ ):
highlighted_title = hit.meta.highlight.title[0]
else:
highlighted_title = hit.title
-
+
# Verifica se o resultado corresponde a uma frase exata
- is_exact_match = any(phrase.lower() in hit.content.lower() or
- phrase.lower() in hit.title.lower()
- for phrase in exact_phrases)
-
- results.append({
- 'id': hit.meta.id,
- 'title': hit.title,
- 'highlighted_title': highlighted_title,
- 'highlighted_content': highlighted_content,
- 'uploaded_at': hit.uploaded_at,
- 'score': hit.meta.score,
- 'is_exact_match': is_exact_match
- })
-
- # Sugestões "Você quis dizer" (apenas para termos fora de aspas)
- if total_hits < 5 and cleaned_query:
- suggestion_search = Search(index='pdf_documents')
- suggestion_search = suggestion_search.suggest(
- 'term_suggestion',
- cleaned_query,
- term={
- 'field': 'content',
- 'suggest_mode': 'popular',
- 'size': 5
+ is_exact_match = any(
+ phrase.lower() in hit.content.lower()
+ or phrase.lower() in hit.title.lower()
+ for phrase in exact_phrases
+ )
+
+ results.append(
+ {
+ "id": hit.meta.id,
+ "title": hit.title,
+ "highlighted_title": highlighted_title,
+ "highlighted_content": highlighted_content,
+ "uploaded_at": hit.uploaded_at,
+ "score": hit.meta.score,
+ "is_exact_match": is_exact_match,
+ "pdf_url": pdf_url,
+ "matching_pages": matching_pages,
}
)
+
+ # Sugestões "Você quis dizer" (apenas para termos fora de aspas)
+ if total_hits < 5 and cleaned_query:
+ suggestion_search = Search(index="pdf_documents")
+ suggestion_search = suggestion_search.suggest(
+ "term_suggestion",
+ cleaned_query,
+ term={"field": "content", "suggest_mode": "popular", "size": 5},
+ )
suggestion_response = suggestion_search.execute()
-
- if hasattr(suggestion_response, 'suggest') and hasattr(suggestion_response.suggest, 'term_suggestion'):
+
+ if hasattr(suggestion_response, "suggest") and hasattr(
+ suggestion_response.suggest, "term_suggestion"
+ ):
for suggestion in suggestion_response.suggest.term_suggestion:
for option in suggestion.options:
suggestions.append(option.text)
-
+
# Cria uma correção ortográfica se necessário
if suggestions and total_hits == 0:
corrected_query = cleaned_query
- for suggestion_term in suggestion_response.suggest.term_suggestion:
+ for (
+ suggestion_term
+ ) in suggestion_response.suggest.term_suggestion:
if suggestion_term.options:
# Substitui palavras incorretas por sugestões
word_to_replace = suggestion_term.text
corrected_word = suggestion_term.options[0].text
- corrected_query = re.sub(r'\b' + re.escape(word_to_replace) + r'\b',
- corrected_word,
- corrected_query,
- flags=re.IGNORECASE)
-
+ corrected_query = re.sub(
+ r"\b" + re.escape(word_to_replace) + r"\b",
+ corrected_word,
+ corrected_query,
+ flags=re.IGNORECASE,
+ )
+
# Reconstrói a consulta original mantendo as frases entre aspas
if corrected_query != cleaned_query:
spelling_correction = corrected_query
for phrase in exact_phrases:
spelling_correction += f' "{phrase}"'
spelling_correction = spelling_correction.strip()
-
+
# Busca por termos relacionados (apenas se houver poucos resultados)
if total_hits < 3 and cleaned_query:
- related_terms = Search(index='pdf_documents')
+ related_terms = Search(index="pdf_documents")
related_terms = related_terms.query(
- 'more_like_this',
- fields=['content', 'title'],
+ "more_like_this",
+ fields=["content", "title"],
like=cleaned_query,
min_term_freq=1,
max_query_terms=10,
- min_doc_freq=1
+ min_doc_freq=1,
)
related_terms = related_terms[:5]
related_response = related_terms.execute()
-
+
for hit in related_response:
# Verifica se este documento já está nos resultados
- if not any(r.get('id') == hit.meta.id for r in results):
- results.append({
- 'id': hit.meta.id,
- 'title': hit.title,
- 'highlighted_title': hit.title,
- 'highlighted_content': hit.content[:300] + '...' if len(hit.content) > 300 else hit.content,
- 'uploaded_at': hit.uploaded_at,
- 'score': hit.meta.score,
- 'is_related': True
- })
-
+ if not any(r.get("id") == hit.meta.id for r in results):
+ results.append(
+ {
+ "id": hit.meta.id,
+ "title": hit.title,
+ "highlighted_title": hit.title,
+ "highlighted_content": (
+ hit.content[:300] + "..."
+ if len(hit.content) > 300
+ else hit.content
+ ),
+ "uploaded_at": hit.uploaded_at,
+ "score": hit.meta.score,
+ "is_related": True,
+ "pdf_url": pdf_url,
+ }
+ )
+
# Calcula a paginação
total_pages = (total_hits + per_page - 1) // per_page if total_hits > 0 else 0
-
- # Renderiza o template com os resultados
- return render(request, 'diarios/search_results.html', {
- 'query': query,
- 'results': results,
- 'suggestions': suggestions[:5], # Limita a 5 sugestões
- 'spelling_correction': spelling_correction,
- 'total_hits': total_hits,
- 'page': page,
- 'total_pages': total_pages,
- 'page_range': range(max(1, page-2), min(total_pages+1, page+3)),
- 'has_exact_phrases': bool(exact_phrases)
- })
+ # Renderiza o template com os resultados
+ return render(
+ request,
+ "diarios/search_results.html",
+ {
+ "query": query,
+ "results": results,
+ "suggestions": suggestions[:5], # Limita a 5 sugestões
+ "spelling_correction": spelling_correction,
+ "total_hits": total_hits,
+ "page": page,
+ "total_pages": total_pages,
+ "page_range": range(max(1, page - 2), min(total_pages + 1, page + 3)),
+ "has_exact_phrases": bool(exact_phrases),
+ },
+ )
diff --git a/diários_oficiais_alems/contrib/sites/migrations/0003_set_site_domain_and_name.py b/diários_oficiais_alems/contrib/sites/migrations/0003_set_site_domain_and_name.py
index ab4649b..e24c352 100644
--- a/diários_oficiais_alems/contrib/sites/migrations/0003_set_site_domain_and_name.py
+++ b/diários_oficiais_alems/contrib/sites/migrations/0003_set_site_domain_and_name.py
@@ -3,6 +3,7 @@ To understand why this file is here, please read:
https://cookiecutter-django.readthedocs.io/en/latest/5-help/faq.html#why-is-there-a-django-contrib-sites-directory-in-cookiecutter-django
"""
+
from django.conf import settings
from django.db import migrations
diff --git a/diários_oficiais_alems/users/migrations/0001_initial.py b/diários_oficiais_alems/users/migrations/0001_initial.py
index 3f5a0a3..073ee0d 100644
--- a/diários_oficiais_alems/users/migrations/0001_initial.py
+++ b/diários_oficiais_alems/users/migrations/0001_initial.py
@@ -32,7 +32,9 @@ class Migration(migrations.Migration):
(
"last_login",
models.DateTimeField(
- blank=True, null=True, verbose_name="last login",
+ blank=True,
+ null=True,
+ verbose_name="last login",
),
),
(
@@ -42,7 +44,8 @@ class Migration(migrations.Migration):
help_text="Designates that this user has all permissions without explicitly assigning them.",
verbose_name="superuser status",
),
- ),(
+ ),
+ (
"username",
models.CharField(
error_messages={
@@ -60,7 +63,9 @@ class Migration(migrations.Migration):
(
"email",
models.EmailField(
- blank=True, max_length=254, verbose_name="email address",
+ blank=True,
+ max_length=254,
+ verbose_name="email address",
),
),
(
@@ -82,13 +87,16 @@ class Migration(migrations.Migration):
(
"date_joined",
models.DateTimeField(
- default=django.utils.timezone.now, verbose_name="date joined",
+ default=django.utils.timezone.now,
+ verbose_name="date joined",
),
),
(
"name",
models.CharField(
- blank=True, max_length=255, verbose_name="Name of User",
+ blank=True,
+ max_length=255,
+ verbose_name="Name of User",
),
),
(
diff --git a/diários_oficiais_alems/users/tests/factories.py b/diários_oficiais_alems/users/tests/factories.py
index dd40c80..f7b49f6 100644
--- a/diários_oficiais_alems/users/tests/factories.py
+++ b/diários_oficiais_alems/users/tests/factories.py
@@ -14,7 +14,9 @@ class UserFactory(DjangoModelFactory[User]):
name = Faker("name")
@post_generation
- def password(self, create: bool, extracted: Sequence[Any], **kwargs): # noqa: FBT001
+ def password(
+ self, create: bool, extracted: Sequence[Any], **kwargs
+ ): # noqa: FBT001
password = (
extracted
if extracted
diff --git a/diários_oficiais_alems/users/views.py b/diários_oficiais_alems/users/views.py
index dfb468e..a2c815c 100644
--- a/diários_oficiais_alems/users/views.py
+++ b/diários_oficiais_alems/users/views.py
@@ -28,7 +28,7 @@ class UserUpdateView(LoginRequiredMixin, SuccessMessageMixin, UpdateView):
assert self.request.user.is_authenticated # type guard
return self.request.user.get_absolute_url()
- def get_object(self, queryset: QuerySet | None=None) -> User:
+ def get_object(self, queryset: QuerySet | None = None) -> User:
assert self.request.user.is_authenticated # type guard
return self.request.user
diff --git a/requirements/local.txt b/requirements/local.txt
index 28c1947..0549181 100644
--- a/requirements/local.txt
+++ b/requirements/local.txt
@@ -31,3 +31,6 @@ django-debug-toolbar==5.0.1 # https://github.com/jazzband/django-debug-toolbar
django-extensions==3.2.3 # https://github.com/django-extensions/django-extensions
django-coverage-plugin==3.1.0 # https://github.com/nedbat/django_coverage_plugin
pytest-django==4.10.0 # https://github.com/pytest-dev/pytest-django
+
+debugpy
+black