2025-03-14 17:36:14 +01:00
|
|
|
import requests
|
|
|
|
|
import os
|
|
|
|
|
from urllib.parse import urlparse
|
|
|
|
|
from django.core.files.base import ContentFile
|
2025-03-07 07:10:13 +01:00
|
|
|
from django.db import models
|
|
|
|
|
import PyPDF2
|
2025-03-07 15:31:53 +01:00
|
|
|
import json
|
2025-03-07 16:32:10 +01:00
|
|
|
from django.core.serializers.json import DjangoJSONEncoder
|
2025-03-14 17:36:14 +01:00
|
|
|
from babel.dates import format_date
|
2025-03-07 07:10:13 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class PDFDocument(models.Model):
|
|
|
|
|
title = models.CharField(max_length=255)
|
2025-03-07 15:31:53 +01:00
|
|
|
file = models.FileField(upload_to="pdfs/")
|
2025-03-07 07:10:13 +01:00
|
|
|
content = models.TextField(blank=True)
|
|
|
|
|
uploaded_at = models.DateTimeField(auto_now_add=True)
|
2025-03-07 15:31:53 +01:00
|
|
|
page_content = models.TextField(blank=True)
|
2025-03-07 07:10:13 +01:00
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
|
return self.title
|
|
|
|
|
|
|
|
|
|
def save(self, *args, **kwargs):
|
|
|
|
|
if self.file:
|
|
|
|
|
pdf = PyPDF2.PdfReader(self.file)
|
|
|
|
|
texto = []
|
2025-03-07 15:31:53 +01:00
|
|
|
pages_data = []
|
|
|
|
|
|
|
|
|
|
for i, pagina in enumerate(pdf.pages):
|
|
|
|
|
page_text = pagina.extract_text()
|
|
|
|
|
pages_data.append(
|
|
|
|
|
{
|
|
|
|
|
"number": i + 1,
|
|
|
|
|
"content": page_text,
|
|
|
|
|
}
|
|
|
|
|
)
|
2025-03-07 07:10:13 +01:00
|
|
|
texto.append(pagina.extract_text())
|
2025-03-07 15:31:53 +01:00
|
|
|
self.content = "\n".join(texto)
|
|
|
|
|
self.page_content = json.dumps(pages_data)
|
2025-03-07 07:10:13 +01:00
|
|
|
|
|
|
|
|
super().save(*args, **kwargs)
|
2025-03-07 16:01:08 +01:00
|
|
|
|
2025-03-14 17:36:14 +01:00
|
|
|
|
2025-03-07 16:01:08 +01:00
|
|
|
class TipoDiarioOficial(models.Model):
|
|
|
|
|
nome = models.CharField(max_length=100, unique=True)
|
|
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
|
return self.nome
|
|
|
|
|
|
|
|
|
|
class Meta:
|
|
|
|
|
verbose_name_plural = "Tipos de Diários Oficiais"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DiarioOficial(models.Model):
|
|
|
|
|
data = models.DateField()
|
|
|
|
|
arquivo = models.FileField(upload_to="diarios_oficiais/", blank=True, null=True)
|
|
|
|
|
tipo = models.ForeignKey(
|
|
|
|
|
TipoDiarioOficial,
|
|
|
|
|
blank=True,
|
|
|
|
|
null=True,
|
2025-03-07 16:32:10 +01:00
|
|
|
on_delete=models.SET_NULL,
|
2025-03-07 16:01:08 +01:00
|
|
|
related_name="diarios",
|
|
|
|
|
)
|
|
|
|
|
numero = models.CharField(max_length=20, unique=True)
|
|
|
|
|
link = models.URLField(blank=True, null=True, unique=True)
|
2025-03-07 16:32:10 +01:00
|
|
|
page_content = models.JSONField(encoder=DjangoJSONEncoder, blank=True, null=True)
|
|
|
|
|
|
2025-03-14 17:36:14 +01:00
|
|
|
def save(self, *args, **kwargs):
|
|
|
|
|
# Se houver um link, baixa o PDF e extrai o conteúdo
|
|
|
|
|
if self.link and not self.arquivo:
|
|
|
|
|
try:
|
|
|
|
|
# Faz o download do PDF
|
|
|
|
|
response = requests.get(self.link)
|
|
|
|
|
response.raise_for_status() # Verifica se o download foi bem-sucedido
|
|
|
|
|
|
|
|
|
|
# Define o nome do arquivo a partir do link
|
|
|
|
|
parsed_url = urlparse(self.link)
|
|
|
|
|
file_name = (
|
|
|
|
|
os.path.basename(parsed_url.path) or f"diario_{self.numero}.pdf"
|
2025-03-07 16:32:10 +01:00
|
|
|
)
|
2025-03-07 16:01:08 +01:00
|
|
|
|
2025-03-14 17:36:14 +01:00
|
|
|
# Salva o arquivo no campo `arquivo`
|
|
|
|
|
self.arquivo.save(file_name, ContentFile(response.content), save=False)
|
|
|
|
|
|
|
|
|
|
# Extrai o conteúdo do PDF
|
|
|
|
|
pdf = PyPDF2.PdfReader(self.arquivo)
|
|
|
|
|
pages_data = []
|
|
|
|
|
|
|
|
|
|
for i, pagina in enumerate(pdf.pages):
|
|
|
|
|
page_text = pagina.extract_text()
|
|
|
|
|
if page_text: # Ignora páginas sem conteúdo
|
|
|
|
|
pages_data.append(
|
|
|
|
|
{
|
|
|
|
|
"number": i + 1,
|
|
|
|
|
"content": page_text,
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Salva o conteúdo das páginas no campo `page_content`
|
|
|
|
|
self.page_content = pages_data
|
|
|
|
|
|
|
|
|
|
except requests.RequestException as e:
|
|
|
|
|
print(f"Erro ao baixar o PDF: {e}")
|
|
|
|
|
except PyPDF2.PdfReadError as e:
|
|
|
|
|
print(f"Erro ao ler o PDF: {e}")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"Erro inesperado: {e}")
|
|
|
|
|
|
|
|
|
|
# Salva o modelo
|
2025-03-07 16:32:10 +01:00
|
|
|
super().save(*args, **kwargs)
|
2025-03-14 17:36:14 +01:00
|
|
|
|
2025-03-07 16:01:08 +01:00
|
|
|
@property
|
|
|
|
|
def data_formatada(self):
|
|
|
|
|
return format_date(self.data, format="long", locale="pt_BR")
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def is_online(self):
|
|
|
|
|
return True if self.link else False
|
|
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
|
return f"Diário {self.tipo.nome} nº {self.numero}, {self.data_formatada}"
|
|
|
|
|
|
|
|
|
|
class Meta:
|
|
|
|
|
constraints = [models.UniqueConstraint(fields=["numero"], name="unique_numero")]
|
|
|
|
|
verbose_name_plural = "Diários Oficiais"
|