Files
Diarios-Oficiais-ALEMS/diarios/models.py

35 lines
1014 B
Python

from django.db import models
import PyPDF2
import json
class PDFDocument(models.Model):
title = models.CharField(max_length=255)
file = models.FileField(upload_to="pdfs/")
content = models.TextField(blank=True)
uploaded_at = models.DateTimeField(auto_now_add=True)
page_content = models.TextField(blank=True)
def __str__(self):
return self.title
def save(self, *args, **kwargs):
if self.file:
pdf = PyPDF2.PdfReader(self.file)
texto = []
pages_data = []
for i, pagina in enumerate(pdf.pages):
page_text = pagina.extract_text()
pages_data.append(
{
"number": i + 1,
"content": page_text,
}
)
texto.append(pagina.extract_text())
self.content = "\n".join(texto)
self.page_content = json.dumps(pages_data)
super().save(*args, **kwargs)