import os
import sys
from pymongo import MongoClient
from bson import ObjectId
from pptx import Presentation
from utils import get_channel, get_logger, now
from config import MONGO
from data.lecture import insert_file_snippet
import base64
[docs]
def png_to_base64(png_file_path):
"""
Converts a PNG file to a Base64-encoded string.
Parameters:
png_file_path: The relative or absolute path to the PNG image file.
Returns:
A Base64-encoded string of the PNG image.
"""
try:
# Open the PNG file in binary read mode
with open(png_file_path, 'rb') as image_file:
# Read the file's contents and encode it to Base64
encoded_string = base64.b64encode(image_file.read())
# Decode the Base64 bytes object to a string and return it
return encoded_string.decode('utf-8')
except FileNotFoundError:
# Return an error message if the file is not found
return "The specified file was not found. Please check the path is correct."
except Exception as e:
# Return a generic error message for any other exceptions
return f"An error occurred during the conversion: {str(e)}"
[docs]
class SERVICE:
"""Service class for handling PowerPoint to text conversion tasks.
Static Attributes:
_collection (MongoClient=preclass.ppt2text): MongoDB collection for storing job information
.. :noindex:
_queue_name (str): RabbitMQ queue name for the service. Default to `preclass-ppt2text`
.. :noindex:
_logger: Logger instance for the service
.. :noindex:
"""
_collection = MongoClient(
MONGO.HOST,
MONGO.PORT
).preclass.ppt2text
_queue_name = "preclass-ppt2text"
_logger = get_logger(
__name__=__name__,
__file__=__file__,
)
[docs]
@staticmethod
def trigger(
parent_service: str,
lecture_id: ObjectId,
parent_job_id: ObjectId
) -> str:
"""Trigger a new PPT to text conversion job.
Args:
parent_service (str): Name of the parent service
lecture_id (ObjectId): MongoDB ObjectId of the lecture
parent_job_id (ObjectId): MongoDB ObjectId of the parent job
Returns:
str: The job ID of the created conversion task
"""
connection, channel = get_channel(SERVICE._queue_name)
SERVICE._logger.info("Pushing job to MONGO")
job_id = SERVICE._collection.insert_one(
dict(
parent_service=parent_service,
created_time = now(),
lecture_id=lecture_id,
parent_job_id=parent_job_id,
)
).inserted_id
SERVICE._logger.info("Pushing job to RabbitMQ")
channel.basic_publish(
exchange="",
routing_key=SERVICE._queue_name,
body=str(job_id)
)
connection.close()
SERVICE._logger.info("Job pushed to RabbitMQ")
return job_id
[docs]
@staticmethod
def callback(ch, method, properties, body):
"""Process PowerPoint conversion jobs from the RabbitMQ queue.
Args:
ch: RabbitMQ channel
method: RabbitMQ method frame
properties: RabbitMQ properties
body: Message body containing the job ID
"""
job_id = ObjectId(body.decode())
job = SERVICE._collection.find_one(dict(_id=job_id))
lecture_id, parent_service, parent_job_id = job["lecture_id"], job["parent_service"], job["parent_job_id"]
SERVICE._logger.debug(f"Recieved PreClass PPT2TEXT Job - {lecture_id}")
extract_text_from_ppt(
ppt_path=f"buffer/{lecture_id}/seed_file.pptx",
png_path=f"buffer/{lecture_id}/pngs",
lecture_id=lecture_id
)
SERVICE._collection.update_one(
dict(_id=job_id),
{"$set": dict(
completion_time=now()
)}
)
SERVICE._logger.info(f"Conversion Complete For {lecture_id}")
parent_connection, parent_channel = get_channel(parent_service)
parent_channel.basic_publish(
exchange="",
routing_key=parent_service,
body=str(parent_job_id)
)
parent_connection.close()
ch.basic_ack(delivery_tag = method.delivery_tag)
[docs]
@staticmethod
def launch_worker():
"""Launch the worker to process PowerPoint conversion jobs.
Starts consuming messages from the RabbitMQ queue and processes them.
Can be terminated with CTRL+C.
"""
try:
connection, channel = get_channel(SERVICE._queue_name)
channel.basic_consume(
queue=SERVICE._queue_name,
on_message_callback=SERVICE.callback,
auto_ack=False,
)
SERVICE._logger.info('Worker Launched. To exit press CTRL+C')
channel.start_consuming()
except KeyboardInterrupt:
SERVICE._logger.warning('Shutting Off Worker')
try:
sys.exit(0)
except SystemExit:
os._exit(0)
if __name__ == "__main__":
SERVICE._logger.warning("STARTING PRECLASS-PPTX2PDF SERVICE")
SERVICE.launch_worker()