Source code for routinepy.lib.scraper.parsers.pdf.exam_routine

from pathlib import Path

from loguru import logger

from routinepy.lib.api.enums import ProgramCode

from .exam_routine_006 import Program006ExamPdfParser



[docs]
class BaseExamPdfParser:
    """
    Factory class that routes exam table extractions to program-specific extractors.

    This class provides the main interface for extracting raw exam table from PDF into
    a workable list for further processing.

    .. note::
        Specific program codes need to have their own implementation.

    .. seealso::
        For an example implementation, see the source code of :class:`Program006ExamPdfParser`.
    """


[docs]
    @staticmethod
    def extract_raw_tables(program_code: ProgramCode, path: Path) -> list:
        """
        Extracts and returns cleaned raw table data from a PDF file based on the specified program code.

        This method selects an appropriate parser for the given program code and uses it to extract
        table data from the provided PDF file.

        :param program_code: The program code (e.g., '006', '001') to determine the parser to use.
        :type program_code: ProgramCode
        :param path: The file path to the PDF file to be parsed.
        :type path: Path
        :return: A list of raw table data extracted from the PDF.
        :rtype: list

        .. note::
            - Specific program codes need to have their own implementation.

        .. warning::
            - Only :attr:`routinepy.lib.api.enums.ProgramCode.CSE_DAY` using :class:`Program006ExamPdfParser` is currently supported

        .. seealso::
            For an example implementation, see the source code of :class:`Program006ExamPdfParser`.
        """
        parser = None
        match program_code:
            case ProgramCode.CSE_Day:
                parser = Program006ExamPdfParser()

            case _:
                logger.warning(
                    f"Exam PDF parser for program {program_code.name} is not available"
                )
                return

        return parser.extract_raw_tables(path)