Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 29 additions & 15 deletions cookbooks/paper_review/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
TexPackageInfo,
)
from cookbooks.paper_review.utils import encode_pdf_base64, load_pdf_bytes
from openjudge.graders.schema import GraderError


@dataclass
Expand Down Expand Up @@ -106,29 +107,38 @@ async def review_paper(
if self.config.enable_correctness:
logger.info("Running correctness detection...")
correctness = await self.correctness_grader.aevaluate(pdf_data=pdf_data)
result.correctness = CorrectnessResult(
score=correctness.score,
reasoning=correctness.reason,
key_issues=correctness.metadata.get("key_issues", []),
)
if isinstance(correctness, GraderError):
logger.error(f"Correctness grader error: {correctness.error}")
else:
result.correctness = CorrectnessResult(
score=correctness.score,
reasoning=correctness.reason,
key_issues=correctness.metadata.get("key_issues", []),
)
Comment on lines 109 to +117
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

While the error handling is a great addition for robustness, this pattern of calling aevaluate, checking for GraderError, and then processing the result is repeated multiple times in this file (for correctness, review, criticality, jailbreaking, and format graders).

To improve maintainability and reduce code duplication, consider refactoring this logic into a helper method. This method could accept the grader, its arguments, and a callback function to process a successful result.

For example, you could define a helper like this:

async def _run_grader(self, grader, on_success, *args, **kwargs):
    """Runs a grader, handles errors, and calls a success callback."""
    grader_name = grader.name.replace('_', ' ').capitalize()
    try:
        result = await grader.aevaluate(*args, **kwargs)
        if isinstance(result, GraderError):
            logger.error(f"{grader_name} grader error: {result.error}")
        else:
            on_success(result)
    except Exception as e:
        logger.error(f"An unexpected error occurred in {grader_name} grader: {e}", exc_info=True)

And then use it for the correctness check like this:

if self.config.enable_correctness:
    logger.info("Running correctness detection...")

    def on_success(res):
        result.correctness = CorrectnessResult(
            score=res.score,
            reasoning=res.reason,
            key_issues=res.metadata.get("key_issues", []),
        )

    await self._run_grader(
        self.correctness_grader, on_success, pdf_data=pdf_data
    )

This approach would make the review_paper and _run_safety_checks methods much cleaner and easier to maintain.


if self.config.enable_review:
logger.info("Running paper review...")
review = await self.review_grader.aevaluate(pdf_data=pdf_data)
result.review = ReviewResult(score=review.score, review=review.reason)
if isinstance(review, GraderError):
logger.error(f"Review grader error: {review.error}")
else:
result.review = ReviewResult(score=review.score, review=review.reason)

# Phase 3: Criticality verification
if self.config.enable_criticality and result.correctness and result.correctness.score > 1:
logger.info("Running criticality verification...")
findings = self._format_findings(result.correctness)
criticality = await self.criticality_grader.aevaluate(pdf_data=pdf_data, findings=findings)
from cookbooks.paper_review.schema import CriticalityIssues

result.criticality = CriticalityResult(
score=criticality.score,
reasoning=criticality.reason,
issues=CriticalityIssues(**criticality.metadata.get("issues", {})),
)
if isinstance(criticality, GraderError):
logger.error(f"Criticality grader error: {criticality.error}")
else:
from cookbooks.paper_review.schema import CriticalityIssues

result.criticality = CriticalityResult(
score=criticality.score,
reasoning=criticality.reason,
issues=CriticalityIssues(**criticality.metadata.get("issues", {})),
)

# Phase 4: BibTeX verification
if self.config.enable_bib_verification and bib_path:
Expand All @@ -144,12 +154,16 @@ async def _run_safety_checks(self, pdf_data: str) -> Dict[str, Any]:

# Jailbreaking check
jailbreak_result = await self.jailbreaking_grader.aevaluate(pdf_data=pdf_data)
if jailbreak_result.metadata.get("is_abuse"):
if isinstance(jailbreak_result, GraderError):
logger.error(f"Jailbreaking grader error: {jailbreak_result.error}")
elif jailbreak_result.metadata.get("is_abuse"):
issues.append(f"Jailbreaking detected: {jailbreak_result.reason}")

# Format check
format_result = await self.format_grader.aevaluate(pdf_data=pdf_data)
if format_result.score == 1:
if isinstance(format_result, GraderError):
logger.error(f"Format grader error: {format_result.error}")
elif format_result.score == 1:
format_ok = False
violations = format_result.metadata.get("violations", [])
if violations:
Expand Down