Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 75 additions & 45 deletions problemtools/verifyproblem.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from .version import add_version_arg

from abc import ABC
from typing import Any, Callable, ClassVar, Literal, Pattern, Match, ParamSpec, TypeVar
from typing import Any, Callable, ClassVar, Literal, Pattern, Match, ParamSpec, TypeVar, cast
from pydantic import ValidationError

random.seed(42)
Expand Down Expand Up @@ -590,7 +590,7 @@ def check(self, context: Context) -> bool:
if self.config['grading'] not in ['default', 'custom']:
self.error('Invalid grading policy in testdata.yaml')

if self.config['grading'] == 'custom' and len(self._problem.graders._graders) == 0:
if self.config['grading'] == 'custom' and self._problem.graders._grader is None:
self._problem.graders.fatal(f'{self} has custom grading but no custom graders provided')
if self.config['grading'] == 'default' and Graders._default_grader is None:
self._problem.graders.fatal(f'{self} has default grading but I could not find default grader')
Expand Down Expand Up @@ -1223,11 +1223,14 @@ class Graders(ProblemPart):
PART_NAME = 'grader'

def setup(self):
self._graders: list = run.find_programs(
graders: list = run.find_programs(
os.path.join(self.problem.probdir, 'graders'),
language_config=self.problem.language_config,
work_dir=self.problem.tmpdir,
)
if len(graders) > 1:
self.fatal('There is more than one custom grader')
self._grader = graders[0] if graders else None
return {}

def __str__(self) -> str:
Expand All @@ -1238,75 +1241,102 @@ def check(self, context: Context) -> bool:
return self._check_res
self._check_res = True

if self.problem.is_pass_fail() and len(self._graders) > 0:
self.error('There are grader programs but the problem is pass-fail')
if self._grader:
if self.problem.is_pass_fail() and self._grader:
self.fatal('There is a grader but the problem is pass-fail')

for grader in self._graders:
success, msg = grader.compile()
success, msg = self._grader.compile()
if not success:
self.fatal(f'Compile error for {grader}', msg)
self.fatal(f'Compile error for {self._grader}', msg)
return self._check_res

def grade(
self, sub_results: list[SubmissionResult], testcasegroup: TestCaseGroup, shadow_result: bool = False
) -> tuple[Verdict, float | None]:
if testcasegroup.config['grading'] == 'default':
graders = [self._default_grader]
if not self._default_grader:
self.fatal('Failed to locate default grader')
return ('JE', None)
grader = self._default_grader
else:
graders = self._graders
if not self._grader:
self.fatal('Problem has grading: custom without any custom grader')
return ('JE', None)
grader = self._grader
Comment on lines +1257 to +1265
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In TestcaseGroup, there are already checks for this. Do we want to assume that TestcaseGroup's check function must be called before running grade, so these checks can be removed? In that case, should we document that dependency?

The code there is

if self.config['grading'] == 'custom' and self._problem.graders._grader is None:
    self._problem.graders.fatal(f'{self} has custom grading but no custom graders provided')
if self.config['grading'] == 'default' and Graders._default_grader is None:
    self._problem.graders.fatal(f'{self} has default grading but I could not find default grader')

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Order of checks is still a bit of a mess. I think with the current state of the code base, it's better to be defensive and duplicate checks like this, particularly fatal checks (so we're not gonna spam twice when this happens, as we're going to stop execution).


if not grader.compile()[0]:
self.fatal(f'Failed to compile grader {grader}', grader.compile()[1])
return ('JE', None)

grader_input = ''.join([f'{r.verdict} {0 if r.score is None else r.score}\n' for r in sub_results])
grader_output_re = r'^((AC)|(WA)|(TLE)|(RTE)|(JE))\s+-?[0-9.]+\s*$'
verdict: Verdict = 'AC'
score: float = 0

if not sub_results:
self.info('No results on %s, so no graders ran' % (testcasegroup,))
self.info(f'No results on {testcasegroup}, so no grader ran')
return (verdict, score)

grader_flags = testcasegroup.config['grader_flags'].split()
self.debug(f'Grading {len(sub_results)} results:\n{grader_input}')
self.debug(f'Grader flags: {grader_flags}')

for grader in graders:
if grader is not None and grader.compile()[0]:
fd, infile = tempfile.mkstemp()
os.close(fd)
fd, outfile = tempfile.mkstemp()
os.close(fd)
infile_path = outfile_path = errfile_path = None
try:
# Create input and output files for grader
# We do it in this awkward way because the files need to be closed before reading/writing
with tempfile.NamedTemporaryFile(mode='w', delete=False) as infile:
infile.write(grader_input)
infile_path = infile.name

with tempfile.NamedTemporaryFile(delete=False) as outfile:
outfile_path = outfile.name

with tempfile.NamedTemporaryFile(delete=False) as errfile:
errfile_path = errfile.name

open(infile, 'w').write(grader_input)
status, runtime = grader.run(infile_path, outfile_path, errfile_path, args=grader_flags)

status, runtime = grader.run(infile, outfile, args=grader_flags)
with open(outfile_path, 'r') as fh:
grader_output = fh.read()

grader_output = open(outfile, 'r').read()
os.remove(infile)
os.remove(outfile)
with open(errfile_path, 'r') as errfile:
stderr_content = errfile.read()

if not os.WIFEXITED(status) or os.WEXITSTATUS(status) != 0:
if not os.WIFEXITED(status):
self.error(f'Judge error: {grader} crashed')
self.debug(f'Grader input:\n{grader_input}')
return ('JE', None)
ret = os.WEXITSTATUS(status)
if ret != 0:
self.error(f'Judge error: exit code {ret} for grader {grader}, expected 0')
self.debug(f'Grader input: {grader_input}\n')
return ('JE', None)

if not re.match(grader_output_re, grader_output):
self.error('Judge error: invalid format of grader output')
self.debug(f'Output must match: "{grader_output_re}"')
self.debug(f'Output was: "{grader_output}"')
return ('JE', None)

verdict_str, score_str = grader_output.split()
verdict = verdict_str # type: ignore
score = float(score_str)
# TODO: check that all graders give same result

if not shadow_result:
self.debug(f'Grade on {testcasegroup} is {verdict} ({score})')

return (verdict, score)
else:
self.error(f'Judge error: exit code {os.WEXITSTATUS(status)} for grader {grader}, expected 0')
self.error(f'Grader stderr:\n{stderr_content}\n')
self.debug(f'Grader input:\n{grader_input}')
return ('JE', None)

if not re.match(grader_output_re, grader_output):
self.error('Judge error: invalid format of grader output')
self.debug(f'Output must match: "{grader_output_re}"')
self.debug(f'Output was: "{grader_output}"')
return ('JE', None)

verdict_str, score_str = grader_output.split()
# Make mypy happy by explicitly using cast
verdict = cast(Verdict, verdict_str)
score = float(score_str)

if not shadow_result:
self.debug(f'Grade on {testcasegroup} is {verdict} ({score})')

return (verdict, score)
except Exception as e:
self.error(f'Grader failed with exception {e}')
return ('JE', None)
finally:
for path in [infile_path, outfile_path, errfile_path]:
if path:
try:
os.remove(path)
except OSError:
pass


class OutputValidators(ProblemPart):
Expand Down