diff --git a/cms/db/submission.py b/cms/db/submission.py
index 10647e1d11..e4a8c5a741 100644
--- a/cms/db/submission.py
+++ b/cms/db/submission.py
@@ -326,8 +326,7 @@ class SubmissionResult(Base):
         nullable=True)
 
     # The output from the sandbox (to allow localization the first item
-    # of the list is a format string, possibly containing some "%s",
-    # that will be filled in using the remaining items of the list).
+    # of the list is a message ID, and the rest are format parameters).
     compilation_text: list[str] = Column(
         ARRAY(String),
         nullable=False,
@@ -758,9 +757,8 @@ class Evaluation(Base):
         nullable=True)
 
     # The output from the grader, usually "Correct", "Time limit", ...
-    # (to allow localization the first item of the list is a format
-    # string, possibly containing some "%s", that will be filled in
-    # using the remaining items of the list).
+    # (to allow localization the first item of the list is a message ID, and
+    # the rest are format parameters).
     text: list[str] = Column(
         ARRAY(String),
         nullable=False,
diff --git a/cms/db/usertest.py b/cms/db/usertest.py
index 6c9e8a2a9a..92598db3ec 100644
--- a/cms/db/usertest.py
+++ b/cms/db/usertest.py
@@ -276,8 +276,7 @@ class UserTestResult(Base):
         nullable=True)
 
     # The output from the sandbox (to allow localization the first item
-    # of the list is a format string, possibly containing some "%s",
-    # that will be filled in using the remaining items of the list).
+    # of the list is a message ID, and the rest are format parameters).
     compilation_text: list[str] = Column(
         ARRAY(String),
         nullable=False,
diff --git a/cms/grading/Sandbox.py b/cms/grading/Sandbox.py
index 7867006902..5066351fda 100644
--- a/cms/grading/Sandbox.py
+++ b/cms/grading/Sandbox.py
@@ -1289,11 +1289,15 @@ def get_exit_status(self) -> str:
                 return self.EXIT_TIMEOUT_WALL
             else:
                 return self.EXIT_TIMEOUT
+        elif 'cg-oom-killed' in self.log:
+            # OOM killer was activated in the sandbox. It killed either the
+            # main process (in which case the exit status is SG) or a
+            # subprocess (in which case the main process gets to decide how to
+            # handle it, but probably RE). In both cases, we want to
+            # "root-cause" the verdict as "memory limit exceeded".
+            return self.EXIT_MEM_LIMIT
         elif 'SG' in status_list:
-            if 'cg-oom-killed' in self.log:
-                return self.EXIT_MEM_LIMIT
-            else:
-                return self.EXIT_SIGNAL
+            return self.EXIT_SIGNAL
         elif 'RE' in status_list:
             return self.EXIT_NONZERO_RETURN
         # OK status is not reported in the log file, it's implicit.
diff --git a/cms/grading/__init__.py b/cms/grading/__init__.py
index 8d8a230cc0..37ba55e668 100644
--- a/cms/grading/__init__.py
+++ b/cms/grading/__init__.py
@@ -25,6 +25,9 @@
 
 import logging
 
+from markupsafe import Markup, escape
+
+from cms.grading.steps.messages import MESSAGE_REGISTRY
 from cms.locale import DEFAULT_TRANSLATION, Translation
 from .language import Language, CompiledLanguage
 
@@ -61,9 +64,10 @@ def format_status_text(
 
     A status text is the content of SubmissionResult.compilation_text,
     Evaluation.text and UserTestResult.(compilation|evaluation)_text.
-    It is a list whose first element is a string with printf-like
-    placeholders and whose other elements are the data to use to fill
-    them.
+    It is a list whose first element is a message ID identifying a
+    MessageCollection and a message in it, and whose other elements are the
+    data to use to fill them. If the message ID begins with "custom:", it is
+    used as a string directly without attempting to look up a message.
     The first element will be translated using the given translator (or
     the identity function, if not given), completed with the data and
     returned.
@@ -78,9 +82,21 @@ def format_status_text(
         if not isinstance(status, list):
             raise TypeError("Invalid type: %r" % type(status))
 
-        # The empty msgid corresponds to the headers of the pofile.
-        text = _(status[0]) if status[0] != '' else ''
-        return text % tuple(status[1:])
+        if status[0] == '':
+            return ''
+        elif status[0].startswith("custom:"):
+            return status[0].removeprefix("custom:") % tuple(status[1:])
+        else:
+            message = MESSAGE_REGISTRY.get(status[0])
+            msg_text = _(message.message) % tuple(status[1:])
+            if message.inline_help:
+                # XXX: is this the best place for this?
+                help = Markup(
+                    f' <i class="icon-question-sign" title="{escape(message.help_text)}"></i>'
+                )
+                return msg_text + help
+            else:
+                return msg_text
     except Exception:
         logger.error("Unexpected error when formatting status "
                      "text: %r", status, exc_info=True)
diff --git a/cms/grading/scoretypes/abc.py b/cms/grading/scoretypes/abc.py
index f36542606e..4000e5c30a 100644
--- a/cms/grading/scoretypes/abc.py
+++ b/cms/grading/scoretypes/abc.py
@@ -36,7 +36,6 @@
 
 from cms import FEEDBACK_LEVEL_RESTRICTED
 from cms.db import SubmissionResult
-from cms.grading.steps import EVALUATION_MESSAGES
 from cms.locale import Translation, DEFAULT_TRANSLATION
 from cms.server.jinja2_toolbox import GLOBAL_ENVIRONMENT
 from jinja2 import Template
@@ -435,7 +434,7 @@ def compute_score(self, submission_result):
                     tc_score, parameter)
 
                 time_limit_was_exceeded = False
-                if evaluations[tc_idx].text == [EVALUATION_MESSAGES.get("timeout").message]:
+                if evaluations[tc_idx].text == ["evaluation:timeout"]:
                     time_limit_was_exceeded = True
 
                 testcases.append({
diff --git a/cms/grading/steps/compilation.py b/cms/grading/steps/compilation.py
index 3b56f2eaad..e149ca2552 100644
--- a/cms/grading/steps/compilation.py
+++ b/cms/grading/steps/compilation.py
@@ -42,7 +42,7 @@ def N_(message: str):
     return message
 
 
-COMPILATION_MESSAGES = MessageCollection([
+COMPILATION_MESSAGES = MessageCollection("compilation", [
     HumanMessage("success",
                  N_("Compilation succeeded"),
                  N_("Your submission successfully compiled to an "
@@ -54,14 +54,20 @@ def N_(message: str):
                  N_("Compilation timed out"),
                  N_("Your submission exceeded the time limit while compiling. "
                     "This might be caused by an excessive use of C++ "
-                    "templates, for example.")),
+                    "templates, for example."),
+                 inline_help=True),
+    HumanMessage("memorylimit",
+                 N_("Compilation memory limit exceeded"),
+                 N_("Your submission exceeded the memory limit while compiling. "
+                    "This might be caused by an excessive use of C++ "
+                    "templates, or too large global variables, for example."),
+                 inline_help=True),
     HumanMessage("signal",
-                 N_("Compilation killed with signal %s (could be triggered "
-                    "by violating memory limits)"),
+                 N_("Compilation killed with signal %s"),
                  N_("Your submission was killed with the specified signal. "
-                    "Among other things, this might be caused by exceeding "
-                    "the memory limit for the compilation, and in turn by an "
-                    "excessive use of C++ templates, for example.")),
+                    "This might be caused by a bug in the compiler, "
+                    "for example."),
+                 inline_help=True),
 ])
 
 
@@ -86,8 +92,9 @@ def compilation_step(
             executable, False if not, None if success is False;
         * text: a human readable, localized message to inform contestants
             of the status; it is either an empty list (for no message) or a
-            list of strings were the second to the last are formatting
-            arguments for the first, or None if success is False;
+            list of strings were the first is a message ID and the rest are
+            format arguments for that message, if the message takes any; or
+            None if success is False;
         * stats: a dictionary with statistics about the compilation, or None
             if success is False.
 
@@ -118,14 +125,14 @@ def compilation_step(
     if exit_status == Sandbox.EXIT_OK:
         # Execution finished successfully and the executable was generated.
         logger.debug("Compilation successfully finished.")
-        text = [COMPILATION_MESSAGES.get("success").message]
+        text = ["compilation:success"]
         return True, True, text, stats
 
     elif exit_status == Sandbox.EXIT_NONZERO_RETURN:
         # Error in compilation: no executable was generated, and we return
         # an error to the user.
         logger.debug("Compilation failed.")
-        text = [COMPILATION_MESSAGES.get("fail").message]
+        text = ["compilation:fail"]
         return True, False, text, stats
 
     elif exit_status == Sandbox.EXIT_TIMEOUT or \
@@ -133,7 +140,14 @@ def compilation_step(
         # Timeout: we assume it is the user's fault, and we return the error
         # to them.
         logger.debug("Compilation timed out.")
-        text = [COMPILATION_MESSAGES.get("timeout").message]
+        text = ["compilation:timeout"]
+        return True, False, text, stats
+
+    elif exit_status == Sandbox.EXIT_MEM_LIMIT:
+        # Memory limit: we assume it is the user's fault, and we return the
+        # error to them.
+        logger.debug("Compilation memory limit exceeded.")
+        text = ["compilation:memorylimit"]
         return True, False, text, stats
 
     elif exit_status == Sandbox.EXIT_SIGNAL:
@@ -141,7 +155,7 @@ def compilation_step(
         # we return the error to them.
         signal = stats["signal"]
         logger.debug("Compilation killed with signal %s.", signal)
-        text = [COMPILATION_MESSAGES.get("signal").message, str(signal)]
+        text = ["compilation:signal", str(signal)]
         return True, False, text, stats
 
     elif exit_status == Sandbox.EXIT_SANDBOX_ERROR:
diff --git a/cms/grading/steps/evaluation.py b/cms/grading/steps/evaluation.py
index adbe507b32..3a434b3776 100644
--- a/cms/grading/steps/evaluation.py
+++ b/cms/grading/steps/evaluation.py
@@ -42,7 +42,7 @@ def N_(message: str):
     return message
 
 
-EVALUATION_MESSAGES = MessageCollection([
+EVALUATION_MESSAGES = MessageCollection("evaluation", [
     HumanMessage("success",
                  N_("Output is correct"),
                  N_("Your submission ran and gave the correct answer")),
@@ -56,27 +56,38 @@ def N_(message: str):
     HumanMessage("nooutput",
                  N_("Evaluation didn't produce file %s"),
                  N_("Your submission ran, but did not write on the "
-                    "correct output file")),
+                    "correct output file"),
+                 inline_help=True),
     HumanMessage("timeout",
                  N_("Execution timed out"),
-                 N_("Your submission used too much CPU time.")),
+                 N_("Your submission used too much CPU time."),
+                 inline_help=True),
     HumanMessage("walltimeout",
                  N_("Execution timed out (wall clock limit exceeded)"),
                  N_("Your submission used too much total time. This might "
                     "be triggered by undefined code, or buffer overflow, "
                     "for example. Note that in this case the CPU time "
                     "visible in the submission details might be much smaller "
-                    "than the time limit.")),
+                    "than the time limit."),
+                 inline_help=True),
     HumanMessage("memorylimit",
                  N_("Memory limit exceeded"),
-                 N_("Your submission used too much memory.")),
+                 N_("Your submission used too much memory."),
+                 inline_help=True),
     HumanMessage("signal",
                  N_("Execution killed by signal"),
-                 N_("The evaluation was killed by a signal.")),
+                 N_("The evaluation was killed by a signal."),
+                 inline_help=True),
     HumanMessage("returncode",
                  N_("Execution failed because the return code was nonzero"),
                  N_("Your submission failed because it exited with a return "
-                    "code different from 0.")),
+                    "code different from 0."),
+                 inline_help=True),
+])
+# This message is stored separately because we don't want to show it on the help page.
+EXECUTION_MESSAGES = MessageCollection("execution", [
+    HumanMessage("success", N_("Execution completed successfully"), ""),
+    # all other user test messages are shared with the regular evaluation messages.
 ])
 
 
@@ -270,27 +281,26 @@ def human_evaluation_message(stats: StatsDict) -> list[str]:
 
     stats: execution statistics for an evaluation step.
 
-    return: a list of strings composing the message (where
-        strings from the second to the last are formatting arguments for the
-        first); or an empty list if no message should be passed to
-        contestants.
+    return: a list of strings composing the message (where the first is a message
+        ID and the rest are format arguments for the message); or an empty list
+        if no message should be passed to contestants.
 
     """
     exit_status = stats['exit_status']
     if exit_status == Sandbox.EXIT_TIMEOUT:
-        return [EVALUATION_MESSAGES.get("timeout").message]
+        return ["evaluation:timeout"]
     elif exit_status == Sandbox.EXIT_TIMEOUT_WALL:
-        return [EVALUATION_MESSAGES.get("walltimeout").message]
+        return ["evaluation:walltimeout"]
     elif exit_status == Sandbox.EXIT_SIGNAL:
-        return [EVALUATION_MESSAGES.get("signal").message]
+        return ["evaluation:signal"]
     elif exit_status == Sandbox.EXIT_SANDBOX_ERROR:
         # Contestants won't see this, the submission will still be evaluating.
         return []
     elif exit_status == Sandbox.EXIT_MEM_LIMIT:
-        return [EVALUATION_MESSAGES.get("memorylimit").message]
+        return ["evaluation:memorylimit"]
     elif exit_status == Sandbox.EXIT_NONZERO_RETURN:
         # Don't tell which code: would be too much information!
-        return [EVALUATION_MESSAGES.get("returncode").message]
+        return ["evaluation:returncode"]
     elif exit_status == Sandbox.EXIT_OK:
         return []
     else:
diff --git a/cms/grading/steps/messages.py b/cms/grading/steps/messages.py
index c3d62f72a1..ab52bd8da3 100644
--- a/cms/grading/steps/messages.py
+++ b/cms/grading/steps/messages.py
@@ -37,28 +37,31 @@ class HumanMessage:
 
     """
 
-    def __init__(self, shorthand: str, message: str, help_text: str):
+    def __init__(self, shorthand: str, message: str, help_text: str, inline_help: bool = False):
         """Initialization.
 
         shorthand: what to call this message in the code.
         message: the message itself.
         help_text: a longer explanation for the help page.
+        inline_help: Whether to show a help tooltip for this message whenever it is shown.
 
         """
         self.shorthand = shorthand
         self.message = message
         self.help_text = help_text
+        self.inline_help = inline_help
 
 
 class MessageCollection:
     """Represent a collection of messages, with error checking."""
 
-    def __init__(self, messages: list[HumanMessage] | None = None):
+    def __init__(self, namespace: str, messages: list[HumanMessage] | None = None):
         self._messages: dict[str, HumanMessage] = {}
         self._ordering: list[str] = []
         if messages is not None:
             for message in messages:
                 self.add(message)
+        MESSAGE_REGISTRY.add(namespace, self)
 
     def add(self, message: HumanMessage):
         if message.shorthand in self._messages:
@@ -81,3 +84,28 @@ def all(self) -> list[HumanMessage]:
         for shorthand in self._ordering:
             ret.append(self._messages[shorthand])
         return ret
+
+class MessageRegistry:
+    """Represents a collection of message collections, organized by a namespace
+    prefix. This is a singleton that is automatically populated by
+    MessageCollection."""
+
+    def __init__(self):
+        self._namespaces: dict[str, MessageCollection] = {}
+
+    def add(self, namespace: str, collection: MessageCollection):
+        if namespace in self._namespaces:
+            logger.error(f"Trying to register duplicate namespace {namespace}")
+            return
+        self._namespaces[namespace] = collection
+
+    def get(self, message_id: str) -> HumanMessage:
+        if ':' not in message_id:
+            raise KeyError(f"Invalid message ID {message_id}")
+        namespace, message = message_id.split(':', 1)
+        if namespace not in self._namespaces:
+            raise KeyError(f"Message namespace {namespace} not found")
+        collection = self._namespaces[namespace]
+        return collection.get(message)
+
+MESSAGE_REGISTRY = MessageRegistry()
diff --git a/cms/grading/steps/trusted.py b/cms/grading/steps/trusted.py
index 7c6513876e..7278dd1989 100644
--- a/cms/grading/steps/trusted.py
+++ b/cms/grading/steps/trusted.py
@@ -40,7 +40,6 @@
 
 from cms import config
 from cms.grading.Sandbox import Sandbox
-from .evaluation import EVALUATION_MESSAGES
 from .utils import generic_step
 from .stats import StatsDict
 
@@ -117,13 +116,15 @@ def extract_outcome_and_text(sandbox: Sandbox) -> tuple[float, list[str]]:
     # If the text starts with translate, the manager is asking us to
     # use a stock message, that can be translated.
     if text.startswith("translate:"):
-        remaining = text[len("translate:"):].strip()
+        remaining = text.removeprefix("translate:").strip()
         if remaining in ["success", "partial", "wrong"]:
-            text = EVALUATION_MESSAGES.get(remaining).message
+            text = "evaluation:" + remaining
         else:
             remaining = remaining[:15]  # to avoid logging lots of text
             logger.warning("Manager asked to translate text, but string "
                            "'%s' is not recognized." % remaining)
+    else:
+        text = "custom:" + text
 
     return outcome, [text]
 
diff --git a/cms/grading/steps/whitediff.py b/cms/grading/steps/whitediff.py
index 395bcf8420..79d4a141a3 100644
--- a/cms/grading/steps/whitediff.py
+++ b/cms/grading/steps/whitediff.py
@@ -30,8 +30,6 @@
 
 from cms.grading.Sandbox import Sandbox
 
-from .evaluation import EVALUATION_MESSAGES
-
 
 logger = logging.getLogger(__name__)
 
@@ -130,9 +128,9 @@ def white_diff_fobj_step(
 
     """
     if _white_diff(output_fobj, correct_output_fobj):
-        return 1.0, [EVALUATION_MESSAGES.get("success").message]
+        return 1.0, ["evaluation:success"]
     else:
-        return 0.0, [EVALUATION_MESSAGES.get("wrong").message]
+        return 0.0, ["evaluation:wrong"]
 
 
 def white_diff_step(
@@ -156,5 +154,4 @@ def white_diff_step(
                 sandbox.get_file(correct_output_filename) as res_file:
             return white_diff_fobj_step(out_file, res_file)
     else:
-        return 0.0, [
-            EVALUATION_MESSAGES.get("nooutput").message, output_filename]
+        return 0.0, ["evaluation:nooutput", output_filename]
diff --git a/cms/grading/tasktypes/Batch.py b/cms/grading/tasktypes/Batch.py
index d23ab04e51..f08ee44dec 100644
--- a/cms/grading/tasktypes/Batch.py
+++ b/cms/grading/tasktypes/Batch.py
@@ -39,11 +39,6 @@
 logger = logging.getLogger(__name__)
 
 
-# Dummy function to mark translatable string.
-def N_(message):
-    return message
-
-
 class Batch(TaskType):
     """Task type class for a unique standalone submission source, with
     comparator (or not).
@@ -336,8 +331,7 @@ def _execution_step(self, job, file_cacher):
             # Check that the output file was created
             if not sandbox.file_exists(self._actual_output):
                 outcome = 0.0
-                text = [N_("Evaluation didn't produce file %s"),
-                        self._actual_output]
+                text = ["evaluation:nooutput", self._actual_output]
                 if job.get_output:
                     job.user_output = None
 
@@ -352,7 +346,7 @@ def _execution_step(self, job, file_cacher):
                 # If just asked to execute, fill text and set dummy outcome.
                 if job.only_execution:
                     outcome = 0.0
-                    text = [N_("Execution completed successfully")]
+                    text = ["execution:success"]
 
                 # Otherwise prepare to evaluate the output file.
                 else:
diff --git a/cms/grading/tasktypes/BatchAndOutput.py b/cms/grading/tasktypes/BatchAndOutput.py
index c82f8e0522..e37accae14 100644
--- a/cms/grading/tasktypes/BatchAndOutput.py
+++ b/cms/grading/tasktypes/BatchAndOutput.py
@@ -29,11 +29,6 @@
 logger = logging.getLogger(__name__)
 
 
-# Dummy function to mark translatable string.
-def N_(message):
-    return message
-
-
 class BatchAndOutput(Batch):
     """Task type class for a task that is a combination of Batch and
     OutputOnly.
@@ -115,7 +110,7 @@ def compile(self, job, file_cacher):
             # This submission did not have any source files, skip compilation
             job.success = True
             job.compilation_success = True
-            job.text = [N_("No compilation needed")]
+            job.text = ["outputonly:nocompile"]
             job.plus = {}
             return
 
@@ -145,7 +140,7 @@ def evaluate(self, job, file_cacher):
         if output_file_params is None and outcome is None:
             job.success = True
             job.outcome = "0.0"
-            job.text = [N_("File not submitted")]
+            job.text = ["outputonly:nofile"]
             job.plus = {}
             return
 
diff --git a/cms/grading/tasktypes/Communication.py b/cms/grading/tasktypes/Communication.py
index 60f53a993b..2c954fb788 100644
--- a/cms/grading/tasktypes/Communication.py
+++ b/cms/grading/tasktypes/Communication.py
@@ -45,11 +45,6 @@
 logger = logging.getLogger(__name__)
 
 
-# Dummy function to mark translatable string.
-def N_(message):
-    return message
-
-
 class Communication(TaskType):
     """Task type class for tasks with a fully admin-controlled process.
 
@@ -405,7 +400,7 @@ def evaluate(self, job, file_cacher):
         # If just asked to execute, fill text and set dummy outcome.
         elif job.only_execution:
             outcome = 0.0
-            text = [N_("Execution completed successfully")]
+            text = ["execution:success"]
 
         # If the user sandbox detected some problem (timeout, ...),
         # the outcome is 0.0 and the text describes that problem.
diff --git a/cms/grading/tasktypes/OutputOnly.py b/cms/grading/tasktypes/OutputOnly.py
index 7a5e2e3e00..becd3a19df 100644
--- a/cms/grading/tasktypes/OutputOnly.py
+++ b/cms/grading/tasktypes/OutputOnly.py
@@ -33,11 +33,6 @@
 logger = logging.getLogger(__name__)
 
 
-# Dummy function to mark translatable string.
-def N_(message):
-    return message
-
-
 class OutputOnly(TaskType):
     """Task type class for output only tasks, with submission composed
     of testcase_number text files, to be evaluated diffing or using a
@@ -107,7 +102,7 @@ def compile(self, job, file_cacher):
         # No compilation needed.
         job.success = True
         job.compilation_success = True
-        job.text = [N_("No compilation needed")]
+        job.text = ["outputonly:nocompile"]
         job.plus = {}
 
     def evaluate(self, job, file_cacher):
@@ -119,7 +114,7 @@ def evaluate(self, job, file_cacher):
         if user_output_filename not in job.files:
             job.success = True
             job.outcome = "0.0"
-            job.text = [N_("File not submitted")]
+            job.text = ["outputonly:nofile"]
             job.plus = {}
             return
 
diff --git a/cms/grading/tasktypes/TwoSteps.py b/cms/grading/tasktypes/TwoSteps.py
index fbe0ee5ce4..127092f1f7 100644
--- a/cms/grading/tasktypes/TwoSteps.py
+++ b/cms/grading/tasktypes/TwoSteps.py
@@ -41,11 +41,6 @@
 logger = logging.getLogger(__name__)
 
 
-# Dummy function to mark translatable string.
-def N_(message):
-    return message
-
-
 class TwoSteps(TaskType):
     """Task type class for tasks where the user must submit two files
     with a function each; the first function compute some data, that
@@ -313,8 +308,7 @@ def evaluate(self, job, file_cacher):
             # Check that the output file was created
             if not second_sandbox.file_exists(TwoSteps.OUTPUT_FILENAME):
                 outcome = 0.0
-                text = [N_("Evaluation didn't produce file %s"),
-                        TwoSteps.OUTPUT_FILENAME]
+                text = ["evaluation:nooutput", TwoSteps.OUTPUT_FILENAME]
                 if job.get_output:
                     job.user_output = None
 
@@ -329,7 +323,7 @@ def evaluate(self, job, file_cacher):
                 # If just asked to execute, fill text and set dummy outcome.
                 if job.only_execution:
                     outcome = 0.0
-                    text = [N_("Execution completed successfully")]
+                    text = ["execution:success"]
 
                 # Otherwise evaluate the output file.
                 else:
diff --git a/cms/grading/tasktypes/util.py b/cms/grading/tasktypes/util.py
index 609d7c5ac6..52ae183f1e 100644
--- a/cms/grading/tasktypes/util.py
+++ b/cms/grading/tasktypes/util.py
@@ -39,8 +39,8 @@
 from cms.grading.Job import CompilationJob, EvaluationJob, Job
 from cms.grading.Sandbox import Sandbox
 from cms.grading.language import Language
-from cms.grading.steps import EVALUATION_MESSAGES, checker_step, \
-    white_diff_fobj_step
+from cms.grading.steps.messages import HumanMessage, MessageCollection
+from cms.grading.steps import checker_step, white_diff_fobj_step
 
 
 logger = logging.getLogger(__name__)
@@ -49,6 +49,17 @@
 EVAL_USER_OUTPUT_FILENAME = "user_output.txt"
 
 
+# Dummy function to mark translatable string.
+def N_(message: str):
+    return message
+
+
+# These messages are used by both BatchAndOutput and OutputOnly.
+OUTPUT_ONLY_MESSAGES = MessageCollection("outputonly", [
+    HumanMessage("nocompile", N_("No compilation needed"), ""),
+    HumanMessage("nofile", N_("File not submitted"), ""),
+])
+
 def create_sandbox(file_cacher: FileCacher, name: str | None = None) -> Sandbox:
     """Create a sandbox, and return it.
 
@@ -251,8 +262,7 @@ def eval_output(
         # as if the file did not exist.
         if not os.path.exists(user_output_path) \
                 or os.path.islink(user_output_path):
-            return True, 0.0, [EVALUATION_MESSAGES.get("nooutput").message,
-                               user_output_filename]
+            return True, 0.0, ["evaluation:nooutput", user_output_filename]
 
     if checker_codename is not None:
         if not check_manager_present(job, checker_codename):
diff --git a/cmstestsuite/Tests.py b/cmstestsuite/Tests.py
index 03870f6d49..67baea705f 100644
--- a/cmstestsuite/Tests.py
+++ b/cmstestsuite/Tests.py
@@ -196,6 +196,11 @@
          languages=(LANG_CPP,),
          checks=[CheckCompilationFail()]),
 
+    Test('compile-memory-limit',
+         task=batch_fileio, filenames=['compile-memory-limit.%l'],
+         languages=(LANG_CPP,),
+         checks=[CheckCompilationFail()]),
+
     # Various timeout conditions.
 
     Test('timeout-cputime',
diff --git a/cmstestsuite/code/compile-memory-limit.cpp b/cmstestsuite/code/compile-memory-limit.cpp
new file mode 100644
index 0000000000..4548845394
--- /dev/null
+++ b/cmstestsuite/code/compile-memory-limit.cpp
@@ -0,0 +1,9 @@
+#ifdef EVAL // this file can accidentally OOM editors/language servers...
+#define a "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+#define b a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
+#define c b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
+#define d c c c c c c c c c c c c c c c c c c c c c c c c c c c c c c c c
+#define e d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d
+const char* x = e e;
+#endif
+int main() { return 0; }