Skip to content

feat(rubric-auto-grading): enhance usage experience #8009

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,16 @@
"type": "object",
"properties": {
"category_grades": {
"type": "array",
"items": {
"type": "object",
"properties": {
"category_id": {
"type": "integer",
"description": "The ID of the rubric category, must be one of the listed categories for the rubric"
},
"criterion_id": {
"type": "integer",
"description": "The ID of the criterion within the rubric category, must be one of the listed criteria for the rubric category"
},
"explanation": {
"type": "string",
"description": "An explanation for why the criterion was selected"
}
},
"required": ["category_id", "criterion_id", "explanation"],
"additionalProperties": false
"type": "object",
"properties": {
},
"description": "A list of criterions selected for each rubric category with explanations"
"required": [],
"additionalProperties": false,
"description": "A mapping of categories to their selected criterion and explanation"
},
"overall_feedback": {
"type": "string",
"description": "General feedback about the student's response, provided in HTML format and focused on how the student can improve according to the rubric"
"description": "General feedback about the student's response, provided in HTML format"
}
},
"required": ["category_grades", "overall_feedback"],
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"_type": "prompt",
"input_variables": ["format_instructions"],
"template": "You are an expert grading assistant for educational assessments.\nYour task is to grade a student's response to a rubric-based question.\nYou will be provided with:\n1. The teacher's instructions\n\n2. The question details\n3. The rubric categories and criteria\n4. The student's response\nYou must analyze how well the student's response meets each rubric category's criteria\nand provide feedback accordingly.\n\nIf teacher instruction is provided, you must follow it. This may include question context, model answers, or desired feedback tone.\n\nThe `overall_feedback` field **must be written in HTML** to support rich text rendering. It should provide actionable suggestions for improvement when appropriate, or acknowledge strengths if the response is good.\n\n{format_instructions}"
"input_variables": ["question_title", "question_description", "rubric_categories", "custom_prompt"],
"template": "You are an expert grading assistant for educational assessments.\nYour task is to grade a student's response to a rubric-based question. You will be given:\n1. Teacher instructions:\n{custom_prompt}\n2. Question details:\n{question_title}\n{question_description}\n3. Rubric categories and criteria:\n{rubric_categories}\nThe student's answer will be provided as the content of the user message (might be blank).\nYou must analyze how well the student's response meets each rubric category's criteria and provide feedback accordingly.\nThe `overall_feedback` field **must be written in HTML** to support rich text rendering. Unless teacher instructions specify otherwise, it should compliment students if their responses are good, or provide actionable suggestions for improvement if they are weak."
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"_type": "prompt",
"input_variables": ["question_title", "question_description", "rubric_categories", "answer_text", "custom_prompt"],
"template": "TEACHER INSTRUCTIONS:\n{custom_prompt}\n\nQUESTION:\n{question_title}\n{question_description}\n\nRUBRIC CATEGORIES:\n{rubric_categories}\n\nSTUDENT RESPONSE:\n{answer_text}"
"input_variables": ["answer_text"],
"template": "<ANSWER>{answer_text}</ANSWER>"
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# frozen_string_literal: true
class Course::Assessment::Answer::RubricAutoGradingService <
Course::Assessment::Answer::AutoGradingService
class Course::Assessment::Answer::RubricAutoGradingService < Course::Assessment::Answer::AutoGradingService # rubocop:disable Metrics/ClassLength
def evaluate(answer)
answer.correct, grade, messages, feedback = evaluate_answer(answer.actable)
answer.auto_grading.result = { messages: messages }
Expand All @@ -12,23 +11,22 @@ def evaluate(answer)

# Grades the given answer.
#
# @param [Course::Assessment::Answer::RubricBasedResponse] answer The answer specified by the
# @param [Course::Assessment::Answer::RubricBasedResponse] answer The answer specified.
# @return [Array<(Boolean, Integer, Object, String)>] The correct status, grade, messages to be
# assigned to the grading, and feedback for the draft post.
def evaluate_answer(answer)
question = answer.question.actable
llm_service = Course::Assessment::Answer::RubricLlmService.new
llm_response = llm_service.evaluate(question, answer)
process_llm_grading_response(question, answer, llm_response)
process_llm_grading_response(answer, llm_response)
end

# Processes the LLM response into grades and feedback, and updates the answer.
# @param [Course::Assessment::Question::RubricBasedResponse] question The question to be graded.
# @param [Course::Assessment::Answer::RubricBasedResponse] answer The answer to update.
# @param [Hash] llm_response The parsed LLM response containing grading information
# @return [Array<(Boolean, Integer, Object, String)>] The correct status, grade, and feedback messages.
def process_llm_grading_response(question, answer, llm_response)
category_grades = process_category_grades(question, llm_response)
def process_llm_grading_response(answer, llm_response)
category_grades = llm_response['category_grades']

# For rubric-based questions, update the answer's selections and grade to database
update_answer_selections(answer, category_grades)
Expand All @@ -38,28 +36,6 @@ def process_llm_grading_response(question, answer, llm_response)
[true, grade, ['success'], llm_response['overall_feedback']]
end

# Processes category grades from LLM response into a structured format
# @param [Course::Assessment::Question::RubricBasedResponse] question The question to be graded.
# @param [Hash] llm_response The parsed LLM response with category grades
# @return [Array<Hash>] Array of processed category grades.
def process_category_grades(question, llm_response)
category_lookup = question.categories.without_bonus_category.includes(:criterions).index_by(&:id)
llm_response['category_grades'].filter_map do |category_grade|
category = category_lookup[category_grade['category_id']]
next unless category

criterion = category.criterions.find { |c| c.id == category_grade['criterion_id'] }
next unless criterion

{
category_id: category_grade['category_id'],
criterion_id: criterion&.id,
grade: criterion&.grade,
explanation: category_grade['explanation']
}
end
end

# Updates the answer's selections and total grade based on the graded categories.
#
# @param [Course::Assessment::Answer::RubricBasedResponse] answer The answer to update.
Expand Down Expand Up @@ -134,6 +110,22 @@ def save_draft_post(submission_question, answer, post)
end
end

# Updates an existing AI-generated draft post with new feedback
# @param [Course::Discussion::Post] post The existing post to update
# @param [Course::Assessment::Answer] answer The answer
# @param [String] feedback The new feedback text
# @return [void]
def update_existing_draft_post(post, answer, feedback)
post.class.transaction do
post.update!(
text: feedback,
updater: User.system,
title: answer.submission.assessment.title
)
post.topic.mark_as_pending
end
end

# Creates a subscription for the discussion topic of the answer post
# @param [Course::Assessment::Answer] answer The answer to create the subscription for
# @param [Course::Discussion::Topic] discussion_topic The discussion topic to subscribe to
Expand All @@ -148,15 +140,30 @@ def create_topic_subscription(discussion_topic, answer)
end
end

# Creates AI-generated draft feedback post for the answer
# @param [Course::Assessment::Answer] answer The answer to create the post for
# Finds the latest AI-generated draft post for the submission question
# @param [Course::Assessment::SubmissionQuestion] submission_question The submission question
# @return [Course::Discussion::Post, nil] The latest AI-generated draft post or nil if none exists
def find_existing_ai_draft_post(submission_question)
submission_question.posts.
where(is_ai_generated: true, workflow_state: 'draft').
last
end

# Creates or updates AI-generated draft feedback post for the answer
# @param [Course::Assessment::Answer] answer The answer to create/update the post for
# @param [String] feedback The feedback text to include in the post
# @return [void]
def create_ai_generated_draft_post(answer, feedback)
submission_question = answer.submission.submission_questions.find_by(question_id: answer.question_id)
return unless submission_question

post = build_draft_post(submission_question, answer, feedback)
save_draft_post(submission_question, answer, post)
existing_post = find_existing_ai_draft_post(submission_question)

if existing_post
update_existing_draft_post(existing_post, answer, feedback)
else
post = build_draft_post(submission_question, answer, feedback)
save_draft_post(submission_question, answer, post)
end
end
end
123 changes: 98 additions & 25 deletions app/services/course/assessment/answer/rubric_llm_service.rb
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
# frozen_string_literal: true
class Course::Assessment::Answer::RubricLlmService
@output_schema = JSON.parse(
File.read('app/services/course/assessment/answer/prompts/rubric_auto_grading_output_format.json')
)
@output_parser = Langchain::OutputParsers::StructuredOutputParser.from_json_schema(
@output_schema
)
class Course::Assessment::Answer::RubricLlmService # rubocop:disable Metrics/ClassLength
MAX_RETRIES = 1
@system_prompt = Langchain::Prompt.load_from_path(
file_path: 'app/services/course/assessment/answer/prompts/rubric_auto_grading_system_prompt.json'
)
Expand All @@ -15,7 +10,7 @@ class Course::Assessment::Answer::RubricLlmService
@llm = LANGCHAIN_OPENAI

class << self
attr_reader :system_prompt, :user_prompt, :output_schema, :output_parser
attr_reader :system_prompt, :user_prompt
attr_accessor :llm
end

Expand All @@ -25,30 +20,61 @@ class << self
# @param [Course::Assessment::Answer::RubricBasedResponse] answer The student's answer.
# @return [Hash] The LLM's evaluation response.
def evaluate(question, answer)
formatted_system_prompt = self.class.system_prompt.format
formatted_user_prompt = self.class.user_prompt.format(
formatted_system_prompt = self.class.system_prompt.format(
question_title: question.title,
question_description: question.description,
rubric_categories: format_rubric_categories(question),
answer_text: answer.answer_text,
custom_prompt: question.ai_grading_custom_prompt
)
formatted_user_prompt = self.class.user_prompt.format(
answer_text: answer.answer_text
)
messages = [
{ role: 'system', content: formatted_system_prompt },
{ role: 'user', content: formatted_user_prompt }
]
response = self.class.llm.chat(
messages: messages,
response_format: {
type: 'json_schema',
json_schema: {
name: 'rubric_grading_output',
strict: true,
schema: self.class.output_schema
dynamic_schema = generate_dynamic_schema(question)
output_parser = Langchain::OutputParsers::StructuredOutputParser.from_json_schema(dynamic_schema)
llm_response = call_llm_with_retries(messages, dynamic_schema, output_parser)
llm_response['category_grades'] = process_category_grades(llm_response['category_grades'])
llm_response
end

# Generates dynamic JSON schema with separate fields for each category
# @param [Course::Assessment::Question::RubricBasedResponse] question The question to be graded.
# @return [Hash] Dynamic JSON schema with category-specific fields
def generate_dynamic_schema(question)
dynamic_schema = JSON.parse(
File.read('app/services/course/assessment/answer/prompts/rubric_auto_grading_output_format.json')
)
question.categories.without_bonus_category.includes(:criterions).each do |category|
field_name = "category_#{category.id}"
dynamic_schema['properties']['category_grades']['properties'][field_name] =
build_category_schema(category, field_name)
dynamic_schema['properties']['category_grades']['required'] << field_name
end
dynamic_schema
end

def build_category_schema(category, field_name)
criterion_ids_with_grades = category.criterions.map { |c| "criterion_#{c.id}_grade_#{c.grade}" }
{
'type' => 'object',
'properties' => {
'criterion_id_with_grade' => {
'type' => 'string',
'enum' => criterion_ids_with_grades,
'description' => "Selected criterion for #{field_name}"
},
'explanation' => {
'type' => 'string',
'description' => "Explanation for selected criterion in #{field_name}"
}
}
).completion
parse_llm_response(response)
},
'required' => ['criterion_id_with_grade', 'explanation'],
'additionalProperties' => false,
'description' => "Selected criterion and explanation for #{field_name} #{category.name}"
}
end

# Formats rubric categories for inclusion in the LLM prompt
Expand All @@ -68,14 +94,61 @@ def format_rubric_categories(question)
end.join("\n\n")
end

# Parses LLM response with retry logic for handling parsing failures
# Processes the category grades from the LLM response
# @param [Hash] category_grades The category grades from the LLM response
# @return [Array<Hash>] An array of hashes with category_id, criterion_id, grade, and explanation
def process_category_grades(category_grades)
category_grades.map do |field_name, category_grade|
criterion_id, grade = category_grade['criterion_id_with_grade'].match(/criterion_(\d+)_grade_(\d+)/).captures
{
category_id: field_name.match(/category_(\d+)/).captures.first.to_i,
criterion_id: criterion_id.to_i,
grade: grade.to_i,
explanation: category_grade['explanation']
}
end
end

# Parses LLM response with OutputFixingParser for handling parsing failures
# @param [String] response The raw LLM response to parse
# @param [Langchain::OutputParsers::StructuredOutputParser] output_parser The parser to use
# @return [Hash] The parsed response as a structured hash
def parse_llm_response(response)
def parse_llm_response(response, output_parser)
fix_parser = Langchain::OutputParsers::OutputFixingParser.from_llm(
llm: self.class.llm,
parser: self.class.output_parser
parser: output_parser
)
fix_parser.parse(response)
end

# Calls LLM with retry mechanism for parsing failures
# @param [Array] messages The messages to send to LLM
# @param [Hash] schema The JSON schema for response format
# @param [Langchain::OutputParsers::StructuredOutputParser] output_parser The parser for LLM response
# @return [Hash] The parsed LLM response
def call_llm_with_retries(messages, schema, output_parser)
retries = 0
begin
response = self.class.llm.chat(
messages: messages,
response_format: {
type: 'json_schema',
json_schema: {
name: 'rubric_grading_response',
strict: true,
schema: schema
}
}
).completion
output_parser.parse(response)
rescue Langchain::OutputParsers::OutputParserException
if retries < MAX_RETRIES
retries += 1
retry
else
# If parsing fails after retries, use OutputFixingParser fallback
parse_llm_response(response, output_parser)
end
end
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ json.categoryGrades answer.selections.includes(:criterion).map do |selection|
end

posts = answer.submission.submission_questions.find_by(question_id: answer.question_id)&.discussion_topic&.posts
ai_generated_comment = posts&.select(&:is_ai_generated)&.last
ai_generated_comment = posts&.select do |post|
post.is_ai_generated && post.workflow_state == 'draft'
end&.last
if ai_generated_comment
json.aiGeneratedComment do
json.partial! ai_generated_comment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ export default function (state = {}, action) {
...state,
[topicId]: {
...state[topicId],
postIds: [...state[topicId].postIds, postId],
postIds: state[topicId].postIds.includes(postId)
? state[topicId].postIds
: [...state[topicId].postIds, postId],
},
};
}
Expand Down
Loading