diff --git a/app/services/course/assessment/answer/prompts/rubric_auto_grading_output_format.json b/app/services/course/assessment/answer/prompts/rubric_auto_grading_output_format.json index a0f0e4d8f85..2595f72067c 100644 --- a/app/services/course/assessment/answer/prompts/rubric_auto_grading_output_format.json +++ b/app/services/course/assessment/answer/prompts/rubric_auto_grading_output_format.json @@ -3,31 +3,16 @@ "type": "object", "properties": { "category_grades": { - "type": "array", - "items": { - "type": "object", - "properties": { - "category_id": { - "type": "integer", - "description": "The ID of the rubric category, must be one of the listed categories for the rubric" - }, - "criterion_id": { - "type": "integer", - "description": "The ID of the criterion within the rubric category, must be one of the listed criteria for the rubric category" - }, - "explanation": { - "type": "string", - "description": "An explanation for why the criterion was selected" - } - }, - "required": ["category_id", "criterion_id", "explanation"], - "additionalProperties": false + "type": "object", + "properties": { }, - "description": "A list of criterions selected for each rubric category with explanations" + "required": [], + "additionalProperties": false, + "description": "A mapping of categories to their selected criterion and explanation" }, "overall_feedback": { "type": "string", - "description": "General feedback about the student's response, provided in HTML format and focused on how the student can improve according to the rubric" + "description": "General feedback about the student's response, provided in HTML format" } }, "required": ["category_grades", "overall_feedback"], diff --git a/app/services/course/assessment/answer/prompts/rubric_auto_grading_system_prompt.json b/app/services/course/assessment/answer/prompts/rubric_auto_grading_system_prompt.json index 6d65e6f0181..b0cf16daae4 100644 --- a/app/services/course/assessment/answer/prompts/rubric_auto_grading_system_prompt.json +++ b/app/services/course/assessment/answer/prompts/rubric_auto_grading_system_prompt.json @@ -1,5 +1,5 @@ { "_type": "prompt", - "input_variables": ["format_instructions"], - "template": "You are an expert grading assistant for educational assessments.\nYour task is to grade a student's response to a rubric-based question.\nYou will be provided with:\n1. The teacher's instructions\n\n2. The question details\n3. The rubric categories and criteria\n4. The student's response\nYou must analyze how well the student's response meets each rubric category's criteria\nand provide feedback accordingly.\n\nIf teacher instruction is provided, you must follow it. This may include question context, model answers, or desired feedback tone.\n\nThe `overall_feedback` field **must be written in HTML** to support rich text rendering. It should provide actionable suggestions for improvement when appropriate, or acknowledge strengths if the response is good.\n\n{format_instructions}" + "input_variables": ["question_title", "question_description", "rubric_categories", "custom_prompt"], + "template": "You are an expert grading assistant for educational assessments.\nYour task is to grade a student's response to a rubric-based question. You will be given:\n1. Teacher instructions:\n{custom_prompt}\n2. Question details:\n{question_title}\n{question_description}\n3. Rubric categories and criteria:\n{rubric_categories}\nThe student's answer will be provided as the content of the user message (might be blank).\nYou must analyze how well the student's response meets each rubric category's criteria and provide feedback accordingly.\nThe `overall_feedback` field **must be written in HTML** to support rich text rendering. Unless teacher instructions specify otherwise, it should compliment students if their responses are good, or provide actionable suggestions for improvement if they are weak." } diff --git a/app/services/course/assessment/answer/prompts/rubric_auto_grading_user_prompt.json b/app/services/course/assessment/answer/prompts/rubric_auto_grading_user_prompt.json index ed65a89ef9c..a241df5441b 100644 --- a/app/services/course/assessment/answer/prompts/rubric_auto_grading_user_prompt.json +++ b/app/services/course/assessment/answer/prompts/rubric_auto_grading_user_prompt.json @@ -1,5 +1,5 @@ { "_type": "prompt", - "input_variables": ["question_title", "question_description", "rubric_categories", "answer_text", "custom_prompt"], - "template": "TEACHER INSTRUCTIONS:\n{custom_prompt}\n\nQUESTION:\n{question_title}\n{question_description}\n\nRUBRIC CATEGORIES:\n{rubric_categories}\n\nSTUDENT RESPONSE:\n{answer_text}" + "input_variables": ["answer_text"], + "template": "{answer_text}" } diff --git a/app/services/course/assessment/answer/rubric_auto_grading_service.rb b/app/services/course/assessment/answer/rubric_auto_grading_service.rb index ea9a84c37d7..b5dfad93963 100644 --- a/app/services/course/assessment/answer/rubric_auto_grading_service.rb +++ b/app/services/course/assessment/answer/rubric_auto_grading_service.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true -class Course::Assessment::Answer::RubricAutoGradingService < - Course::Assessment::Answer::AutoGradingService +class Course::Assessment::Answer::RubricAutoGradingService < Course::Assessment::Answer::AutoGradingService # rubocop:disable Metrics/ClassLength def evaluate(answer) answer.correct, grade, messages, feedback = evaluate_answer(answer.actable) answer.auto_grading.result = { messages: messages } @@ -12,23 +11,22 @@ def evaluate(answer) # Grades the given answer. # - # @param [Course::Assessment::Answer::RubricBasedResponse] answer The answer specified by the + # @param [Course::Assessment::Answer::RubricBasedResponse] answer The answer specified. # @return [Array<(Boolean, Integer, Object, String)>] The correct status, grade, messages to be # assigned to the grading, and feedback for the draft post. def evaluate_answer(answer) question = answer.question.actable llm_service = Course::Assessment::Answer::RubricLlmService.new llm_response = llm_service.evaluate(question, answer) - process_llm_grading_response(question, answer, llm_response) + process_llm_grading_response(answer, llm_response) end # Processes the LLM response into grades and feedback, and updates the answer. - # @param [Course::Assessment::Question::RubricBasedResponse] question The question to be graded. # @param [Course::Assessment::Answer::RubricBasedResponse] answer The answer to update. # @param [Hash] llm_response The parsed LLM response containing grading information # @return [Array<(Boolean, Integer, Object, String)>] The correct status, grade, and feedback messages. - def process_llm_grading_response(question, answer, llm_response) - category_grades = process_category_grades(question, llm_response) + def process_llm_grading_response(answer, llm_response) + category_grades = llm_response['category_grades'] # For rubric-based questions, update the answer's selections and grade to database update_answer_selections(answer, category_grades) @@ -38,28 +36,6 @@ def process_llm_grading_response(question, answer, llm_response) [true, grade, ['success'], llm_response['overall_feedback']] end - # Processes category grades from LLM response into a structured format - # @param [Course::Assessment::Question::RubricBasedResponse] question The question to be graded. - # @param [Hash] llm_response The parsed LLM response with category grades - # @return [Array] Array of processed category grades. - def process_category_grades(question, llm_response) - category_lookup = question.categories.without_bonus_category.includes(:criterions).index_by(&:id) - llm_response['category_grades'].filter_map do |category_grade| - category = category_lookup[category_grade['category_id']] - next unless category - - criterion = category.criterions.find { |c| c.id == category_grade['criterion_id'] } - next unless criterion - - { - category_id: category_grade['category_id'], - criterion_id: criterion&.id, - grade: criterion&.grade, - explanation: category_grade['explanation'] - } - end - end - # Updates the answer's selections and total grade based on the graded categories. # # @param [Course::Assessment::Answer::RubricBasedResponse] answer The answer to update. @@ -134,6 +110,22 @@ def save_draft_post(submission_question, answer, post) end end + # Updates an existing AI-generated draft post with new feedback + # @param [Course::Discussion::Post] post The existing post to update + # @param [Course::Assessment::Answer] answer The answer + # @param [String] feedback The new feedback text + # @return [void] + def update_existing_draft_post(post, answer, feedback) + post.class.transaction do + post.update!( + text: feedback, + updater: User.system, + title: answer.submission.assessment.title + ) + post.topic.mark_as_pending + end + end + # Creates a subscription for the discussion topic of the answer post # @param [Course::Assessment::Answer] answer The answer to create the subscription for # @param [Course::Discussion::Topic] discussion_topic The discussion topic to subscribe to @@ -148,15 +140,30 @@ def create_topic_subscription(discussion_topic, answer) end end - # Creates AI-generated draft feedback post for the answer - # @param [Course::Assessment::Answer] answer The answer to create the post for + # Finds the latest AI-generated draft post for the submission question + # @param [Course::Assessment::SubmissionQuestion] submission_question The submission question + # @return [Course::Discussion::Post, nil] The latest AI-generated draft post or nil if none exists + def find_existing_ai_draft_post(submission_question) + submission_question.posts. + where(is_ai_generated: true, workflow_state: 'draft'). + last + end + + # Creates or updates AI-generated draft feedback post for the answer + # @param [Course::Assessment::Answer] answer The answer to create/update the post for # @param [String] feedback The feedback text to include in the post # @return [void] def create_ai_generated_draft_post(answer, feedback) submission_question = answer.submission.submission_questions.find_by(question_id: answer.question_id) return unless submission_question - post = build_draft_post(submission_question, answer, feedback) - save_draft_post(submission_question, answer, post) + existing_post = find_existing_ai_draft_post(submission_question) + + if existing_post + update_existing_draft_post(existing_post, answer, feedback) + else + post = build_draft_post(submission_question, answer, feedback) + save_draft_post(submission_question, answer, post) + end end end diff --git a/app/services/course/assessment/answer/rubric_llm_service.rb b/app/services/course/assessment/answer/rubric_llm_service.rb index 76555624cbf..c4513e66f44 100644 --- a/app/services/course/assessment/answer/rubric_llm_service.rb +++ b/app/services/course/assessment/answer/rubric_llm_service.rb @@ -1,11 +1,6 @@ # frozen_string_literal: true -class Course::Assessment::Answer::RubricLlmService - @output_schema = JSON.parse( - File.read('app/services/course/assessment/answer/prompts/rubric_auto_grading_output_format.json') - ) - @output_parser = Langchain::OutputParsers::StructuredOutputParser.from_json_schema( - @output_schema - ) +class Course::Assessment::Answer::RubricLlmService # rubocop:disable Metrics/ClassLength + MAX_RETRIES = 1 @system_prompt = Langchain::Prompt.load_from_path( file_path: 'app/services/course/assessment/answer/prompts/rubric_auto_grading_system_prompt.json' ) @@ -15,7 +10,7 @@ class Course::Assessment::Answer::RubricLlmService @llm = LANGCHAIN_OPENAI class << self - attr_reader :system_prompt, :user_prompt, :output_schema, :output_parser + attr_reader :system_prompt, :user_prompt attr_accessor :llm end @@ -25,30 +20,61 @@ class << self # @param [Course::Assessment::Answer::RubricBasedResponse] answer The student's answer. # @return [Hash] The LLM's evaluation response. def evaluate(question, answer) - formatted_system_prompt = self.class.system_prompt.format - formatted_user_prompt = self.class.user_prompt.format( + formatted_system_prompt = self.class.system_prompt.format( question_title: question.title, question_description: question.description, rubric_categories: format_rubric_categories(question), - answer_text: answer.answer_text, custom_prompt: question.ai_grading_custom_prompt ) + formatted_user_prompt = self.class.user_prompt.format( + answer_text: answer.answer_text + ) messages = [ { role: 'system', content: formatted_system_prompt }, { role: 'user', content: formatted_user_prompt } ] - response = self.class.llm.chat( - messages: messages, - response_format: { - type: 'json_schema', - json_schema: { - name: 'rubric_grading_output', - strict: true, - schema: self.class.output_schema + dynamic_schema = generate_dynamic_schema(question) + output_parser = Langchain::OutputParsers::StructuredOutputParser.from_json_schema(dynamic_schema) + llm_response = call_llm_with_retries(messages, dynamic_schema, output_parser) + llm_response['category_grades'] = process_category_grades(llm_response['category_grades']) + llm_response + end + + # Generates dynamic JSON schema with separate fields for each category + # @param [Course::Assessment::Question::RubricBasedResponse] question The question to be graded. + # @return [Hash] Dynamic JSON schema with category-specific fields + def generate_dynamic_schema(question) + dynamic_schema = JSON.parse( + File.read('app/services/course/assessment/answer/prompts/rubric_auto_grading_output_format.json') + ) + question.categories.without_bonus_category.includes(:criterions).each do |category| + field_name = "category_#{category.id}" + dynamic_schema['properties']['category_grades']['properties'][field_name] = + build_category_schema(category, field_name) + dynamic_schema['properties']['category_grades']['required'] << field_name + end + dynamic_schema + end + + def build_category_schema(category, field_name) + criterion_ids_with_grades = category.criterions.map { |c| "criterion_#{c.id}_grade_#{c.grade}" } + { + 'type' => 'object', + 'properties' => { + 'criterion_id_with_grade' => { + 'type' => 'string', + 'enum' => criterion_ids_with_grades, + 'description' => "Selected criterion for #{field_name}" + }, + 'explanation' => { + 'type' => 'string', + 'description' => "Explanation for selected criterion in #{field_name}" } - } - ).completion - parse_llm_response(response) + }, + 'required' => ['criterion_id_with_grade', 'explanation'], + 'additionalProperties' => false, + 'description' => "Selected criterion and explanation for #{field_name} #{category.name}" + } end # Formats rubric categories for inclusion in the LLM prompt @@ -68,14 +94,61 @@ def format_rubric_categories(question) end.join("\n\n") end - # Parses LLM response with retry logic for handling parsing failures + # Processes the category grades from the LLM response + # @param [Hash] category_grades The category grades from the LLM response + # @return [Array] An array of hashes with category_id, criterion_id, grade, and explanation + def process_category_grades(category_grades) + category_grades.map do |field_name, category_grade| + criterion_id, grade = category_grade['criterion_id_with_grade'].match(/criterion_(\d+)_grade_(\d+)/).captures + { + category_id: field_name.match(/category_(\d+)/).captures.first.to_i, + criterion_id: criterion_id.to_i, + grade: grade.to_i, + explanation: category_grade['explanation'] + } + end + end + + # Parses LLM response with OutputFixingParser for handling parsing failures # @param [String] response The raw LLM response to parse + # @param [Langchain::OutputParsers::StructuredOutputParser] output_parser The parser to use # @return [Hash] The parsed response as a structured hash - def parse_llm_response(response) + def parse_llm_response(response, output_parser) fix_parser = Langchain::OutputParsers::OutputFixingParser.from_llm( llm: self.class.llm, - parser: self.class.output_parser + parser: output_parser ) fix_parser.parse(response) end + + # Calls LLM with retry mechanism for parsing failures + # @param [Array] messages The messages to send to LLM + # @param [Hash] schema The JSON schema for response format + # @param [Langchain::OutputParsers::StructuredOutputParser] output_parser The parser for LLM response + # @return [Hash] The parsed LLM response + def call_llm_with_retries(messages, schema, output_parser) + retries = 0 + begin + response = self.class.llm.chat( + messages: messages, + response_format: { + type: 'json_schema', + json_schema: { + name: 'rubric_grading_response', + strict: true, + schema: schema + } + } + ).completion + output_parser.parse(response) + rescue Langchain::OutputParsers::OutputParserException + if retries < MAX_RETRIES + retries += 1 + retry + else + # If parsing fails after retries, use OutputFixingParser fallback + parse_llm_response(response, output_parser) + end + end + end end diff --git a/app/views/course/assessment/answer/rubric_based_responses/_rubric_based_response.json.jbuilder b/app/views/course/assessment/answer/rubric_based_responses/_rubric_based_response.json.jbuilder index b58e2b3a8da..868810e402a 100644 --- a/app/views/course/assessment/answer/rubric_based_responses/_rubric_based_response.json.jbuilder +++ b/app/views/course/assessment/answer/rubric_based_responses/_rubric_based_response.json.jbuilder @@ -41,7 +41,9 @@ json.categoryGrades answer.selections.includes(:criterion).map do |selection| end posts = answer.submission.submission_questions.find_by(question_id: answer.question_id)&.discussion_topic&.posts -ai_generated_comment = posts&.select(&:is_ai_generated)&.last +ai_generated_comment = posts&.select do |post| + post.is_ai_generated && post.workflow_state == 'draft' +end&.last if ai_generated_comment json.aiGeneratedComment do json.partial! ai_generated_comment diff --git a/client/app/bundles/course/assessment/submission/reducers/topics.js b/client/app/bundles/course/assessment/submission/reducers/topics.js index aa763ca5bcd..ad8f18fa089 100644 --- a/client/app/bundles/course/assessment/submission/reducers/topics.js +++ b/client/app/bundles/course/assessment/submission/reducers/topics.js @@ -19,7 +19,9 @@ export default function (state = {}, action) { ...state, [topicId]: { ...state[topicId], - postIds: [...state[topicId].postIds, postId], + postIds: state[topicId].postIds.includes(postId) + ? state[topicId].postIds + : [...state[topicId].postIds, postId], }, }; } diff --git a/spec/services/course/assessment/answer/rubric_auto_grading_service_spec.rb b/spec/services/course/assessment/answer/rubric_auto_grading_service_spec.rb index 32777d59005..c13578ffa0d 100644 --- a/spec/services/course/assessment/answer/rubric_auto_grading_service_spec.rb +++ b/spec/services/course/assessment/answer/rubric_auto_grading_service_spec.rb @@ -58,67 +58,28 @@ { 'category_grades' => [ { - 'category_id' => question.categories.first.id, - 'criterion_id' => question.categories.first.criterions.last.id, - 'grade' => question.categories.first.criterions.last.grade, - 'explanation' => '1st selection explanation' + category_id: question.categories.first.id, + criterion_id: question.categories.first.criterions.last.id, + grade: question.categories.first.criterions.last.grade, + explanation: '1st selection explanation' }, { - 'category_id' => question.categories.second.id, - 'criterion_id' => question.categories.second.criterions.last.id, - 'grade' => question.categories.second.criterions.last.grade, - 'explanation' => '2nd selection explanation' + category_id: question.categories.second.id, + criterion_id: question.categories.second.criterions.last.id, + grade: question.categories.second.criterions.last.grade, + explanation: '2nd selection explanation' } ], 'overall_feedback' => 'overall feedback' } end - it 'processes category grades' do - result = subject.send(:process_llm_grading_response, question, answer.actable, valid_response) - expect(result[0]).to be true - expect(result[1]).to eq(question.categories.first.criterions.last.grade + - question.categories.second.criterions.last.grade) - expect(result[2]).to contain_exactly('success') - expect(result[3]).to eq('overall feedback') - end it 'updates answer selections' do expect(answer.actable).to receive(:assign_params).with(hash_including(:selections_attributes)) - subject.send(:process_llm_grading_response, question, answer.actable, valid_response) + subject.send(:process_llm_grading_response, answer.actable, valid_response) end end end - describe '#process_category_grades' do - let(:category) { question.categories.first } - let(:criterion) { category.criterions.first } - let(:llm_response) do - { - 'category_grades' => [ - { - 'category_id' => category.id, - 'criterion_id' => criterion.id, - 'explanation' => 'selection explanation' - } - ] - } - end - it 'processes category grades correctly' do - result = subject.send(:process_category_grades, question, llm_response) - expect(result.size).to eq(1) - expect(result.first[:category_id]).to eq(category.id) - expect(result.first[:criterion_id]).to eq(criterion.id) - expect(result.first[:grade]).to eq(criterion.grade) - expect(result.first[:explanation]).to eq('selection explanation') - end - it 'ignores non-existent categories' do - llm_response['category_grades'] << { 'category_id' => -1, 'criterion_id' => -1 } - llm_response['category_grades'] << { 'category_id' => category.id, 'criterion_id' => -1 } - result = subject.send(:process_category_grades, question, llm_response) - expect(result.size).to eq(1) - expect(result.first[:category_id]).to eq(category.id) - end - end - describe '#update_answer_selections' do let(:category_grades) do [ @@ -222,6 +183,21 @@ end end + describe '#update_existing_draft_post' do + let(:submission_question) do + create(:course_assessment_submission_question, submission: submission, question: question.acting_as) + end + let(:existing_post) do + create(:course_discussion_post, topic: submission_question.acting_as, text: 'draft post', is_ai_generated: true, + workflow_state: 'draft') + end + it 'updates the existing post with new feedback' do + expect(existing_post).to receive(:update!) + expect(existing_post.topic).to receive(:mark_as_pending) + subject.send(:update_existing_draft_post, existing_post, answer, 'new draft post') + end + end + describe '#create_topic_subscription' do let(:discussion_topic) { create(:course_discussion_topic) } it 'ensures the student and group managers are subscribed' do @@ -234,6 +210,38 @@ end end + describe '#find_existing_ai_draft_post' do + let(:submission_question) do + create(:course_assessment_submission_question, submission: submission, question: question.acting_as) + end + + context 'when there are no AI-generated draft posts' do + it 'returns nil' do + result = subject.send(:find_existing_ai_draft_post, submission_question) + expect(result).to be_nil + end + end + + context 'when there are AI-generated draft posts' do + let!(:older_ai_draft_post) do + create(:course_discussion_post, topic: submission_question.acting_as, is_ai_generated: true, + workflow_state: 'draft', created_at: 1.hour.ago) + end + let!(:newer_ai_draft_post) do + create(:course_discussion_post, topic: submission_question.acting_as, is_ai_generated: true, + workflow_state: 'draft', created_at: 30.minutes.ago) + end + let!(:ai_published_post) do + create(:course_discussion_post, topic: submission_question.acting_as, is_ai_generated: true, + workflow_state: 'published') + end + it 'returns the most recent AI-generated draft post' do + result = subject.send(:find_existing_ai_draft_post, submission_question) + expect(result).to eq(newer_ai_draft_post) + end + end + end + describe '#create_ai_generated_draft_post' do let(:submission_question) do create(:course_assessment_submission_question, submission: submission, question: question.acting_as) @@ -243,17 +251,39 @@ double(find_by: submission_question) ) end - it 'creates a AI-gernerated draft post' do - expect do - subject.send(:create_ai_generated_draft_post, answer, 'draft post') - end.to change { Course::Discussion::Post.count }.by(1) - post = Course::Discussion::Post.last - expect(post.text).to eq('draft post') - expect(post.is_ai_generated).to be true - expect(post.workflow_state).to eq('draft') - expect(post.title).to eq(answer.submission.assessment.title) - expect(post.topic.pending_staff_reply).to be true + + context 'when no existing AI-generated draft post exists' do + it 'creates a new AI-generated draft post' do + expect do + subject.send(:create_ai_generated_draft_post, answer, 'draft post') + end.to change { Course::Discussion::Post.count }.by(1) + post = Course::Discussion::Post.last + expect(post.text).to eq('draft post') + expect(post.is_ai_generated).to be true + expect(post.workflow_state).to eq('draft') + expect(post.title).to eq(answer.submission.assessment.title) + expect(post.topic.pending_staff_reply).to be true + end + end + + context 'when an existing AI-generated draft post exists' do + let!(:existing_post) do + create(:course_discussion_post, topic: submission_question.acting_as, text: 'draft post', + is_ai_generated: true, workflow_state: 'draft') + end + it 'updates the existing post instead of creating a new one' do + expect do + subject.send(:create_ai_generated_draft_post, answer, 'updated draft post') + end.not_to(change { Course::Discussion::Post.count }) + existing_post.reload + expect(existing_post.text).to eq('updated draft post') + expect(existing_post.is_ai_generated).to be true + expect(existing_post.workflow_state).to eq('draft') + expect(existing_post.title).to eq(answer.submission.assessment.title) + expect(existing_post.topic.pending_staff_reply).to be true + end end + context 'when no submission question exists' do before do allow(answer.submission).to receive(:submission_questions).and_return( diff --git a/spec/services/course/assessment/answer/rubric_llm_service_spec.rb b/spec/services/course/assessment/answer/rubric_llm_service_spec.rb index c78bf7e50b3..3d9c6f5bbbb 100644 --- a/spec/services/course/assessment/answer/rubric_llm_service_spec.rb +++ b/spec/services/course/assessment/answer/rubric_llm_service_spec.rb @@ -20,13 +20,15 @@ expect(subject).to receive(:format_rubric_categories).with(question).and_call_original result = subject.evaluate(question, answer) expect(result).to be_a(Hash) - expect(result['category_grades']).to be_an(Array) - result['category_grades'].each do |grade| - category = categories.find { |c| c.id == grade['category_id'] } - expect(category).to be_present - criterion = category.criterions.find { |c| c.id == grade['criterion_id'] } + category_grades = result['category_grades'] + expect(category_grades).to be_a(Array) + categories.each do |category| + category_grade = category_grades.find { |cg| cg[:category_id] == category.id } + expect(category_grade).to be_present + criterion = category.criterions.find { |c| c.id == category_grade[:criterion_id] } expect(criterion).to be_present - expect(grade['explanation']).to include('Mock explanation for category') + expect(category_grade[:grade]).to eq(criterion.grade) + expect(category_grade[:explanation]).to eq("Mock explanation for category_#{category.id}") end expect(result['overall_feedback']).to include('Mock overall feedback') end @@ -49,34 +51,47 @@ describe '#parse_llm_response' do let(:valid_json) do + category_fields = categories.map do |category| + "\"category_#{category.id}\": { + \"criterion_id_with_grade\": + \"criterion_#{category.criterions.first.id}_grade_#{category.criterions.first.grade}\", + \"explanation\": \"selection explanation\" + }" + end.join(',') + <<~JSON { - "category_grades": [ - { - "category_id": #{categories.first.id}, - "criterion_id": #{categories.first.criterions.first.id}, - "explanation": "selection explanation" - } - ], + "category_grades": { #{category_fields} }, "overall_feedback": "overall feedback" } JSON end let(:invalid_json) { '{ "category_grades": [{ "missing": "closing bracket" }' } + + let(:output_parser) do + schema = subject.generate_dynamic_schema(question) + Langchain::OutputParsers::StructuredOutputParser.from_json_schema(schema) + end + context 'with valid JSON' do it 'returns the parsed output' do - result = subject.parse_llm_response(valid_json) + result = subject.parse_llm_response(valid_json, output_parser) expect(result).to eq(JSON.parse(valid_json)) end end context 'with invalid JSON' do it 'attempts to fix and parse the response' do - result = subject.parse_llm_response(invalid_json) - expect(result['category_grades']).to be_an(Array) - result['category_grades'].each do |grade| - expect(grade['category_id']).to be_a(Integer) - expect(grade['criterion_id']).to be_a(Integer) - expect(grade['explanation']).to be_a(String) + result = subject.parse_llm_response(invalid_json, output_parser) + categories.each do |category| + field_name = "category_#{category.id}" + expect(result['category_grades'][field_name]).to be_present + criterion_id_with_grade = result['category_grades'][field_name]['criterion_id_with_grade'] + expect(criterion_id_with_grade).to match(/criterion_(\d+)_grade_(\d+)/) + criterion_id, grade = criterion_id_with_grade.match(/criterion_(\d+)_grade_(\d+)/).captures + criterion = category.criterions.find { |c| c.id == criterion_id.to_i } + expect(criterion).to be_present + expect(grade.to_i).to eq(criterion.grade) + expect(result['category_grades'][field_name]['explanation']).to be_a(String) end expect(result['overall_feedback']).to be_a(String) end diff --git a/spec/support/stubs/langchain/llm_stubs.rb b/spec/support/stubs/langchain/llm_stubs.rb index 9cd550b5071..5a8b0856190 100644 --- a/spec/support/stubs/langchain/llm_stubs.rb +++ b/spec/support/stubs/langchain/llm_stubs.rb @@ -25,61 +25,62 @@ def chat(messages: [], **_kwargs) private def rubric_grading_request?(user_message) - user_message.include?('Category ID:') && user_message.include?('Criterion ID:') && user_message.include?('Grade:') + user_message.include?('') && user_message.include?('') end def output_fixing_request?(user_message) user_message.include?('JSON Schema') end - def handle_output_fixing(_user_message) - # only fix rubric grading output for now + def handle_output_fixing(user_message) + schema = parse_json_schema(user_message) + category_grades = {} + category_properties = schema['properties']['category_grades']['properties'] + category_properties.each do |category_name, category_schema| + category_grades[category_name] = { + 'criterion_id_with_grade' => category_schema['properties']['criterion_id_with_grade']['enum'].first, + 'explanation' => "Mock explanation for #{category_name}" + } + end mock_response = { - 'category_grades' => [ - { - 'category_id' => 1, - 'criterion_id' => 1, - 'explanation' => 'Mock explanation for category 1' - } - ], + 'category_grades' => category_grades, 'overall_feedback' => 'Mock overall feedback' } - MockChatResponse.new(mock_response.to_json) end def handle_rubric_grading(user_message) category_ids = user_message.scan(/Category ID: (\d+)/).flatten.map(&:to_i) - criterion_ids = extract_random_criterion_ids(user_message) + criterion_ids_with_grades = extract_random_criterion(user_message) - category_grades = category_ids.zip(criterion_ids).map do |category_id, criterion_id| - { - 'category_id' => category_id, - 'criterion_id' => criterion_id, - 'explanation' => "Mock explanation for category #{category_id}" + mock_response = { 'overall_feedback' => 'Mock overall feedback' } + category_ids.zip(criterion_ids_with_grades).each do |category_id, criterion_id_with_grade| + mock_response["category_#{category_id}"] = { + 'criterion_id_with_grade' => criterion_id_with_grade, + 'explanation' => "Mock explanation for category_#{category_id}" } end - - mock_response = { - 'category_grades' => category_grades, - 'overall_feedback' => 'Mock overall feedback' - } - MockChatResponse.new(mock_response.to_json) end - def extract_random_criterion_ids(user_message) + def extract_random_criterion(user_message) category_sections = user_message.split(/(?=Category ID: \d+)/).reject(&:empty?) category_sections.filter_map do |section| - criterion_ids = section.scan(/- \[Grade: \d+(?:\.\d+)?, Criterion ID: (\d+)\]/) - - next if criterion_ids.empty? - - criterion_ids.sample.first.to_i + criterion = section.scan(/- \[Grade: (\d+(?:\.\d+)?), Criterion ID: (\d+)\]/).sample + if criterion + { + criterion_id: criterion[1].to_i, + grade: criterion[0].to_i + } + end end end - end + def parse_json_schema(user_message) + json_match = user_message.match(/```json\s*(.*?)\s*```/m) + JSON.parse(json_match[1]) + end + end STUBBED_LANGCHAIN_OPENAI = OpenAiStub.new.freeze end