diff --git a/dingo/model/llm/compare/llm_html_extract_compare_v3.py b/dingo/model/llm/compare/llm_html_extract_compare_v3.py index 9abfeee9..202294d6 100644 --- a/dingo/model/llm/compare/llm_html_extract_compare_v3.py +++ b/dingo/model/llm/compare/llm_html_extract_compare_v3.py @@ -215,7 +215,7 @@ def process_response(cls, response: str) -> EvalDetail: tmp_type = "EXTRACTION_EQUAL" result.status = response_model.score != 1 - result.label = [f"{tmp_type}.{response_model.name}"] + result.label = [f"{tmp_type}"] result.reason = [json.dumps(response_json, ensure_ascii=False)] return result diff --git a/test/scripts/model/llm/test_llm_html_extract_compare_v3.py b/test/scripts/model/llm/test_llm_html_extract_compare_v3.py index b2546ff1..308635aa 100644 --- a/test/scripts/model/llm/test_llm_html_extract_compare_v3.py +++ b/test/scripts/model/llm/test_llm_html_extract_compare_v3.py @@ -75,7 +75,7 @@ def test_score_1_prompt_better(self): ) result = LLMHtmlExtractCompareV3.process_response(raw) assert result.metric == "LLMHtmlExtractCompareV3" - assert result.label == ["PROMPT_BETTER.Error_Content_Coverage"] + assert result.label == ["PROMPT_BETTER"] assert result.status is False parsed = json.loads(result.reason[0]) assert parsed["score"] == 1 @@ -86,7 +86,7 @@ def test_score_2_content_better(self): ensure_ascii=False, ) result = LLMHtmlExtractCompareV3.process_response(raw) - assert result.label == ["CONTENT_BETTER.Error_Formula"] + assert result.label == ["CONTENT_BETTER"] assert result.status is True def test_score_0_extraction_equal(self): @@ -95,7 +95,7 @@ def test_score_0_extraction_equal(self): ensure_ascii=False, ) result = LLMHtmlExtractCompareV3.process_response(raw) - assert result.label == ["EXTRACTION_EQUAL.None"] + assert result.label == ["EXTRACTION_EQUAL"] assert result.status is True def test_json_fenced_with_markdown(self): @@ -111,7 +111,7 @@ def test_redacted_thinking_appended_to_reason(self): '{"score": 2, "name": "Error_Table", "reason": "Brief."}' ) result = LLMHtmlExtractCompareV3.process_response(body) - assert "CONTENT_BETTER.Error_Table" == result.label[0] + assert "CONTENT_BETTER" == result.label[0] parsed = json.loads(result.reason[0]) assert "internal" in parsed["reason"]