witw78

psas_a_evaluation.py

Apr 20th, 2025
17
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.72 KB | None | 0 0
  1. import os
  2. import json
  3. from sentence_transformers import SentenceTransformer
  4. from sklearn.metrics.pairwise import cosine_similarity
  5. import torch
  6. import numpy as np
  7. from scipy import stats
  8.  
  9. # 读取 JSON 文件
  10. def read_json_file(file_path):
  11.     try:
  12.         with open(file_path, 'r', encoding='utf-8') as file:
  13.             data = json.load(file)
  14.         return data
  15.     except FileNotFoundError:
  16.         print(f"错误: 文件 {file_path} 未找到。")
  17.         return []
  18.     except json.JSONDecodeError:
  19.         print(f"错误: 无法解析 {file_path} 中的 JSON 数据。")
  20.         return []
  21.  
  22. # 从模型输出中提取答案
  23. def extract_answers(model_output):
  24.     # 这里可以使用更复杂的方法来提取答案,目前简单返回输出
  25.     return model_output
  26.  
  27. # 验证语义一致性
  28. def verify_semantic_consistency(extracted_answer, correct_answer, model):
  29.     # 使用 SentenceTransformer 计算语义相似度
  30.     embeddings = model.encode([extracted_answer, correct_answer])
  31.     similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
  32.     return similarity
  33.  
  34. # 计算得分和详细信息
  35. def calculate_score(data, embedding_model):
  36.     total_score = 0
  37.     question_details = []
  38.     for i, item in enumerate(data):
  39.         question = item["question_structure"]
  40.         correct_answers = item["answer"]
  41.         model_output = item["model_output"]
  42.         extracted_answers = extract_answers(model_output)
  43.         sub_question_details = []
  44.         for j, correct_answer in enumerate(correct_answers):
  45.             if j < len(extracted_answers):
  46.                 extracted_answer = extracted_answers[j]
  47.                 similarity = verify_semantic_consistency(extracted_answer, correct_answer, embedding_model)
  48.                 score = 1 if similarity > 0.7 else 0
  49.                 sub_question_details.append({
  50.                     "sub_question_index": j + 1,
  51.                     "correct_answer": correct_answer,
  52.                     "extracted_answer": extracted_answer,
  53.                     "similarity": similarity,
  54.                     "is_correct": similarity > 0.7,
  55.                     "score": score
  56.                 })
  57.                 total_score += score
  58.         question_details.append({
  59.             "question_index": i + 1,
  60.             "question_structure": question,
  61.             "sub_question_details": sub_question_details
  62.         })
  63.     return total_score, question_details
  64.  
  65. if torch.cuda.is_available():
  66.     device = torch.device("cuda")
  67.     print(f"Using GPU: {torch.cuda.get_device_name(0)}")
  68. else:
  69.     device = torch.device("cpu")
  70.     print("GPU is not available, using CPU instead.")
  71.  
  72. # 将数据转换为 Markdown 格式
  73. def convert_to_markdown(original_results, modified_results):
  74.     markdown = "# 模型对比结果\n\n"
  75.     similarity_diffs = []
  76.     original_similarities = []
  77.     modified_similarities = []
  78.  
  79.     markdown += "### 子问题相似度差值详情\n"
  80.     for i, question in enumerate(original_results["question_details"]):
  81.         markdown += f"- **问题编号**: {question['question_index']}\n"
  82.         markdown += f"  - **问题结构**: {question['question_structure']}\n"
  83.         markdown += "  - **子问题详情**:\n"
  84.         for j, sub_question in enumerate(question["sub_question_details"]):
  85.             modified_sub_question = modified_results["question_details"][i]["sub_question_details"][j]
  86.             similarity_diff = sub_question["similarity"] - modified_sub_question["similarity"]
  87.             similarity_diffs.append(similarity_diff)
  88.             original_similarities.append(sub_question["similarity"])
  89.             modified_similarities.append(modified_sub_question["similarity"])
  90.             markdown += f"    - **子问题编号**: {sub_question['sub_question_index']}\n"
  91.             markdown += f"      - **子问题相似度差值(原始 - 修改后)**: {similarity_diff:.4f}\n"
  92.         markdown += "\n"
  93.  
  94.     # 原始模型子问题相似度统计分析
  95.     if original_similarities:
  96.         original_mean = np.mean(original_similarities)
  97.         original_median = np.median(original_similarities)
  98.         original_std = np.std(original_similarities)
  99.         original_min = np.min(original_similarities)
  100.         original_max = np.max(original_similarities)
  101.         original_skewness = stats.skew(original_similarities)
  102.         original_kurtosis = stats.kurtosis(original_similarities)
  103.  
  104.         markdown += "### 原始模型子问题相似度统计分析\n"
  105.         markdown += f"- **均值**: {original_mean:.4f}\n"
  106.         markdown += f"- **中位数**: {original_median:.4f}\n"
  107.         markdown += f"- **标准差**: {original_std:.4f}\n"
  108.         markdown += f"- **最小值**: {original_min:.4f}\n"
  109.         markdown += f"- **最大值**: {original_max:.4f}\n"
  110.         markdown += f"- **偏度**: {original_skewness:.4f}\n"
  111.         markdown += f"- **峰度**: {original_kurtosis:.4f}\n"
  112.  
  113.     # 修改后模型子问题相似度统计分析
  114.     if modified_similarities:
  115.         modified_mean = np.mean(modified_similarities)
  116.         modified_median = np.median(modified_similarities)
  117.         modified_std = np.std(modified_similarities)
  118.         modified_min = np.min(modified_similarities)
  119.         modified_max = np.max(modified_similarities)
  120.         modified_skewness = stats.skew(modified_similarities)
  121.         modified_kurtosis = stats.kurtosis(modified_similarities)
  122.  
  123.         markdown += "### 修改后模型子问题相似度统计分析\n"
  124.         markdown += f"- **均值**: {modified_mean:.4f}\n"
  125.         markdown += f"- **中位数**: {modified_median:.4f}\n"
  126.         markdown += f"- **标准差**: {modified_std:.4f}\n"
  127.         markdown += f"- **最小值**: {modified_min:.4f}\n"
  128.         markdown += f"- **最大值**: {modified_max:.4f}\n"
  129.         markdown += f"- **偏度**: {modified_skewness:.4f}\n"
  130.         markdown += f"- **峰度**: {modified_kurtosis:.4f}\n"
  131.  
  132.     # 子问题相似度差值统计分析
  133.     if similarity_diffs:
  134.         mean_diff = np.mean(similarity_diffs)
  135.         median_diff = np.median(similarity_diffs)
  136.         std_diff = np.std(similarity_diffs)
  137.         min_diff = np.min(similarity_diffs)
  138.         max_diff = np.max(similarity_diffs)
  139.         skewness = stats.skew(similarity_diffs)
  140.         kurtosis = stats.kurtosis(similarity_diffs)
  141.  
  142.         markdown += "### 子问题相似度差值统计分析\n"
  143.         markdown += f"- **均值**: {mean_diff:.4f}\n"
  144.         markdown += f"- **中位数**: {median_diff:.4f}\n"
  145.         markdown += f"- **标准差**: {std_diff:.4f}\n"
  146.         markdown += f"- **最小值**: {min_diff:.4f}\n"
  147.         markdown += f"- **最大值**: {max_diff:.4f}\n"
  148.         markdown += f"- **偏度**: {skewness:.4f}\n"
  149.         markdown += f"- **峰度**: {kurtosis:.4f}\n"
  150.  
  151.     return markdown
  152.  
  153. # 主函数
  154. def main():
  155.     original_file_path = '/home/xie.zhongwei/original_model_results.json'
  156.     modified_file_path = '/home/xie.zhongwei/modified_model_results.json'
  157.     output_result_path = 'comparison_results.md'
  158.  
  159.     original_data = read_json_file(original_file_path)
  160.     modified_data = read_json_file(modified_file_path)
  161.  
  162.     if original_data and modified_data:
  163.         # 加载 SentenceTransformer 模型并指定使用 GPU
  164.         embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
  165.  
  166.         original_score, original_details = calculate_score(original_data, embedding_model)
  167.         modified_score, modified_details = calculate_score(modified_data, embedding_model)
  168.  
  169.         original_results = {
  170.             "total_score": original_score,
  171.             "question_details": original_details
  172.         }
  173.         modified_results = {
  174.             "total_score": modified_score,
  175.             "question_details": modified_details
  176.         }
  177.  
  178.         # 转换 bool_ 为 bool 以避免 JSON 序列化问题
  179.         def convert_bool_to_python_bool(result):
  180.             for question in result["question_details"]:
  181.                 for sub_question in question["sub_question_details"]:
  182.                     if isinstance(sub_question["is_correct"], np.bool_):
  183.                         sub_question["is_correct"] = bool(sub_question["is_correct"])
  184.             return result
  185.  
  186.         original_results = convert_bool_to_python_bool(original_results)
  187.         modified_results = convert_bool_to_python_bool(modified_results)
  188.  
  189.         # 转换为 Markdown 格式
  190.         markdown_content = convert_to_markdown(original_results, modified_results)
  191.  
  192.         with open(output_result_path, 'w', encoding='utf-8') as f:
  193.             f.write(markdown_content)
  194.  
  195.         print(f"对比结果已保存到 {output_result_path}")
  196.         print(f"原始模型总得分: {original_score}")
  197.         print(f"修改后模型总得分: {modified_score}")
  198.  
  199. if __name__ == "__main__":
  200.     main()
  201.    
Add Comment
Please, Sign In to add comment