Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import json
- from sentence_transformers import SentenceTransformer
- from sklearn.metrics.pairwise import cosine_similarity
- import torch
- import numpy as np
- from scipy import stats
- # 读取 JSON 文件
- def read_json_file(file_path):
- try:
- with open(file_path, 'r', encoding='utf-8') as file:
- data = json.load(file)
- return data
- except FileNotFoundError:
- print(f"错误: 文件 {file_path} 未找到。")
- return []
- except json.JSONDecodeError:
- print(f"错误: 无法解析 {file_path} 中的 JSON 数据。")
- return []
- # 从模型输出中提取答案
- def extract_answers(model_output):
- # 这里可以使用更复杂的方法来提取答案,目前简单返回输出
- return model_output
- # 验证语义一致性
- def verify_semantic_consistency(extracted_answer, correct_answer, model):
- # 使用 SentenceTransformer 计算语义相似度
- embeddings = model.encode([extracted_answer, correct_answer])
- similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
- return similarity
- # 计算得分和详细信息
- def calculate_score(data, embedding_model):
- total_score = 0
- question_details = []
- for i, item in enumerate(data):
- question = item["question_structure"]
- correct_answers = item["answer"]
- model_output = item["model_output"]
- extracted_answers = extract_answers(model_output)
- sub_question_details = []
- for j, correct_answer in enumerate(correct_answers):
- if j < len(extracted_answers):
- extracted_answer = extracted_answers[j]
- similarity = verify_semantic_consistency(extracted_answer, correct_answer, embedding_model)
- score = 1 if similarity > 0.7 else 0
- sub_question_details.append({
- "sub_question_index": j + 1,
- "correct_answer": correct_answer,
- "extracted_answer": extracted_answer,
- "similarity": similarity,
- "is_correct": similarity > 0.7,
- "score": score
- })
- total_score += score
- question_details.append({
- "question_index": i + 1,
- "question_structure": question,
- "sub_question_details": sub_question_details
- })
- return total_score, question_details
- if torch.cuda.is_available():
- device = torch.device("cuda")
- print(f"Using GPU: {torch.cuda.get_device_name(0)}")
- else:
- device = torch.device("cpu")
- print("GPU is not available, using CPU instead.")
- # 将数据转换为 Markdown 格式
- def convert_to_markdown(original_results, modified_results):
- markdown = "# 模型对比结果\n\n"
- similarity_diffs = []
- original_similarities = []
- modified_similarities = []
- markdown += "### 子问题相似度差值详情\n"
- for i, question in enumerate(original_results["question_details"]):
- markdown += f"- **问题编号**: {question['question_index']}\n"
- markdown += f" - **问题结构**: {question['question_structure']}\n"
- markdown += " - **子问题详情**:\n"
- for j, sub_question in enumerate(question["sub_question_details"]):
- modified_sub_question = modified_results["question_details"][i]["sub_question_details"][j]
- similarity_diff = sub_question["similarity"] - modified_sub_question["similarity"]
- similarity_diffs.append(similarity_diff)
- original_similarities.append(sub_question["similarity"])
- modified_similarities.append(modified_sub_question["similarity"])
- markdown += f" - **子问题编号**: {sub_question['sub_question_index']}\n"
- markdown += f" - **子问题相似度差值(原始 - 修改后)**: {similarity_diff:.4f}\n"
- markdown += "\n"
- # 原始模型子问题相似度统计分析
- if original_similarities:
- original_mean = np.mean(original_similarities)
- original_median = np.median(original_similarities)
- original_std = np.std(original_similarities)
- original_min = np.min(original_similarities)
- original_max = np.max(original_similarities)
- original_skewness = stats.skew(original_similarities)
- original_kurtosis = stats.kurtosis(original_similarities)
- markdown += "### 原始模型子问题相似度统计分析\n"
- markdown += f"- **均值**: {original_mean:.4f}\n"
- markdown += f"- **中位数**: {original_median:.4f}\n"
- markdown += f"- **标准差**: {original_std:.4f}\n"
- markdown += f"- **最小值**: {original_min:.4f}\n"
- markdown += f"- **最大值**: {original_max:.4f}\n"
- markdown += f"- **偏度**: {original_skewness:.4f}\n"
- markdown += f"- **峰度**: {original_kurtosis:.4f}\n"
- # 修改后模型子问题相似度统计分析
- if modified_similarities:
- modified_mean = np.mean(modified_similarities)
- modified_median = np.median(modified_similarities)
- modified_std = np.std(modified_similarities)
- modified_min = np.min(modified_similarities)
- modified_max = np.max(modified_similarities)
- modified_skewness = stats.skew(modified_similarities)
- modified_kurtosis = stats.kurtosis(modified_similarities)
- markdown += "### 修改后模型子问题相似度统计分析\n"
- markdown += f"- **均值**: {modified_mean:.4f}\n"
- markdown += f"- **中位数**: {modified_median:.4f}\n"
- markdown += f"- **标准差**: {modified_std:.4f}\n"
- markdown += f"- **最小值**: {modified_min:.4f}\n"
- markdown += f"- **最大值**: {modified_max:.4f}\n"
- markdown += f"- **偏度**: {modified_skewness:.4f}\n"
- markdown += f"- **峰度**: {modified_kurtosis:.4f}\n"
- # 子问题相似度差值统计分析
- if similarity_diffs:
- mean_diff = np.mean(similarity_diffs)
- median_diff = np.median(similarity_diffs)
- std_diff = np.std(similarity_diffs)
- min_diff = np.min(similarity_diffs)
- max_diff = np.max(similarity_diffs)
- skewness = stats.skew(similarity_diffs)
- kurtosis = stats.kurtosis(similarity_diffs)
- markdown += "### 子问题相似度差值统计分析\n"
- markdown += f"- **均值**: {mean_diff:.4f}\n"
- markdown += f"- **中位数**: {median_diff:.4f}\n"
- markdown += f"- **标准差**: {std_diff:.4f}\n"
- markdown += f"- **最小值**: {min_diff:.4f}\n"
- markdown += f"- **最大值**: {max_diff:.4f}\n"
- markdown += f"- **偏度**: {skewness:.4f}\n"
- markdown += f"- **峰度**: {kurtosis:.4f}\n"
- return markdown
- # 主函数
- def main():
- original_file_path = '/home/xie.zhongwei/original_model_results.json'
- modified_file_path = '/home/xie.zhongwei/modified_model_results.json'
- output_result_path = 'comparison_results.md'
- original_data = read_json_file(original_file_path)
- modified_data = read_json_file(modified_file_path)
- if original_data and modified_data:
- # 加载 SentenceTransformer 模型并指定使用 GPU
- embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
- original_score, original_details = calculate_score(original_data, embedding_model)
- modified_score, modified_details = calculate_score(modified_data, embedding_model)
- original_results = {
- "total_score": original_score,
- "question_details": original_details
- }
- modified_results = {
- "total_score": modified_score,
- "question_details": modified_details
- }
- # 转换 bool_ 为 bool 以避免 JSON 序列化问题
- def convert_bool_to_python_bool(result):
- for question in result["question_details"]:
- for sub_question in question["sub_question_details"]:
- if isinstance(sub_question["is_correct"], np.bool_):
- sub_question["is_correct"] = bool(sub_question["is_correct"])
- return result
- original_results = convert_bool_to_python_bool(original_results)
- modified_results = convert_bool_to_python_bool(modified_results)
- # 转换为 Markdown 格式
- markdown_content = convert_to_markdown(original_results, modified_results)
- with open(output_result_path, 'w', encoding='utf-8') as f:
- f.write(markdown_content)
- print(f"对比结果已保存到 {output_result_path}")
- print(f"原始模型总得分: {original_score}")
- print(f"修改后模型总得分: {modified_score}")
- if __name__ == "__main__":
- main()
Add Comment
Please, Sign In to add comment