4272 개의 document 중 200 여개를 추출할 예정
(답안지에 사용되지 않은 document는 colbert 학습에 사용)
100 개 구성 계획
30개는 biology
biology_ko_mmlu_sampled = [2519, 1840, 3454, 4185, 1700, 1139, 3491, 391, 3857, 2639, 4166, 1807, 3088, 3922, 4242]
2519 4abe123a-58b6-447b-9fb9-2a0a7d79251e
1840 4f5d4778-5103-424a-93b1-c072269626a9
3454 a13249a3-95a7-4d13-93b5-1e45a3f54545
4185 7a8ec242-870e-4fc4-8201-5ca2b8785ed7
1700 b8f659fa-05f1-4dba-a884-5e3fd292d908
1139 eddbe367-798e-4c82-8635-b515a2ab4693
3491 6a8877ea-9960-4841-9f5a-c330215579ae
391 e9cb6a69-c160-4884-b3ad-a1e5eee307a9
3857 58d7a691-9b4b-4c4c-81d8-4d0083077b12
2639 bf1091d6-2ca3-484c-b566-4d3dc40a2de6
4166 99c3aa04-52b6-47f8-b19b-b6c1cfcfb0ae
1807 962bbde9-f012-4aec-ad61-6544939f7d09
3088 a76a5516-48ed-4301-b312-fc7383375726
3922 772df8d7-47bc-4470-9574-75c32828d510
4242 a5c1fa26-cfe5-4698-9e9c-38650e993310
biology_ko_ai2_arc_sampled = [3004, 2692, 3099, 1137, 375, 810, 4103, 300, 1587, 928, 3684, 326, 1845, 3371, 3420]
3004 02f79eed-d588-4f99-ac88-780855664a22
2692 bc4f6f00-2d17-48bb-a90e-0514f60e2749
3099 478f68b0-ad2f-4af4-add2-75bc9916e050
1137 1675b40a-14c0-4fb0-b7e7-3484868d803b
375 9372c34d-4ff9-4ad6-a67c-e5ddf65033d0
810 bd12b839-2dc5-4bf9-8b7e-b43f0f4614cb
4103 6d0b6315-ff03-449d-b2ef-6887d94b7ff6
300 4a967aaa-f247-47e3-817b-eb3ab3a3eb5e
1587 3a6f491b-228e-40c4-a805-af9a523ecbd0
928 2e778b23-3814-4720-bdad-827b7c4e1172
3684 c1c53840-1706-49f4-9508-6a654fdfa1ba
326 2b0016b4-f2de-449b-bfe9-a4d5e50c6d65
1845 dc398ded-03b2-4113-8ae6-9161cb5a545c
3371 29f939e1-a784-40fc-a31b-139fdaceec66
3420 56b91ae0-cb76-46e2-a725-8d4bafcca24e
20개는 physics
physics_ko_mmlu_sampled = [675, 2202, 2761, 4050, 3764, 1598, 2146, 3455, 3667, 2774]
675 d25d6442-f858-4133-a746-745829063024
2202 c52640c4-273d-4cc9-a82c-07dec6f7cc7a
2761 cb4a3d86-988e-4425-b295-bbe34317c5cb
4050 5a78deb2-ae7c-4e5f-b7cf-fbcc3d566b53
3764 71d79704-262e-4eb0-8ebc-7c9e597ea1c9
1598 807f29a3-0c6c-4949-a0d2-e3d63d188f35
2146 185008d2-6091-4beb-94e4-d847d70fdea6
3455 12fa6f99-e1e6-449e-96bb-63cc1353790f
3667 5b15d690-101d-412d-86e6-f47ab0f4eb8f
2774 46c4ef9d-42db-4798-8b51-38782d2c0308
physics_ko_ai2_arc_sampled = [2689, 2437, 1816, 3309, 3694, 3477, 3151, 4246, 3795, 3426]
2689 6dcf1f6e-36a9-4606-b43d-9a6b456ab0bb
2437 55187155-b988-4c66-bce7-895d185a74f4
1816 9c74e281-6107-4be9-a837-bdb5a5d3beb8
3309 17919183-4177-4edb-a6d5-f2dc9456dbb7
3694 1dea74f9-4137-4aaf-9410-044f3973efd1
3477 6b779a5a-85dc-44ce-9707-d94132b06ddd
3151 c92af751-03da-479e-8362-6a181bd591af
4246 aeb28496-0a80-4978-97b5-484f36b0272c
3795 a395b38f-af11-409e-9c17-89a21f4a9521
3426 0d18a619-d45c-4450-afd5-5821c0853505
computer_science 10개
coumputer_science_ko_mmlu_sampled = [283, 3067, 2428, 4034, 273]
283 cced57be-474e-4cb2-a751-95981374d7b5
3067 21ea48b8-b053-4b2d-a1d8-d3ad15e548d7
2428 7fc8b55a-f05d-4857-aebd-933e42897567
4034 753646b2-0cc4-4d71-94a1-8201b3cd0659
273 0b5888eb-e3d9-44ed-9c9c-1b88101dd694
coumputer_science_ko_ai2_arc = ko_ai2_arc_df[ko_ai2_arc_df['category'] == 'computer_science'].index
[299, 1030, 2262, 2857]
299 cc45ca16-77e2-4bb7-9e3d-35b4c10a71de
1030 c9887727-0e1e-41fc-a844-fd38d791873c
2262 9e49d5a6-1d12-4864-992d-c074d4bea569
2857 70915d8f-c7b9-436f-9ba6-e298b91b8d01
computer_security
computer_security_ko_mmlu_sampled = [1164, 3996, 3613, 1288, 2211]
1164 e02813a8-d5e1-4324-a4c3-54a3ba72c903
3996 709ffcb8-27c0-4ee2-92ca-514425d27492
3613 749b55df-5b6b-4dc0-bd42-354ce61052f3
1288 e31ae7ee-4911-46ea-a828-2cd63407e324
2211 59b393d4-bcb7-4978-a9cb-52a7efe38c1a
computer_security_ko_ai2_arc = list(ko_ai2_arc_df[ko_ai2_arc_df["category"] == "computer_security"].index)
[769, 1001, 2471, 3732]
769 ca7ab9d1-a540-4c52-a6a1-688772f258d8
1001 42dc8596-196c-4286-858c-164c63287779
2471 e5e569a2-aad5-46c8-b71a-6cb021f732e3
3732 f7cd4fb3-d9cf-471e-9d23-dc8372d356ef
추후 진행 예정 chemistry 10개
valid.jsonl의 형태
{
"eval_id" : eval_id,
"msg" : [{"role" : "user", "content" : content}]
}
valid_answer.jsonl의 형태
{
"eval_id" : eval_id,
"standalone_query" : content,
"topk": [],
}