Merge pull request #20 from kinfey/main

Update 02.AITools Samples
microsoft · Aug 11, 2024 · 1c0c25e · 1c0c25e
2 parents c5a5c77 + eaf5ab3
commit 1c0c25e
Show file tree

Hide file tree

Showing 17 changed files with 1,884 additions and 108 deletions.
diff --git a/src/02.AIToolsSolutionE2E/olive-config.json b/src/02.AIToolsSolutionE2E/olive-config.json
diff --git a/src/02.AIToolsSolutionE2E/qa_e2e/datasets/TruthfulQA.csv b/src/02.AIToolsSolutionE2E/qa_e2e/datasets/TruthfulQA.csv
diff --git a/src/02.AIToolsSolutionE2E/qa_e2e/datasets/Truthful_QA_datasets.ipynb b/src/02.AIToolsSolutionE2E/qa_e2e/datasets/Truthful_QA_datasets.ipynb
@@ -0,0 +1,117 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import csv\n",
+    "import json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "csvfile = open('./TruthfulQA.csv', 'r')\n",
+    "jsonfile = open('./TruthfulQA.json', 'w')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fieldnames = (\"Type\",\"Category\",\"Question\",\"Best Answer\",\"Correct Answers\",\"Incorrect Answers\",\"Source\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "reader = csv.DictReader(csvfile, fieldnames)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "i = 0\n",
+    "for row in reader:\n",
+    "    if i > 0:\n",
+    "        # print(row)\n",
+    "        try:\n",
+    "            # print(row)\n",
+    "            # json.loads(row)\n",
+    "            json.dump(row, jsonfile, ensure_ascii=False)\n",
+    "            jsonfile.write('\\n')\n",
+    "            i += 1\n",
+    "        except ValueError:\n",
+    "            continue\n",
+    "    if i == 0:\n",
+    "        i += 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # i = 1\n",
+    "data = []\n",
+    "with open('./TruthfulQA.json', 'r',encoding=\"utf8\") as file:\n",
+    "    for line in file:\n",
+    "        try:\n",
+    "            data.append(json.loads(line))\n",
+    "        except ValueError:\n",
+    "            continue\n",
+    "        i+=1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "with open('datasets.json', 'w',encoding=\"utf8\") as f:\n",
+    "    for i in range(len(data)):\n",
+    "        if i >0:\n",
+    "            json.dump(data[i], f, ensure_ascii=False)\n",
+    "            f.write('\\n')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pydev",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/02.AIToolsSolutionE2E/qa_e2e/docs/01.Installation.md b/src/02.AIToolsSolutionE2E/qa_e2e/docs/01.Installation.md
@@ -0,0 +1,56 @@
+# **Installation**
+
+***Notes:*** This is some different from AI Tools, I choose Azure ML Service as backend,and  ONNX Runtime for samples.
+
+## **1. Set Python Env**
+
+```bash
+
+
+conda create -n slmopsenv python==3.10.12
+
+conda activate slmopsenv
+
+
+```
+
+## **2. Install Python Library**
+
+
+```bash
+
+pip install -r requirements.txt
+
+```
+
+
+## **3. Structure**
+
+
+
+```md
+
+|--📁 QA_E2E
+    |-📁 datasets
+    |-📁 fine-tuning
+    |-📁 inferences
+    |-📁 models-cache
+
+```
+
+**📁 datasets** - Store the data that needs fine-tuning as a json format file
+
+**📁 fine-tuning** - Stores Microsoft Olive settings in **olive-config.json** and saves a cache of related steps
+
+**📁 inferences** - For Inferences models，and test result
+
+**📁 models-cache** - Save Fine-tuning Microsoft Phi-3 mini models
+
+
+
+
+
+
+
+
+
diff --git a/src/02.AIToolsSolutionE2E/qa_e2e/docs/02.PrepareDatasets.md b/src/02.AIToolsSolutionE2E/qa_e2e/docs/02.PrepareDatasets.md
@@ -0,0 +1,85 @@
+# **Prepare your QA datasets**
+
+We hope to inject Phi-3-mini into [TruthfulQA's data](https://github.com/sylinrl/TruthfulQA) . The first step is to import TruthfulQA's KOL data. 
+
+
+**Note:** Please create your noteboook **(Truthful_QA_datasets.ipynb)** in datasets folder
+
+
+### **1. Load  data into csv and save it to json**
+
+
+```python
+
+import csv
+import json
+
+csvfile = open('./TruthfulQA.csv', 'r')
+jsonfile = open('./TruthfulQA.json', 'w')
+
+fieldnames = ("Type","Category","Question","Best Answer","Correct Answers","Incorrect Answers","Source")
+
+reader = csv.DictReader(csvfile, fieldnames)
+
+i = 0
+for row in reader:
+    if i > 0:
+        # print(row)
+        try:
+            # print(row)
+            # json.loads(row)
+            json.dump(row, jsonfile, ensure_ascii=False)
+            jsonfile.write('\n')
+            i += 1
+        except ValueError:
+            continue
+    if i == 0:
+        i += 1
+
+
+```
+
+### **2. Clear your data**
+
+
+
+```python
+
+
+data = []
+with open('./TruthfulQA.json', 'r',encoding="utf8") as file:
+    for line in file:
+        try:
+            data.append(json.loads(line))
+        except ValueError:
+            continue
+        i+=1
+
+
+```
+
+### **3. Save your data**
+
+
+
+```python
+
+
+import json
+
+with open('datasets.json', 'w',encoding="utf8") as f:
+    for i in range(len(data)):
+        if i >0:
+            json.dump(data[i], f, ensure_ascii=False)
+            f.write('\n')
+
+
+```
+
+
+
+### **Congratulations!**  
+
+Your data has been successfully loaded.  Next, you need to configure your data and related algorithms through Microsoft Olive [E2E_LoRA&QLoRA_Config_With_Olive.md](./03.E2E_LoRA&QLoRA_Config_With_Olive.md)
+
+