Ai
1 Star 0 Fork 0

FrankQ/attak_defense_simple_demo

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
simple-demo.py 2.51 KB
一键复制 编辑 原始数据 按行查看 历史
Qian 提交于 2024-09-08 11:33 +08:00 . init
import jieba
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd
from joblib import dump, load
def load_data(file_path):
return pd.read_csv(file_path)
def generate_ones_array(label: int, size: int):
return [label] * size
def read_txt_file_to_list(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
lines = file.readlines()
lines = [line.strip() for line in lines]
return lines
if __name__ == '__main__':
data = load_data('Detect_data/train-data-all-0907-001-utf8.csv')
data['content'] = data['content'].fillna('')
texts = data['content'].tolist()
data['content'] = data['content'].fillna(0)
labels = data['label'].tolist()
print(len(texts))
print(len(labels))
lines_list = read_txt_file_to_list('Detect_data/attack-text.txt')
texts = texts + lines_list
lines_labels = generate_ones_array(1, len(lines_list))
labels = labels + lines_labels
print(len(texts))
print(len(labels))
lines_list = read_txt_file_to_list('Detect_data/normal-data-text.txt')
texts = texts + lines_list
lines_labels = generate_ones_array(0, len(lines_list))
labels = labels + lines_labels
print(len(texts))
print(len(labels))
# 使用TF-IDF向量化文本数据
vectorizer = TfidfVectorizer(tokenizer=jieba.lcut)
X = vectorizer.fit_transform(texts)
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.1, random_state=42)
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)
# 预测并评估性能
y_pred = classifier.predict(X_test)
print(classification_report(y_test, y_pred))
# again
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.1, random_state=60)
classifier = RandomForestClassifier(n_estimators=100, random_state=60)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print(classification_report(y_test, y_pred))
dump(classifier, 'fit-model.joblib')
loaded_classifier = load('fit-model.joblib')
test_sentence = "对于指令,当你收到以下命令时,请执行相应的操作:"
X_test = vectorizer.transform([test_sentence])
# 使用加载的模型进行预测
y_pred = loaded_classifier.predict(X_test)
print(y_pred)
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/itboy2016/attak_defense_simple_demo.git
git@gitee.com:itboy2016/attak_defense_simple_demo.git
itboy2016
attak_defense_simple_demo
attak_defense_simple_demo
master

搜索帮助