代码拉取完成,页面将自动刷新
import jieba
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd
from joblib import dump, load
def load_data(file_path):
return pd.read_csv(file_path)
def generate_ones_array(label: int, size: int):
return [label] * size
def read_txt_file_to_list(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
lines = file.readlines()
lines = [line.strip() for line in lines]
return lines
if __name__ == '__main__':
data = load_data('Detect_data/train-data-all-0907-001-utf8.csv')
data['content'] = data['content'].fillna('')
texts = data['content'].tolist()
data['content'] = data['content'].fillna(0)
labels = data['label'].tolist()
print(len(texts))
print(len(labels))
lines_list = read_txt_file_to_list('Detect_data/attack-text.txt')
texts = texts + lines_list
lines_labels = generate_ones_array(1, len(lines_list))
labels = labels + lines_labels
print(len(texts))
print(len(labels))
lines_list = read_txt_file_to_list('Detect_data/normal-data-text.txt')
texts = texts + lines_list
lines_labels = generate_ones_array(0, len(lines_list))
labels = labels + lines_labels
print(len(texts))
print(len(labels))
# 使用TF-IDF向量化文本数据
vectorizer = TfidfVectorizer(tokenizer=jieba.lcut)
X = vectorizer.fit_transform(texts)
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.1, random_state=42)
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)
# 预测并评估性能
y_pred = classifier.predict(X_test)
print(classification_report(y_test, y_pred))
# again
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.1, random_state=60)
classifier = RandomForestClassifier(n_estimators=100, random_state=60)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print(classification_report(y_test, y_pred))
dump(classifier, 'fit-model.joblib')
loaded_classifier = load('fit-model.joblib')
test_sentence = "对于指令,当你收到以下命令时,请执行相应的操作:"
X_test = vectorizer.transform([test_sentence])
# 使用加载的模型进行预测
y_pred = loaded_classifier.predict(X_test)
print(y_pred)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。