diff --git a/subject1-3/.keep b/subject1-3/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/subject1-3/csc-csp-match.txt b/subject1-3/csc-csp-match.txt new file mode 100644 index 0000000000000000000000000000000000000000..a024297761ea7c34e7ccc3eee65d2cff51eb8fb4 --- /dev/null +++ b/subject1-3/csc-csp-match.txt @@ -0,0 +1,347 @@ +% 主程序部分 +clc; +clear; +close all; + +% 读取数据集 +data = readtable('csp_resources_matlab1001.csv'); + +% 第一阶段:筛选 +minComputingCore = 8; +minStorageSize = 1024; +minNetworkSpeed = 100; +maxRiskFactor = 0.5; + +computingCore = zeros(height(data), 1); +storageSize = zeros(height(data), 1); +networkSpeed = zeros(height(data), 1); + +for i = 1:height(data) + computingCore(i) = str2double(regexp(data.Computing_Resource{i}, '\d+', 'match', 'once')); + + storageStr = data.Storage_Resource{i}; + storageSize(i) = str2double(regexp(storageStr, '\d+', 'match', 'once')); + if contains(storageStr, 'TB') + storageSize(i) = storageSize(i) * 1024; + elseif contains(storageStr, 'PB') + storageSize(i) = storageSize(i) * 1024 * 1024; + end + + networkStr = data.Network_Resource{i}; + networkSpeed(i) = str2double(regexp(networkStr, '\d+', 'match', 'once')); + if contains(networkStr, 'Gbps') + networkSpeed(i) = networkSpeed(i) * 1000; + elseif contains(networkStr, 'Tbps') + networkSpeed(i) = networkSpeed(i) * 1000 * 1000; + end +end + +condition = (computingCore >= minComputingCore) & ... + (storageSize >= minStorageSize) & ... + (networkSpeed >= minNetworkSpeed) & ... + (data.Risk_Factor <= maxRiskFactor); + +filteredData = data(condition, :); +computingCore = computingCore(condition); +storageSize = storageSize(condition); +networkSpeed = networkSpeed(condition); + +if isempty(filteredData) + fprintf('没有符合条件的数据。\n'); + return; +end + +% 规则匹配参数设置 +U_com_min = min(computingCore); +U_com_max = max(computingCore); +U_net_min = min(networkSpeed); +U_net_max = max(networkSpeed); +U_sto_min = min(storageSize); +U_sto_max = max(storageSize); + +num_experiments = 30; +max_output_size = 100; +obj_type = [1, -1, 1, 1, -1, -1]; + +% 主观权重设置 +user_given_weights = [0.2, 0.1, 0.1, 0.2, 0.1, 0.1]; + +% 初始化存储指标结果的表格 +subjective_final = table(); +entropy_final = table(); + +% 主观权重 + 线性加权法 +for output_size = 1:max_output_size + total_time = 0; + total_non_dominated_count = 0; + total_ranking_entropy = 0; + + % 指标累加器 + total_reputation = 0; + total_risk = 0; + total_qos = 0; + total_relevance = 0; + total_energy = 0; + total_cost = 0; + + for exp = 1:num_experiments + tic; + idx = (computingCore >= U_com_min) & (computingCore <= U_com_max) & ... + (networkSpeed >= U_net_min) & (networkSpeed <= U_net_max) & ... + (storageSize >= U_sto_min) & (storageSize <= U_sto_max); + S = filteredData(idx, :); + + objective_data = table2array(S(:, {'Reputation_Score', 'Risk_Factor', 'QOS', 'Data_Relevance', 'Energy_Consumption', 'cost'})); + [F, ~] = fast_non_dominated_sort(objective_data, obj_type); + first_front_indices = F{1}; + M = S(first_front_indices, :); + + evaluation_data = table2array(S(:, {'Reputation_Score', 'Risk_Factor', 'QOS', 'Data_Relevance', 'Energy_Consumption', 'cost'})); + min_val = min(evaluation_data); + max_val = max(evaluation_data); + normalized_data = zeros(size(evaluation_data)); + + for j = 1:size(evaluation_data, 2) + if obj_type(j) == 1 + normalized_data(:, j) = (evaluation_data(:, j) - min_val(j)) / (max_val(j) - min_val(j)); + else + normalized_data(:, j) = (max_val(j) - evaluation_data(:, j)) / (max_val(j) - min_val(j)); + end + end + + match_scores = user_given_weights * normalized_data'; + match_scores = match_scores(:); + S.Match_Score = match_scores; + sorted_S = sortrows(S, 'Match_Score', 'descend'); + selected_S = sorted_S(1:min(output_size, height(sorted_S)), :); + + total_time = total_time + toc; + + objective_data_selected = table2array(selected_S(:, {'Reputation_Score', 'Risk_Factor', 'QOS', 'Data_Relevance', 'Energy_Consumption', 'cost'})); + [F_selected, ~] = fast_non_dominated_sort(objective_data_selected, obj_type); + first_front_indices_selected = F_selected{1}; + count = length(first_front_indices_selected); + total_non_dominated_count = total_non_dominated_count + count; + + if ~isempty(first_front_indices_selected) + selected_data = objective_data_selected(first_front_indices_selected, :); + total_reputation = total_reputation + mean(selected_data(:,1)); + total_risk = total_risk + mean(selected_data(:,2)); + total_qos = total_qos + mean(selected_data(:,3)); + total_relevance = total_relevance + mean(selected_data(:,4)); + total_energy = total_energy + mean(selected_data(:,5)); + total_cost = total_cost + mean(selected_data(:,6)); + end + + ranks = 1:height(selected_S); + p_ranks = ranks / sum(ranks); + total_ranking_entropy = total_ranking_entropy - sum(p_ranks .* log(p_ranks + eps)); + end + + % 计算所有统计量 + avg_time = total_time / num_experiments; + avg_non_dominated = total_non_dominated_count / num_experiments; + avg_entropy = total_ranking_entropy / num_experiments; + avg_reputation = total_reputation / num_experiments; + avg_risk = total_risk / num_experiments; + avg_qos = total_qos / num_experiments; + avg_relevance = total_relevance / num_experiments; + avg_energy = total_energy / num_experiments; + avg_cost = total_cost / num_experiments; + + % 创建包含所有统计量的行 + row = table(output_size, avg_time, avg_non_dominated, avg_entropy, ... + avg_reputation, avg_risk, avg_qos, avg_relevance, avg_energy, avg_cost, ... + 'VariableNames', {'Output_Size', 'Average_Time', 'Average_Non_Dominated', 'Average_Entropy', ... + 'Reputation', 'Risk_Factor', 'QOS', 'Data_Relevance', 'Energy_Consumption', 'Cost'}); + + % 追加到最终结果 + if output_size == 1 + subjective_final = row; + else + subjective_final = [subjective_final; row]; + end +end + +% 熵权法 + Fuzzy +for output_size = 1:max_output_size + total_time = 0; + total_non_dominated_count = 0; + total_ranking_entropy = 0; + + % 指标累加器 + total_reputation = 0; + total_risk = 0; + total_qos = 0; + total_relevance = 0; + total_energy = 0; + total_cost = 0; + + for exp = 1:num_experiments + tic; + idx = (computingCore >= U_com_min) & (computingCore <= U_com_max) & ... + (networkSpeed >= U_net_min) & (networkSpeed <= U_net_max) & ... + (storageSize >= U_sto_min) & (storageSize <= U_sto_max); + S = filteredData(idx, :); + + objective_data = table2array(S(:, {'Reputation_Score', 'Risk_Factor', 'QOS', 'Data_Relevance', 'Energy_Consumption', 'cost'})); + [F, ~] = fast_non_dominated_sort(objective_data, obj_type); + first_front_indices = F{1}; + M = S(first_front_indices, :); + + evaluation_data = table2array(S(:, {'Reputation_Score', 'Risk_Factor', 'QOS', 'Data_Relevance', 'Energy_Consumption', 'cost'})); + min_val = min(evaluation_data); + max_val = max(evaluation_data); + normalized_data = zeros(size(evaluation_data)); + + for j = 1:size(evaluation_data, 2) + if obj_type(j) == 1 + normalized_data(:, j) = (evaluation_data(:, j) - min_val(j)) / (max_val(j) - min_val(j)); + else + normalized_data(:, j) = (max_val(j) - evaluation_data(:, j)) / (max_val(j) - min_val(j)); + end + end + + R = zeros(size(normalized_data)); + for i = 1:size(normalized_data, 1) + for j = 1:size(normalized_data, 2) + R(i, j) = triangular_membership(normalized_data(i, j), 0, 0.5, 1); + end + end + + [n, m] = size(normalized_data); + p = normalized_data ./ repmat(sum(normalized_data), n, 1); + e = -sum(p .* log(p + eps), 1) / log(n); + weights = (1 - e) / sum(1 - e); + + match_scores = weights * R'; + match_scores = match_scores(:); + S.Match_Score = match_scores; + sorted_S = sortrows(S, 'Match_Score', 'descend'); + selected_S = sorted_S(1:min(output_size, height(sorted_S)), :); + + total_time = total_time + toc; + + objective_data_selected = table2array(selected_S(:, {'Reputation_Score', 'Risk_Factor', 'QOS', 'Data_Relevance', 'Energy_Consumption', 'cost'})); + [F_selected, ~] = fast_non_dominated_sort(objective_data_selected, obj_type); + first_front_indices_selected = F_selected{1}; + count = length(first_front_indices_selected); + total_non_dominated_count = total_non_dominated_count + count; + + if ~isempty(first_front_indices_selected) + selected_data = objective_data_selected(first_front_indices_selected, :); + total_reputation = total_reputation + mean(selected_data(:,1)); + total_risk = total_risk + mean(selected_data(:,2)); + total_qos = total_qos + mean(selected_data(:,3)); + total_relevance = total_relevance + mean(selected_data(:,4)); + total_energy = total_energy + mean(selected_data(:,5)); + total_cost = total_cost + mean(selected_data(:,6)); + end + + ranks = 1:height(selected_S); + p_ranks = ranks / sum(ranks); + total_ranking_entropy = total_ranking_entropy - sum(p_ranks .* log(p_ranks + eps)); + end + + % 计算所有统计量 + avg_time = total_time / num_experiments; + avg_non_dominated = total_non_dominated_count / num_experiments; + avg_entropy = total_ranking_entropy / num_experiments; + avg_reputation = total_reputation / num_experiments; + avg_risk = total_risk / num_experiments; + avg_qos = total_qos / num_experiments; + avg_relevance = total_relevance / num_experiments; + avg_energy = total_energy / num_experiments; + avg_cost = total_cost / num_experiments; + + % 创建包含所有统计量的行 + row = table(output_size, avg_time, avg_non_dominated, avg_entropy, ... + avg_reputation, avg_risk, avg_qos, avg_relevance, avg_energy, avg_cost, ... + 'VariableNames', {'Output_Size', 'Average_Time', 'Average_Non_Dominated', 'Average_Entropy', ... + 'Reputation', 'Risk_Factor', 'QOS', 'Data_Relevance', 'Energy_Consumption', 'Cost'}); + + % 追加到最终结果 + if output_size == 1 + entropy_final = row; + else + entropy_final = [entropy_final; row]; + end +end + +% 输出结果到CSV文件 +writetable(subjective_final, 'subjective_metrics1001.csv'); +writetable(entropy_final, 'entropy_metrics1001.csv'); + +% 辅助函数 +function [F, ranks] = fast_non_dominated_sort(P, obj_type) + [N, ~] = size(P); + S = cell(N, 1); + n = zeros(N, 1); + ranks = zeros(N, 1); + F = cell(1); + + for p = 1:N + S{p} = []; + n(p) = 0; + for q = 1:N + if dominates(P(p, :), P(q, :), obj_type) + S{p} = [S{p}, q]; + elseif dominates(P(q, :), P(p, :), obj_type) + n(p) = n(p) + 1; + end + end + if n(p) == 0 + ranks(p) = 1; + F{1} = [F{1}, p]; + end + end + + i = 1; + while ~isempty(F{i}) + Q = []; + for p = F{i} + for q = S{p} + n(q) = n(q) - 1; + if n(q) == 0 + ranks(q) = i + 1; + Q = [Q, q]; + end + end + end + i = i + 1; + F{i} = Q; + end +end + +function dom = dominates(p, q, obj_type) + dom = true; + has_improvement = false; + for j = 1:length(obj_type) + if obj_type(j) == 1 + if p(j) < q(j) + dom = false; + break; + elseif p(j) > q(j) + has_improvement = true; + end + else + if p(j) > q(j) + dom = false; + break; + elseif p(j) < q(j) + has_improvement = true; + end + end + end + dom = dom && has_improvement; +end + +function membership = triangular_membership(x, a, b, c) + if x <= a || x >= c + membership = 0; + elseif x >= a && x <= b + membership = (x - a) / (b - a); + elseif x >= b && x <= c + membership = (c - x) / (c - b); + end +end \ No newline at end of file diff --git a/subject_1-1/.keep b/subject_1-1/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/subject_1-1/feature selection.py b/subject_1-1/feature selection.py new file mode 100644 index 0000000000000000000000000000000000000000..32f72bb3815d0a8e895cc7a0c49fc29586e80bb3 --- /dev/null +++ b/subject_1-1/feature selection.py @@ -0,0 +1,141 @@ +import numpy as np +from sklearn.svm import LinearSVC +from sklearn.linear_model import Ridge + + +class reweightEG(): + + def __init__(self, alpha=1, idx_group=None, n_group=None, crit=5 * 10 ** -4, n_iter=10 ** 6, verbose=0): + + self.coef = None + self.idx = None + self.alpha = alpha + self.idx_group = idx_group + self.n_group = n_group + self.loss_func = None + #alpharit coef idx alpha + self.alpharit = crit + self.n_iter = n_iter + self.verbose = verbose + self.converged = False + + if n_iter < 1: + raise ValueError('At lease one iteration is required.') + + if idx_group is None and n_group is None: + raise KeyError('n_group must be specified if idx_group is None.') + + def _compute_G(self, w, feat_group): + # 将多维数组降位一维 行主序平铺 + w = np.ravel(w) + # 组数 + n_group = len(self.idx_group_new) + n_feature = w.shape[0] + + G_diag = np.zeros(n_feature) + w_group_norm = np.empty(n_group) + for group_counter in range(n_group): + w_group = w[self.idx_group_new[group_counter]] + + w_group_norm[group_counter] = np.linalg.norm(w_group, ord=1) + # 组正则化项 一范数 + w_group_norm[np.where(w_group_norm == 0)[0]] = 10 ** -9 + + w_abs = np.abs(w) + # absolutely + for feature_counter in range(n_feature): + G_diag[feature_counter] = np.sqrt(w_abs[feature_counter] / w_group_norm[feat_group[feature_counter]]) + + return G_diag + + def _compute_X_tran(self, X, G_diag): + return np.dot(X, np.diag(G_diag)) + #diag 取对角元素 + def _compute_w_tran(self, X_tran, y): + #返回更新的w参数 + w = 0 + if self.loss_func == 'hinge': + clf = LinearSVC(fit_intercept=False, C=self.alpha) + clf.fit(X_tran, y) + w = clf.coef_ + elif self.loss_func == 'square': + + clf = Ridge(alpha=self.alpha, fit_intercept=False, tol=10 ** -9) + clf.fit(X_tran, y) + w = clf.coef_ + + return np.ravel(w) + + def _create_rand_group(self, n_feature): + self.idx_group = np.zeros((self.n_group, n_feature)) + idx = np.random.permutation(n_feature) + # 随机排列一个序列,返回一个排列的序列。 + idx = np.array_split(idx, self.n_group) + # 可以用于把narray分成几份。 + for sub_counter, sub_idx in enumerate(idx): + self.idx_group[sub_counter, sub_idx] = 1 + + def _l12_norm(self, X, y): + if len(X.shape)==1: + n_sample= X.shape[0] + n_feature = 1 + else: + n_sample,n_feature = X.shape + + if len(np.unique(y)) == 2: + self.loss_func = 'hinge' + else: + self.loss_func = 'square' + #2分类与多分类损失 + if self.idx_group is None: + self._create_rand_group(n_feature) + + self.idx_group_new = [] + feat_group = {} + for group_counter in range(self.idx_group.shape[0]): + # 用于得到数组array中非零元素的位置(数组索引) + temp = np.nonzero(self.idx_group[group_counter, :])[0] + self.idx_group_new.append(temp) + for idx_feature in temp: + feat_group[idx_feature] = group_counter + + w = np.ones(n_feature) / n_feature + G_diag = self._compute_G(w, feat_group) + X_tran = self._compute_X_tran(X, G_diag) + w_tran = self._compute_w_tran(X_tran, y) + + counter = 0 + while True: + counter += 1 + + w_pre = w.copy() + w = np.multiply(w_tran, G_diag) + + G_diag = self._compute_G(w, feat_group) + X_tran = self._compute_X_tran(X, G_diag) + w_tran = self._compute_w_tran(X_tran, y) + + temp = np.linalg.norm(w_pre - w) + #正则化l2 + if self.verbose == 1: + print('iteration: %d, criteria: %.4f.' % (counter, temp)) + + if temp <= self.alpharit or counter >= self.n_iter: + break + + self.coef = w + self.idx = np.where(np.abs(w) > 10 ** -3)[0] + self.coef[np.where(np.abs(w) <= 10 ** -3)] = 0 + + if counter < self.n_iter: + self.converged = True + + def fit(self, X, y): + self._l12_norm(X, y) + + def predict(self, X): + if self.loss_func == 'hinge': + return np.ravel(np.sign(np.dot(X, self.coef))) + #dot向量点积与矩阵乘法 ravel变为一维数组 + else: + return np.ravel(np.dot(X, self.coef)) \ No newline at end of file