@@ -47,7 +47,9 @@ \section{上手实践}
4747
4848% \subsection{非深度迁移}
4949
50- 在众多的非深度迁移学习方法中,我们选择发表于ICCV-13的JDA(Joint Adaptation Network)~\cite {long2013transfer }方法进行实践。实验平台为普通机器上的Matlab软件。
50+ 在众多的非深度迁移学习方法中,我们选择最经典的迁移方法之一、发表于IEEE TNN 2011的TCA(Transfer Component Analysis)~\cite {pan2011domain }方法进行实践。为了便于学习,我们同时用Matlab和Python实现了此代码。代码的链接为\url {https://github.com/jindongwang/transferlearning/tree/master/code/traditional/TCA}。下面我们对代码进行简单讲解。
51+
52+ \subsubsection {Matlab }
5153
5254\textbf {1. 数据获取 }
5355
@@ -69,12 +71,11 @@ \section{上手实践}
6971
7072\textbf {2. 算法精炼 }
7173
72- JDA主要进行边缘分布和条件分布的自适应。通过整理化简,JDA最终的求解目标是:
73-
74+ TCA主要进行边缘分布自适应。通过整理化简,TCA最终的求解目标是:
7475\begin {equation }
7576\label {equ-eigen }
7677\begin {split }
77- \left (\mathbf {X} \sum _{c=0}^{C} \ mathbf {M}_c \mathbf {X}^\top + \lambda \mathbf {I}\right ) \mathbf {A} =\mathbf {X} \mathbf {H} \mathbf {X}^\top \mathbf {A} \Phi
78+ \left (\mathbf {X} \mathbf {M} \mathbf {X}^\top + \lambda \mathbf {I}\right ) \mathbf {A} =\mathbf {X} \mathbf {H} \mathbf {X}^\top \mathbf {A} \Phi
7879\end {split }
7980\end {equation }
8081
@@ -92,105 +93,59 @@ \section{上手实践}
9293
9394\textbf {3. 编写代码 }
9495
95- 我们参考JDA开源的代码,直接给出精炼后的源码 :
96+ 我们直接给出精炼后的源码 :
9697
97- \ begin{lstlisting} [title=JDA方法的Matlab实现 , frame=shadowbox]
98- function [acc,acc_ite ,A] = MyJDA (X_src,Y_src, X_tar,Y_tar ,options)
99- % This is the implementation of Joint Distribution Adaptation .
100- % Reference: Mingsheng Long et al. Transfer feature learning with joint distribution adaptation. ICCV 2013 .
98+ \ begin{lstlisting} [title=TCA方法的Matlab实现 , frame=shadowbox]
99+ function [X_src_new,X_tar_new ,A] = TCA (X_src,X_tar,options)
100+ % The is the implementation of Transfer Component Analysis .
101+ % Reference: Sinno Pan et al. Domain Adaptation via Transfer Component Analysis. TNN 2011 .
101102
102- % Inputs:
103- %%% X_src : source feature matrix, ns * n_feature
104- %%% Y_src : source label vector, ns * 1
105- %%% X_tar : target feature matrix, nt * n_feature
106- %%% Y_tar : target label vector, nt * 1
107- %%% options : option struct
108- %%%%% lambda : regularization parameter
109- %%%%% dim : dimension after adaptation, dim <= n_feature
110- %%%%% kernel_tpye : kernel name, choose from 'primal' | 'linear' | 'rbf'
111- %%%%% gamma : bandwidth for rbf kernel, can be missed for other kernels
112- %%%%% T : n_iterations, T >= 1. T <= 10 is suffice
113-
114- % Outputs:
115- %%% acc : final accuracy using knn, float
116- %%% acc_ite : list of all accuracies during iterations
117- %%% A : final adaptation matrix, (ns + nt) * (ns + nt)
118-
119- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
103+ % Inputs:
104+ %%% X_src : source feature matrix, ns * n_feature
105+ %%% X_tar : target feature matrix, nt * n_feature
106+ %%% options : option struct
107+ %%%%% lambda : regularization parameter
108+ %%%%% dim : dimensionality after adaptation (dim <= n_feature)
109+ %%%%% kernel_tpye : kernel name, choose from 'primal' | 'linear' | 'rbf'
110+ %%%%% gamma : bandwidth for rbf kernel, can be missed for other kernels
111+
112+ % Outputs:
113+ %%% X_src_new : transformed source feature matrix, ns * dim
114+ %%% X_tar_new : transformed target feature matrix, nt * dim
115+ %%% A : adaptation matrix, (ns + nt) * (ns + nt)
116+
117+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
120118
121119%% Set options
122120lambda = options.lambda;
123121dim = options.dim;
124122kernel_type = options.kernel_type;
125- gamma = options.gamma;
126- T = options.T;
127-
128- acc_ite = [];
129- Y_tar_pseudo = [];
130- %% Iteration
131- for i = 1 : T
132- [Z,A] = JDA_core(X_src,Y_src,X_tar,Y_tar_pseudo,options);
133- %normalization for better classification performance
134- Z = Z*diag(sparse(1./sqrt(sum(Z.^2))));
135- Zs = Z(:,1:size(X_src,1));
136- Zt = Z(:,size(X_src,1)+1:end);
137-
138- knn_model = fitcknn(Zs',Y_src,'NumNeighbors',1);
139- Y_tar_pseudo = knn_model.predict(Zt');
140- acc = length(find(Y_tar_pseudo==Y_tar))/length(Y_tar);
141- fprintf('JDA+NN=%0.4f\n',acc);
142- acc_ite = [acc_ite;acc];
143- end
144-
145- end
146-
147- function [Z,A] = JDA_core(X_src,Y_src,X_tar,Y_tar_pseudo,options)
148- %% Set options
149- lambda = options.lambda; %% lambda for the regularization
150- dim = options.dim; %% dim is the dimension after adaptation, dim <= m
151- kernel_type = options.kernel_type; %% kernel_type is the kernel name, primal|linear|rbf
152- gamma = options.gamma; %% gamma is the bandwidth of rbf kernel
123+ gamma = options.gamma;
153124
154- %% Construct MMD matrix
125+ %% Calculate
155126X = [X_src',X_tar'];
156127X = X*diag(sparse(1./sqrt(sum(X.^2))));
157128[m,n] = size(X);
158129ns = size(X_src,1);
159130nt = size(X_tar,1);
160131e = [1/ns*ones(ns,1);-1/nt*ones(nt,1)];
161- C = length(unique(Y_src));
162-
163- %%% M0
164- M = e * e' * C; %multiply C for better normalization
165-
166- %%% Mc
167- N = 0;
168- if ~isempty(Y_tar_pseudo) && length(Y_tar_pseudo)==nt
169- for c = reshape(unique(Y_src),1,C)
170- e = zeros(n,1);
171- e(Y_src==c) = 1 / length(find(Y_src==c));
172- e(ns+find(Y_tar_pseudo==c)) = -1 / length(find(Y_tar_pseudo==c));
173- e(isinf(e)) = 0;
174- N = N + e*e';
175- end
176- end
177-
178- M = M + N;
132+ M = e * e';
179133M = M / norm(M,'fro');
180-
181- %% Centering matrix H
182- H = eye(n) - 1/n * ones(n,n);
183-
184- %% Calculation
134+ H = eye(n)-1/(n)*ones(n,n);
185135if strcmp(kernel_type,'primal')
186136[A,~] = eigs(X*M*X'+lambda*eye(m),X*H*X',dim,'SM');
187- Z = A'*X;
137+ Z = A' * X;
138+ Z = Z * diag(sparse(1./sqrt(sum(Z.^2))));
139+ X_src_new = Z(:,1:ns)';
140+ X_tar_new = Z(:,ns+1:end)';
188141else
189- K = kernel_jda (kernel_type,X,[],gamma);
142+ K = TCA_kernel (kernel_type,X,[],gamma);
190143[A,~] = eigs(K*M*K'+lambda*eye(n),K*H*K',dim,'SM');
191- Z = A'*K;
144+ Z = A' * K;
145+ Z = Z*diag(sparse(1./sqrt(sum(Z.^2))));
146+ X_src_new = Z(:,1:ns)';
147+ X_tar_new = Z(:,ns+1:end)';
192148end
193-
194149end
195150
196151% With Fast Computation of the RBF kernel matrix
@@ -210,8 +165,7 @@ \section{上手实践}
210165% Modified by Mingsheng Long
211166% 2013(c)
212167% Mingsheng Long (longmingsheng@gmail.com), 2013
213-
214- function K = kernel_jda(ker,X,X2,gamma)
168+ function K = TCA_kernel(ker,X,X2,gamma)
215169
216170switch ker
217171case 'linear'
@@ -252,17 +206,14 @@ \section{上手实践}
252206
253207\end {lstlisting }
254208
255- 我们将JDA方法包装成函数 $ \mathrm {MyJDA }$ 。函数共接受5个输入参数 :
209+ 我们将TCA方法包装成函数 $ \mathrm {TCA }$ 。注意到TCA是一个无监督迁移方法,不需要接受label作为参数。因此,函数共接受3个输入参数 :
256210
257211\begin {itemize }
258212 \item $ \mathrm {X_{src}}$ : 源域的特征,大小为$ n_s \times m$
259- \item $ \mathrm {Y_{src}}$ : 源域的标注,大小为$ n_s \times 1 $
260213 \item $ \mathrm {X_{tar}}$ : 目标域的特征,大小为$ n_t \times m$
261- \item $ \mathrm {Y_{tar}}$ : 目标域的标注,大小为$ n_t \times 1 $
262214 \item $ \mathrm {options}$ : 参数结构体,它包含:
263215 \begin {itemize }
264- \item $ \lambda $ c: 平衡参数,可以自由给出
265- \item $ T$ : 算法迭代次数
216+ \item $ \lambda $ : 平衡参数,可以自由给出
266217 \item $ dim$ : 算法最终选择将数据将到多少维
267218 \item $ kernel type$ : 选择的核类型,可以选择RBF、线性、或无核
268219 \item $ \gamma $ : 如果选择RBF核,那么它的宽度为$ \gamma $
@@ -271,42 +222,138 @@ \section{上手实践}
271222
272223函数的输出包含3项:
273224\begin {itemize }
274- \item $ acc $ : 算法的精度
275- \item $ acc_{iter }$ : 算法每次迭代的精度,是一个一维数据
225+ \item $ X_{srcnew} $ : TCA后的源域
226+ \item $ X_{tarnew }$ : TCA后的目标域
276227 \item $ A$ : 最终的变换矩阵
277228\end {itemize }
278229
279230\textbf {4. 测试算法 }
280231
281- 我们使用如下的代码对JDA算法进行测试 :
232+ 我们使用如下的代码对TCA算法进行测试 :
282233
283234\ begin{lstlisting}
284- options.T = 10; % #iterations, default=10
285235options.gamma = 2; % the parameter for kernel
286236options.kernel_type = 'linear';
287237options.lambda = 1.0;
288238options.dim = 20;
289- [Acc,Acc_iter,A] = MyJDA(Xs,Ys,Xt,Yt,options);
290- disp(Acc);
239+ [X_src_new,X_tar_new,A] = TCA(Xs,Xt,options);
240+
241+ % Use knn to predict the target label
242+ knn_model = fitcknn(X_src_new,Y_src,'NumNeighbors',1);
243+ Y_tar_pseudo = knn_model.predict(X_tar_new);
244+ acc = length(find(Y_tar_pseudo==Y_tar))/length(Y_tar);
245+ fprintf('Acc=%0.4f\n',acc);
291246\end {lstlisting }
292247
293248结果显示如下:
294249\ begin{lstlisting}
295- Iteration [ 1]:BDA+NN=0.4499
296- Iteration [ 2]:BDA+NN=0.4342
297- Iteration [ 3]:BDA+NN=0.4395
298- Iteration [ 4]:BDA+NN=0.4363
299- Iteration [ 5]:BDA+NN=0.4395
300- Iteration [ 6]:BDA+NN=0.4468
301- Iteration [ 7]:BDA+NN=0.4457
302- Iteration [ 8]:BDA+NN=0.4489
303- Iteration [ 9]:BDA+NN=0.4509
304- Iteration [10]:BDA+NN=0.4551
250+ Acc=0.4499
251+ \end {lstlisting }
252+
253+ \subsubsection {Python }
254+
255+ 与Matlab代码类似,我们也可以用Python对TCA进行实现,其主要依赖于Numpy和Scipy两个强大的科学计算库。Python版本的TCA代码如下:
256+
257+ \ begin{lstlisting} [title=TCA方法的Matlab实现, frame=shadowbox]
258+
259+ import numpy as np
260+ import scipy.io
261+ import scipy.linalg
262+ import sklearn.metrics
263+ from sklearn.neighbors import KNeighborsClassifier
264+
265+
266+ def kernel(ker, X1, X2, gamma):
267+ K = None
268+ if not ker or ker == 'primal':
269+ K = X1
270+ elif ker == 'linear':
271+ if X2 is not None:
272+ K = sklearn.metrics.pairwise.linear_kernel(np.asarray(X1).T, np.asarray(X2).T)
273+ else:
274+ K = sklearn.metrics.pairwise.linear_kernel(np.asarray(X1).T)
275+ elif ker == 'rbf':
276+ if X2 is not None:
277+ K = sklearn.metrics.pairwise.rbf_kernel(np.asarray(X1).T, np.asarray(X2).T, gamma)
278+ else:
279+ K = sklearn.metrics.pairwise.rbf_kernel(np.asarray(X1).T, None, gamma)
280+ return K
281+
282+
283+ class TCA:
284+ def __init__(self, kernel_type='primal', dim=30, lamb=1, gamma=1):
285+ '''
286+ Init func
287+ :param kernel_type: kernel, values: 'primal' | 'linear' | 'rbf'
288+ :param dim: dimension after transfer
289+ :param lamb: lambda value in equation
290+ :param gamma: kernel bandwidth for rbf kernel
291+ '''
292+ self.kernel_type = kernel_type
293+ self.dim = dim
294+ self.lamb = lamb
295+ self.gamma = gamma
296+
297+ def fit(self, Xs, Xt):
298+ '''
299+ Transform Xs and Xt
300+ :param Xs: ns * n_feature, source feature
301+ :param Xt: nt * n_feature, target feature
302+ :return: Xs_new and Xt_new after TCA
303+ '''
304+ X = np.hstack((Xs.T, Xt.T))
305+ X /= np.linalg.norm(X, axis=0)
306+ m, n = X.shape
307+ ns, nt = len(Xs), len(Xt)
308+ e = np.vstack((1 / ns * np.ones((ns, 1)), -1 / nt * np.ones((nt, 1))))
309+ M = e * e.T
310+ M = M / np.linalg.norm(M, 'fro')
311+ H = np.eye(n) - 1 / n * np.ones((n, n))
312+ K = kernel(self.kernel_type, X, None, gamma=self.gamma)
313+ n_eye = m if self.kernel_type == 'primal' else n
314+ a, b = np.linalg.multi_dot([K, M, K.T]) + self.lamb * np.eye(n_eye), np.linalg.multi_dot([K, H, K.T])
315+ w, V = scipy.linalg.eig(a, b)
316+ ind = np.argsort(w)
317+ A = V[:, ind[:self.dim]]
318+ Z = np.dot(A.T, K)
319+ Z /= np.linalg.norm(Z, axis=0)
320+ Xs_new, Xt_new = Z[:, :ns].T, Z[:, ns:].T
321+ return Xs_new, Xt_new
322+
323+ def fit_predict(self, Xs, Ys, Xt, Yt):
324+ '''
325+ Transform Xs and Xt, then make predictions on target using 1NN
326+ :param Xs: ns * n_feature, source feature
327+ :param Ys: ns * 1, source label
328+ :param Xt: nt * n_feature, target feature
329+ :param Yt: nt * 1, target label
330+ :return: Accuracy and predicted_labels on the target domain
331+ '''
332+ Xs_new, Xt_new = self.fit(Xs, Xt)
333+ clf = KNeighborsClassifier(n_neighbors=1)
334+ clf.fit(Xs_new, Ys.ravel())
335+ y_pred = clf.predict(Xt_new)
336+ acc = sklearn.metrics.accuracy_score(Yt, y_pred)
337+ return acc, y_pred
338+
339+
340+ if __name__ == '__main__':
341+ domains = ['caltech.mat', 'amazon.mat', 'webcam.mat', 'dslr.mat']
342+ for i in [2]:
343+ for j in [3]:
344+ if i != j:
345+ src, tar = 'data/' + domains[i], 'data/' + domains[j]
346+ src_domain, tar_domain = scipy.io.loadmat(src), scipy.io.loadmat(tar)
347+ Xs, Ys, Xt, Yt = src_domain['feas'], src_domain['label'], tar_domain['feas'], tar_domain['label']
348+ tca = TCA(kernel_type='linear', dim=30, lamb=1, gamma=1)
349+ acc, ypre = tca.fit_predict(Xs, Ys, Xt, Yt)
350+ print(acc)
351+
305352\end {lstlisting }
306353
307354\textbf {5. 小结 }
308355
309- 通过以上过程,我们使用Matlab代码对JDA方法进行了实验 ,完成了一个迁移学习任务。其他的非深度迁移学习方法,均可以参考上面的过程。值得庆幸的是,许多论文的作者都公布了他们的文章代码,以方便我们进行接下来的研究。读者可以从Github~\footnote {\url {https://github.com/jindongwang/transferlearning/tree/master/code}}或者相关作者的网站上获取其他许多方法的代码。
356+ 通过以上过程,我们分别使用Matlab代码和Python代码对经典的TCA方法进行了实验 ,完成了一个迁移学习任务。其他的非深度迁移学习方法,均可以参考上面的过程。值得庆幸的是,许多论文的作者都公布了他们的文章代码,以方便我们进行接下来的研究。读者可以从Github~\footnote {\url {https://github.com/jindongwang/transferlearning/tree/master/code}}或者相关作者的网站上获取其他许多方法的代码。
310357
311358% \subsection{深度网络的finetune}
312359%
0 commit comments