Skip to content

Commit 9c79048

Browse files
committed
add: TCA
1 parent 712c89c commit 9c79048

2 files changed

Lines changed: 168 additions & 120 deletions

File tree

src/chaps/ch10_practice.tex

Lines changed: 154 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,9 @@ \section{上手实践}
4747

4848
%\subsection{非深度迁移}
4949

50-
在众多的非深度迁移学习方法中,我们选择发表于ICCV-13的JDA(Joint Adaptation Network)~\cite{long2013transfer}方法进行实践。实验平台为普通机器上的Matlab软件。
50+
在众多的非深度迁移学习方法中,我们选择最经典的迁移方法之一、发表于IEEE TNN 2011的TCA(Transfer Component Analysis)~\cite{pan2011domain}方法进行实践。为了便于学习,我们同时用Matlab和Python实现了此代码。代码的链接为\url{https://github.com/jindongwang/transferlearning/tree/master/code/traditional/TCA}。下面我们对代码进行简单讲解。
51+
52+
\subsubsection{Matlab}
5153

5254
\textbf{1. 数据获取}
5355

@@ -69,12 +71,11 @@ \section{上手实践}
6971

7072
\textbf{2. 算法精炼}
7173

72-
JDA主要进行边缘分布和条件分布的自适应。通过整理化简,JDA最终的求解目标是:
73-
74+
TCA主要进行边缘分布自适应。通过整理化简,TCA最终的求解目标是:
7475
\begin{equation}
7576
\label{equ-eigen}
7677
\begin{split}
77-
\left(\mathbf{X} \sum_{c=0}^{C} \mathbf{M}_c \mathbf{X}^\top + \lambda \mathbf{I}\right) \mathbf{A} =\mathbf{X} \mathbf{H} \mathbf{X}^\top \mathbf{A} \Phi
78+
\left(\mathbf{X} \mathbf{M} \mathbf{X}^\top + \lambda \mathbf{I}\right) \mathbf{A} =\mathbf{X} \mathbf{H} \mathbf{X}^\top \mathbf{A} \Phi
7879
\end{split}
7980
\end{equation}
8081

@@ -92,105 +93,59 @@ \section{上手实践}
9293

9394
\textbf{3. 编写代码}
9495

95-
我们参考JDA开源的代码,直接给出精炼后的源码
96+
我们直接给出精炼后的源码
9697

97-
\begin{lstlisting}[title=JDA方法的Matlab实现, frame=shadowbox]
98-
function [acc,acc_ite,A] = MyJDA(X_src,Y_src,X_tar,Y_tar,options)
99-
% This is the implementation of Joint Distribution Adaptation.
100-
% Reference: Mingsheng Long et al. Transfer feature learning with joint distribution adaptation. ICCV 2013.
98+
\begin{lstlisting}[title=TCA方法的Matlab实现, frame=shadowbox]
99+
function [X_src_new,X_tar_new,A] = TCA(X_src,X_tar,options)
100+
% The is the implementation of Transfer Component Analysis.
101+
% Reference: Sinno Pan et al. Domain Adaptation via Transfer Component Analysis. TNN 2011.
101102

102-
% Inputs:
103-
%%% X_src : source feature matrix, ns * n_feature
104-
%%% Y_src : source label vector, ns * 1
105-
%%% X_tar : target feature matrix, nt * n_feature
106-
%%% Y_tar : target label vector, nt * 1
107-
%%% options : option struct
108-
%%%%% lambda : regularization parameter
109-
%%%%% dim : dimension after adaptation, dim <= n_feature
110-
%%%%% kernel_tpye : kernel name, choose from 'primal' | 'linear' | 'rbf'
111-
%%%%% gamma : bandwidth for rbf kernel, can be missed for other kernels
112-
%%%%% T : n_iterations, T >= 1. T <= 10 is suffice
113-
114-
% Outputs:
115-
%%% acc : final accuracy using knn, float
116-
%%% acc_ite : list of all accuracies during iterations
117-
%%% A : final adaptation matrix, (ns + nt) * (ns + nt)
118-
119-
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
103+
% Inputs:
104+
%%% X_src : source feature matrix, ns * n_feature
105+
%%% X_tar : target feature matrix, nt * n_feature
106+
%%% options : option struct
107+
%%%%% lambda : regularization parameter
108+
%%%%% dim : dimensionality after adaptation (dim <= n_feature)
109+
%%%%% kernel_tpye : kernel name, choose from 'primal' | 'linear' | 'rbf'
110+
%%%%% gamma : bandwidth for rbf kernel, can be missed for other kernels
111+
112+
% Outputs:
113+
%%% X_src_new : transformed source feature matrix, ns * dim
114+
%%% X_tar_new : transformed target feature matrix, nt * dim
115+
%%% A : adaptation matrix, (ns + nt) * (ns + nt)
116+
117+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
120118

121119
%% Set options
122120
lambda = options.lambda;
123121
dim = options.dim;
124122
kernel_type = options.kernel_type;
125-
gamma = options.gamma;
126-
T = options.T;
127-
128-
acc_ite = [];
129-
Y_tar_pseudo = [];
130-
%% Iteration
131-
for i = 1 : T
132-
[Z,A] = JDA_core(X_src,Y_src,X_tar,Y_tar_pseudo,options);
133-
%normalization for better classification performance
134-
Z = Z*diag(sparse(1./sqrt(sum(Z.^2))));
135-
Zs = Z(:,1:size(X_src,1));
136-
Zt = Z(:,size(X_src,1)+1:end);
137-
138-
knn_model = fitcknn(Zs',Y_src,'NumNeighbors',1);
139-
Y_tar_pseudo = knn_model.predict(Zt');
140-
acc = length(find(Y_tar_pseudo==Y_tar))/length(Y_tar);
141-
fprintf('JDA+NN=%0.4f\n',acc);
142-
acc_ite = [acc_ite;acc];
143-
end
144-
145-
end
146-
147-
function [Z,A] = JDA_core(X_src,Y_src,X_tar,Y_tar_pseudo,options)
148-
%% Set options
149-
lambda = options.lambda; %% lambda for the regularization
150-
dim = options.dim; %% dim is the dimension after adaptation, dim <= m
151-
kernel_type = options.kernel_type; %% kernel_type is the kernel name, primal|linear|rbf
152-
gamma = options.gamma; %% gamma is the bandwidth of rbf kernel
123+
gamma = options.gamma;
153124

154-
%% Construct MMD matrix
125+
%% Calculate
155126
X = [X_src',X_tar'];
156127
X = X*diag(sparse(1./sqrt(sum(X.^2))));
157128
[m,n] = size(X);
158129
ns = size(X_src,1);
159130
nt = size(X_tar,1);
160131
e = [1/ns*ones(ns,1);-1/nt*ones(nt,1)];
161-
C = length(unique(Y_src));
162-
163-
%%% M0
164-
M = e * e' * C; %multiply C for better normalization
165-
166-
%%% Mc
167-
N = 0;
168-
if ~isempty(Y_tar_pseudo) && length(Y_tar_pseudo)==nt
169-
for c = reshape(unique(Y_src),1,C)
170-
e = zeros(n,1);
171-
e(Y_src==c) = 1 / length(find(Y_src==c));
172-
e(ns+find(Y_tar_pseudo==c)) = -1 / length(find(Y_tar_pseudo==c));
173-
e(isinf(e)) = 0;
174-
N = N + e*e';
175-
end
176-
end
177-
178-
M = M + N;
132+
M = e * e';
179133
M = M / norm(M,'fro');
180-
181-
%% Centering matrix H
182-
H = eye(n) - 1/n * ones(n,n);
183-
184-
%% Calculation
134+
H = eye(n)-1/(n)*ones(n,n);
185135
if strcmp(kernel_type,'primal')
186136
[A,~] = eigs(X*M*X'+lambda*eye(m),X*H*X',dim,'SM');
187-
Z = A'*X;
137+
Z = A' * X;
138+
Z = Z * diag(sparse(1./sqrt(sum(Z.^2))));
139+
X_src_new = Z(:,1:ns)';
140+
X_tar_new = Z(:,ns+1:end)';
188141
else
189-
K = kernel_jda(kernel_type,X,[],gamma);
142+
K = TCA_kernel(kernel_type,X,[],gamma);
190143
[A,~] = eigs(K*M*K'+lambda*eye(n),K*H*K',dim,'SM');
191-
Z = A'*K;
144+
Z = A' * K;
145+
Z = Z*diag(sparse(1./sqrt(sum(Z.^2))));
146+
X_src_new = Z(:,1:ns)';
147+
X_tar_new = Z(:,ns+1:end)';
192148
end
193-
194149
end
195150

196151
% With Fast Computation of the RBF kernel matrix
@@ -210,8 +165,7 @@ \section{上手实践}
210165
% Modified by Mingsheng Long
211166
% 2013(c)
212167
% Mingsheng Long (longmingsheng@gmail.com), 2013
213-
214-
function K = kernel_jda(ker,X,X2,gamma)
168+
function K = TCA_kernel(ker,X,X2,gamma)
215169

216170
switch ker
217171
case 'linear'
@@ -252,17 +206,14 @@ \section{上手实践}
252206

253207
\end{lstlisting}
254208

255-
我们将JDA方法包装成函数$\mathrm{MyJDA}$函数共接受5个输入参数
209+
我们将TCA方法包装成函数$\mathrm{TCA}$注意到TCA是一个无监督迁移方法,不需要接受label作为参数。因此,函数共接受3个输入参数
256210

257211
\begin{itemize}
258212
\item $\mathrm{X_{src}}$: 源域的特征,大小为$n_s \times m$
259-
\item $\mathrm{Y_{src}}$: 源域的标注,大小为$n_s \times 1$
260213
\item $\mathrm{X_{tar}}$: 目标域的特征,大小为$n_t \times m$
261-
\item $\mathrm{Y_{tar}}$: 目标域的标注,大小为$n_t \times 1$
262214
\item $\mathrm{options}$: 参数结构体,它包含:
263215
\begin{itemize}
264-
\item $\lambda$c: 平衡参数,可以自由给出
265-
\item $T$: 算法迭代次数
216+
\item $\lambda$: 平衡参数,可以自由给出
266217
\item $dim$: 算法最终选择将数据将到多少维
267218
\item $kernel type$: 选择的核类型,可以选择RBF、线性、或无核
268219
\item $\gamma$: 如果选择RBF核,那么它的宽度为$\gamma$
@@ -271,42 +222,138 @@ \section{上手实践}
271222

272223
函数的输出包含3项:
273224
\begin{itemize}
274-
\item $acc$: 算法的精度
275-
\item $acc_{iter}$: 算法每次迭代的精度,是一个一维数据
225+
\item $X_{srcnew}$: TCA后的源域
226+
\item $X_{tarnew}$: TCA后的目标域
276227
\item $A$: 最终的变换矩阵
277228
\end{itemize}
278229

279230
\textbf{4. 测试算法}
280231

281-
我们使用如下的代码对JDA算法进行测试
232+
我们使用如下的代码对TCA算法进行测试
282233

283234
\begin{lstlisting}
284-
options.T = 10; % #iterations, default=10
285235
options.gamma = 2; % the parameter for kernel
286236
options.kernel_type = 'linear';
287237
options.lambda = 1.0;
288238
options.dim = 20;
289-
[Acc,Acc_iter,A] = MyJDA(Xs,Ys,Xt,Yt,options);
290-
disp(Acc);
239+
[X_src_new,X_tar_new,A] = TCA(Xs,Xt,options);
240+
241+
% Use knn to predict the target label
242+
knn_model = fitcknn(X_src_new,Y_src,'NumNeighbors',1);
243+
Y_tar_pseudo = knn_model.predict(X_tar_new);
244+
acc = length(find(Y_tar_pseudo==Y_tar))/length(Y_tar);
245+
fprintf('Acc=%0.4f\n',acc);
291246
\end{lstlisting}
292247

293248
结果显示如下:
294249
\begin{lstlisting}
295-
Iteration [ 1]:BDA+NN=0.4499
296-
Iteration [ 2]:BDA+NN=0.4342
297-
Iteration [ 3]:BDA+NN=0.4395
298-
Iteration [ 4]:BDA+NN=0.4363
299-
Iteration [ 5]:BDA+NN=0.4395
300-
Iteration [ 6]:BDA+NN=0.4468
301-
Iteration [ 7]:BDA+NN=0.4457
302-
Iteration [ 8]:BDA+NN=0.4489
303-
Iteration [ 9]:BDA+NN=0.4509
304-
Iteration [10]:BDA+NN=0.4551
250+
Acc=0.4499
251+
\end{lstlisting}
252+
253+
\subsubsection{Python}
254+
255+
与Matlab代码类似,我们也可以用Python对TCA进行实现,其主要依赖于Numpy和Scipy两个强大的科学计算库。Python版本的TCA代码如下:
256+
257+
\begin{lstlisting}[title=TCA方法的Matlab实现, frame=shadowbox]
258+
259+
import numpy as np
260+
import scipy.io
261+
import scipy.linalg
262+
import sklearn.metrics
263+
from sklearn.neighbors import KNeighborsClassifier
264+
265+
266+
def kernel(ker, X1, X2, gamma):
267+
K = None
268+
if not ker or ker == 'primal':
269+
K = X1
270+
elif ker == 'linear':
271+
if X2 is not None:
272+
K = sklearn.metrics.pairwise.linear_kernel(np.asarray(X1).T, np.asarray(X2).T)
273+
else:
274+
K = sklearn.metrics.pairwise.linear_kernel(np.asarray(X1).T)
275+
elif ker == 'rbf':
276+
if X2 is not None:
277+
K = sklearn.metrics.pairwise.rbf_kernel(np.asarray(X1).T, np.asarray(X2).T, gamma)
278+
else:
279+
K = sklearn.metrics.pairwise.rbf_kernel(np.asarray(X1).T, None, gamma)
280+
return K
281+
282+
283+
class TCA:
284+
def __init__(self, kernel_type='primal', dim=30, lamb=1, gamma=1):
285+
'''
286+
Init func
287+
:param kernel_type: kernel, values: 'primal' | 'linear' | 'rbf'
288+
:param dim: dimension after transfer
289+
:param lamb: lambda value in equation
290+
:param gamma: kernel bandwidth for rbf kernel
291+
'''
292+
self.kernel_type = kernel_type
293+
self.dim = dim
294+
self.lamb = lamb
295+
self.gamma = gamma
296+
297+
def fit(self, Xs, Xt):
298+
'''
299+
Transform Xs and Xt
300+
:param Xs: ns * n_feature, source feature
301+
:param Xt: nt * n_feature, target feature
302+
:return: Xs_new and Xt_new after TCA
303+
'''
304+
X = np.hstack((Xs.T, Xt.T))
305+
X /= np.linalg.norm(X, axis=0)
306+
m, n = X.shape
307+
ns, nt = len(Xs), len(Xt)
308+
e = np.vstack((1 / ns * np.ones((ns, 1)), -1 / nt * np.ones((nt, 1))))
309+
M = e * e.T
310+
M = M / np.linalg.norm(M, 'fro')
311+
H = np.eye(n) - 1 / n * np.ones((n, n))
312+
K = kernel(self.kernel_type, X, None, gamma=self.gamma)
313+
n_eye = m if self.kernel_type == 'primal' else n
314+
a, b = np.linalg.multi_dot([K, M, K.T]) + self.lamb * np.eye(n_eye), np.linalg.multi_dot([K, H, K.T])
315+
w, V = scipy.linalg.eig(a, b)
316+
ind = np.argsort(w)
317+
A = V[:, ind[:self.dim]]
318+
Z = np.dot(A.T, K)
319+
Z /= np.linalg.norm(Z, axis=0)
320+
Xs_new, Xt_new = Z[:, :ns].T, Z[:, ns:].T
321+
return Xs_new, Xt_new
322+
323+
def fit_predict(self, Xs, Ys, Xt, Yt):
324+
'''
325+
Transform Xs and Xt, then make predictions on target using 1NN
326+
:param Xs: ns * n_feature, source feature
327+
:param Ys: ns * 1, source label
328+
:param Xt: nt * n_feature, target feature
329+
:param Yt: nt * 1, target label
330+
:return: Accuracy and predicted_labels on the target domain
331+
'''
332+
Xs_new, Xt_new = self.fit(Xs, Xt)
333+
clf = KNeighborsClassifier(n_neighbors=1)
334+
clf.fit(Xs_new, Ys.ravel())
335+
y_pred = clf.predict(Xt_new)
336+
acc = sklearn.metrics.accuracy_score(Yt, y_pred)
337+
return acc, y_pred
338+
339+
340+
if __name__ == '__main__':
341+
domains = ['caltech.mat', 'amazon.mat', 'webcam.mat', 'dslr.mat']
342+
for i in [2]:
343+
for j in [3]:
344+
if i != j:
345+
src, tar = 'data/' + domains[i], 'data/' + domains[j]
346+
src_domain, tar_domain = scipy.io.loadmat(src), scipy.io.loadmat(tar)
347+
Xs, Ys, Xt, Yt = src_domain['feas'], src_domain['label'], tar_domain['feas'], tar_domain['label']
348+
tca = TCA(kernel_type='linear', dim=30, lamb=1, gamma=1)
349+
acc, ypre = tca.fit_predict(Xs, Ys, Xt, Yt)
350+
print(acc)
351+
305352
\end{lstlisting}
306353

307354
\textbf{5. 小结}
308355

309-
通过以上过程,我们使用Matlab代码对JDA方法进行了实验,完成了一个迁移学习任务。其他的非深度迁移学习方法,均可以参考上面的过程。值得庆幸的是,许多论文的作者都公布了他们的文章代码,以方便我们进行接下来的研究。读者可以从Github~\footnote{\url{https://github.com/jindongwang/transferlearning/tree/master/code}}或者相关作者的网站上获取其他许多方法的代码。
356+
通过以上过程,我们分别使用Matlab代码和Python代码对经典的TCA方法进行了实验,完成了一个迁移学习任务。其他的非深度迁移学习方法,均可以参考上面的过程。值得庆幸的是,许多论文的作者都公布了他们的文章代码,以方便我们进行接下来的研究。读者可以从Github~\footnote{\url{https://github.com/jindongwang/transferlearning/tree/master/code}}或者相关作者的网站上获取其他许多方法的代码。
310357

311358
%\subsection{深度网络的finetune}
312359
%

src/main.toc

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -69,16 +69,17 @@
6969
\contentsline {subsubsection}{\numberline {9.4.2}核心方法}{52}{subsubsection.9.4.2}%
7070
\contentsline {subsubsection}{\numberline {9.4.3}小结}{55}{subsubsection.9.4.3}%
7171
\contentsline {section}{\numberline {10}上手实践}{56}{section.10}%
72-
\contentsline {section}{\numberline {11}迁移学习前沿}{62}{section.11}%
73-
\contentsline {subsection}{\numberline {11.1}机器智能与人类经验结合迁移}{62}{subsection.11.1}%
74-
\contentsline {subsection}{\numberline {11.2}传递式迁移学习}{62}{subsection.11.2}%
75-
\contentsline {subsection}{\numberline {11.3}终身迁移学习}{63}{subsection.11.3}%
76-
\contentsline {subsection}{\numberline {11.4}在线迁移学习}{64}{subsection.11.4}%
77-
\contentsline {subsection}{\numberline {11.5}迁移强化学习}{65}{subsection.11.5}%
78-
\contentsline {subsection}{\numberline {11.6}迁移学习的可解释性}{65}{subsection.11.6}%
79-
\contentsline {section}{\numberline {12}总结语}{66}{section.12}%
80-
\contentsline {section}{\numberline {13}附录}{67}{section.13}%
81-
\contentsline {subsection}{\numberline {13.1}迁移学习相关的期刊和会议}{67}{subsection.13.1}%
82-
\contentsline {subsection}{\numberline {13.2}迁移学习研究学者}{67}{subsection.13.2}%
83-
\contentsline {subsection}{\numberline {13.3}迁移学习资源汇总}{70}{subsection.13.3}%
84-
\contentsline {subsection}{\numberline {13.4}迁移学习常用算法及数据资源}{71}{subsection.13.4}%
72+
\contentsline {subsubsection}{\numberline {10.0.1}Matlab}{56}{subsubsection.10.0.1}%
73+
\contentsline {section}{\numberline {11}迁移学习前沿}{61}{section.11}%
74+
\contentsline {subsection}{\numberline {11.1}机器智能与人类经验结合迁移}{61}{subsection.11.1}%
75+
\contentsline {subsection}{\numberline {11.2}传递式迁移学习}{61}{subsection.11.2}%
76+
\contentsline {subsection}{\numberline {11.3}终身迁移学习}{62}{subsection.11.3}%
77+
\contentsline {subsection}{\numberline {11.4}在线迁移学习}{63}{subsection.11.4}%
78+
\contentsline {subsection}{\numberline {11.5}迁移强化学习}{64}{subsection.11.5}%
79+
\contentsline {subsection}{\numberline {11.6}迁移学习的可解释性}{64}{subsection.11.6}%
80+
\contentsline {section}{\numberline {12}总结语}{65}{section.12}%
81+
\contentsline {section}{\numberline {13}附录}{66}{section.13}%
82+
\contentsline {subsection}{\numberline {13.1}迁移学习相关的期刊和会议}{66}{subsection.13.1}%
83+
\contentsline {subsection}{\numberline {13.2}迁移学习研究学者}{66}{subsection.13.2}%
84+
\contentsline {subsection}{\numberline {13.3}迁移学习资源汇总}{69}{subsection.13.3}%
85+
\contentsline {subsection}{\numberline {13.4}迁移学习常用算法及数据资源}{70}{subsection.13.4}%

0 commit comments

Comments
 (0)