function [array_gain_con,id_BS_conv] = training_hierarchy_conv(Codebook,h,SNR,Nt,conv_encoder_conf)
    sigma = sqrt(1/SNR);
    nnt = (0:1:Nt-1)';
    codeword_BS = -1+1/Nt:2/Nt:1-1/Nt;
    %codeword_BS = linspace(-1,1,Nt);
    codebook_BS = exp(1j*pi*nnt*codeword_BS)/sqrt(Nt);
    % Viterbi decoder for Convolutional Code.
    n = conv_encoder_conf.n;
    k = conv_encoder_conf.k;
    N = conv_encoder_conf.N;
    A = conv_encoder_conf.A;
    N_inner_state_bits = (N-1)*k;
    N_status = 2^((N-1)*k);
    N_choices = 2^k;
    NextStat = zeros(N_status, N_choices);  % Status code always starts from 1.
    ConvOutput = cell(N_status, N_choices); % ConvOutput in binary form.
    for ns = 1:N_status
        for nc = 1:N_choices
            merged = [to_binary(nc-1,k),to_binary(ns-1,N_inner_state_bits)];
            NextStat(ns, nc) = from_binary(merged(1:N_inner_state_bits))+1;
            % calculate conv.
            o_block = false([1,n]);
            for o_iter=1:n
                o_block(o_iter) = logical(mod(merged*(A{o_iter}.'),2));
            end
            ConvOutput{ns, nc} = o_block;
        end
    end
    %% Viterbi decode. Hard decode.
    INF = (log2(Nt)-1)/k*n;
    losses = INF*([0;ones(ns-1,1)]);
    N_input_blocks = INF/n;
    best_path_choices = cell(N_input_blocks, 1);
    for iter=1:N_input_blocks
        best_path_choices{iter} = zeros(ns, 2);   % (choice, last_state)
    end
    path_cache_last=zeros(N_status,1);
    path_cache_new=zeros(N_status,1);
    w_BS = zeros(Nt,4);
    for k_iter = 1:N_input_blocks
        idx_ori1=Codebook(2*k_iter-1,:);
        idx_f1=~(idx_ori1);
        idx_ori2=Codebook(2*k_iter,:);
        idx_f2=~(idx_ori2);
        possible_dir=zeros(1,Nt);
        delta=Nt/(2^(k_iter-1));
        for i=1:N_status
            possible_dir(path_cache_new(i)*delta+1:(path_cache_new(i)+1)*delta)=1;
        end
        idx1=idx_ori1 & possible_dir;
        idxf1=idx_f1 & possible_dir;
        idx2=idx_ori2 & possible_dir;
        idxf2=idx_f2 & possible_dir;
        index11=find(idxf1==1);
        index12=find(idx1==1);
        index21=find(idxf2==1);
        index22=find(idx2==1);
        w_BS(:,1)=generate_widebeam(Nt,index11);
        w_BS(:,2)=generate_widebeam(Nt,index12);
        w_BS(:,3)=generate_widebeam(Nt,index21);
        w_BS(:,4)=generate_widebeam(Nt,index22);
        noise = sigma*(randn(1,4)+1i*randn(1,4))/sqrt(2);
        temp =sqrt(1/2)*h.'*w_BS+noise;
        A=abs(temp).^2;
        if A(1,1)>A(1,2) && A(1,3)>A(1,4)
             received_block=[0,0];
        elseif A(1,1)>A(1,2) && A(1,3)<A(1,4)
             received_block=[0,1];
        elseif A(1,1)<A(1,2) && A(1,3)>A(1,4)
             received_block=[1,0];
        else
             received_block=[1,1];
        end
        new_losses = (NaN)*zeros(ns, 1);   % NaN means not updated.
        for ns = 1:N_status
            for nc = 1:N_choices
                o_block = ConvOutput{ns, nc};
                % Ensure that t_loss>=0 for correctness of Viterbi.
                t_loss = sum(xor(o_block, received_block));
                t_ind = NextStat(ns, nc);
                if isnan(new_losses(t_ind))
                    new_losses(t_ind)=losses(ns)+t_loss;
                    best_path_choices{k_iter}(t_ind, 1) = nc-1;
                    best_path_choices{k_iter}(t_ind, 2) = ns;
                else
                    if new_losses(t_ind) > (losses(ns)+t_loss)
                        new_losses(t_ind) = losses(ns)+t_loss;
                        best_path_choices{k_iter}(t_ind, 1) = nc-1;
                        best_path_choices{k_iter}(t_ind, 2) = ns;
                    end
                end
            end
        end
        % update the best paths.
        losses = new_losses;
        path_cache_last = path_cache_new;
        for ns = 1:N_status
            input = best_path_choices{k_iter}(ns, 1);
            last_sta = best_path_choices{k_iter}(ns, 2);
            path_cache_new(ns,1)= path_cache_last(last_sta,1)*2+input;
        end
    end
    %% Generate bit pedictions using best_path_choices.
    decoded_bits = false([1, N_input_blocks*k]);
    
    if conv_encoder_conf.trailing
        p=1;
    else
        % abrupt stopping.
        [~, p]=min(losses);
    end
    
    for iter=N_input_blocks:-1:1
        o_index = k*(iter-1)+1;
        status = best_path_choices{iter}(p, :);
        decoded_bits(o_index:o_index+k-1) = flip(to_binary(status(1),k));
        p = status(2);
    end
    
    if conv_encoder_conf.trailing
        decoded_bits = decoded_bits(1:N_input_blocks*k-N_inner_state_bits);
    end
    id=from_binary(decoded_bits);
    id1 = id*2+1;
    id2 = id*2+2;
    noise = sigma*(randn(1,1)+1i*randn(1,1))/sqrt(2);
    temp1 = h.'*codebook_BS(:,id1)+noise;
    a1=abs(temp1)^2;
    noise = sigma*(randn(1,1)+1i*randn(1,1))/sqrt(2);
    temp2 = h.'*codebook_BS(:,id2)+noise;
    a2=abs(temp2)^2;
    if a1>=a2
        id_BS_conv=id1;
        array_gain_con=abs(h.'*codebook_BS(:,id1))^2;
    end
    if a1<a2
        id_BS_conv=id2;
        array_gain_con=abs(h.'*codebook_BS(:,id2))^2;
     end    
end