🎤 DSP 项目4 - DTW孤立词语音识别代码模板

📌 项目说明:
⚠️ 提交清单:ex1_preemphasis.m、ex2_enframe.m、ex3_mfcc.m、ex4_dtw.m
✅ 实验流程:
  1. 按顺序完成 ex1-ex4,填写 ? 处的代码
  2. 运行 train_template.m 生成 template.mat
  3. 运行 ex0_all.m 测试(先用test目录,再用train目录对比)
  4. 运行 test_Project4.p 检查代码,生成加密成绩
📄 ex1_preemphasis.m(预加重滤波)
function y = ex1_preemphasis(x, alpha)
%EX1_PREEMPHASIS 预加重滤波
%   y = ex1_preemphasis(x, alpha)
%   公式: y[n] = x[n] - alpha * x[n-1]
%
%   输入:
%       x     - 输入语音信号
%       alpha - 预加重系数,默认0.97
%   输出:
%       y     - 预加重后的信号

if nargin < 2
    alpha = 0.97;
end

x = x(:);

% TODO: 使用filter函数实现预加重
% 提示: b = [1, -alpha], a = 1
y = filter(?, 1, x);

end
📄 ex2_enframe.m(信号分帧)
function frames = ex2_enframe(x, frame_len, frame_shift)
%EX2_ENFRAME 将信号分帧
%   frames = ex2_enframe(x, frame_len, frame_shift)
%
%   输入:
%       x           - 输入信号
%       frame_len   - 帧长(样本点数)
%       frame_shift - 帧移(样本点数)
%   输出:
%       frames      - 帧矩阵 [frame_len x num_frames]

x = x(:);
N = length(x);

% TODO: 计算帧数
% 公式: num_frames = floor((N - frame_len) / frame_shift) + 1
num_frames = ?;

if num_frames < 1
    error('信号太短,无法分帧');
end

frames = zeros(frame_len, num_frames);

for i = 1:num_frames
    % TODO: 计算第i帧的起始索引
    % 公式: start_idx = (i - 1) * frame_shift + 1
    start_idx = ?;
    frames(:, i) = x(start_idx : start_idx + frame_len - 1);
end

end
📄 ex3_mfcc.m(MFCC特征提取)
function [mfcc_all, mfcc_base] = ex3_mfcc(x, fs, params)
%EX3_MFCC 计算MFCC特征(含Delta和Delta-Delta)
%   输出: mfcc_all [36 x num_frames] = 12 MFCC + 12 Delta + 12 Delta-Delta

%% 参数默认值
if nargin < 3, params = struct(); end
if ~isfield(params, 'frame_len_ms'), params.frame_len_ms = 25; end
if ~isfield(params, 'frame_shift_ms'), params.frame_shift_ms = 10; end
if ~isfield(params, 'pre_emphasis'), params.pre_emphasis = 0.97; end
if ~isfield(params, 'nfft'), params.nfft = 2048; end
if ~isfield(params, 'num_filters'), params.num_filters = 32; end
if ~isfield(params, 'num_ceps'), params.num_ceps = 12; end
if ~isfield(params, 'lifter_coef'), params.lifter_coef = 22; end
if ~isfield(params, 'delta_N'), params.delta_N = 2; end

%% Step 1: 预加重
x_emph = ex1_preemphasis(x, params.pre_emphasis);

%% Step 2: 分帧
frame_len = round(params.frame_len_ms * fs / 1000);
frame_shift = round(params.frame_shift_ms * fs / 1000);
frames = ex2_enframe(x_emph, frame_len, frame_shift);
num_frames = size(frames, 2);

%% Step 3: 加窗(汉宁窗)
% TODO: 创建周期性汉宁窗
win = ?;

% TODO: 对每帧应用窗函数(使用repmat或bsxfun)
frames_win = ?;

%% Step 4: FFT 和功率谱
nfft = params.nfft;

% TODO: 计算FFT
X = ?;

% TODO: 计算功率谱(只取正频率部分)
% 公式: P = (1/nfft) * |X(1:nfft/2+1)|^2
pow_spec = ?;

%% Step 5: Mel滤波器组
num_filters = params.num_filters;
mel_bank = create_mel_filterbank(fs, nfft, num_filters, 0, fs/2);
mel_energy = mel_bank * pow_spec;
mel_energy = max(mel_energy, eps);  % 避免log(0)

%% Step 6: 取对数
log_mel = log(mel_energy);

%% Step 7: DCT变换
% TODO: 对log_mel进行DCT变换
dct_coef = ?;

% TODO: 取第2到(num_ceps+1)阶系数
num_ceps = params.num_ceps;
mfcc_base = ?;

%% Step 8: 升倒谱(Liftering)
L = params.lifter_coef;
n = (1:num_ceps)';
lifter = 1 + (L/2) * sin(pi * n / L);
mfcc_base = mfcc_base .* repmat(lifter, 1, num_frames);

%% Step 9: Delta 和 Delta-Delta
delta = compute_delta(mfcc_base, params.delta_N);
delta_delta = compute_delta(delta, params.delta_N);

%% 合并输出
mfcc_all = [mfcc_base; delta; delta_delta];

end

%% ==================== 辅助函数 ====================

function mel_bank = create_mel_filterbank(fs, nfft, num_filters, low_freq, high_freq)
%CREATE_MEL_FILTERBANK 创建Mel三角滤波器组

hz2mel = @(f) 2595 * log10(1 + f/700);
mel2hz = @(m) 700 * (10.^(m/2595) - 1);

low_mel = hz2mel(low_freq);
high_mel = hz2mel(high_freq);

mel_points = linspace(low_mel, high_mel, num_filters + 2);
hz_points = mel2hz(mel_points);

bin_points = floor(hz_points / fs * nfft) + 1;
bin_points = min(bin_points, nfft/2 + 1);
bin_points = max(bin_points, 1);

num_bins = nfft/2 + 1;
mel_bank = zeros(num_filters, num_bins);

for i = 1:num_filters
    left = bin_points(i);
    center = bin_points(i + 1);
    right = bin_points(i + 2);
    
    for k = left:center
        if center ~= left
            mel_bank(i, k) = (k - left) / (center - left);
        end
    end
    
    for k = center:right
        if right ~= center
            mel_bank(i, k) = (right - k) / (right - center);
        end
    end
end

end

function delta = compute_delta(feat, N)
%COMPUTE_DELTA 计算差分系数

[num_ceps, num_frames] = size(feat);
delta = zeros(num_ceps, num_frames);
denom = 2 * sum((1:N).^2);

for t = 1:num_frames
    numerator = zeros(num_ceps, 1);
    for n = 1:N
        t_plus = min(t + n, num_frames);
        t_minus = max(t - n, 1);
        numerator = numerator + n * (feat(:, t_plus) - feat(:, t_minus));
    end
    delta(:, t) = numerator / denom;
end

end
📄 ex4_dtw.m(DTW动态规整)
function [dist, D, path] = ex4_dtw(template, test, normalize_flag)
%EX4_DTW 动态时间规整算法
%   [dist, D, path] = ex4_dtw(template, test, normalize_flag)
%
%   递推公式: D(i,j) = d(i,j) + min{D(i-1,j), D(i,j-1), D(i-1,j-1)}

if nargin < 3
    normalize_flag = true;
end

[dim1, n] = size(template);
[dim2, m] = size(test);

if dim1 ~= dim2
    error('特征维度不匹配');
end

% 均值归一化
if normalize_flag
    template = template - repmat(mean(template, 2), 1, n);
    test = test - repmat(mean(test, 2), 1, m);
end

%% 计算局部距离矩阵
local_dist = zeros(n, m);
for i = 1:n
    for j = 1:m
        % TODO: 计算欧氏距离
        local_dist(i, j) = ?;
    end
end

%% 初始化累积距离矩阵
% TODO: 创建(n+1)x(m+1)的矩阵,初始化为inf
D = ?;

% TODO: 设置边界条件 D(1,1) = 0
?;

%% 动态规划填充
for i = 1:n
    for j = 1:m
        cost = local_dist(i, j);
        
        % TODO: 获取三个方向的累积距离
        d1 = ?;  % 从上方 D(i, j+1)
        d2 = ?;  % 从左方 D(i+1, j)
        d3 = ?;  % 从对角 D(i, j)
        
        % TODO: 递推公式
        D(i + 1, j + 1) = ?;
    end
end

% 最终DTW距离
dist = D(n + 1, m + 1);

% 路径回溯(可选)
if nargout >= 3
    path = backtrack(D);
else
    path = [];
end

end

function path = backtrack(D)
%BACKTRACK 路径回溯
[n_plus1, m_plus1] = size(D);
path = zeros(n_plus1 + m_plus1, 2);
k = 0;
i = n_plus1 - 1;
j = m_plus1 - 1;

while i > 0 && j > 0
    k = k + 1;
    path(k, :) = [i, j];
    
    if i == 1 && j == 1
        break;
    end
    
    candidates = [];
    if i > 1 && j > 1
        candidates = [candidates; i-1, j-1, D(i, j)];
    end
    if i > 1
        candidates = [candidates; i-1, j, D(i, j+1)];
    end
    if j > 1
        candidates = [candidates; i, j-1, D(i+1, j)];
    end
    
    if isempty(candidates)
        break;
    end
    
    [~, idx] = min(candidates(:, 3));
    i = candidates(idx, 1);
    j = candidates(idx, 2);
end

path = flipud(path(1:k, :));
end