字典树（Trie）

字典树简介

字典树（\(Trie\)，又叫前缀树）是一种 树形数据结构，常用于高效地存储和查找字符串集合。它的核心思想是：公共前缀只存储一次，不同字符串在前缀相同的部分共享路径。

定义

数据结构约定：

\(tr[u][c]\)：从节点 \(u\) 经过字符 \(c\) 到达的子节点编号；\(0\) 表示不存在。
\(poss[u]\)：有多少字符串路径经过节点 \(u\)（包含在此结束的字符串）。
\(end[u]\)：有多少字符串在节点 \(u\) 结束。
\(idx\)：当前分配到的节点编号（根节点编号为 \(0\)）。

int idx = 0;
vector<vector<int>> tr(N, vector<int>(26, 0));
vector<int> poss(N, 0), end(N, 0);

插入操作

从根 \(p = 0\) 出发；
将字符 \(x\) 映射为 \(c = x – ‘a’\)；
若边不存在，则新建节点：\(tr[p][c] = ++idx\)；
沿边走到子节点：\(p = tr[p][c]\)；
走到实际节点后记录经过数：\(++poss[p]\)；
完成一串后，标记单词结尾：\(++end[p]\)。

auto insert = [&](string s) -> void
{
    int p = 0;
    for (auto x : s)
    {
        int c = x - 'a';
        if (!tr[p][c])
            tr[p][c] = ++idx;
        p = tr[p][c];
        ++poss[p];
    }

    ++end[p];
};

字典树的典型应用

1.询问字符串出现次数

题目链接：835. Trie字符串统计 – AcWing题库

题目描述：

维护一个字符串集合，支持两种操作：

I x 向集合中插入一个字符串 \(x\)；
Q x 询问一个字符串在集合中出现了多少次。

共有 \(N\) 个操作，所有输入的字符串总长度不超过 \(10^5\)，字符串仅包含小写英文字母。

思路：

用一棵字典树存所有插入的字符串，根是 \(0\)。

插入：从根按字符往下走；没有边就新建节点；走到的每个节点 \(poss++\)；最后节点 \(end++\)（记录这个词出现了一次）。

查询：同样按字符往下走；途中断边就返回 \(0\)；能走到最后就返回该节点的 \(end\)，即出现次数。

关键代码：

void miyan()
{
    int n;
    cin >> n;

    int idx = 0;
    vector<vector<int>> tr(N, vector<int>(26, 0));
    vector<int> poss(N, 0), end(N, 0);
    auto insert = [&](string s) -> void
    {
        int p = 0;
        for (auto x : s)
        {
            int c = x - 'a';
            if (!tr[p][c])
                tr[p][c] = ++idx;
            p = tr[p][c];
            ++poss[p];
        }

        ++end[p];
    };

    auto query = [&](string s) -> int
    {
        int p = 0;
        for (auto x : s)
        {
            int c = x - 'a';
            if (!tr[p][c])
                return 0;
            p = tr[p][c];
        }

        return end[p];
    };

    while (n--)
    {
        char op;
        string s;
        cin >> op >> s;

        if (op == 'I')
            insert(s);
        else
            cout << query(s) << endl;
    }
}

2.1前缀匹配计数

题目链接：接头密匙_牛客题霸_牛客网

题目描述：

给定两组密匙序列：\(m\) 个密匙 \(b\) 与 \(n\) 个密匙 \(a\)。定义两把密匙一致，当且仅当：

密匙 \(b\) 的长度不超过密匙 \(a\) 的长度；
对任意 \(0 \le i < \text{length}(b)-1\)，有\(b[i+1]-b[i]=a[i+1]-a[i]\)。

现给定所有 \(b\) 与 \(a\)，返回长度为 \(m\) 的数组 \(ans\)，其中 \(ans[i]\) 表示与第 \(i\) 个 \(b\) 一致的 \(a\) 的个数。
数组元素总数不超过 \(10^5\)，且 \(1 \le m,n \le 1000\)。

思路：

将每个序列转为相邻差分序列 \(diff(x)\)。由定义可知：\(diff(b)\) 是 \(diff(a)\) 的前缀。

做法：把所有 \(diff(a)\) 串插入一棵 \(Trie\)，在节点上用 \(poss\) 记录“经过该节点的路径数”。查询时，把 \(diff(b)\) 串在 \(Trie\) 中走到结尾，答案就是该结点的 \(poss\)（有多少 \(diff(a)\) 以它为前缀）。

由于差分可能是多位数或负数，先把每个差分转成字符并用分隔符 \(\text{#}\) 串联；字符集包含 0-9、-、# 共 12 类，映射到 \(0..11\)。

关键代码：

class Solution
{
public:
    vector<int> countConsistentKeys(vector<vector<int>> &b, vector<vector<int>> &a)
    {
        int n = a.size(), m = b.size();
        const int N = 5e5 + 10;
        int idx = 0;
        vector tr(N, vector<int>(12, 0));
        vector<int> poss(N, 0), end(N, 0);

        auto get = [](char c) -> int
        {
            if (c <= '9' && c >= '0')
                return c - '0';
            else if (c == '-')
                return 10;
            return 11;
        };

        auto insert = [&](string s) -> void
        {
            int p = 0;
            for (auto x : s)
            {
                int c = get(x);
                if (!tr[p][c])
                    tr[p][c] = ++idx;
                p = tr[p][c];
                ++poss[p];
            }

            ++end[p];
        };

        auto query = [&](string s) -> int
        {
            int p = 0;
            for (auto x : s)
            {
                int c = get(x);
                if (!tr[p][c])
                    return 0;
                p = tr[p][c];
            }

            return poss[p];
        };

        for (int i = 0; i < n; ++i)
        {
            string s;
            for (int j = 1; j < a[i].size(); ++j)
                s += to_string(a[i][j] - a[i][j - 1]) + '#';

            insert(s);
        }

        vector<int> ans(m, 0);
        for (int i = 0; i < m; ++i)
        {
            string s;
            for (int j = 1; j < b[i].size(); ++j)
                s += to_string(b[i][j] - b[i][j - 1]) + '#';

            ans[i] = query(s);
        }

        return ans;
    }
};

2.2前缀匹配计数

题目链接：P2922 [USACO08DEC] Secret Message G – 洛谷

题目描述：

有 \(M\) 条被截获的二进制消息（只知道前 \(b_i\) 位）与 \(N\) 条暗号（只知道前 \(c_j\) 位）。对每个暗号 \(j\)，统计有多少消息与其前缀一致：即两者前缀长度取二者较短，且对应位完全相同。输入保证位总数 \(\sum b_i + \sum c_j \le 5\times 10^5\)。

思路：

将所有消息都插入到字典树中。

定义：

\(end[u]\)：有多少消息恰好在节点 \(u\) 结束（消息长度 ≤ 暗号长度时，用来数“消息是暗号前缀”）。
\(poss[u]\)：有多少消息经过节点 \(u\)（暗号长度 ≤ 消息长度且暗号能走完整时，用来数“消息以暗号为前缀”）。

查询一条暗号串 \(s\)：从根节点开始向下查找，沿途累计 \(end\)（统计“更短或等长消息作为 \(s\) 的前缀”）；若 \(s\) 全部走完，则再加上 \(poss[p] – end[p]\)（统计“以 \(s\) 为前缀的更长消息”，并去掉等长重复）。若中途断边，则只能得到沿途 \(end\) 的和。

关键代码：

void miyan()
{
    int m, n;
    cin >> m >> n;

    int idx = 0;
    vector tr(N, array<int, 2>{0, 0});
    vector<int> poss(N, 0), end(N, 0);
    auto insert = [&](string s) -> void
    {
        int p = 0;
        for (int i = 0; s[i]; ++i)
        {
            int c = s[i] - '0';
            if (!tr[p][c])
                tr[p][c] = ++idx;
            p = tr[p][c];
            ++poss[p];
        }

        ++end[p];
    };

    auto query = [&](string s) -> int
    {
        int ans = 0;
        int p = 0;
        int cnt = 0;
        for (int i = 0; s[i]; ++i)
        {
            int c = s[i] - '0';
            if (!tr[p][c])
                break;
            p = tr[p][c];
            ans += end[p];
            ++cnt;
        }

        if (cnt == s.size())
            ans += poss[p] - end[p];
        return ans;
    };

    for (int i = 0; i < m; ++i)
    {
        int k;
        cin >> k;
        string s;
        while (k--)
        {
            char c;
            cin >> c;
            s += c;
        }

        insert(s);
    }

    for (int i = 0; i < n; ++i)
    {
        int k;
        cin >> k;
        string s;
        while (k--)
        {
            char c;
            cin >> c;
            s += c;
        }

        cout << query(s) << endl;
    }
}

3.动态前缀覆盖检测

题目链接：https://atcoder.jp/contests/abc403/tasks/abc403_e

题目描述：

有两个多重集合：字符串集 \(X\) 和 \(Y\)，初始均为空。依次处理 \(Q\) 次操作：

若 \(T_i=1\)：把字符串 \(S_i\) 插入集合 \(X\)；
若 \(T_i=2\)：把字符串 \(S_i\) 插入集合 \(Y\)。

每次操作后，输出当前 \(Y\) 中不以 \(X\) 中任意字符串为前缀的字符串数量（多重集计数）。

思路：

在 \(Trie\) 里插 \(X\)：走到终点把该节点打 \(flag=1\)；同时把曾经过这个终点的所有 \(Y\)（保存在 \(pos[终点]\) 里）标记作废，\(ans–\)，并清空该 \(pos\)。

在 \(Trie\) 里插 \(Y\)：先把 \(ans++\)，沿路把这条 \(Y\) 的编号丢进每个节点的 \(pos[u]\)；若途中遇到 \(flag=1\)，立刻把这条 \(Y\) 作废一次，\(ans–\)。

用 \(st[id]\) 防重复作废；\(ans\) 始终表示 “\(Y\) 中不以任何 \(X\) 为前缀” 的数量。

关键代码：

void miyan()
{
    int q;
    cin >> q;

    ll ans = 0;
    ll idx = 0, id = 0;
    vector tr(N, vector<int>(26, 0));
    vector<int> st(N, 0);
    vector<int> flag(N, 0);
    vector<vector<int>> pos(N);
    auto insert1 = [&](string s) -> void
    {
        int p = 0;
        for (int i = 0; s[i]; ++i)
        {
            int c = s[i] - 'a';
            if (!tr[p][c])
                tr[p][c] = ++idx;
            p = tr[p][c];
        }

        flag[p] = 1;

        for (auto x : pos[p])
            if (!st[x])
                st[x] = 1, --ans;

        pos[p].clear();
    };

    auto insert2 = [&](string s) -> void
    {
        ++ans;
        ++id;

        int p = 0;
        for (int i = 0; s[i]; ++i)
        {
            int c = s[i] - 'a';
            if (!tr[p][c])
                tr[p][c] = ++idx;
            p = tr[p][c];

            pos[p].push_back(id);

            if (!st[id] && flag[p])
                st[id] = 1, --ans;
        }
    };

    while (q--)
    {
        int op;
        string s;
        cin >> op >> s;

        if (op == 1)
            insert1(s);
        else
            insert2(s);

        cout << ans << endl;
    }
}

4.两数最大异或值

题目链接：https://www.luogu.com.cn/problem/P4551

题目描述：

给定 \(N\) 个整数，从中选出两个进行按位异或，问能得到的最大结果是多少。输入约束：\(1\le N\le 10^5\)，\(0\le A_i<2^{31}\)。

思路：

\(01\)字典树 + 贪心。

把所有数的二进制表示（最高到第 \(30\) 位）插入一棵 \(01\) 字典树：

插入：从高位到低位（\(30\)→\(0\)）逐位添加到字典树中。
查询：想让异或值最大，就尽量在每一位走相反位。对某个数 \(x\) 的第 \(i\) 位 \(c\in\{0,1\}\)，若当前节点存在 \(!c\) 边，就走那条边并把答案第 \(i\) 位置 \(1\)；否则只能走 \(c\) 边。

把所有数都插入后，再对每个数做一次查询，取最大值即为答案。
总体贪心思路：异或要想某一位为 \(1\)，就让两数在该位不同；由于高位值更大，从高位往低位贪心即可。

关键代码：

struct Trie
{
    int idx;
    vector<vector<int>> tr;

    Trie(int _N)
    {
        int n = _N + 10;
        idx = 0;
        tr.resize(n, vector<int>(2, 0));
    }

    void insert(int x)
    {
        int p = 0;
        for (int i = 30; ~i; i--)
        {
            int c = x >> i & 1;
            if (!tr[p][c])
                tr[p][c] = ++idx;
            p = tr[p][c];
        }
    }

    int query(int x)
    {
        int ans = 0;
        int p = 0;
        for (int i = 30; ~i; i--)
        {
            int c = x >> i & 1;
            if (tr[p][!c])
            {
                ans += (1 << i);
                p = tr[p][!c];
            }
            else
                p = tr[p][c];
        }
        return ans;
    }
};

void miyan()
{
    int n;
    cin >> n;

    Trie trie(n * 32);

    vector<int> a(n);
    for (int i = 0; i < n; ++i)
    {
        cin >> a[i];
        trie.insert(a[i]);
    }

    ll ans = 0;
    for (int i = 0; i < n; ++i)
        ans = max(ans, 1ll * trie.query(a[i]));

    cout << ans << endl;
}

5.树上最大异或路径

题目链接：https://www.luogu.com.cn/problem/P10471

题目描述：

给定一棵 \(n\) 个点的带权树（\(0 \le\) 边权 \(< 2^{31}\)）。定义两点间异或路径为它们唯一路径上所有边权的按位异或。求所有两点间异或路径的最大值。

思路：

\(01\)字典树 + 贪心。

定义根节点为 \(1\)，定义

\(\mathrm{val}[u]=\bigoplus_{e\in \mathrm{root}\to u} w(e)\)

即根到 \(u\) 路径的边权异或。则对任意两点 \(u,v\)：

\(\text{pathXor}(u,v)=\text{val}[u]\ \oplus\ \text{val}[v]\)

因为根到 \(u\)、根到 \(v\) 的路径在 \(LCA\) 之前相同，异或两次抵消，剩下的就是 \(u\to v\) 的边权异或。

于是问题转化为：给定数组 \(\{\text{val}[1],\dots,\text{val}[n]\}\)，最大化两两异或 \(\text{val}[i]\oplus \text{val}[j]\)。这就是经典的两数最大异或，用 \(01\)字典树从高位到低位贪心匹配相反位即可。

关键代码：

struct Trie
{
    int idx;
    vector<vector<int>> tr;

    Trie(int _N)
    {
        int n = _N + 10;
        idx = 0;
        tr.resize(n, vector<int>(2, 0));
    }

    void insert(int x)
    {
        int p = 0;
        for (int i = 30; ~i; i--)
        {
            int c = x >> i & 1;
            if (!tr[p][c])
                tr[p][c] = ++idx;
            p = tr[p][c];
        }
    }

    int query(int x)
    {
        int ans = 0;
        int p = 0;
        for (int i = 30; ~i; i--)
        {
            int c = x >> i & 1;
            if (tr[p][!c])
            {
                ans += (1 << i);
                p = tr[p][!c];
            }
            else
                p = tr[p][c];
        }
        return ans;
    }
};

void miyan()
{
    int n;
    cin >> n;
    vector<vector<pii>> g(n + 1);
    for (int i = 1; i < n; ++i)
    {
        int u, v, w;
        cin >> u >> v >> w;
        g[u].push_back({v, w});
        g[v].push_back({u, w});
    }

    Trie trie(n * 32);

    vector<int> val(n + 1, 0);
    auto dfs = [&](auto &&self, int u, int f, int res) -> void
    {
        for (auto [v, w] : g[u])
        {
            if (v == f)
                continue;

            val[v] = res ^ w;
            trie.insert(val[v]);
            self(self, v, u, val[v]);
        }
    };

    dfs(dfs, 1, 0, 0);

    ll ans = 0;
    for (int i = 1; i <= n; ++i)
        ans = max(ans, 1ll * trie.query(val[i]));

    cout << ans << endl;
}

6.统计两两字符串最长公共前缀和

题目链接：码蹄集OJ-黛玉葬花

题目描述：

给定 \(n\) 个仅含小写字母的字符串 \(s_1,\dots,s_n\)。定义两串的“宿命相似度”为它们的最长公共前缀长度 \(\mathrm{prefix}(s,t)\)。求

\(\Big(\sum_{i=1}^n\sum_{j=1}^n \mathrm{prefix}(s_i,s_j)\Big)\ \bmod\ 998244353\)

所有字符串总长 \(\le 10^6\)。

思路：

对于固定的一个字符串 \(s\)，设它的前缀为 \(s[1..1], s[1..2], \ldots, s[1..|s|]\)。\(\mathrm{prefix}(s, t)\) 恰好等于：有多少个前缀 \(s[1..k]\) 同时也是 \(t\) 的前缀。于是

\(\sum_{t}\mathrm{prefix}(s,t)=\sum_{k=1}^{|s|}\bigl|{\,t \mid s[1..k]\preceq t\,}\bigr|\)

因此，把所有字符串插入字典树，并在每个节点维护一个计数 \(poss[u] =\) “有多少字符串经过该节点”（即有多少串拥有该节点所代表的前缀）。
那么：

插入一个字符串：沿路把经过节点的 \(poss\) 递增；
查询一个字符串 \(s\)：沿着 \(s\) 的路径，把每一层节点的 \(poss\) 相加，就是 \(\sum_t \mathrm{prefix}(s,t)\)。

最后把所有字符串的查询结果求和，即为答案，对 \(MOD\) 取模。

关键代码：

struct Trie
{
    int idx;
    vector<vector<int>> tr;
    vector<int> poss;
    vector<int> ed;

    Trie(int _N)
    {
        int n = _N;
        idx = 0;
        tr.resize(n, vector<int>(63, 0));
        poss.resize(n, 0);
        ed.resize(n, 0);
    }

    void insert(string s)
    {
        int p = 0;
        for (int i = 0; i < s.size(); i++)
        {
            int c = s[i] - 'a';
            if (!tr[p][c])
                tr[p][c] = ++idx;
            p = tr[p][c];
            ++poss[p];
        }
        ++ed[p];
    }

    ll query(string s)
    {
        ll ans = 0;
        int p = 0;
        for (int i = 0; i < s.size(); i++)
        {
            int c = s[i] - 'a';
            p = tr[p][c];
            ans = (ans + poss[p]) % MOD;
        }
        return ans;
    }
};

void miyan()
{
    int n;
    cin >> n;

    Trie trie(N);
    vector<string> s(n);
    for (int i = 0; i < n; ++i)
    {
        cin >> s[i];
        trie.insert(s[i]);
    }

    ll ans = 0;
    for (auto x : s)
        ans = (ans + trie.query(x)) % MOD;

    cout << ans << endl;
}