Trie (Prefix Tree) Data Structure

A Trie (prefix tree) is a tree-like data structure used to efficiently store and retrieve strings. Each node represents a character, and paths from root to nodes represent string prefixes. Tries are widely used in string matching, prefix queries, and autocomplete systems.

Basic Trie Implementation

Basic trie operations include insertion, search, deletion, and prefix matching. Each node contains children pointers and a flag indicating if it represents the end of a word.

Standard Trie Implementation

#include <iostream>
#include <vector>
#include <string>
#include <unordered_map>
using namespace std;

class TrieNode {
public:
    unordered_map<char, TrieNode*> children;
    bool isEnd;
    int count; // number of strings ending at this node
    
    TrieNode() : isEnd(false), count(0) {}
};

class Trie {
private:
    TrieNode* root;
    
public:
    Trie() {
        root = new TrieNode();
    }
    
    // Insert string
    void insert(const string& word) {
        TrieNode* node = root;
        for (char c : word) {
            if (node->children.find(c) == node->children.end()) {
                node->children[c] = new TrieNode();
            }
            node = node->children[c];
        }
        node->isEnd = true;
        node->count++;
    }
    
    // Search if string exists
    bool search(const string& word) {
        TrieNode* node = findNode(word);
        return node != nullptr && node->isEnd;
    }
    
    // Check if prefix exists
    bool startsWith(const string& prefix) {
        return findNode(prefix) != nullptr;
    }
    
    // Delete string
    bool remove(const string& word) {
        return removeHelper(root, word, 0);
    }
    
    // Get all strings with given prefix
    vector<string> getWordsWithPrefix(const string& prefix) {
        vector<string> result;
        TrieNode* prefixNode = findNode(prefix);
        if (prefixNode != nullptr) {
            dfs(prefixNode, prefix, result);
        }
        return result;
    }
    
    // Get all strings in trie
    vector<string> getAllWords() {
        vector<string> result;
        dfs(root, "", result);
        return result;
    }
    
    // Count strings with given prefix
    int countWordsWithPrefix(const string& prefix) {
        TrieNode* prefixNode = findNode(prefix);
        if (prefixNode == nullptr) return 0;
        return countWordsHelper(prefixNode);
    }
    
private:
    TrieNode* findNode(const string& word) {
        TrieNode* node = root;
        for (char c : word) {
            if (node->children.find(c) == node->children.end()) {
                return nullptr;
            }
            node = node->children[c];
        }
        return node;
    }
    
    bool removeHelper(TrieNode* node, const string& word, int index) {
        if (index == word.length()) {
            if (!node->isEnd) return false;
            node->isEnd = false;
            node->count--;
            return node->children.empty() && node->count == 0;
        }
        
        char c = word[index];
        if (node->children.find(c) == node->children.end()) {
            return false;
        }
        
        TrieNode* child = node->children[c];
        bool shouldDeleteChild = removeHelper(child, word, index + 1);
        
        if (shouldDeleteChild) {
            delete child;
            node->children.erase(c);
            return !node->isEnd && node->children.empty() && node->count == 0;
        }
        
        return false;
    }
    
    void dfs(TrieNode* node, const string& current, vector<string>& result) {
        if (node->isEnd) {
            for (int i = 0; i < node->count; i++) {
                result.push_back(current);
            }
        }
        
        for (auto& [c, child] : node->children) {
            dfs(child, current + c, result);
        }
    }
    
    int countWordsHelper(TrieNode* node) {
        int count = node->count;
        for (auto& [c, child] : node->children) {
            count += countWordsHelper(child);
        }
        return count;
    }
};

int main() {
    Trie trie;
    
    // Insert words
    vector<string> words = {"apple", "app", "apricot", "banana", "band", "bandana"};
    
    cout << "Inserting words: ";
    for (const string& word : words) {
        cout << word << " ";
        trie.insert(word);
    }
    cout << endl;
    
    // Search tests
    vector<string> searchWords = {"app", "apple", "application", "ban"};
    cout << "\nSearch results:" << endl;
    for (const string& word : searchWords) {
        bool found = trie.search(word);
        cout << word << ": " << (found ? "Found" : "Not found") << endl;
    }
    
    // Prefix tests
    vector<string> prefixes = {"ap", "ban", "car"};
    cout << "\nPrefix results:" << endl;
    for (const string& prefix : prefixes) {
        bool hasPrefix = trie.startsWith(prefix);
        cout << prefix << ": " << (hasPrefix ? "Has prefix" : "No prefix") << endl;
        
        if (hasPrefix) {
            vector<string> wordsWithPrefix = trie.getWordsWithPrefix(prefix);
            cout << "  Words: ";
            for (const string& word : wordsWithPrefix) {
                cout << word << " ";
            }
            cout << endl;
        }
    }
    
    return 0;
}

This trie implementation uses hash maps for children nodes, supports multiple occurrences of the same word, and provides comprehensive operations including prefix queries and word enumeration.

Array-Based Trie (More Efficient)

For competitive programming, array-based trie implementation is often more efficient in terms of both time and space, especially when the alphabet size is small.

Array-Based Trie Implementation

#include <iostream>
#include <vector>
#include <string>
#include <cstring>
using namespace std;

class ArrayTrie {
private:
    static const int MAXN = 100005; // maximum number of nodes
    static const int ALPHABET_SIZE = 26; // alphabet size
    
    int trie[MAXN][ALPHABET_SIZE];
    bool isEnd[MAXN];
    int count[MAXN];
    int nodeCount;
    
public:
    ArrayTrie() {
        nodeCount = 1; // root node
        memset(trie[0], 0, sizeof(trie[0]));
        isEnd[0] = false;
        count[0] = 0;
    }
    
    void insert(const string& word) {
        int node = 0;
        for (char c : word) {
            int idx = c - 'a';
            if (trie[node][idx] == 0) {
                trie[node][idx] = nodeCount;
                memset(trie[nodeCount], 0, sizeof(trie[nodeCount]));
                isEnd[nodeCount] = false;
                count[nodeCount] = 0;
                nodeCount++;
            }
            node = trie[node][idx];
        }
        isEnd[node] = true;
        count[node]++;
    }
    
    bool search(const string& word) {
        int node = findNode(word);
        return node != -1 && isEnd[node];
    }
    
    bool startsWith(const string& prefix) {
        return findNode(prefix) != -1;
    }
    
    int countWordsWithPrefix(const string& prefix) {
        int node = findNode(prefix);
        if (node == -1) return 0;
        return countWordsHelper(node);
    }
    
    // Find longest common prefix
    string longestCommonPrefix(const vector<string>& words) {
        if (words.empty()) return "";
        
        // Insert all words
        for (const string& word : words) {
            insert(word);
        }
        
        string lcp = "";
        int node = 0;
        
        while (true) {
            int childCount = 0;
            int nextNode = -1;
            char nextChar = 'a';
            
            // Count children
            for (int i = 0; i < ALPHABET_SIZE; i++) {
                if (trie[node][i] != 0) {
                    childCount++;
                    nextNode = trie[node][i];
                    nextChar = 'a' + i;
                }
            }
            
            // Stop if multiple children or end of word
            if (childCount != 1 || isEnd[node]) {
                break;
            }
            
            lcp += nextChar;
            node = nextNode;
        }
        
        return lcp;
    }
    
private:
    int findNode(const string& word) {
        int node = 0;
        for (char c : word) {
            int idx = c - 'a';
            if (trie[node][idx] == 0) {
                return -1;
            }
            node = trie[node][idx];
        }
        return node;
    }
    
    int countWordsHelper(int node) {
        int totalCount = count[node];
        for (int i = 0; i < ALPHABET_SIZE; i++) {
            if (trie[node][i] != 0) {
                totalCount += countWordsHelper(trie[node][i]);
            }
        }
        return totalCount;
    }
};

int main() {
    ArrayTrie trie;
    
    vector<string> words = {"flower", "flow", "flight", "fly"};
    
    cout << "Words: ";
    for (const string& word : words) {
        cout << word << " ";
        trie.insert(word);
    }
    cout << endl;
    
    // Test searches
    vector<string> testWords = {"flow", "flower", "flowing", "fl"};
    cout << "\nSearch results:" << endl;
    for (const string& word : testWords) {
        bool found = trie.search(word);
        cout << word << ": " << (found ? "Found" : "Not found") << endl;
    }
    
    // Test prefix counting
    cout << "\nPrefix counts:" << endl;
    vector<string> prefixes = {"fl", "flo", "fly"};
    for (const string& prefix : prefixes) {
        int count = trie.countWordsWithPrefix(prefix);
        cout << prefix << ": " << count << " words" << endl;
    }
    
    // Find longest common prefix
    ArrayTrie lcpTrie;
    string lcp = lcpTrie.longestCommonPrefix(words);
    cout << "\nLongest common prefix: " << lcp << endl;
    
    return 0;
}

Array-based trie uses static arrays for children, providing better cache locality and faster access. It's particularly efficient for lowercase English letters with fixed alphabet size.

Key Takeaways

  • Trie provides O(m) time complexity for insertion, search, and deletion, where m is the string length
  • Space complexity is O(ALPHABET_SIZE Γ— N Γ— M) where N is number of strings and M is average length
  • Array-based implementation is more efficient for competitive programming with fixed alphabet
  • Common applications: autocomplete, spell checkers, IP routing, string matching problems