Trie (Prefix Tree) Data Structure
A Trie (prefix tree) is a tree-like data structure used to efficiently store and retrieve strings. Each node represents a character, and paths from root to nodes represent string prefixes. Tries are widely used in string matching, prefix queries, and autocomplete systems.
Basic Trie Implementation
Basic trie operations include insertion, search, deletion, and prefix matching. Each node contains children pointers and a flag indicating if it represents the end of a word.
Standard Trie Implementation
#include <iostream>
#include <vector>
#include <string>
#include <unordered_map>
using namespace std;
class TrieNode {
public:
unordered_map<char, TrieNode*> children;
bool isEnd;
int count; // number of strings ending at this node
TrieNode() : isEnd(false), count(0) {}
};
class Trie {
private:
TrieNode* root;
public:
Trie() {
root = new TrieNode();
}
// Insert string
void insert(const string& word) {
TrieNode* node = root;
for (char c : word) {
if (node->children.find(c) == node->children.end()) {
node->children[c] = new TrieNode();
}
node = node->children[c];
}
node->isEnd = true;
node->count++;
}
// Search if string exists
bool search(const string& word) {
TrieNode* node = findNode(word);
return node != nullptr && node->isEnd;
}
// Check if prefix exists
bool startsWith(const string& prefix) {
return findNode(prefix) != nullptr;
}
// Delete string
bool remove(const string& word) {
return removeHelper(root, word, 0);
}
// Get all strings with given prefix
vector<string> getWordsWithPrefix(const string& prefix) {
vector<string> result;
TrieNode* prefixNode = findNode(prefix);
if (prefixNode != nullptr) {
dfs(prefixNode, prefix, result);
}
return result;
}
// Get all strings in trie
vector<string> getAllWords() {
vector<string> result;
dfs(root, "", result);
return result;
}
// Count strings with given prefix
int countWordsWithPrefix(const string& prefix) {
TrieNode* prefixNode = findNode(prefix);
if (prefixNode == nullptr) return 0;
return countWordsHelper(prefixNode);
}
private:
TrieNode* findNode(const string& word) {
TrieNode* node = root;
for (char c : word) {
if (node->children.find(c) == node->children.end()) {
return nullptr;
}
node = node->children[c];
}
return node;
}
bool removeHelper(TrieNode* node, const string& word, int index) {
if (index == word.length()) {
if (!node->isEnd) return false;
node->isEnd = false;
node->count--;
return node->children.empty() && node->count == 0;
}
char c = word[index];
if (node->children.find(c) == node->children.end()) {
return false;
}
TrieNode* child = node->children[c];
bool shouldDeleteChild = removeHelper(child, word, index + 1);
if (shouldDeleteChild) {
delete child;
node->children.erase(c);
return !node->isEnd && node->children.empty() && node->count == 0;
}
return false;
}
void dfs(TrieNode* node, const string& current, vector<string>& result) {
if (node->isEnd) {
for (int i = 0; i < node->count; i++) {
result.push_back(current);
}
}
for (auto& [c, child] : node->children) {
dfs(child, current + c, result);
}
}
int countWordsHelper(TrieNode* node) {
int count = node->count;
for (auto& [c, child] : node->children) {
count += countWordsHelper(child);
}
return count;
}
};
int main() {
Trie trie;
// Insert words
vector<string> words = {"apple", "app", "apricot", "banana", "band", "bandana"};
cout << "Inserting words: ";
for (const string& word : words) {
cout << word << " ";
trie.insert(word);
}
cout << endl;
// Search tests
vector<string> searchWords = {"app", "apple", "application", "ban"};
cout << "\nSearch results:" << endl;
for (const string& word : searchWords) {
bool found = trie.search(word);
cout << word << ": " << (found ? "Found" : "Not found") << endl;
}
// Prefix tests
vector<string> prefixes = {"ap", "ban", "car"};
cout << "\nPrefix results:" << endl;
for (const string& prefix : prefixes) {
bool hasPrefix = trie.startsWith(prefix);
cout << prefix << ": " << (hasPrefix ? "Has prefix" : "No prefix") << endl;
if (hasPrefix) {
vector<string> wordsWithPrefix = trie.getWordsWithPrefix(prefix);
cout << " Words: ";
for (const string& word : wordsWithPrefix) {
cout << word << " ";
}
cout << endl;
}
}
return 0;
}This trie implementation uses hash maps for children nodes, supports multiple occurrences of the same word, and provides comprehensive operations including prefix queries and word enumeration.
Array-Based Trie (More Efficient)
For competitive programming, array-based trie implementation is often more efficient in terms of both time and space, especially when the alphabet size is small.
Array-Based Trie Implementation
#include <iostream>
#include <vector>
#include <string>
#include <cstring>
using namespace std;
class ArrayTrie {
private:
static const int MAXN = 100005; // maximum number of nodes
static const int ALPHABET_SIZE = 26; // alphabet size
int trie[MAXN][ALPHABET_SIZE];
bool isEnd[MAXN];
int count[MAXN];
int nodeCount;
public:
ArrayTrie() {
nodeCount = 1; // root node
memset(trie[0], 0, sizeof(trie[0]));
isEnd[0] = false;
count[0] = 0;
}
void insert(const string& word) {
int node = 0;
for (char c : word) {
int idx = c - 'a';
if (trie[node][idx] == 0) {
trie[node][idx] = nodeCount;
memset(trie[nodeCount], 0, sizeof(trie[nodeCount]));
isEnd[nodeCount] = false;
count[nodeCount] = 0;
nodeCount++;
}
node = trie[node][idx];
}
isEnd[node] = true;
count[node]++;
}
bool search(const string& word) {
int node = findNode(word);
return node != -1 && isEnd[node];
}
bool startsWith(const string& prefix) {
return findNode(prefix) != -1;
}
int countWordsWithPrefix(const string& prefix) {
int node = findNode(prefix);
if (node == -1) return 0;
return countWordsHelper(node);
}
// Find longest common prefix
string longestCommonPrefix(const vector<string>& words) {
if (words.empty()) return "";
// Insert all words
for (const string& word : words) {
insert(word);
}
string lcp = "";
int node = 0;
while (true) {
int childCount = 0;
int nextNode = -1;
char nextChar = 'a';
// Count children
for (int i = 0; i < ALPHABET_SIZE; i++) {
if (trie[node][i] != 0) {
childCount++;
nextNode = trie[node][i];
nextChar = 'a' + i;
}
}
// Stop if multiple children or end of word
if (childCount != 1 || isEnd[node]) {
break;
}
lcp += nextChar;
node = nextNode;
}
return lcp;
}
private:
int findNode(const string& word) {
int node = 0;
for (char c : word) {
int idx = c - 'a';
if (trie[node][idx] == 0) {
return -1;
}
node = trie[node][idx];
}
return node;
}
int countWordsHelper(int node) {
int totalCount = count[node];
for (int i = 0; i < ALPHABET_SIZE; i++) {
if (trie[node][i] != 0) {
totalCount += countWordsHelper(trie[node][i]);
}
}
return totalCount;
}
};
int main() {
ArrayTrie trie;
vector<string> words = {"flower", "flow", "flight", "fly"};
cout << "Words: ";
for (const string& word : words) {
cout << word << " ";
trie.insert(word);
}
cout << endl;
// Test searches
vector<string> testWords = {"flow", "flower", "flowing", "fl"};
cout << "\nSearch results:" << endl;
for (const string& word : testWords) {
bool found = trie.search(word);
cout << word << ": " << (found ? "Found" : "Not found") << endl;
}
// Test prefix counting
cout << "\nPrefix counts:" << endl;
vector<string> prefixes = {"fl", "flo", "fly"};
for (const string& prefix : prefixes) {
int count = trie.countWordsWithPrefix(prefix);
cout << prefix << ": " << count << " words" << endl;
}
// Find longest common prefix
ArrayTrie lcpTrie;
string lcp = lcpTrie.longestCommonPrefix(words);
cout << "\nLongest common prefix: " << lcp << endl;
return 0;
}Array-based trie uses static arrays for children, providing better cache locality and faster access. It's particularly efficient for lowercase English letters with fixed alphabet size.
Key Takeaways
- Trie provides O(m) time complexity for insertion, search, and deletion, where m is the string length
- Space complexity is O(ALPHABET_SIZE Γ N Γ M) where N is number of strings and M is average length
- Array-based implementation is more efficient for competitive programming with fixed alphabet
- Common applications: autocomplete, spell checkers, IP routing, string matching problems