Hash Tables
Hash tables (also called hash maps) are data structures that implement an associative array abstract data type, mapping keys to values. They use a hash function to compute an index into a bucket array where the desired value can be found.
Hash tables provide average O(1) time complexity for search, insertion, and deletion operations, making them extremely efficient for many applications like databases, caches, and symbol tables.
Hash Function
A hash function converts keys into array indices. A good hash function should distribute keys uniformly across the hash table:
Basic Hash Table Implementation
#include <iostream>
#include <vector>
#include <string>
using namespace std;
class HashTable {
private:
static const int TABLE_SIZE = 10;
vector<vector<pair<string, int>>> table;
// Simple hash function
int hashFunction(const string& key) {
int hash = 0;
for (char c : key) {
hash += c;
}
return hash % TABLE_SIZE;
}
public:
HashTable() {
table.resize(TABLE_SIZE);
}
void insert(const string& key, int value) {
int index = hashFunction(key);
// Check if key already exists
for (auto& pair : table[index]) {
if (pair.first == key) {
pair.second = value; // Update existing value
return;
}
}
// Add new key-value pair
table[index].push_back({key, value});
}
bool search(const string& key, int& value) {
int index = hashFunction(key);
for (const auto& pair : table[index]) {
if (pair.first == key) {
value = pair.second;
return true;
}
}
return false;
}
bool remove(const string& key) {
int index = hashFunction(key);
for (auto it = table[index].begin(); it != table[index].end(); ++it) {
if (it->first == key) {
table[index].erase(it);
return true;
}
}
return false;
}
void display() {
cout << "Hash Table Contents:" << endl;
for (int i = 0; i < TABLE_SIZE; i++) {
cout << "Bucket " << i << ": ";
if (table[i].empty()) {
cout << "Empty";
} else {
for (const auto& pair : table[i]) {
cout << "[" << pair.first << ":" << pair.second << "] ";
}
}
cout << endl;
}
}
void showStats() {
int totalElements = 0;
int occupiedBuckets = 0;
int maxChainLength = 0;
for (int i = 0; i < TABLE_SIZE; i++) {
int chainLength = table[i].size();
totalElements += chainLength;
if (chainLength > 0) {
occupiedBuckets++;
maxChainLength = max(maxChainLength, chainLength);
}
}
cout << "\nHash Table Statistics:" << endl;
cout << "Total elements: " << totalElements << endl;
cout << "Occupied buckets: " << occupiedBuckets << "/" << TABLE_SIZE << endl;
cout << "Load factor: " << (double)totalElements / TABLE_SIZE << endl;
cout << "Max chain length: " << maxChainLength << endl;
}
};
int main() {
HashTable ht;
// Insert some key-value pairs
ht.insert("apple", 5);
ht.insert("banana", 3);
ht.insert("orange", 8);
ht.insert("grape", 12);
ht.insert("kiwi", 7);
ht.display();
// Search for values
int value;
if (ht.search("banana", value)) {
cout << "\nFound banana with value: " << value << endl;
}
if (ht.search("mango", value)) {
cout << "Found mango with value: " << value << endl;
} else {
cout << "\nMango not found" << endl;
}
// Remove an element
ht.remove("orange");
cout << "\nAfter removing orange:" << endl;
ht.display();
ht.showStats();
return 0;
}Collision Resolution
Open Addressing (Linear Probing)
In open addressing, when a collision occurs, we probe for the next available slot:
Hash Table with Linear Probing
#include <iostream>
#include <vector>
#include <string>
using namespace std;
class LinearProbingHashTable {
private:
static const int TABLE_SIZE = 10;
static const string DELETED; // Marker for deleted entries
vector<string> keys;
vector<int> values;
vector<bool> occupied;
int hashFunction(const string& key) {
int hash = 0;
for (char c : key) {
hash = (hash * 31 + c) % TABLE_SIZE;
}
return hash;
}
int findSlot(const string& key) {
int index = hashFunction(key);
int originalIndex = index;
while (occupied[index] && keys[index] != key && keys[index] != DELETED) {
index = (index + 1) % TABLE_SIZE;
// Table is full
if (index == originalIndex) {
return -1;
}
}
return index;
}
public:
LinearProbingHashTable() {
keys.resize(TABLE_SIZE);
values.resize(TABLE_SIZE);
occupied.resize(TABLE_SIZE, false);
}
bool insert(const string& key, int value) {
int index = findSlot(key);
if (index == -1) {
cout << "Hash table is full!" << endl;
return false;
}
keys[index] = key;
values[index] = value;
occupied[index] = true;
return true;
}
bool search(const string& key, int& value) {
int index = hashFunction(key);
int originalIndex = index;
while (occupied[index]) {
if (keys[index] == key) {
value = values[index];
return true;
}
index = (index + 1) % TABLE_SIZE;
if (index == originalIndex) {
break;
}
}
return false;
}
bool remove(const string& key) {
int index = hashFunction(key);
int originalIndex = index;
while (occupied[index]) {
if (keys[index] == key) {
keys[index] = DELETED;
occupied[index] = false;
return true;
}
index = (index + 1) % TABLE_SIZE;
if (index == originalIndex) {
break;
}
}
return false;
}
void display() {
cout << "Hash Table (Linear Probing):" << endl;
for (int i = 0; i < TABLE_SIZE; i++) {
cout << "Slot " << i << ": ";
if (occupied[i]) {
cout << "[" << keys[i] << ":" << values[i] << "]";
} else {
cout << "Empty";
}
cout << endl;
}
}
};
const string LinearProbingHashTable::DELETED = "__DELETED__";
int main() {
LinearProbingHashTable ht;
// Insert elements
ht.insert("apple", 10);
ht.insert("banana", 20);
ht.insert("cherry", 30);
ht.insert("date", 40);
ht.insert("elderberry", 50);
ht.display();
// Search
int value;
cout << "\nSearching for 'banana': ";
if (ht.search("banana", value)) {
cout << "Found with value " << value << endl;
} else {
cout << "Not found" << endl;
}
// Remove and display
ht.remove("cherry");
cout << "\nAfter removing 'cherry':" << endl;
ht.display();
return 0;
}STL Hash Containers
C++ STL provides several hash-based containers for different use cases:
Using STL Hash Containers
#include <iostream>
#include <unordered_map>
#include <unordered_set>
#include <string>
using namespace std;
int main() {
// unordered_map (hash table for key-value pairs)
unordered_map<string, int> fruitPrices;
// Insert elements
fruitPrices["apple"] = 5;
fruitPrices["banana"] = 3;
fruitPrices["orange"] = 4;
fruitPrices.insert({"grape", 8});
cout << "Fruit Prices:" << endl;
for (const auto& pair : fruitPrices) {
cout << pair.first << ": $" << pair.second << endl;
}
// Search and access
cout << "\nPrice of banana: $" << fruitPrices["banana"] << endl;
if (fruitPrices.find("mango") != fruitPrices.end()) {
cout << "Mango found" << endl;
} else {
cout << "Mango not found" << endl;
}
// Count occurrences
cout << "Apple count: " << fruitPrices.count("apple") << endl;
// Remove element
fruitPrices.erase("orange");
cout << "\nAfter removing orange, size: " << fruitPrices.size() << endl;
cout << "\n" << string(40, '-') << "\n" << endl;
// unordered_set (hash table for unique elements)
unordered_set<string> visitedCities;
visitedCities.insert("New York");
visitedCities.insert("London");
visitedCities.insert("Tokyo");
visitedCities.insert("Paris");
visitedCities.insert("London"); // Duplicate - won't be added
cout << "Visited Cities (" << visitedCities.size() << "):" << endl;
for (const string& city : visitedCities) {
cout << "- " << city << endl;
}
// Check if element exists
if (visitedCities.count("Tokyo")) {
cout << "\nTokyo is in the visited cities list" << endl;
}
cout << "\n" << string(40, '-') << "\n" << endl;
// Word frequency counter using unordered_map
string text = "the quick brown fox jumps over the lazy dog the fox is quick";
unordered_map<string, int> wordCount;
// Simple word extraction (space-separated)
string word = "";
for (char c : text) {
if (c == ' ') {
if (!word.empty()) {
wordCount[word]++;
word = "";
}
} else {
word += c;
}
}
if (!word.empty()) {
wordCount[word]++;
}
cout << "Word Frequency:" << endl;
for (const auto& pair : wordCount) {
cout << pair.first << ": " << pair.second << endl;
}
// Hash table performance info
cout << "\nHash Table Info:" << endl;
cout << "Bucket count: " << fruitPrices.bucket_count() << endl;
cout << "Load factor: " << fruitPrices.load_factor() << endl;
cout << "Max load factor: " << fruitPrices.max_load_factor() << endl;
return 0;
}Applications and Use Cases
Common Applications
- Database Indexing: Fast lookup of records by primary key
- Caching: Store frequently accessed data for quick retrieval
- Symbol Tables: Compiler symbol tables for variable and function lookups
- Hash Sets: Checking membership and removing duplicates
- Hash Maps: Associative arrays and dictionaries
Performance Characteristics
- Average Case: O(1) for search, insertion, and deletion
- Worst Case: O(n) when many collisions occur
- Space Complexity: O(n) where n is the number of elements