Hash Tables

Hash tables (also called hash maps) are data structures that implement an associative array abstract data type, mapping keys to values. They use a hash function to compute an index into a bucket array where the desired value can be found.

Hash tables provide average O(1) time complexity for search, insertion, and deletion operations, making them extremely efficient for many applications like databases, caches, and symbol tables.

Hash Function

A hash function converts keys into array indices. A good hash function should distribute keys uniformly across the hash table:

Basic Hash Table Implementation

#include <iostream>
#include <vector>
#include <string>
using namespace std;

class HashTable {
private:
    static const int TABLE_SIZE = 10;
    vector<vector<pair<string, int>>> table;
    
    // Simple hash function
    int hashFunction(const string& key) {
        int hash = 0;
        for (char c : key) {
            hash += c;
        }
        return hash % TABLE_SIZE;
    }

public:
    HashTable() {
        table.resize(TABLE_SIZE);
    }
    
    void insert(const string& key, int value) {
        int index = hashFunction(key);
        
        // Check if key already exists
        for (auto& pair : table[index]) {
            if (pair.first == key) {
                pair.second = value; // Update existing value
                return;
            }
        }
        
        // Add new key-value pair
        table[index].push_back({key, value});
    }
    
    bool search(const string& key, int& value) {
        int index = hashFunction(key);
        
        for (const auto& pair : table[index]) {
            if (pair.first == key) {
                value = pair.second;
                return true;
            }
        }
        
        return false;
    }
    
    bool remove(const string& key) {
        int index = hashFunction(key);
        
        for (auto it = table[index].begin(); it != table[index].end(); ++it) {
            if (it->first == key) {
                table[index].erase(it);
                return true;
            }
        }
        
        return false;
    }
    
    void display() {
        cout << "Hash Table Contents:" << endl;
        for (int i = 0; i < TABLE_SIZE; i++) {
            cout << "Bucket " << i << ": ";
            if (table[i].empty()) {
                cout << "Empty";
            } else {
                for (const auto& pair : table[i]) {
                    cout << "[" << pair.first << ":" << pair.second << "] ";
                }
            }
            cout << endl;
        }
    }
    
    void showStats() {
        int totalElements = 0;
        int occupiedBuckets = 0;
        int maxChainLength = 0;
        
        for (int i = 0; i < TABLE_SIZE; i++) {
            int chainLength = table[i].size();
            totalElements += chainLength;
            
            if (chainLength > 0) {
                occupiedBuckets++;
                maxChainLength = max(maxChainLength, chainLength);
            }
        }
        
        cout << "\nHash Table Statistics:" << endl;
        cout << "Total elements: " << totalElements << endl;
        cout << "Occupied buckets: " << occupiedBuckets << "/" << TABLE_SIZE << endl;
        cout << "Load factor: " << (double)totalElements / TABLE_SIZE << endl;
        cout << "Max chain length: " << maxChainLength << endl;
    }
};

int main() {
    HashTable ht;
    
    // Insert some key-value pairs
    ht.insert("apple", 5);
    ht.insert("banana", 3);
    ht.insert("orange", 8);
    ht.insert("grape", 12);
    ht.insert("kiwi", 7);
    
    ht.display();
    
    // Search for values
    int value;
    if (ht.search("banana", value)) {
        cout << "\nFound banana with value: " << value << endl;
    }
    
    if (ht.search("mango", value)) {
        cout << "Found mango with value: " << value << endl;
    } else {
        cout << "\nMango not found" << endl;
    }
    
    // Remove an element
    ht.remove("orange");
    cout << "\nAfter removing orange:" << endl;
    ht.display();
    
    ht.showStats();
    
    return 0;
}

Collision Resolution

Open Addressing (Linear Probing)

In open addressing, when a collision occurs, we probe for the next available slot:

Hash Table with Linear Probing

#include <iostream>
#include <vector>
#include <string>
using namespace std;

class LinearProbingHashTable {
private:
    static const int TABLE_SIZE = 10;
    static const string DELETED; // Marker for deleted entries
    
    vector<string> keys;
    vector<int> values;
    vector<bool> occupied;
    
    int hashFunction(const string& key) {
        int hash = 0;
        for (char c : key) {
            hash = (hash * 31 + c) % TABLE_SIZE;
        }
        return hash;
    }
    
    int findSlot(const string& key) {
        int index = hashFunction(key);
        int originalIndex = index;
        
        while (occupied[index] && keys[index] != key && keys[index] != DELETED) {
            index = (index + 1) % TABLE_SIZE;
            
            // Table is full
            if (index == originalIndex) {
                return -1;
            }
        }
        
        return index;
    }

public:
    LinearProbingHashTable() {
        keys.resize(TABLE_SIZE);
        values.resize(TABLE_SIZE);
        occupied.resize(TABLE_SIZE, false);
    }
    
    bool insert(const string& key, int value) {
        int index = findSlot(key);
        
        if (index == -1) {
            cout << "Hash table is full!" << endl;
            return false;
        }
        
        keys[index] = key;
        values[index] = value;
        occupied[index] = true;
        return true;
    }
    
    bool search(const string& key, int& value) {
        int index = hashFunction(key);
        int originalIndex = index;
        
        while (occupied[index]) {
            if (keys[index] == key) {
                value = values[index];
                return true;
            }
            
            index = (index + 1) % TABLE_SIZE;
            
            if (index == originalIndex) {
                break;
            }
        }
        
        return false;
    }
    
    bool remove(const string& key) {
        int index = hashFunction(key);
        int originalIndex = index;
        
        while (occupied[index]) {
            if (keys[index] == key) {
                keys[index] = DELETED;
                occupied[index] = false;
                return true;
            }
            
            index = (index + 1) % TABLE_SIZE;
            
            if (index == originalIndex) {
                break;
            }
        }
        
        return false;
    }
    
    void display() {
        cout << "Hash Table (Linear Probing):" << endl;
        for (int i = 0; i < TABLE_SIZE; i++) {
            cout << "Slot " << i << ": ";
            if (occupied[i]) {
                cout << "[" << keys[i] << ":" << values[i] << "]";
            } else {
                cout << "Empty";
            }
            cout << endl;
        }
    }
};

const string LinearProbingHashTable::DELETED = "__DELETED__";

int main() {
    LinearProbingHashTable ht;
    
    // Insert elements
    ht.insert("apple", 10);
    ht.insert("banana", 20);
    ht.insert("cherry", 30);
    ht.insert("date", 40);
    ht.insert("elderberry", 50);
    
    ht.display();
    
    // Search
    int value;
    cout << "\nSearching for 'banana': ";
    if (ht.search("banana", value)) {
        cout << "Found with value " << value << endl;
    } else {
        cout << "Not found" << endl;
    }
    
    // Remove and display
    ht.remove("cherry");
    cout << "\nAfter removing 'cherry':" << endl;
    ht.display();
    
    return 0;
}

STL Hash Containers

C++ STL provides several hash-based containers for different use cases:

Using STL Hash Containers

#include <iostream>
#include <unordered_map>
#include <unordered_set>
#include <string>
using namespace std;

int main() {
    // unordered_map (hash table for key-value pairs)
    unordered_map<string, int> fruitPrices;
    
    // Insert elements
    fruitPrices["apple"] = 5;
    fruitPrices["banana"] = 3;
    fruitPrices["orange"] = 4;
    fruitPrices.insert({"grape", 8});
    
    cout << "Fruit Prices:" << endl;
    for (const auto& pair : fruitPrices) {
        cout << pair.first << ": $" << pair.second << endl;
    }
    
    // Search and access
    cout << "\nPrice of banana: $" << fruitPrices["banana"] << endl;
    
    if (fruitPrices.find("mango") != fruitPrices.end()) {
        cout << "Mango found" << endl;
    } else {
        cout << "Mango not found" << endl;
    }
    
    // Count occurrences
    cout << "Apple count: " << fruitPrices.count("apple") << endl;
    
    // Remove element
    fruitPrices.erase("orange");
    cout << "\nAfter removing orange, size: " << fruitPrices.size() << endl;
    
    cout << "\n" << string(40, '-') << "\n" << endl;
    
    // unordered_set (hash table for unique elements)
    unordered_set<string> visitedCities;
    
    visitedCities.insert("New York");
    visitedCities.insert("London");
    visitedCities.insert("Tokyo");
    visitedCities.insert("Paris");
    visitedCities.insert("London"); // Duplicate - won't be added
    
    cout << "Visited Cities (" << visitedCities.size() << "):" << endl;
    for (const string& city : visitedCities) {
        cout << "- " << city << endl;
    }
    
    // Check if element exists
    if (visitedCities.count("Tokyo")) {
        cout << "\nTokyo is in the visited cities list" << endl;
    }
    
    cout << "\n" << string(40, '-') << "\n" << endl;
    
    // Word frequency counter using unordered_map
    string text = "the quick brown fox jumps over the lazy dog the fox is quick";
    unordered_map<string, int> wordCount;
    
    // Simple word extraction (space-separated)
    string word = "";
    for (char c : text) {
        if (c == ' ') {
            if (!word.empty()) {
                wordCount[word]++;
                word = "";
            }
        } else {
            word += c;
        }
    }
    if (!word.empty()) {
        wordCount[word]++;
    }
    
    cout << "Word Frequency:" << endl;
    for (const auto& pair : wordCount) {
        cout << pair.first << ": " << pair.second << endl;
    }
    
    // Hash table performance info
    cout << "\nHash Table Info:" << endl;
    cout << "Bucket count: " << fruitPrices.bucket_count() << endl;
    cout << "Load factor: " << fruitPrices.load_factor() << endl;
    cout << "Max load factor: " << fruitPrices.max_load_factor() << endl;
    
    return 0;
}

Applications and Use Cases

Common Applications

  • Database Indexing: Fast lookup of records by primary key
  • Caching: Store frequently accessed data for quick retrieval
  • Symbol Tables: Compiler symbol tables for variable and function lookups
  • Hash Sets: Checking membership and removing duplicates
  • Hash Maps: Associative arrays and dictionaries

Performance Characteristics

  • Average Case: O(1) for search, insertion, and deletion
  • Worst Case: O(n) when many collisions occur
  • Space Complexity: O(n) where n is the number of elements