哈希表

哈希表(也称为哈希映射)是实现关联数组抽象数据类型的数据结构,将键映射到值。它们使用哈希函数计算桶数组的索引,在该索引处可以找到所需的值。

哈希表为搜索、插入和删除操作提供平均 O(1) 时间复杂度,使其对于数据库、缓存和符号表等许多应用程序极其高效。

哈希函数

哈希函数将键转换为数组索引。一个好的哈希函数应该将键均匀分布在哈希表中:

基本哈希表实现

#include <iostream>
#include <vector>
#include <string>
using namespace std;

class HashTable {
private:
    static const int TABLE_SIZE = 10;
    vector<vector<pair<string, int>>> table;
    
    // Simple hash function
    int hashFunction(const string& key) {
        int hash = 0;
        for (char c : key) {
            hash += c;
        }
        return hash % TABLE_SIZE;
    }

public:
    HashTable() {
        table.resize(TABLE_SIZE);
    }
    
    void insert(const string& key, int value) {
        int index = hashFunction(key);
        
        // Check if key already exists
        for (auto& pair : table[index]) {
            if (pair.first == key) {
                pair.second = value; // Update existing value
                return;
            }
        }
        
        // Add new key-value pair
        table[index].push_back({key, value});
    }
    
    bool search(const string& key, int& value) {
        int index = hashFunction(key);
        
        for (const auto& pair : table[index]) {
            if (pair.first == key) {
                value = pair.second;
                return true;
            }
        }
        
        return false;
    }
    
    bool remove(const string& key) {
        int index = hashFunction(key);
        
        for (auto it = table[index].begin(); it != table[index].end(); ++it) {
            if (it->first == key) {
                table[index].erase(it);
                return true;
            }
        }
        
        return false;
    }
    
    void display() {
        cout << "Hash Table Contents:" << endl;
        for (int i = 0; i < TABLE_SIZE; i++) {
            cout << "Bucket " << i << ": ";
            if (table[i].empty()) {
                cout << "Empty";
            } else {
                for (const auto& pair : table[i]) {
                    cout << "[" << pair.first << ":" << pair.second << "] ";
                }
            }
            cout << endl;
        }
    }
    
    void showStats() {
        int totalElements = 0;
        int occupiedBuckets = 0;
        int maxChainLength = 0;
        
        for (int i = 0; i < TABLE_SIZE; i++) {
            int chainLength = table[i].size();
            totalElements += chainLength;
            
            if (chainLength > 0) {
                occupiedBuckets++;
                maxChainLength = max(maxChainLength, chainLength);
            }
        }
        
        cout << "\nHash Table Statistics:" << endl;
        cout << "Total elements: " << totalElements << endl;
        cout << "Occupied buckets: " << occupiedBuckets << "/" << TABLE_SIZE << endl;
        cout << "Load factor: " << (double)totalElements / TABLE_SIZE << endl;
        cout << "Max chain length: " << maxChainLength << endl;
    }
};

int main() {
    HashTable ht;
    
    // Insert some key-value pairs
    ht.insert("apple", 5);
    ht.insert("banana", 3);
    ht.insert("orange", 8);
    ht.insert("grape", 12);
    ht.insert("kiwi", 7);
    
    ht.display();
    
    // Search for values
    int value;
    if (ht.search("banana", value)) {
        cout << "\nFound banana with value: " << value << endl;
    }
    
    if (ht.search("mango", value)) {
        cout << "Found mango with value: " << value << endl;
    } else {
        cout << "\nMango not found" << endl;
    }
    
    // Remove an element
    ht.remove("orange");
    cout << "\nAfter removing orange:" << endl;
    ht.display();
    
    ht.showStats();
    
    return 0;
}

冲突解决

开放寻址(线性探测)

在开放寻址中,当发生冲突时,我们探测下一个可用的槽位:

使用线性探测的哈希表

#include <iostream>
#include <vector>
#include <string>
using namespace std;

class LinearProbingHashTable {
private:
    static const int TABLE_SIZE = 10;
    static const string DELETED; // Marker for deleted entries
    
    vector<string> keys;
    vector<int> values;
    vector<bool> occupied;
    
    int hashFunction(const string& key) {
        int hash = 0;
        for (char c : key) {
            hash = (hash * 31 + c) % TABLE_SIZE;
        }
        return hash;
    }
    
    int findSlot(const string& key) {
        int index = hashFunction(key);
        int originalIndex = index;
        
        while (occupied[index] && keys[index] != key && keys[index] != DELETED) {
            index = (index + 1) % TABLE_SIZE;
            
            // Table is full
            if (index == originalIndex) {
                return -1;
            }
        }
        
        return index;
    }

public:
    LinearProbingHashTable() {
        keys.resize(TABLE_SIZE);
        values.resize(TABLE_SIZE);
        occupied.resize(TABLE_SIZE, false);
    }
    
    bool insert(const string& key, int value) {
        int index = findSlot(key);
        
        if (index == -1) {
            cout << "Hash table is full!" << endl;
            return false;
        }
        
        keys[index] = key;
        values[index] = value;
        occupied[index] = true;
        return true;
    }
    
    bool search(const string& key, int& value) {
        int index = hashFunction(key);
        int originalIndex = index;
        
        while (occupied[index]) {
            if (keys[index] == key) {
                value = values[index];
                return true;
            }
            
            index = (index + 1) % TABLE_SIZE;
            
            if (index == originalIndex) {
                break;
            }
        }
        
        return false;
    }
    
    bool remove(const string& key) {
        int index = hashFunction(key);
        int originalIndex = index;
        
        while (occupied[index]) {
            if (keys[index] == key) {
                keys[index] = DELETED;
                occupied[index] = false;
                return true;
            }
            
            index = (index + 1) % TABLE_SIZE;
            
            if (index == originalIndex) {
                break;
            }
        }
        
        return false;
    }
    
    void display() {
        cout << "Hash Table (Linear Probing):" << endl;
        for (int i = 0; i < TABLE_SIZE; i++) {
            cout << "Slot " << i << ": ";
            if (occupied[i]) {
                cout << "[" << keys[i] << ":" << values[i] << "]";
            } else {
                cout << "Empty";
            }
            cout << endl;
        }
    }
};

const string LinearProbingHashTable::DELETED = "__DELETED__";

int main() {
    LinearProbingHashTable ht;
    
    // Insert elements
    ht.insert("apple", 10);
    ht.insert("banana", 20);
    ht.insert("cherry", 30);
    ht.insert("date", 40);
    ht.insert("elderberry", 50);
    
    ht.display();
    
    // Search
    int value;
    cout << "\nSearching for 'banana': ";
    if (ht.search("banana", value)) {
        cout << "Found with value " << value << endl;
    } else {
        cout << "Not found" << endl;
    }
    
    // Remove and display
    ht.remove("cherry");
    cout << "\nAfter removing 'cherry':" << endl;
    ht.display();
    
    return 0;
}

STL 哈希容器

C++ STL 为不同用例提供了几个基于哈希的容器:

使用 STL 哈希容器

#include <iostream>
#include <unordered_map>
#include <unordered_set>
#include <string>
using namespace std;

int main() {
    // unordered_map (hash table for key-value pairs)
    unordered_map<string, int> fruitPrices;
    
    // Insert elements
    fruitPrices["apple"] = 5;
    fruitPrices["banana"] = 3;
    fruitPrices["orange"] = 4;
    fruitPrices.insert({"grape", 8});
    
    cout << "Fruit Prices:" << endl;
    for (const auto& pair : fruitPrices) {
        cout << pair.first << ": $" << pair.second << endl;
    }
    
    // Search and access
    cout << "\nPrice of banana: $" << fruitPrices["banana"] << endl;
    
    if (fruitPrices.find("mango") != fruitPrices.end()) {
        cout << "Mango found" << endl;
    } else {
        cout << "Mango not found" << endl;
    }
    
    // Count occurrences
    cout << "Apple count: " << fruitPrices.count("apple") << endl;
    
    // Remove element
    fruitPrices.erase("orange");
    cout << "\nAfter removing orange, size: " << fruitPrices.size() << endl;
    
    cout << "\n" << string(40, '-') << "\n" << endl;
    
    // unordered_set (hash table for unique elements)
    unordered_set<string> visitedCities;
    
    visitedCities.insert("New York");
    visitedCities.insert("London");
    visitedCities.insert("Tokyo");
    visitedCities.insert("Paris");
    visitedCities.insert("London"); // Duplicate - won't be added
    
    cout << "Visited Cities (" << visitedCities.size() << "):" << endl;
    for (const string& city : visitedCities) {
        cout << "- " << city << endl;
    }
    
    // Check if element exists
    if (visitedCities.count("Tokyo")) {
        cout << "\nTokyo is in the visited cities list" << endl;
    }
    
    cout << "\n" << string(40, '-') << "\n" << endl;
    
    // Word frequency counter using unordered_map
    string text = "the quick brown fox jumps over the lazy dog the fox is quick";
    unordered_map<string, int> wordCount;
    
    // Simple word extraction (space-separated)
    string word = "";
    for (char c : text) {
        if (c == ' ') {
            if (!word.empty()) {
                wordCount[word]++;
                word = "";
            }
        } else {
            word += c;
        }
    }
    if (!word.empty()) {
        wordCount[word]++;
    }
    
    cout << "Word Frequency:" << endl;
    for (const auto& pair : wordCount) {
        cout << pair.first << ": " << pair.second << endl;
    }
    
    // Hash table performance info
    cout << "\nHash Table Info:" << endl;
    cout << "Bucket count: " << fruitPrices.bucket_count() << endl;
    cout << "Load factor: " << fruitPrices.load_factor() << endl;
    cout << "Max load factor: " << fruitPrices.max_load_factor() << endl;
    
    return 0;
}

应用和用例

常见应用

  • 数据库索引: 通过主键快速查找记录
  • 缓存: 存储经常访问的数据以便快速检索
  • 符号表: 编译器符号表用于变量和函数查找
  • 哈希集合: 检查成员资格和删除重复项
  • 哈希映射: 关联数组和字典

性能特征

  • 平均情况: 搜索、插入和删除的时间复杂度为 O(1)
  • 最坏情况: 当发生许多冲突时为 O(n)
  • 空间复杂度: O(n),其中 n 是元素数量