哈希表
哈希表(也称为哈希映射)是实现关联数组抽象数据类型的数据结构,将键映射到值。它们使用哈希函数计算桶数组的索引,在该索引处可以找到所需的值。
哈希表为搜索、插入和删除操作提供平均 O(1) 时间复杂度,使其对于数据库、缓存和符号表等许多应用程序极其高效。
哈希函数
哈希函数将键转换为数组索引。一个好的哈希函数应该将键均匀分布在哈希表中:
基本哈希表实现
#include <iostream>
#include <vector>
#include <string>
using namespace std;
class HashTable {
private:
static const int TABLE_SIZE = 10;
vector<vector<pair<string, int>>> table;
// Simple hash function
int hashFunction(const string& key) {
int hash = 0;
for (char c : key) {
hash += c;
}
return hash % TABLE_SIZE;
}
public:
HashTable() {
table.resize(TABLE_SIZE);
}
void insert(const string& key, int value) {
int index = hashFunction(key);
// Check if key already exists
for (auto& pair : table[index]) {
if (pair.first == key) {
pair.second = value; // Update existing value
return;
}
}
// Add new key-value pair
table[index].push_back({key, value});
}
bool search(const string& key, int& value) {
int index = hashFunction(key);
for (const auto& pair : table[index]) {
if (pair.first == key) {
value = pair.second;
return true;
}
}
return false;
}
bool remove(const string& key) {
int index = hashFunction(key);
for (auto it = table[index].begin(); it != table[index].end(); ++it) {
if (it->first == key) {
table[index].erase(it);
return true;
}
}
return false;
}
void display() {
cout << "Hash Table Contents:" << endl;
for (int i = 0; i < TABLE_SIZE; i++) {
cout << "Bucket " << i << ": ";
if (table[i].empty()) {
cout << "Empty";
} else {
for (const auto& pair : table[i]) {
cout << "[" << pair.first << ":" << pair.second << "] ";
}
}
cout << endl;
}
}
void showStats() {
int totalElements = 0;
int occupiedBuckets = 0;
int maxChainLength = 0;
for (int i = 0; i < TABLE_SIZE; i++) {
int chainLength = table[i].size();
totalElements += chainLength;
if (chainLength > 0) {
occupiedBuckets++;
maxChainLength = max(maxChainLength, chainLength);
}
}
cout << "\nHash Table Statistics:" << endl;
cout << "Total elements: " << totalElements << endl;
cout << "Occupied buckets: " << occupiedBuckets << "/" << TABLE_SIZE << endl;
cout << "Load factor: " << (double)totalElements / TABLE_SIZE << endl;
cout << "Max chain length: " << maxChainLength << endl;
}
};
int main() {
HashTable ht;
// Insert some key-value pairs
ht.insert("apple", 5);
ht.insert("banana", 3);
ht.insert("orange", 8);
ht.insert("grape", 12);
ht.insert("kiwi", 7);
ht.display();
// Search for values
int value;
if (ht.search("banana", value)) {
cout << "\nFound banana with value: " << value << endl;
}
if (ht.search("mango", value)) {
cout << "Found mango with value: " << value << endl;
} else {
cout << "\nMango not found" << endl;
}
// Remove an element
ht.remove("orange");
cout << "\nAfter removing orange:" << endl;
ht.display();
ht.showStats();
return 0;
}冲突解决
开放寻址(线性探测)
在开放寻址中,当发生冲突时,我们探测下一个可用的槽位:
使用线性探测的哈希表
#include <iostream>
#include <vector>
#include <string>
using namespace std;
class LinearProbingHashTable {
private:
static const int TABLE_SIZE = 10;
static const string DELETED; // Marker for deleted entries
vector<string> keys;
vector<int> values;
vector<bool> occupied;
int hashFunction(const string& key) {
int hash = 0;
for (char c : key) {
hash = (hash * 31 + c) % TABLE_SIZE;
}
return hash;
}
int findSlot(const string& key) {
int index = hashFunction(key);
int originalIndex = index;
while (occupied[index] && keys[index] != key && keys[index] != DELETED) {
index = (index + 1) % TABLE_SIZE;
// Table is full
if (index == originalIndex) {
return -1;
}
}
return index;
}
public:
LinearProbingHashTable() {
keys.resize(TABLE_SIZE);
values.resize(TABLE_SIZE);
occupied.resize(TABLE_SIZE, false);
}
bool insert(const string& key, int value) {
int index = findSlot(key);
if (index == -1) {
cout << "Hash table is full!" << endl;
return false;
}
keys[index] = key;
values[index] = value;
occupied[index] = true;
return true;
}
bool search(const string& key, int& value) {
int index = hashFunction(key);
int originalIndex = index;
while (occupied[index]) {
if (keys[index] == key) {
value = values[index];
return true;
}
index = (index + 1) % TABLE_SIZE;
if (index == originalIndex) {
break;
}
}
return false;
}
bool remove(const string& key) {
int index = hashFunction(key);
int originalIndex = index;
while (occupied[index]) {
if (keys[index] == key) {
keys[index] = DELETED;
occupied[index] = false;
return true;
}
index = (index + 1) % TABLE_SIZE;
if (index == originalIndex) {
break;
}
}
return false;
}
void display() {
cout << "Hash Table (Linear Probing):" << endl;
for (int i = 0; i < TABLE_SIZE; i++) {
cout << "Slot " << i << ": ";
if (occupied[i]) {
cout << "[" << keys[i] << ":" << values[i] << "]";
} else {
cout << "Empty";
}
cout << endl;
}
}
};
const string LinearProbingHashTable::DELETED = "__DELETED__";
int main() {
LinearProbingHashTable ht;
// Insert elements
ht.insert("apple", 10);
ht.insert("banana", 20);
ht.insert("cherry", 30);
ht.insert("date", 40);
ht.insert("elderberry", 50);
ht.display();
// Search
int value;
cout << "\nSearching for 'banana': ";
if (ht.search("banana", value)) {
cout << "Found with value " << value << endl;
} else {
cout << "Not found" << endl;
}
// Remove and display
ht.remove("cherry");
cout << "\nAfter removing 'cherry':" << endl;
ht.display();
return 0;
}STL 哈希容器
C++ STL 为不同用例提供了几个基于哈希的容器:
使用 STL 哈希容器
#include <iostream>
#include <unordered_map>
#include <unordered_set>
#include <string>
using namespace std;
int main() {
// unordered_map (hash table for key-value pairs)
unordered_map<string, int> fruitPrices;
// Insert elements
fruitPrices["apple"] = 5;
fruitPrices["banana"] = 3;
fruitPrices["orange"] = 4;
fruitPrices.insert({"grape", 8});
cout << "Fruit Prices:" << endl;
for (const auto& pair : fruitPrices) {
cout << pair.first << ": $" << pair.second << endl;
}
// Search and access
cout << "\nPrice of banana: $" << fruitPrices["banana"] << endl;
if (fruitPrices.find("mango") != fruitPrices.end()) {
cout << "Mango found" << endl;
} else {
cout << "Mango not found" << endl;
}
// Count occurrences
cout << "Apple count: " << fruitPrices.count("apple") << endl;
// Remove element
fruitPrices.erase("orange");
cout << "\nAfter removing orange, size: " << fruitPrices.size() << endl;
cout << "\n" << string(40, '-') << "\n" << endl;
// unordered_set (hash table for unique elements)
unordered_set<string> visitedCities;
visitedCities.insert("New York");
visitedCities.insert("London");
visitedCities.insert("Tokyo");
visitedCities.insert("Paris");
visitedCities.insert("London"); // Duplicate - won't be added
cout << "Visited Cities (" << visitedCities.size() << "):" << endl;
for (const string& city : visitedCities) {
cout << "- " << city << endl;
}
// Check if element exists
if (visitedCities.count("Tokyo")) {
cout << "\nTokyo is in the visited cities list" << endl;
}
cout << "\n" << string(40, '-') << "\n" << endl;
// Word frequency counter using unordered_map
string text = "the quick brown fox jumps over the lazy dog the fox is quick";
unordered_map<string, int> wordCount;
// Simple word extraction (space-separated)
string word = "";
for (char c : text) {
if (c == ' ') {
if (!word.empty()) {
wordCount[word]++;
word = "";
}
} else {
word += c;
}
}
if (!word.empty()) {
wordCount[word]++;
}
cout << "Word Frequency:" << endl;
for (const auto& pair : wordCount) {
cout << pair.first << ": " << pair.second << endl;
}
// Hash table performance info
cout << "\nHash Table Info:" << endl;
cout << "Bucket count: " << fruitPrices.bucket_count() << endl;
cout << "Load factor: " << fruitPrices.load_factor() << endl;
cout << "Max load factor: " << fruitPrices.max_load_factor() << endl;
return 0;
}应用和用例
常见应用
- 数据库索引: 通过主键快速查找记录
- 缓存: 存储经常访问的数据以便快速检索
- 符号表: 编译器符号表用于变量和函数查找
- 哈希集合: 检查成员资格和删除重复项
- 哈希映射: 关联数组和字典
性能特征
- 平均情况: 搜索、插入和删除的时间复杂度为 O(1)
- 最坏情况: 当发生许多冲突时为 O(n)
- 空间复杂度: O(n),其中 n 是元素数量