Union-Find (Disjoint Set)

Union-Find is a data structure that efficiently maintains a collection of disjoint sets and supports union and find operations with near-constant time complexity using path compression and union by rank optimizations.

Basic Union-Find Implementation

The basic Union-Find structure supports find (which set an element belongs to) and union (merge two sets) operations.

Union-Find with Path Compression and Union by Rank

#include <iostream>
#include <vector>
using namespace std;

class UnionFind {
private:
    vector<int> parent, rank, size;
    int components;
    
public:
    UnionFind(int n) : parent(n), rank(n, 0), size(n, 1), components(n) {
        for (int i = 0; i < n; i++) {
            parent[i] = i;
        }
    }
    
    // Find with path compression
    int find(int x) {
        if (parent[x] != x) {
            parent[x] = find(parent[x]); // Path compression
        }
        return parent[x];
    }
    
    // Union by rank
    bool unite(int x, int y) {
        int px = find(x), py = find(y);
        if (px == py) return false; // Already in same set
        
        // Union by rank: attach smaller tree under larger tree
        if (rank[px] < rank[py]) {
            parent[px] = py;
            size[py] += size[px];
        } else if (rank[px] > rank[py]) {
            parent[py] = px;
            size[px] += size[py];
        } else {
            parent[py] = px;
            size[px] += size[py];
            rank[px]++;
        }
        
        components--;
        return true;
    }
    
    // Check if two elements are in same set
    bool connected(int x, int y) {
        return find(x) == find(y);
    }
    
    // Get size of set containing x
    int getSize(int x) {
        return size[find(x)];
    }
    
    // Get number of connected components
    int getComponents() {
        return components;
    }
};

int main() {
    cout << "Testing Union-Find Data Structure:" << endl;
    
    UnionFind uf(6);
    
    cout << "Initial components: " << uf.getComponents() << endl;
    
    // Connect some elements
    uf.unite(0, 1);
    uf.unite(1, 2);
    uf.unite(3, 4);
    
    cout << "After unions: " << uf.getComponents() << " components" << endl;
    
    // Test connectivity
    cout << "0 and 2 connected: " << (uf.connected(0, 2) ? "Yes" : "No") << endl;
    cout << "0 and 3 connected: " << (uf.connected(0, 3) ? "Yes" : "No") << endl;
    
    // Test set sizes
    cout << "Size of set containing 0: " << uf.getSize(0) << endl;
    cout << "Size of set containing 3: " << uf.getSize(3) << endl;
    cout << "Size of set containing 5: " << uf.getSize(5) << endl;
    
    return 0;
}

Union-Find with path compression and union by rank achieves nearly O(1) amortized time complexity for both operations. Path compression flattens the tree during find, while union by rank keeps trees balanced.

Application: Detecting Cycles in Undirected Graph

Union-Find can efficiently detect cycles in undirected graphs by checking if two vertices of an edge are already connected.

Cycle Detection in Undirected Graph

#include <iostream>
#include <vector>
using namespace std;

class CycleDetector {
private:
    UnionFind uf;
    
public:
    CycleDetector(int n) : uf(n) {}
    
    bool hasCycle(vector<pair<int, int>>& edges) {
        for (auto& edge : edges) {
            int u = edge.first, v = edge.second;
            
            // If u and v are already connected, adding this edge creates a cycle
            if (uf.connected(u, v)) {
                return true;
            }
            
            // Connect u and v
            uf.unite(u, v);
        }
        return false;
    }
};

// Application: Kruskal's MST Algorithm
class KruskalMST {
private:
    struct Edge {
        int u, v, weight;
        bool operator<(const Edge& other) const {
            return weight < other.weight;
        }
    };
    
    vector<Edge> edges;
    int n;
    
public:
    KruskalMST(int vertices) : n(vertices) {}
    
    void addEdge(int u, int v, int w) {
        edges.push_back({u, v, w});
    }
    
    pair<int, vector<Edge>> findMST() {
        sort(edges.begin(), edges.end());
        UnionFind uf(n);
        
        vector<Edge> mst;
        int totalWeight = 0;
        
        for (const Edge& e : edges) {
            if (!uf.connected(e.u, e.v)) {
                uf.unite(e.u, e.v);
                mst.push_back(e);
                totalWeight += e.weight;
                
                if (mst.size() == n - 1) break;
            }
        }
        
        return {totalWeight, mst};
    }
};

int main() {
    // Test cycle detection
    cout << "Testing Cycle Detection:" << endl;
    
    CycleDetector detector(4);
    vector<pair<int, int>> edges = {{0, 1}, {1, 2}, {2, 3}, {3, 0}};
    
    if (detector.hasCycle(edges)) {
        cout << "Graph has a cycle!" << endl;
    } else {
        cout << "Graph is acyclic!" << endl;
    }
    
    // Test Kruskal's algorithm
    cout << "\nTesting Kruskal's MST:" << endl;
    
    KruskalMST kruskal(4);
    kruskal.addEdge(0, 1, 10);
    kruskal.addEdge(0, 2, 6);
    kruskal.addEdge(0, 3, 5);
    kruskal.addEdge(1, 3, 15);
    kruskal.addEdge(2, 3, 4);
    
    auto [weight, mst] = kruskal.findMST();
    cout << "MST weight: " << weight << endl;
    
    return 0;
}

Union-Find is perfect for cycle detection and MST algorithms. For cycle detection, if two vertices of an edge are already connected, adding the edge creates a cycle.