IntermediateDSA ยท Lesson 5

Hashing and Hash Tables

Understand hash tables, collision resolution, and solve problems using sets and dictionaries efficiently

How Hash Tables Work

A hash table maps keys to values using a hash function. Python's dict and set are hash tables.

Key โ†’ hash(key) โ†’ index โ†’ bucket

Collision resolution:

  • Chaining: Each bucket holds a linked list
  • Open addressing: Probe for next empty slot (Python uses this)

Complexity:

  • Average: O(1) for insert, lookup, delete
  • Worst: O(n) if many collisions

Python's Built-in Hash Tables

# dict โ€” key-value hash map
d = {}
d["key"] = "value"
d.get("missing", "default")  # safe lookup
del d["key"]
"key" in d  # O(1) membership test

# set โ€” hash set (keys only)
s = set()
s.add("item")
"item" in s  # O(1)
s.remove("item")  # raises KeyError if missing
s.discard("missing")  # safe remove

Common Patterns

Two Sum

def two_sum(nums, target):
    """Find indices of two numbers summing to target โ€” O(n)"""
    seen = {}  # value โ†’ index
    for i, num in enumerate(nums):
        complement = target - num
        if complement in seen:
            return [seen[complement], i]
        seen[num] = i
    return []

print(two_sum([2, 7, 11, 15], 9))   # [0, 1]
print(two_sum([3, 2, 4], 6))         # [1, 2]

Group Anagrams

from collections import defaultdict

def group_anagrams(strs):
    """Group strings that are anagrams โ€” O(n * k log k)"""
    groups = defaultdict(list)
    for s in strs:
        key = tuple(sorted(s))  # sorted chars as key
        groups[key].append(s)
    return list(groups.values())

print(group_anagrams(["eat","tea","tan","ate","nat","bat"]))
# [['eat','tea','ate'], ['tan','nat'], ['bat']]

Longest Consecutive Sequence

def longest_consecutive(nums):
    """O(n) using set"""
    num_set = set(nums)
    max_len = 0

    for num in num_set:
        if num - 1 not in num_set:  # start of a sequence
            curr = num
            length = 1
            while curr + 1 in num_set:
                curr += 1
                length += 1
            max_len = max(max_len, length)
    return max_len

print(longest_consecutive([100,4,200,1,3,2]))  # 4 (1,2,3,4)
print(longest_consecutive([0,3,7,2,5,8,4,6,0,1]))  # 9

Implement Hash Map from Scratch

class HashMap:
    def __init__(self, capacity=16):
        self.capacity = capacity
        self.size = 0
        self.buckets = [[] for _ in range(capacity)]
        self.load_factor = 0.75

    def _hash(self, key):
        return hash(key) % self.capacity

    def put(self, key, value):
        idx = self._hash(key)
        bucket = self.buckets[idx]
        for i, (k, v) in enumerate(bucket):
            if k == key:
                bucket[i] = (key, value)
                return
        bucket.append((key, value))
        self.size += 1
        if self.size / self.capacity > self.load_factor:
            self._resize()

    def get(self, key, default=None):
        idx = self._hash(key)
        for k, v in self.buckets[idx]:
            if k == key:
                return v
        return default

    def remove(self, key):
        idx = self._hash(key)
        bucket = self.buckets[idx]
        for i, (k, v) in enumerate(bucket):
            if k == key:
                bucket.pop(i)
                self.size -= 1
                return

    def _resize(self):
        old_buckets = self.buckets
        self.capacity *= 2
        self.buckets = [[] for _ in range(self.capacity)]
        self.size = 0
        for bucket in old_buckets:
            for key, value in bucket:
                self.put(key, value)

hm = HashMap()
hm.put("name", "Alice")
hm.put("age", 30)
print(hm.get("name"))   # Alice
print(hm.get("city", "Unknown"))  # Unknown

Exercises

Exercise 1: Subarray Sum Equals K

Count subarrays summing to k using prefix sum + hash map.

Solution:

def subarray_sum(nums, k):
    count = 0
    prefix = 0
    freq = {0: 1}
    for num in nums:
        prefix += num
        count += freq.get(prefix - k, 0)
        freq[prefix] = freq.get(prefix, 0) + 1
    return count

print(subarray_sum([1, 1, 1], 2))   # 2
print(subarray_sum([1, 2, 3], 3))    # 2

Exercise 2: Top K Frequent Elements

Find k most frequent elements in O(n) using bucket sort.

Solution:

from collections import Counter

def top_k_frequent(nums, k):
    count = Counter(nums)
    # Bucket sort: index = frequency
    buckets = [[] for _ in range(len(nums) + 1)]
    for num, freq in count.items():
        buckets[freq].append(num)

    result = []
    for freq in range(len(buckets) - 1, 0, -1):
        result.extend(buckets[freq])
        if len(result) >= k:
            return result[:k]
    return result

print(top_k_frequent([1,1,1,2,2,3], 2))  # [1, 2]