How Hash Tables Work
A hash table maps keys to values using a hash function. Python's dict and set are hash tables.
Key โ hash(key) โ index โ bucket
Collision resolution:
- Chaining: Each bucket holds a linked list
- Open addressing: Probe for next empty slot (Python uses this)
Complexity:
- Average: O(1) for insert, lookup, delete
- Worst: O(n) if many collisions
Python's Built-in Hash Tables
# dict โ key-value hash map
d = {}
d["key"] = "value"
d.get("missing", "default") # safe lookup
del d["key"]
"key" in d # O(1) membership test
# set โ hash set (keys only)
s = set()
s.add("item")
"item" in s # O(1)
s.remove("item") # raises KeyError if missing
s.discard("missing") # safe remove
Common Patterns
Two Sum
def two_sum(nums, target):
"""Find indices of two numbers summing to target โ O(n)"""
seen = {} # value โ index
for i, num in enumerate(nums):
complement = target - num
if complement in seen:
return [seen[complement], i]
seen[num] = i
return []
print(two_sum([2, 7, 11, 15], 9)) # [0, 1]
print(two_sum([3, 2, 4], 6)) # [1, 2]
Group Anagrams
from collections import defaultdict
def group_anagrams(strs):
"""Group strings that are anagrams โ O(n * k log k)"""
groups = defaultdict(list)
for s in strs:
key = tuple(sorted(s)) # sorted chars as key
groups[key].append(s)
return list(groups.values())
print(group_anagrams(["eat","tea","tan","ate","nat","bat"]))
# [['eat','tea','ate'], ['tan','nat'], ['bat']]
Longest Consecutive Sequence
def longest_consecutive(nums):
"""O(n) using set"""
num_set = set(nums)
max_len = 0
for num in num_set:
if num - 1 not in num_set: # start of a sequence
curr = num
length = 1
while curr + 1 in num_set:
curr += 1
length += 1
max_len = max(max_len, length)
return max_len
print(longest_consecutive([100,4,200,1,3,2])) # 4 (1,2,3,4)
print(longest_consecutive([0,3,7,2,5,8,4,6,0,1])) # 9
Implement Hash Map from Scratch
class HashMap:
def __init__(self, capacity=16):
self.capacity = capacity
self.size = 0
self.buckets = [[] for _ in range(capacity)]
self.load_factor = 0.75
def _hash(self, key):
return hash(key) % self.capacity
def put(self, key, value):
idx = self._hash(key)
bucket = self.buckets[idx]
for i, (k, v) in enumerate(bucket):
if k == key:
bucket[i] = (key, value)
return
bucket.append((key, value))
self.size += 1
if self.size / self.capacity > self.load_factor:
self._resize()
def get(self, key, default=None):
idx = self._hash(key)
for k, v in self.buckets[idx]:
if k == key:
return v
return default
def remove(self, key):
idx = self._hash(key)
bucket = self.buckets[idx]
for i, (k, v) in enumerate(bucket):
if k == key:
bucket.pop(i)
self.size -= 1
return
def _resize(self):
old_buckets = self.buckets
self.capacity *= 2
self.buckets = [[] for _ in range(self.capacity)]
self.size = 0
for bucket in old_buckets:
for key, value in bucket:
self.put(key, value)
hm = HashMap()
hm.put("name", "Alice")
hm.put("age", 30)
print(hm.get("name")) # Alice
print(hm.get("city", "Unknown")) # Unknown
Exercises
Exercise 1: Subarray Sum Equals K
Count subarrays summing to k using prefix sum + hash map.
Solution:
def subarray_sum(nums, k):
count = 0
prefix = 0
freq = {0: 1}
for num in nums:
prefix += num
count += freq.get(prefix - k, 0)
freq[prefix] = freq.get(prefix, 0) + 1
return count
print(subarray_sum([1, 1, 1], 2)) # 2
print(subarray_sum([1, 2, 3], 3)) # 2
Exercise 2: Top K Frequent Elements
Find k most frequent elements in O(n) using bucket sort.
Solution:
from collections import Counter
def top_k_frequent(nums, k):
count = Counter(nums)
# Bucket sort: index = frequency
buckets = [[] for _ in range(len(nums) + 1)]
for num, freq in count.items():
buckets[freq].append(num)
result = []
for freq in range(len(buckets) - 1, 0, -1):
result.extend(buckets[freq])
if len(result) >= k:
return result[:k]
return result
print(top_k_frequent([1,1,1,2,2,3], 2)) # [1, 2]