HashMap工作原理

环境
介绍
签名
创建HashMap
新增与修改
删除
查找
HashIterator
Null键
实现Cloneable接口
实现Serializable接口
参考

环境

C:\Users\chwl>java -version
java version "1.7.0_51"
Java(TM) SE Runtime Environment (build 1.7.0_51-b13)
Java HotSpot(TM) 64-Bit Server VM (build 24.51-b03, mixed mode)

介绍

HashMap底层使用hash表实现，解决冲突的方法是链地址法，简单理解起来就是采用数组加链表得数据结构来存储数据。

哈希表以及存储节点的定义。

//哈希表
transient Entry<K,V>[] table = (Entry<K,V>[]) EMPTY_TABLE; 

//存储节点定义（单链表）
static class Entry<K,V> implements Map.Entry<K,V> {
    final K key;
    V value;
    Entry<K,V> next;
    int hash;

    Entry(int h, K k, V v, Entry<K,V> n) {
        value = v;
        next = n;
        key = k;
        hash = h;
    }

    public final K getKey() {
        return key;
    }

    public final V getValue() {
        return value;
    }

    public final V setValue(V newValue) {
        V oldValue = value;
        value = newValue;
        return oldValue;
    }

    public final boolean equals(Object o) {
        if (!(o instanceof Map.Entry))
            return false;
        Map.Entry e = (Map.Entry)o;
        Object k1 = getKey();
        Object k2 = e.getKey();
        if (k1 == k2 || (k1 != null && k1.equals(k2))) {
            Object v1 = getValue();
            Object v2 = e.getValue();
            if (v1 == v2 || (v1 != null && v1.equals(v2)))
                return true;
        }
        return false;
    }

    public final int hashCode() {
        return Objects.hashCode(getKey()) ^ Objects.hashCode(getValue());
    }

    public final String toString() {
        return getKey() + "=" + getValue();
    }

    void recordAccess(HashMap<K,V> m) {
    }

    void recordRemoval(HashMap<K,V> m) {
    }
}

下面开始分析HashMap的源代码，从数据的增删该查等方面进行分析。

签名

public class HashMap<K,V>
    extends AbstractMap<K,V>
    implements Map<K,V>, Cloneable, Serializable

创建HashMap

1.构造函数

设置HashMap的初始化大小initialCapacity和平衡因子loadFactor;

无参构造函数使用默认的初始化大小16，平衡因子0.75；

当entry数大于capacity*loadFactor时，HashMap考虑进行resize。

public HashMap(int initialCapacity, float loadFactor) {
    if (initialCapacity < 0)
        throw new IllegalArgumentException("Illegal initial capacity: " +
                                           initialCapacity);
    if (initialCapacity > MAXIMUM_CAPACITY)
        initialCapacity = MAXIMUM_CAPACITY;
    if (loadFactor <= 0 || Float.isNaN(loadFactor))
        throw new IllegalArgumentException("Illegal load factor: " +
                                           loadFactor);

    this.loadFactor = loadFactor;
    threshold = initialCapacity;
    init();
}

  
public HashMap(int initialCapacity) {
    this(initialCapacity, DEFAULT_LOAD_FACTOR);
}

   
public HashMap() {
    this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR);
}

新增与修改

HashMap的新增与修改操作，通过put方法实现。

1.put方法源码

public V put(K key, V value) {
    //table为空时，需初始化
    if (table == EMPTY_TABLE) {
        inflateTable(threshold);
    }
    //处理Null键
    if (key == null)
        return putForNullKey(value);
    int hash = hash(key);
    //索引桶的位置
    int i = indexFor(hash, table.length);
    for (Entry<K,V> e = table[i]; e != null; e = e.next) {
        Object k;
    //对已存在的键，覆盖其值
    if (e.hash == hash && ((k = e.key) == key || key.equals(k))) {
        V oldValue = e.value;
        e.value = value;
        //记录访问，Entry.recordAccess()在LinkedHashMap.Entry中覆盖，可用来实现LRU访问
        e.recordAccess(this);
        return oldValue;
    }
    }
    
    modCount++;
    addEntry(hash, key, value, i);
    return null;
}

void addEntry(int hash, K key, V value, int bucketIndex) {        
    //是否重新调整table数组的大小
    if ((size >= threshold) && (null != table[bucketIndex])) {
        resize(2 * table.length);
        hash = (null != key) ? hash(key) : 0;
        bucketIndex = indexFor(hash, table.length);
    }        
    //将元素添加到桶中，采用链表的头插法
    createEntry(hash, key, value, bucketIndex);
}

void createEntry(int hash, K key, V value, int bucketIndex) {        
    //链表的头插法
    Entry<K,V> e = table[bucketIndex];
    table[bucketIndex] = new Entry<>(hash, key, value, e);
    size++;
}

2.分析

使用put方法为HashMap新增键值对。

table为一个Entry<K,V>数组，Entry<k,v>为一个链表元素，也就是说table数组中存储的是链表，可把它看成bucket（桶）；

键的hash值相同的Entry存储在同一个bucket（桶）中；

源码采用函数indexFor来索引桶的位置，hash(key)和table.length-1作与运算，这样可确保所得结果处于0~table.length

static int indexFor(int h, int length) {
    // assert Integer.bitCount(length) == 1 : "length must be a non-zero power of 2";
    return h & (length-1);
}

索引到数组的位置后，接着就是将元素存入相应的桶中，遍历桶中元素，将键值与入参键（key）比较，若相同，则替换掉键对应的值（value），此处不仅要比较hash值，还要equals比较。若桶中无相同键，调用addEntry新增元素。

addEntry并不是简单的往桶中添加的元素，如上文中提到，当entry数大于capacity*loadFactor时，会考虑进行resize。

void addEntry(int hash, K key, V value, int bucketIndex) {
    //但size大于阀值，且当前桶为null时，进行resize
    if ((size >= threshold) && (null != table[bucketIndex])) {
        resize(2 * table.length);
        hash = (null != key) ? hash(key) : 0;
        bucketIndex = indexFor(hash, table.length);
    }

    createEntry(hash, key, value, bucketIndex);
}

resize根据initHashSeedAsNeeded(newCapacity)的返回值决定是否key进行rehash。

void resize(int newCapacity) {
    Entry[] oldTable = table;
    int oldCapacity = oldTable.length;
    if (oldCapacity == MAXIMUM_CAPACITY) {
        threshold = Integer.MAX_VALUE;
        return;
    }

    Entry[] newTable = new Entry[newCapacity];
    transfer(newTable, initHashSeedAsNeeded(newCapacity));
    table = newTable;
    threshold = (int)Math.min(newCapacity * loadFactor, MAXIMUM_CAPACITY + 1);
}

//用来改变hashSeed的值
final boolean initHashSeedAsNeeded(int capacity) {
    boolean currentAltHashing = hashSeed != 0;
    boolean useAltHashing = sun.misc.VM.isBooted() &&
            (capacity >= Holder.ALTERNATIVE_HASHING_THRESHOLD);
    boolean switching = currentAltHashing ^ useAltHashing;
    if (switching) {
        hashSeed = useAltHashing
            ? sun.misc.Hashing.randomHashSeed(this)
            : 0;
    }
    return switching;
}

void transfer(Entry[] newTable, boolean rehash) {
    int newCapacity = newTable.length;
    for (Entry<K,V> e : table) {
        while(null != e) {
            Entry<K,V> next = e.next;
            if (rehash) {
                e.hash = null == e.key ? 0 : hash(e.key);
            }
            int i = indexFor(e.hash, newCapacity);
            e.next = newTable[i];
            newTable[i] = e;
            e = next;
        }
    }
}

删除

1.remove方法源码

public V remove(Object key) {
    Entry<K,V> e = removeEntryForKey(key);
    return (e == null ? null : e.value);
}

final Entry<K,V> removeEntryForKey(Object key) {
    if (size == 0) {
        return null;
    }
    int hash = (key == null) ? 0 : hash(key);
    int i = indexFor(hash, table.length);
    Entry<K,V> prev = table[i];
    Entry<K,V> e = prev;

    while (e != null) {
        Entry<K,V> next = e.next;
        Object k;
        if (e.hash == hash &&
            ((k = e.key) == key || (key != null && key.equals(k)))) {
            modCount++;
            size--;
            if (prev == e)
                table[i] = next;
            else
                prev.next = next;
            e.recordRemoval(this);
            return e;
        }
        prev = e;
        e = next;
    }

    return e;
}

2.分析

remove方法与put方法原理大致相同。

通过key索引数组位置，遍历相应位置的桶，找到相同元素，执行链表删除。

查找

1.get方法源码

public V get(Object key) {
    if (key == null)
        return getForNullKey();
    Entry<K,V> entry = getEntry(key);

    return null == entry ? null : entry.getValue();
}

final Entry<K,V> getEntry(Object key) {
    if (size == 0) {
        return null;
    }

    int hash = (key == null) ? 0 : hash(key);
    for (Entry<K,V> e = table[indexFor(hash, table.length)];
         e != null;
         e = e.next) {
        Object k;
        if (e.hash == hash &&
            ((k = e.key) == key || (key != null && key.equals(k))))
            return e;
    }
    return null;
}

2.分析

get方法调用getEntry,getEntry的实现与put和removeEntryForKey类似。

HashIterator

HashIterator是fast-fail的，若一线程在使用它遍历集合的同时，另一线程对集合中的元素进行了增减，则会抛出ConcurrentModificationException异常。

private abstract class HashIterator<E> implements Iterator<E> {
    Entry<K,V> next;        // next entry to return
    int expectedModCount;   // For fast-fail
    int index;              // current slot
    Entry<K,V> current;     // current entry

    HashIterator() {
        expectedModCount = modCount;
        if (size > 0) { // advance to first entry
            Entry[] t = table;
            while (index < t.length && (next = t[index++]) == null)
                ;
        }
    }

    public final boolean hasNext() {
        return next != null;
    }

    final Entry<K,V> nextEntry() {
        //通过判断集合size实现fast-fail
        if (modCount != expectedModCount)
            throw new ConcurrentModificationException();
        Entry<K,V> e = next;
        if (e == null)
            throw new NoSuchElementException();

        if ((next = e.next) == null) {
            Entry[] t = table;
            while (index < t.length && (next = t[index++]) == null)
                ;
        }
        current = e;
        return e;
    }

    public void remove() {
        if (current == null)
            throw new IllegalStateException();
        if (modCount != expectedModCount)
            throw new ConcurrentModificationException();
        Object k = current.key;
        current = null;
        HashMap.this.removeEntryForKey(k);
        expectedModCount = modCount;
    }
}

HashMap提供三种集合视角，均集成自HashIterator。

private final class ValueIterator extends HashIterator<V> {
    public V next() {
        return nextEntry().value;
    }
}

private final class KeyIterator extends HashIterator<K> {
    public K next() {
        return nextEntry().getKey();
    }
}

private final class EntryIterator extends HashIterator<Map.Entry<K,V>> {
    public Map.Entry<K,V> next() {
        return nextEntry();
    }
}

`Null`键

HashMap允许使用Null作为键，相应的增删该查方法也有对Null键的特殊处理；

分析代码我们不难看出，Null键索引的是table[0]

private V putForNullKey(V value) {
    for (Entry<K,V> e = table[0]; e != null; e = e.next) {
        if (e.key == null) {
            V oldValue = e.value;
            e.value = value;
            e.recordAccess(this);
            return oldValue;
        }
    }
    modCount++;
    addEntry(0, null, value, 0);
    return null;
}

private V getForNullKey() {
    if (size == 0) {
        return null;
    }
    for (Entry<K,V> e = table[0]; e != null; e = e.next) {
        if (e.key == null)
            return e.value;
    }
    return null;
}

removeEntryForKey也对Null键做了处理

int hash = (key == null) ? 0 : hash(key);
int i = indexFor(hash, table.length);

实现Cloneable接口

重写Object类中的clone方法，实现了浅拷贝。

1.clone方法源码

/**
 * Returns a shallow copy of this <tt>HashMap</tt> instance: the keys and
 * values themselves are not cloned.
 *
 * @return a shallow copy of this map
 */
public Object clone() {
    HashMap<K,V> result = null;
    try {
        result = (HashMap<K,V>)super.clone();
    } catch (CloneNotSupportedException e) {
        // assert false;
    }
    if (result.table != EMPTY_TABLE) {
        result.inflateTable(Math.min(
            (int) Math.min(
                size * Math.min(1 / loadFactor, 4.0f),
                // we have limits...
                HashMap.MAXIMUM_CAPACITY),
           table.length));
    }
    result.entrySet = null;
    result.modCount = 0;
    result.size = 0;
    result.init();
    result.putAllForCreate(this);

    return result;
}

private void putAllForCreate(Map<? extends K, ? extends V> m) {
    for (Map.Entry<? extends K, ? extends V> e : m.entrySet())
        putForCreate(e.getKey(), e.getValue());
}

private void putForCreate(K key, V value) {
    int hash = null == key ? 0 : hash(key);
    int i = indexFor(hash, table.length);

    /**
     * Look for preexisting entry for key.  This will never happen for
     * clone or deserialize.  It will only happen for construction if the
     * input Map is a sorted map whose ordering is inconsistent w/ equals.
     */
    for (Entry<K,V> e = table[i]; e != null; e = e.next) {
        Object k;
        if (e.hash == hash &&
            ((k = e.key) == key || (key != null && key.equals(k)))) {
            e.value = value;
            return;
        }
    }

    createEntry(hash, key, value, i);
}

2.示例证明浅拷贝

package edu.zju.chwl.map;

import java.util.HashMap;
import org.junit.Test;

public class TestHashMap {

    @Test
    public void test() {
        HashMap<String, Student> hashMap = new HashMap<String, Student>();
        Student stu = new Student("tom", 26);
        hashMap.put("tom", stu);
        HashMap<String, String> cloneHashMap = (HashMap<String, String>) hashMap.clone();
        stu.setName("tony");
        System.out.println("hashMap:" + hashMap);
        System.out.println("cloneHashMap:" + cloneHashMap);
    }

    class Student {
        private String name;
        private int age;

        Student(String name, int age) {
            this.name = name;
            this.age = age;
        }

        public void setName(String name) {
            this.name = name;
        }

        @Override
        public String toString() {
            return "name:" + name + ",age:" + age;
        }

    }
}

打印结果：

hashMap:{tom=name:tony,age:26}

cloneHashMap:{tom=name:tony,age:26}

证明clone方法并未对键，值进行深拷贝。

实现Serializable接口

保存Entry的table数组为transient的，也就是说在进行序列化时，并不会包含该成员，这是为什么呢？

transient Entry<K,V>[] table = (Entry<K,V>[]) EMPTY_TABLE;

我们可以试想下面的场景：

我们在机器A上算出对象A的哈希值与索引，然后把它插入到HashMap中，然后把该HashMap序列化后，在机器B上重新算对象的哈希值与索引，这与机器A上算出的可能是不一样的（取决于hashCode方法的实现），所以我们在机器B上get对象A时，会得到错误的结果。

所以说，当序列化一个HashMap对象时，保存Entry的table是不需要序列化进来的，因为它在另一台机器上是错误的。

HashMap实现自己的序列化，重现了readObject方法与writeObject方法。

//序列化
private void writeObject(java.io.ObjectOutputStream s)
    throws IOException
{
    // Write out the threshold, loadfactor, and any hidden stuff
    s.defaultWriteObject();

    // Write out number of buckets
    if (table==EMPTY_TABLE) {
        s.writeInt(roundUpToPowerOf2(threshold));
    } else {
       s.writeInt(table.length);
    }

    // Write out size (number of Mappings)
    s.writeInt(size);

    // Write out keys and values (alternating)
    if (size > 0) {
        for(Map.Entry<K,V> e : entrySet0()) {
            s.writeObject(e.getKey());
            s.writeObject(e.getValue());
        }
    }
}

//反序列化
private void readObject(java.io.ObjectInputStream s)
     throws IOException, ClassNotFoundException
{
    // Read in the threshold (ignored), loadfactor, and any hidden stuff
    s.defaultReadObject();
    if (loadFactor <= 0 || Float.isNaN(loadFactor)) {
        throw new InvalidObjectException("Illegal load factor: " +
                                           loadFactor);
    }

    // set other fields that need values
    table = (Entry<K,V>[]) EMPTY_TABLE;

    // Read in number of buckets
    s.readInt(); // ignored.

    // Read number of mappings
    int mappings = s.readInt();
    if (mappings < 0)
        throw new InvalidObjectException("Illegal mappings count: " +
                                           mappings);

    // capacity chosen by number of mappings and desired load (if >= 0.25)
    int capacity = (int) Math.min(
                mappings * Math.min(1 / loadFactor, 4.0f),
                // we have limits...
                HashMap.MAXIMUM_CAPACITY);

    // allocate the bucket array;
    if (mappings > 0) {
        inflateTable(capacity);
    } else {
        threshold = capacity;
    }

    init();  // Give subclass a chance to do its thing.

    // Read the keys and values, and put the mappings in the HashMap
    for (int i = 0; i < mappings; i++) {
        K key = (K) s.readObject();
        V value = (V) s.readObject();
        putForCreate(key, value);
    }
}

private void putForCreate(K key, V value) {
    int hash = null == key ? 0 : hash(key);
    int i = indexFor(hash, table.length);

    for (Entry<K,V> e = table[i]; e != null; e = e.next) {
        Object k;
        if (e.hash == hash &&
            ((k = e.key) == key || (key != null && key.equals(k)))) {
            e.value = value;
            return;
        }
    }

    createEntry(hash, key, value, i);
}

void createEntry(int hash, K key, V value, int bucketIndex) {
    Entry<K,V> e = table[bucketIndex];
    table[bucketIndex] = new Entry<>(hash, key, value, e);
    size++;
}

HashMap在序列化时，对key和value分别单独序列化，反序列化时再对key进行hash，构造数据结构。

参考

Java HashMap 源码解析

HashMap的工作原理