diff --git a/second/week_01/79/ArrayList.md b/second/week_01/79/ArrayList.md new file mode 100644 index 0000000000000000000000000000000000000000..0f61d71231177e7011a3b3d08b1c23d18e26de2e --- /dev/null +++ b/second/week_01/79/ArrayList.md @@ -0,0 +1,647 @@ +### [Java 8] ArrayList + +``` java +public class ArrayList extends AbstractList + implements List, RandomAccess, Cloneable, java.io.Serializable +{ + private static final long serialVersionUID = 8683452581122892189L; + + /** + * Default initial capacity. + */ + // 无参构造时默认容量为10 + private static final int DEFAULT_CAPACITY = 10; + + /** + * Shared empty array instance used for empty instances. + */ + // 所有容量为0的ArrayList共享该底层数组 + private static final Object[] EMPTY_ELEMENTDATA = {}; + + /** + * Shared empty array instance used for default sized empty instances. We + * distinguish this from EMPTY_ELEMENTDATA to know how much to inflate when + * first element is added. + */ + // 所有默认容量但元素数为0的ArrayList共享该底层数组 + private static final Object[] DEFAULTCAPACITY_EMPTY_ELEMENTDATA = {}; + + /** + * The array buffer into which the elements of the ArrayList are stored. + * The capacity of the ArrayList is the length of this array buffer. Any + * empty ArrayList with elementData == DEFAULTCAPACITY_EMPTY_ELEMENTDATA + * will be expanded to DEFAULT_CAPACITY when the first element is added. + */ + // 底层数组 + transient Object[] elementData; // non-private to simplify nested class access + + /** + * The size of the ArrayList (the number of elements it contains). + * + * @serial + */ + // 元素数量 + private int size; + + /** + * Constructs an empty list with the specified initial capacity. + * + * @param initialCapacity the initial capacity of the list + * @throws IllegalArgumentException if the specified initial capacity + * is negative + */ + public ArrayList(int initialCapacity) { + if (initialCapacity > 0) { + this.elementData = new Object[initialCapacity]; + } else if (initialCapacity == 0) { + // 如果初始容量为0,底层数组直接用共享的 EMPTY_ELEMENTDATA占位, 避免额外的内存浪费 + this.elementData = EMPTY_ELEMENTDATA; + } else { + // 初始容量不能小于0,否则抛 IllegalArgumentException + throw new IllegalArgumentException("Illegal Capacity: "+ + initialCapacity); + } + } + + /** + * Constructs an empty list with an initial capacity of ten. + */ + public ArrayList() { + // 不指定初始容量,则初始容量为默认容量,但底层数组直接用共享的 DEFAULTCAPACITY_EMPTY_ELEMENTDATA占位,延迟初始化数组,在添加元素时再初始化 + this.elementData = DEFAULTCAPACITY_EMPTY_ELEMENTDATA; + } + + /** + * Constructs a list containing the elements of the specified + * collection, in the order they are returned by the collection's + * iterator. + * + * @param c the collection whose elements are to be placed into this list + * @throws NullPointerException if the specified collection is null + */ + public ArrayList(Collection c) { + elementData = c.toArray(); + if ((size = elementData.length) != 0) { + // c.toArray might (incorrectly) not return Object[] (see 6260652) + if (elementData.getClass() != Object[].class) + // Collection 接口的 toArray 方法实现中可能初始化数组并非使用 Object[] 类型,这种情况下如果将该数组任何索引更新为其他类型的元素就会抛 ArrayStoreException,这可能和底层数组的 Object[] 类型定义语义不符,因此要将数组元素复制到 用 Object[] 类型初始化的数组上 + elementData = Arrays.copyOf(elementData, size, Object[].class); + } else { + // replace with empty array. + // 传入空集合,则相当于初始容量设为0 + this.elementData = EMPTY_ELEMENTDATA; + } + } + + /** + * Trims the capacity of this ArrayList instance to be the + * list's current size. An application can use this operation to minimize + * the storage of an ArrayList instance. + */ + // 容量缩减为当前元素数量 + public void trimToSize() { + // 底层数组可能变更,增加结构修改计数 + modCount++; + if (size < elementData.length) { + // 如果元素数量小于容量,需要进行缩容。如果元素数量为0,直接将底层数组更换为 EMPTY_ELEMENTDATA占位,否则用新的当前元素数量大小的数组复制元素 + elementData = (size == 0) + ? EMPTY_ELEMENTDATA + : Arrays.copyOf(elementData, size); + } + } + + /** + * Increases the capacity of this ArrayList instance, if + * necessary, to ensure that it can hold at least the number of elements + * specified by the minimum capacity argument. + * + * @param minCapacity the desired minimum capacity + */ + // 扩容容量使容量不小于给定的容量 + public void ensureCapacity(int minCapacity) { + // 由于默认容量且未添加元素的情况下底层数组使用 DEFAULTCAPACITY_EMPTY_ELEMENTDATA 占位,其他情况下都可以直接由获取底层数组长度得到容量,因此这里要判断一下是前者,如果是则需要根据给定容量是否超过默认容量来决定是否扩容 + int minExpand = (elementData != DEFAULTCAPACITY_EMPTY_ELEMENTDATA) + // any size if not default element table + ? 0 + // larger than default for default empty table. It's already + // supposed to be at default size. + : DEFAULT_CAPACITY; + + if (minCapacity > minExpand) { + ensureExplicitCapacity(minCapacity); + } + } + + private static int calculateCapacity(Object[] elementData, int minCapacity) { + // 由于默认容量且未添加元素的情况下底层数组使用 DEFAULTCAPACITY_EMPTY_ELEMENTDATA 占位,如果是这种情况需要返回给定容量和默认容量的最大值 + if (elementData == DEFAULTCAPACITY_EMPTY_ELEMENTDATA) { + return Math.max(DEFAULT_CAPACITY, minCapacity); + } + return minCapacity; + } + + private void ensureCapacityInternal(int minCapacity) { + ensureExplicitCapacity(calculateCapacity(elementData, minCapacity)); + } + + private void ensureExplicitCapacity(int minCapacity) { + // 底层数组可能扩容,增加结构修改计数 + modCount++; + + // overflow-conscious code + if (minCapacity - elementData.length > 0) + // 给定容量超过当前容量,需要扩容 + grow(minCapacity); + } + + /** + * The maximum size of array to allocate. + * Some VMs reserve some header words in an array. + * Attempts to allocate larger arrays may result in + * OutOfMemoryError: Requested array size exceeds VM limit + */ + // 保守的最大数组大小,不同 JVM 实现数组元素上限是不同的,这个保守的最大数组大小可以确保所有 JVM 实现上用这个大小创建数组不会抛出 OutOfMemoryError 异常 + private static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8; + + /** + * Increases the capacity to ensure that it can hold at least the + * number of elements specified by the minimum capacity argument. + * + * @param minCapacity the desired minimum capacity + */ + private void grow(int minCapacity) { + // overflow-conscious code + int oldCapacity = elementData.length; + // 容量增大50% + int newCapacity = oldCapacity + (oldCapacity >> 1); + // 如果容量增大50%就溢出了,则直接使用给定容量作为新容量 + if (newCapacity - minCapacity < 0) + newCapacity = minCapacity; + // 如果新容量超过保守的最大数组大小,则尝试用 Integer 最大值作为新容量,可能在一些JVM实现上运行时抛出 OutOfMemoryError 异常 + if (newCapacity - MAX_ARRAY_SIZE > 0) + newCapacity = hugeCapacity(minCapacity); + // minCapacity is usually close to size, so this is a win: + // 用新容量创建底层数组并复制元素完成扩容 + elementData = Arrays.copyOf(elementData, newCapacity); + } + + private static int hugeCapacity(int minCapacity) { + if (minCapacity < 0) // overflow + throw new OutOfMemoryError(); + // 如果给定容量超过保守的最大数组大小,则尝试用 Integer 最大值作为新容量,可能在一些JVM实现上运行时抛出 OutOfMemoryError 异常 + return (minCapacity > MAX_ARRAY_SIZE) ? + Integer.MAX_VALUE : + MAX_ARRAY_SIZE; + } + + /** + * Returns the number of elements in this list. + * + * @return the number of elements in this list + */ + public int size() { + return size; + } + + /** + * Returns true if this list contains no elements. + * + * @return true if this list contains no elements + */ + public boolean isEmpty() { + return size == 0; + } + + /** + * Returns true if this list contains the specified element. + * More formally, returns true if and only if this list contains + * at least one element e such that + * (o==null ? e==null : o.equals(e)). + * + * @param o element whose presence in this list is to be tested + * @return true if this list contains the specified element + */ + public boolean contains(Object o) { + // 从前遍历查找 + return indexOf(o) >= 0; + } + + /** + * Returns the index of the first occurrence of the specified element + * in this list, or -1 if this list does not contain the element. + * More formally, returns the lowest index i such that + * (o==null ? get(i)==null : o.equals(get(i))), + * or -1 if there is no such index. + */ + public int indexOf(Object o) { + // 元素值是可能为 null 的,搜索元素是 null 就直接用 null 搜 + if (o == null) { + for (int i = 0; i < size; i++) + if (elementData[i]==null) + return i; + } else { + // 搜索元素不是 null 则调用 equals 方法搜 + for (int i = 0; i < size; i++) + if (o.equals(elementData[i])) + return i; + } + return -1; + } + + /** + * Returns the index of the last occurrence of the specified element + * in this list, or -1 if this list does not contain the element. + * More formally, returns the highest index i such that + * (o==null ? get(i)==null : o.equals(get(i))), + * or -1 if there is no such index. + */ + public int lastIndexOf(Object o) { + // 元素值是可能为 null 的,搜索元素是 null 就直接用 null 搜 + if (o == null) { + for (int i = size-1; i >= 0; i--) + if (elementData[i]==null) + return i; + } else { + // 搜索元素不是 null 则调用 equals 方法搜 + for (int i = size-1; i >= 0; i--) + if (o.equals(elementData[i])) + return i; + } + return -1; + } + + /** + * Returns a shallow copy of this ArrayList instance. (The + * elements themselves are not copied.) + * + * @return a clone of this ArrayList instance + */ + public Object clone() { + try { + ArrayList v = (ArrayList) super.clone(); + v.elementData = Arrays.copyOf(elementData, size); + v.modCount = 0; + return v; + } catch (CloneNotSupportedException e) { + // this shouldn't happen, since we are Cloneable + throw new InternalError(e); + } + } + + /** + * Returns an array containing all of the elements in this list + * in proper sequence (from first to last element). + * + *

The returned array will be "safe" in that no references to it are + * maintained by this list. (In other words, this method must allocate + * a new array). The caller is thus free to modify the returned array. + * + *

This method acts as bridge between array-based and collection-based + * APIs. + * + * @return an array containing all of the elements in this list in + * proper sequence + */ + public Object[] toArray() { + // 拷贝底层数组再返回,防止数组共享带来副作用风险 + return Arrays.copyOf(elementData, size); + } + + /** + * Returns an array containing all of the elements in this list in proper + * sequence (from first to last element); the runtime type of the returned + * array is that of the specified array. If the list fits in the + * specified array, it is returned therein. Otherwise, a new array is + * allocated with the runtime type of the specified array and the size of + * this list. + * + *

If the list fits in the specified array with room to spare + * (i.e., the array has more elements than the list), the element in + * the array immediately following the end of the collection is set to + * null. (This is useful in determining the length of the + * list only if the caller knows that the list does not contain + * any null elements.) + * + * @param a the array into which the elements of the list are to + * be stored, if it is big enough; otherwise, a new array of the + * same runtime type is allocated for this purpose. + * @return an array containing the elements of the list + * @throws ArrayStoreException if the runtime type of the specified array + * is not a supertype of the runtime type of every element in + * this list + * @throws NullPointerException if the specified array is null + */ + @SuppressWarnings("unchecked") + public T[] toArray(T[] a) { + if (a.length < size) + // Make a new array of a's runtime type, but my contents: + // 如果给定数组大小小于当前 ArrayList 元素数量,拷贝底层数组到给定数组同类型的新数组 + return (T[]) Arrays.copyOf(elementData, size, a.getClass()); + // 给定数组大小不小于当前 ArrayList 元素数量,直接将底层数组元素向给定数组上拷贝 + System.arraycopy(elementData, 0, a, 0, size); + if (a.length > size) + // 如果给定数组大小大于当前 ArrayList 元素数量,将最后一个有效元素后的位置置 null,作为结束标记 + a[size] = null; + return a; + } + + // Positional Access Operations + + @SuppressWarnings("unchecked") + // 索引访问底层数组并转换为泛型类型的辅助方法 + E elementData(int index) { + return (E) elementData[index]; + } + + /** + * Returns the element at the specified position in this list. + * + * @param index index of the element to return + * @return the element at the specified position in this list + * @throws IndexOutOfBoundsException {@inheritDoc} + */ + public E get(int index) { + // 索引越界校验 + rangeCheck(index); + + return elementData(index); + } + + /** + * Replaces the element at the specified position in this list with + * the specified element. + * + * @param index index of the element to replace + * @param element element to be stored at the specified position + * @return the element previously at the specified position + * @throws IndexOutOfBoundsException {@inheritDoc} + */ + public E set(int index, E element) { + // 索引越界校验 + rangeCheck(index); + + E oldValue = elementData(index); + // 更新索引位置元素 + elementData[index] = element; + // 返回该索引位置之前的元素 + return oldValue; + } + + /** + * Appends the specified element to the end of this list. + * + * @param e element to be appended to this list + * @return true (as specified by {@link Collection#add}) + */ + public boolean add(E e) { + // 添加新元素,需要增加结构修改计数并检查是否需要扩容 + ensureCapacityInternal(size + 1); // Increments modCount!! + elementData[size++] = e; + return true; + } + + /** + * Inserts the specified element at the specified position in this + * list. Shifts the element currently at that position (if any) and + * any subsequent elements to the right (adds one to their indices). + * + * @param index index at which the specified element is to be inserted + * @param element element to be inserted + * @throws IndexOutOfBoundsException {@inheritDoc} + */ + public void add(int index, E element) { + // 索引越界校验 + rangeCheckForAdd(index); + + // 添加新元素,需要增加结构修改计数并检查是否需要扩容 + ensureCapacityInternal(size + 1); // Increments modCount!! + // 通过数组复制的方式将底层数组 index 及之后索引位置的元素向后移一个位置 + System.arraycopy(elementData, index, elementData, index + 1, + size - index); + elementData[index] = element; + size++; + } + + /** + * Removes the element at the specified position in this list. + * Shifts any subsequent elements to the left (subtracts one from their + * indices). + * + * @param index the index of the element to be removed + * @return the element that was removed from the list + * @throws IndexOutOfBoundsException {@inheritDoc} + */ + public E remove(int index) { + // 索引越界校验 + rangeCheck(index); + + // 移除元素,需要增加结构修改计数 + modCount++; + // 先拿到移除元素的引用 + E oldValue = elementData(index); + + // 计算移动的元素数量 + int numMoved = size - index - 1; + if (numMoved > 0) + // 只要移除的不是最后一个元素,就通过数组复制的方式将 index 索引位置之后的元素向前移一个位置 + System.arraycopy(elementData, index+1, elementData, index, + numMoved); + // 移除对最后一个元素的引用,尽快 GC + elementData[--size] = null; // clear to let GC do its work + + // 返回移除的元素 + return oldValue; + } + + /** + * Removes the first occurrence of the specified element from this list, + * if it is present. If the list does not contain the element, it is + * unchanged. More formally, removes the element with the lowest index + * i such that + * (o==null ? get(i)==null : o.equals(get(i))) + * (if such an element exists). Returns true if this list + * contained the specified element (or equivalently, if this list + * changed as a result of the call). + * + * @param o element to be removed from this list, if present + * @return true if this list contained the specified element + */ + // 遍历查找 + 移除 + public boolean remove(Object o) { + if (o == null) { + for (int index = 0; index < size; index++) + if (elementData[index] == null) { + fastRemove(index); + return true; + } + } else { + for (int index = 0; index < size; index++) + if (o.equals(elementData[index])) { + fastRemove(index); + return true; + } + } + return false; + } + + /* + * Private remove method that skips bounds checking and does not + * return the value removed. + */ + // 相比 remove 方法没有索引越界校验也不返回移除的元素 + private void fastRemove(int index) { + modCount++; + int numMoved = size - index - 1; + if (numMoved > 0) + System.arraycopy(elementData, index+1, elementData, index, + numMoved); + elementData[--size] = null; // clear to let GC do its work + } + + /** + * Removes all of the elements from this list. The list will + * be empty after this call returns. + */ + public void clear() { + // 元素清空操作,需要增加结构修改计数 + modCount++; + + // clear to let GC do its work + // 遍历移除引用,尽快 GC + for (int i = 0; i < size; i++) + elementData[i] = null; + + // 元素数量清0 + size = 0; + } + + /** + * Appends all of the elements in the specified collection to the end of + * this list, in the order that they are returned by the + * specified collection's Iterator. The behavior of this operation is + * undefined if the specified collection is modified while the operation + * is in progress. (This implies that the behavior of this call is + * undefined if the specified collection is this list, and this + * list is nonempty.) + * + * @param c collection containing elements to be added to this list + * @return true if this list changed as a result of the call + * @throws NullPointerException if the specified collection is null + */ + public boolean addAll(Collection c) { + Object[] a = c.toArray(); + int numNew = a.length; + // 批量添加新元素,需要增加结构修改计数并检查是否需要扩容 + ensureCapacityInternal(size + numNew); // Increments modCount + // 将新元素从底层数组最后一个有效元素位置后开始拷贝 + System.arraycopy(a, 0, elementData, size, numNew); + // 更新元素数量 + size += numNew; + // 返回是否有新元素添加 + return numNew != 0; + } + + /** + * Inserts all of the elements in the specified collection into this + * list, starting at the specified position. Shifts the element + * currently at that position (if any) and any subsequent elements to + * the right (increases their indices). The new elements will appear + * in the list in the order that they are returned by the + * specified collection's iterator. + * + * @param index index at which to insert the first element from the + * specified collection + * @param c collection containing elements to be added to this list + * @return true if this list changed as a result of the call + * @throws IndexOutOfBoundsException {@inheritDoc} + * @throws NullPointerException if the specified collection is null + */ + public boolean addAll(int index, Collection c) { + // 索引越界校验 + rangeCheckForAdd(index); + + Object[] a = c.toArray(); + int numNew = a.length; + // 批量添加新元素,需要增加结构修改计数并检查是否需要扩容 + ensureCapacityInternal(size + numNew); // Increments modCount + + // 计算需要移动的元素数量 + int numMoved = size - index; + if (numMoved > 0) + // 通过数组复制的方式将底层数组 index 及之后索引位置的元素向后移新加元素数量个位置 + System.arraycopy(elementData, index, elementData, index + numNew, + numMoved); + + // 将新元素从底层数组 index 索引位置开始拷贝 + System.arraycopy(a, 0, elementData, index, numNew); + // 更新元素数量 + size += numNew; + // 返回是否有新元素添加 + return numNew != 0; + } + + /** + * Removes from this list all of the elements whose index is between + * {@code fromIndex}, inclusive, and {@code toIndex}, exclusive. + * Shifts any succeeding elements to the left (reduces their index). + * This call shortens the list by {@code (toIndex - fromIndex)} elements. + * (If {@code toIndex==fromIndex}, this operation has no effect.) + * + * @throws IndexOutOfBoundsException if {@code fromIndex} or + * {@code toIndex} is out of range + * ({@code fromIndex < 0 || + * fromIndex >= size() || + * toIndex > size() || + * toIndex < fromIndex}) + */ + protected void removeRange(int fromIndex, int toIndex) { + // 批量移除元素,需要增加结构修改计数 + modCount++; + + // 计算需要移动的元素数量 + int numMoved = size - toIndex; + // 就通过数组复制的方式将 index 索引位置之后的元素向前移动移除元素数量个位置 + System.arraycopy(elementData, toIndex, elementData, fromIndex, + numMoved); + + // clear to let GC do its work + // 遍历无效元素的索引移除无效元素的引用,尽快 GC + int newSize = size - (toIndex-fromIndex); + for (int i = newSize; i < size; i++) { + elementData[i] = null; + } + // 更新元素数量 + size = newSize; + } + + /** + * Checks if the given index is in range. If not, throws an appropriate + * runtime exception. This method does *not* check if the index is + * negative: It is always used immediately prior to an array access, + * which throws an ArrayIndexOutOfBoundsException if index is negative. + */ + private void rangeCheck(int index) { + // 用于访问底层数组前索引越界校验,只需校验不访问底层数组无效元素索引位置即可,如果是负数会由紧接着的底层数组访问抛出异常 + if (index >= size) + throw new IndexOutOfBoundsException(outOfBoundsMsg(index)); + } + + /** + * A version of rangeCheck used by add and addAll. + */ + private void rangeCheckForAdd(int index) { + // 用于 add 和 addAll 方法的索引越界校验,由于还有可能的扩容操作之后才访问底层数组,因此不仅需要校验不访问底层数组无效元素索引位置,也需要校验不是负数 + if (index > size || index < 0) + throw new IndexOutOfBoundsException(outOfBoundsMsg(index)); + } + + /** + * Constructs an IndexOutOfBoundsException detail message. + * Of the many possible refactorings of the error handling code, + * this "outlining" performs best with both server and client VMs. + */ + // 拼接索引越界异常信息 + private String outOfBoundsMsg(int index) { + return "Index: "+index+", Size: "+size; + } +``` diff --git a/second/week_01/79/HashMap.md b/second/week_01/79/HashMap.md new file mode 100644 index 0000000000000000000000000000000000000000..8c8504261e895a08c36a35cfdaca8f3cadada6cd --- /dev/null +++ b/second/week_01/79/HashMap.md @@ -0,0 +1,485 @@ +### [Java 8] HashMap + +`HashMap` 是哈希表的基本实现,不是线程安全的。`HashMap` 底层主要存储是一个数组 `table`,数组中每个元素称为一个桶。将 `key` 通过哈希函数得 `key.hashCode()` 到哈希值 `hash`,再将 `hash` 按照桶的个数(即数组长度)取模得到该 `key` 所映射的桶(即数组的索引)。 + +因此从 `key` 到桶的映射过程可能会碰撞,即不同的 `key` 可能会映射到同一个桶,因此桶内需要能存多个键值对。桶默认使用链表存储多个键值对。 + +如果碰撞过多会严重影响 `HashMap` 的性能,本来算个 `hash` 在取个模再比较个 `key` 三部曲就完事的工作,在碰撞时第三步要遍历链表挨个比较键值找到要查找的 `key`,这样 *O(1)* 的时间复杂度退化为 *O(K)*,其中K为桶内键值对数。 + +因此为了减小碰撞带来的性能退化,有两种策略分别针对不同的场景: +1. 假设哈希函数分布还是不错的,但因为桶数量太少了,广泛分布的 `hash` 被压缩到少量的桶中不碰撞才怪。既然这样,就扩容桶的数量,原先映射到一个桶就可能分散到不同的桶。比如桶的数量为4,`hash` 为3和7的 `key` 都会映射到索引为3的桶,但把桶扩容到8后,两者就分别映射到索引为3和7的桶;当然等键值对数增长到桶的数量再扩容有点晚了,肯定已经发生一些碰撞了,试想多牛逼的哈希函数配上多么契合的场景才能保证一个桶只落一个键值对呢。因此需要一个阈值,键值对数超过阈值就扩容。 + +2. 假设哈希函数实现得比较烂,一堆不同的 `key` 通过哈希函数计算后都是差不多的 `hash`,桶再多又有毛用,全TMD映射到少量的桶中,桶里链表又特别长。既然这样,那就提高键值对多的桶内查找 `key` 的效率,用红黑树替换链表,将 *O(K)* 补救到 *O(logK)* + + +#### 常量及实例变量 +``` java +// 默认桶的数量,必须是2的整数次幂 +static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16 + +// 桶的最大数量 +static final int MAXIMUM_CAPACITY = 1 << 30; + +// 默认负载因子 +static final float DEFAULT_LOAD_FACTOR = 0.75f; + +// 桶内链表转化为红黑树的键值对数量阈值 +static final int TREEIFY_THRESHOLD = 8; + +// 桶内红黑树转化为链表的键值对数量阈值 +static final int UNTREEIFY_THRESHOLD = 6; + +// 当桶的数量没有达到这个阈值时,桶内链表不会转化为红黑树 +static final int MIN_TREEIFY_CAPACITY = 64; + +// 桶数组 +transient Node[] table; + +// 键值对总数 +transient int size; + +// HashMap修改次数(确切地说是结构变更次数,不包括修改已存在key的value),其实相当于当前集合快照版本,用于迭代器遍历时检查集合是否被修改 +transient int modCount; + +// 桶数组扩容阈值 +int threshold; + +// 负载因子 +final float loadFactor; +``` +负载因子 `loadFacotr` 是桶数组相对扩容阈值,是一个相对于桶数量的比例(可以大于1),因此绝对阈值 `threshold` 就是桶的数量 `table.length` 乘以负载因子 `loadFactor` 。当键值对总数 `size` 达到 `threshold` 时,触发 `resize` 方法进行桶数组 `table` 扩容。 因此 `loadFactor` 可以理解为 `HashMap` 时间和空间的权衡 + +`loadFactor` 是 `HashMap` 初始化时可以指定的,如果未指定则默认为 `DEFAULT_LOAD_FACTOR` + +桶的数量即 `table` 的大小也是 `HashMap` 初始化时可以指定的,如果未指定则默认为 `DEFAULT_INITIAL_CAPACITY` + +`table` 也不是无限扩容的,最多支持 `MAXIMUM_CAPACITY` 个桶 + +`table` 的长度必须是2的整数次幂,这是为了取模运算更高效,即hash对2的整数次幂n取模可以用骚操作位运算 `hash & (n - 1)` 。这是也是为什么默认值 `DEFAULT_INITIAL_CAPACITY` 及最大值 `MAXIMUM_CAPACITY` 也要求是2的整数次幂 + +`table` 的元素是 `HashMap.Node` 类型,`HashMap.Node` 是默认的链表节点,`HashMap.TreeNode` 是红黑树节点,继承了 `HashMap.Node` 。既然链表有头树有根,`table` 中就只引用一个头/根节点即可。 + +当某个桶内键值对数量超过 `TREEIFY_THRESHOLD` 时将触发 `treeifyBin` 方法将这个桶的链表转化为红黑树,当然这有个大前提,就是当前桶数量不少于 `MIN_TREEIFY_CAPACITY` ,因为桶很少的时候冲突的可能性就是非常高,这时就因为某个链太长就转为红黑树太鲁莽了,怎么也得先多弄几个桶看看是桶太少还是哈希函数太烂 + +由于 `HashMap.TreeNode` 空间几乎是 `HashMap.Node` 的2倍,因此在性能提升不大的情况下链表没必要转化为红黑树,另外对于良好实现分布均匀的哈希函数,冲突的概率很小,对应于这种情况就应该只有极低的概率链表转化红黑树。对于服从常数为0.5的泊松分布的哈希函数,8个 `key` 落到同一个桶中的概率只有0.00000006,因此将 `TREEIFY_THRESHOLD` 设为8可以满足上面的论断 + +如果某个桶已经转化为红黑树,`resize` 后原先桶里的键值对可能落到不同的桶中,即触发 `split`方法,`split` 后树节点可能很少了,浪费多一倍的空间没什么必要了,可以转化回链表,即触发 `untreeify` 。如果没有这个操作,多次 `resize` 和 `split` 后链表可能有很多的桶只有很少的节点,但却使用红黑树结构。`split` 后一个桶内红黑树节点降到多少转化回链表受阈值 `UNTREEIFY_THRESHOLD` 控制 + + +#### 哈希 +``` java +static final int hash(Object key) { + int h; + // HashMap允许key为null,对应的hash为0 + // 由于hash之后要 &(table.length-1)确定桶索引,只有比table.length唯一的1的位低的位保留下来,高位信息都被过滤掉了 + // 对于哈希函数分布不均的情况这里挣扎了下,将hash低16位和高16位做异或,这样高位信息也会反应在低位中,降低某些哈希函数实现只在高位变化从而碰撞的概率,当然这里也只是用开销很小的位操作,因为分布平均的哈希函数不需要挣扎,而实现比较烂的哈希函数有转化红黑树兜底 + return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16); +} +``` + + +#### 构造函数及相关辅助方法 +``` java +public HashMap(int initialCapacity, float loadFactor) { + if (initialCapacity < 0) + throw new IllegalArgumentException("Illegal initial capacity: " + + initialCapacity); + // 桶数量不超过MAXIMUM_CAPACITY + if (initialCapacity > MAXIMUM_CAPACITY) + initialCapacity = MAXIMUM_CAPACITY; + if (loadFactor <= 0 || Float.isNaN(loadFactor)) + throw new IllegalArgumentException("Illegal load factor: " + + loadFactor); + this.loadFactor = loadFactor; + // 调用tableSizeFor得到是桶数量,这里却赋值给了threshold,这是一个构造时的临时处理,因为table是延迟初始化的,并且没有专门的字段存储桶容量,因此先扔给threshold存着,具体等第一次resize初始化table时再将真正的扩容阈值赋给threshold + this.threshold = tableSizeFor(initialCapacity); +} + +public HashMap(int initialCapacity) { + this(initialCapacity, DEFAULT_LOAD_FACTOR); +} + +public HashMap() { + this.loadFactor = DEFAULT_LOAD_FACTOR; // all other fields defaulted +} + +public HashMap(Map m) { + this.loadFactor = DEFAULT_LOAD_FACTOR; + putMapEntries(m, false); +} + +// 计算大于等于cap的最小的2的整数幂 +static final int tableSizeFor(int cap) { + // -1是专门针对cap正好就是2的整数幂这种情况 + int n = cap - 1; + // 将n最高1位右边的位都设置为1 + n |= n >>> 1; + n |= n >>> 2; + n |= n >>> 4; + n |= n >>> 8; + n |= n >>> 16; + // 再+1正好就是2的整数幂 + return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1; +} + +final void putMapEntries(Map m, boolean evict) { + // 获取传进来的键值对数量 + int s = m.size(); + if (s > 0) { + // 判断table是否初始化 + if (table == null) { // pre-size + // 用键值对数量除以负载因子倒推桶容量,加上1预防浮点数计算误差,这里还不是真正的桶容量,因为还未向上取最小2的整数幂 + float ft = ((float)s / loadFactor) + 1.0F; + // 桶容量不超过MAXIMUM_CAPACITY + int t = ((ft < (float)MAXIMUM_CAPACITY) ? + (int)ft : MAXIMUM_CAPACITY); + // table没初始化时threshold暂存桶容量,或者threshold为0表示使用默认桶数量,无论哪种情况,这里将通过键值对反推的桶容量取向上最小的2的整数幂赋给threshold,在第一次resize时用来初始化table + if (t > threshold) + threshold = tableSizeFor(t); + } + else if (s > threshold) + // table已经初始化了,但发现键值对数量超过扩容阈值了,那就赶紧先resize,不要等到putVal再resize + resize(); + // 遍历将每个键值对增加到该HashMap中 + for (Map.Entry e : m.entrySet()) { + K key = e.getKey(); + V value = e.getValue(); + putVal(hash(key), key, value, false, evict); + } + } +} +``` + + +#### 桶容量 +``` java +// 这里恰好和构造函数呼应,体现了table延迟初始化前后桶容量是如何保存的 +final int capacity() { + // 1. table若已初始化,当然table.length就是桶容量 + // 2. 若table未初始化,且threshold大于0,对应HashMap前两个构造函数,参数指定了桶容量,暂存在threshold中 + // 3. 若table未初始化,且threshold等于0,对应HashMap第三个构造函数(无参),没有指定桶容量则使用默认桶容量 + return (table != null) ? table.length : + (threshold > 0) ? threshold : + DEFAULT_INITIAL_CAPACITY; +} +``` + + +#### 扩容 +``` java +final Node[] resize() { + Node[] oldTab = table; + int oldCap = (oldTab == null) ? 0 : oldTab.length; + int oldThr = threshold; + int newCap, newThr = 0; + if (oldCap > 0) { + // oldCap大于0,说明table已经初始化 + if (oldCap >= MAXIMUM_CAPACITY) { + // 桶容量已经达到MAXIMUM_CAPACITY了,再也扩不动了 + // 将threshold设置为Integer.MAX_VALUE,再也不触发resize + threshold = Integer.MAX_VALUE; + return oldTab; + } + // 桶容量扩容一倍 + else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY && + oldCap >= DEFAULT_INITIAL_CAPACITY) + // 只有扩容后容量未达到MAXIMUM_CAPACITY并且扩容前容量不低于DEFAULT_INITIAL_CAPACITY时才将threshold增大一倍 + // 第一个条件类似上面的分支,扩容后如果桶容量达到MAXIMUM_CAPACITY,那么threshold就应该设置为Integer.MAX_VALUE而不是傻傻地增大一倍,这个操作在下面newThr==0的分支中处理 + // 第二个条件是因为当桶容量很小的时候,threshold移位操作带来的小数点上的误差影响非常大,应该由扩容后的桶容量乘以负载因子重新计算,这同样交给下面newThr==0的分支中计算 + newThr = oldThr << 1; // double threshold + } + else if (oldThr > 0) // initial capacity was placed in threshold + // table未初始化但threshold大于0,说明指定容量构造后第一次resize,threshold暂存的就是table初始化的容量,这里正式移交给桶容量变量,threshold本身则由下面newThr==0的分支中计算 + newCap = oldThr; + else { // zero initial threshold signifies using defaults + // table未初始化且threshold等于0,说明无参构造后第一次resize,桶容量使用默认容量,threshold直接由默认容量乘以负载因子计算 + newCap = DEFAULT_INITIAL_CAPACITY; + newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY); + } + if (newThr == 0) { + // 计算扩容阈值 + float ft = (float)newCap * loadFactor; + // 如果桶容量达到MAXIMUM_CAPACITY或扩容阈值达到MAXIMUM_CAPACITY,直接将threshold设置为Integer.MAX_VALUE,否则将计算好的阈值赋给threshold + // 之所以还要判断计算的阈值是否达到MAXIMUM_CAPACITY是因为loadFactor是可能大于1的 + newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ? + (int)ft : Integer.MAX_VALUE); + } + threshold = newThr; + @SuppressWarnings({"rawtypes","unchecked"}) + // 用扩容容量构造新table + Node[] newTab = (Node[])new Node[newCap]; + table = newTab; + // 如果table不是第一次初始化,则需要将旧table的键值对迁移到新table中,键值对可能在新table中落到另外一个桶中 + if (oldTab != null) { + for (int j = 0; j < oldCap; ++j) { + Node e; + if ((e = oldTab[j]) != null) { + // 移除旧table中引用在这次循环后尽早回收 + oldTab[j] = null; + // e.next为null说明该桶内只有一个节点 + if (e.next == null) + // 直接将这个节点移到新table中hash对应的索引即可 + newTab[e.hash & (newCap - 1)] = e; + // e.next不为null说明桶内有多个节点,可能是链表也可能是红黑树 + else if (e instanceof TreeNode) + // 该桶是红黑树 + // 很有可能拆成两棵分别迁移到不同的桶 + ((TreeNode)e).split(this, newTab, j, oldCap); + else { // preserve order + // 该桶是链表 + Node loHead = null, loTail = null; + Node hiHead = null, hiTail = null; + Node next; + do { + next = e.next; + // 由于oldCap是2的整数幂,只有唯一的1,e.hash&oldCap得到e.hash相应这一位的信息。如果结果为0,则说明 e.hash % newCap < oldCap,则扩容前后该节点落到相同索引的桶(低索引半区);但如果结果为1,则说明 oldCap <= e.hash % newCap < newCap,扩容后该节点将落在新扩展的高索引半区并且与扩容前桶索引(低索引半区)相差oldCap + if ((e.hash & oldCap) == 0) { + // 通过低索引链表尾节点判断低索引桶是否有节点 + if (loTail == null) + // 低索引桶第一个节点,设置低索引链表头节点 + loHead = e; + else + // 追加到低索引链表尾节点 + loTail.next = e; + loTail = e; + } + else { + // 通过高索引链表尾节点判断高索引桶是否有节点 + if (hiTail == null) + // 高索引桶第一个节点,设置高索引链表头节点 + hiHead = e; + else + // 追加到高索引链表尾节点 + hiTail.next = e; + hiTail = e; + } + } while ((e = next) != null); + if (loTail != null) { + loTail.next = null; + // 设置低索引桶 + newTab[j] = loHead; + } + if (hiTail != null) { + hiTail.next = null; + // 设置高索引桶 + newTab[j + oldCap] = hiHead; + } + } + } + } + } + return newTab; +} +``` + +#### 键值对总数 +``` java +public int size() { + return size; +} + +public boolean isEmpty() { + return size == 0; +} +``` + + +#### 增/改键值对 +``` java +public V put(K key, V value) { + return putVal(hash(key), key, value, false, true); +} + +final V putVal(int hash, K key, V value, boolean onlyIfAbsent, + boolean evict) { + Node[] tab; Node p; int n, i; + // 判断table是否初始化 + if ((tab = table) == null || (n = tab.length) == 0) + // 触发resize初始化table + n = (tab = resize()).length; + // 判断所属桶是否有节点 + if ((p = tab[i = (n - 1) & hash]) == null) + // 没有节点则直接创建链表头节点 + tab[i] = newNode(hash, key, value, null); + else { + // 该桶已经有节点 + Node e; K k; + // 判断头/根节点是否是要找的key + if (p.hash == hash && + ((k = p.key) == key || (key != null && key.equals(k)))) + e = p; + // 判断是否是红黑树根节点 + else if (p instanceof TreeNode) + e = ((TreeNode)p).putTreeVal(this, tab, hash, key, value); + else { + // 链表有多个节点且头节点不是要找的key,则向下遍历链表 + for (int binCount = 0; ; ++binCount) { + if ((e = p.next) == null) { + // 遍历了一圈没有找到key,则说明需要新增一个键值对 + p.next = newNode(hash, key, value, null); + // 判断该桶内链表节点数量是否达到转化红黑树阈值TREEIFY_THRESHOLD,这里用TREEIFY_THRESHOLD - 1是因为头节点已经在循环前遍历过了 + if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st + // 达到转化红黑树的阈值,转化红黑树 + treeifyBin(tab, hash); + break; + } + // 判断该节点是否是要找的key + if (e.hash == hash && + ((k = e.key) == key || (key != null && key.equals(k)))) + break; + p = e; + } + } + // 如果找到了key,则说明是一个值更新操作 + if (e != null) { // existing mapping for key + V oldValue = e.value; + if (!onlyIfAbsent || oldValue == null) + e.value = value; + afterNodeAccess(e); + return oldValue; + } + } + ++modCount; + // 递增键值对总数size,判断结果是否超过扩容阈值threshold + if (++size > threshold) + // 超过threshold,调用resize扩容 + resize(); + afterNodeInsertion(evict); + return null; +} + +public void putAll(Map m) { + putMapEntries(m, true); +} +``` + + +#### 转化红黑树 +``` java +final void treeifyBin(Node[] tab, int hash) { + int n, index; Node e; + // 如果桶容量小于MIN_TREEIFY_CAPACITY,说明桶数量还太少,则扩容而不是转化红黑树 + if (tab == null || (n = tab.length) < MIN_TREEIFY_CAPACITY) + resize(); + else if ((e = tab[index = (n - 1) & hash]) != null) { + TreeNode hd = null, tl = null; + do { + TreeNode p = replacementTreeNode(e, null); + if (tl == null) + hd = p; + else { + p.prev = tl; + tl.next = p; + } + tl = p; + } while ((e = e.next) != null); + if ((tab[index] = hd) != null) + hd.treeify(tab); + } +} +``` + + +#### 查找 +``` java +public V get(Object key) { + Node e; + return (e = getNode(hash(key), key)) == null ? null : e.value; +} + +final Node getNode(int hash, Object key) { + Node[] tab; Node first, e; int n; K k; + // 判断table是否初始化以及hash对应的桶是否有节点 + if ((tab = table) != null && (n = tab.length) > 0 && + (first = tab[(n - 1) & hash]) != null) { + // 判断头/根节点是否是要找的key + if (first.hash == hash && // always check first node + ((k = first.key) == key || (key != null && key.equals(k)))) + return first; + // 如果头/根节点不是要找的key,且桶内还有其他节点 + if ((e = first.next) != null) { + // 判断是否是红黑树,如是则在红黑树内查找key + if (first instanceof TreeNode) + return ((TreeNode)first).getTreeNode(hash, key); + // 否则遍历链表 + do { + if (e.hash == hash && + ((k = e.key) == key || (key != null && key.equals(k)))) + return e; + } while ((e = e.next) != null); + } + } + // 没找到key + return null; +} +``` + + +#### 删除 +``` java +public V remove(Object key) { + Node e; + return (e = removeNode(hash(key), key, null, false, true)) == null ? + null : e.value; +} + +final Node removeNode(int hash, Object key, Object value, + boolean matchValue, boolean movable) { + Node[] tab; Node p; int n, index; + // 与getNode类似,查找key的节点 + if ((tab = table) != null && (n = tab.length) > 0 && + (p = tab[index = (n - 1) & hash]) != null) { + Node node = null, e; K k; V v; + if (p.hash == hash && + ((k = p.key) == key || (key != null && key.equals(k)))) + node = p; + else if ((e = p.next) != null) { + if (p instanceof TreeNode) + node = ((TreeNode)p).getTreeNode(hash, key); + else { + do { + if (e.hash == hash && + ((k = e.key) == key || + (key != null && key.equals(k)))) { + node = e; + break; + } + p = e; + } while ((e = e.next) != null); + } + } + // 判断节点是否找到且满足值匹配条件(如果开启值匹配选项) + if (node != null && (!matchValue || (v = node.value) == value || (value != null && value.equals(v)))) { + // 判断是否是红黑树节点 + if (node instanceof TreeNode) + // 移除红黑树节点 + ((TreeNode)node).removeTreeNode(this, tab, movable); + // 判断是否是链表头节点 + else if (node == p) + // 无论是下一个节点还是null,都直接赋给桶 + tab[index] = node.next; + else + // 父节点next直接指向子节点,及删除链表当前节点 + p.next = node.next; + ++modCount; + // 递减键值对总数 + --size; + afterNodeRemoval(node); + return node; + } + } + // 没找到key + return null; +} + +public void clear() { + Node[] tab; + modCount++; + if ((tab = table) != null && size > 0) { + size = 0; + // 清除所有桶对头/根节点的引用 + for (int i = 0; i < tab.length; ++i) + tab[i] = null; + } +} +```