Arraylist

底层数据结构

1	transient Object[] elementData; //元素数据底层数组实现

默认构造函数

public MyArrayList(){
        //数组默认大小
        elementData = new Object[10];
        size = 0;
    }

简单实现增删改查方法

// add方法
public void add(T t){
    elementData[size++] = t;
}

// get方法
public Object get(int index){

    if(index > size){
        throw new RuntimeException("数组没有"+index+"这个元素");
    }

    return elementData[index];
}

// remove
public void remove(int moveNum){
    // 获取删除元素后的元素数量
    int lastes = size - moveNum - 1;

    // 核心将删除索引后的元素赋值到前面
    if(moveNum > 0){
        // 删除索引的后一位开始复制数组到原数组的删除索引，复制大小为lastes
        System.arraycopy(elementData,moveNum+1,elementData,moveNum,lastes);
    }
}

// set
public String set(int index,String newValue){
    // 判断索引是否存在
   if(index >= size){
       throw new RuntimeException("索引越界");
   }
   String oldValue = (String)elementData[index];
   elementData[index] = newValue;
   return oldValue;
}

扩容核心

private int newCapacity(int minCapacity) {
    int oldCapacity = this.elementData.length;
    // 新容量为老容量的1.5倍
    int newCapacity = oldCapacity + (oldCapacity >> 1);
    if (newCapacity - minCapacity <= 0) {
        if (this.elementData == DEFAULTCAPACITY_EMPTY_ELEMENTDATA) {
            return Math.max(10, minCapacity);
        } else if (minCapacity < 0) {
            throw new OutOfMemoryError();
        } else {
            return minCapacity;
        }
    } else {
    	// 新容量大于最大值时的处理
        return newCapacity - 2147483639 <= 0 ? newCapacity : hugeCapacity(minCapacity);
    }
}

elementData 用transient 修饰，序列化后数据会丢失吗

private void writeObject(java.io.ObjectOutputStream s)
        throws java.io.IOException {
        // Write out element count, and any hidden stuff
        int expectedModCount = modCount;
        s.defaultWriteObject();

        // Write out size as capacity for behavioral compatibility with clone()
        s.writeInt(size);

        // Write out all elements in the proper order.
        for (int i=0; i<size; i++) {
            s.writeObject(elementData[i]);
        }

        if (modCount != expectedModCount) {
            throw new ConcurrentModificationException();
        }
    }

不对elementData序列化，对elementData中的元素进行循环，取出来单独进行序列化。

为什么不直接对数组序列化

一般情况下数组是没有存满的，直接序列化会造成空间浪费

与Vector的比较

与Arraylist相似，除了添加了synchronized保证线成安全，扩容自定义

// 在add方法上添加了synchronizated
public synchronized boolean add(E e) {
    ++this.modCount;
    this.add(e, this.elementData, this.elementCount);
    return true;
}
// 扩容 用户可在初始化阶段指定capacityIncrement的大小
// 没指定就扩容原来的2倍
int newCapacity = oldCapacity + (this.capacityIncrement > 0 ? this.capacityIncrement : oldCapacity);

LinkedList

底层数据结构

// 被transient修饰的成员变量不参与序列化过程。
transient int size;
transient LinkedList.Node<E> first; // 记录第一个节点
transient LinkedList.Node<E> last;  // 记录最后一个节点
// 构造函数
private static class Node<E> {
    E item;
    // 底层为双向链表
    LinkedList.Node<E> next;
    LinkedList.Node<E> prev;

    Node(LinkedList.Node<E> prev, E element, LinkedList.Node<E> next) {
        this.item = element;
        this.next = next;
        this.prev = prev;
    }
}

简单实现增删改查方法

//add
public void add(E e){
    Node l = last; // 保存最后节点
    MyLinkedList.Node<E> newNode = new MyLinkedList.Node(l, e, null);
    this.last = newNode;
    if(l == null) // 集合中没有数据
        first = newNode;
    else
        l.next = newNode;
    size++;
}
//set
public E set(int index, E element) {
    // 判断index的合理性
    if(0 >= index || index >= size){
        throw new RuntimeException("不存元素");
    }
    MyLinkedList.Node<E> x = this.get(index); // 获取当前索引节点
    E oldVal = x.item;
    x.item = element;
    return oldVal;
}
// get
public MyLinkedList.Node<E> get(int index){
    // 判断index的合理性
    if(0 >= index || index >= size){
        throw new RuntimeException("不存元素");
    }

    MyLinkedList.Node x;
    int i;
    if(index < size >> 1){ //判断索引在前半部分还是后半部分
        x = this.first;
        for(i = 0; i < index; i++){
            x = x.next;
        }
    }else{
        x = this.last;
        for(i = size; i > index; i--){
            x = x.prev;
        }
    }
    return x;
}
// remove
public E remove(int index){
    // 判断index的合理性
    if(0 >= index || index >= size){
        throw new RuntimeException("不存元素");
    }
    // 通过get方法得到该节点
    Node<E> x = get(index);
    // 保存该节点的信息
    E element = x.item;
    MyLinkedList.Node<E> next = x.next;
    MyLinkedList.Node<E> prev = x.prev;

    if(prev == null){
        this.first = next;
    }else{
    	// 让当前节点的前一个节点的下一个节点指向当前节点的下一个节点
    	// 1 <-> 2 <-> 3 删除2
    	// 1 -> 3
        prev.next = next;
        x.prev = null;
    }

    if(next == null){
        this.last = prev;
    }else{
    	// 让当前节点的后一个节点的前一个节点指向当前节点的前一个节点
    	// 1 <-> 2 <-> 3 删除2
    	// 1 <- 3
        next.prev = prev;
        x.next = null;
    }

    x.item = null;
    --this.size;
    return element;
}

HashMap

底层数据结构

//底层是 数组和链表和红黑树 结合在一起使用也就是 链表散列。
static class Node<K,V> implements Map.Entry<K,V> {
        final int hash;
        final K key;
        V value;
        Node<K,V> next;

        Node(int hash, K key, V value, Node<K,V> next) {
            this.hash = hash;
            this.key = key;
            this.value = value;
            this.next = next;
        }
        // get set equals方法

KEY的hash过程

//   首先取 key值进行 hashcode 算出hashcode赋值给 h ， 在用自己的底16位和自己的高16位进行异或
static final int hash(Object key) {
        int h;
       // 扰动机制，table的长度一般都小于2^16，所有让高16位参与运算会让hash值更散
        return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
}
// 在 put 元素时进行了 与操作
n = (tab = resize()).length; // hash表长度
if ((p = tab[i = (n - 1) & hash]) == null)

这里可以体现为什么HashMap的长度建议为2的幂次

当数组长度为 2 的幂次是 length - 1就刚好所有的二进制全为 1

不是2的幂次，则算出的length - 1就会存在一些位置为 0，而0进行&操作，就一定为0，增加了hash冲突概率

jdk如何保证刚好是2的幂次
1
2
3
4
5
6
7
8
9
static final int tableSizeFor(int cap) {
        int n = cap - 1;
        n |= n >>> 1;
        n |= n >>> 2;
        n |= n >>> 4;
        n |= n >>> 8;
        n |= n >>> 16;
        return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;
    }
n 如果是正整数，那么最高位肯定是 1

n或上无符号右移1位，则不管n的第二位是0，1都会变成1，现在就最高位和次位都为1

n或上无符号右移2位,就是将第一步前两个1与第3，4个或变成1，现在就是前4位为1

。。。。。。。

为什么要cap - 1；

因为如果n 刚好是2的整数幂，则会将n变成2^(n + 1);

扩容过程

// 成员变量含义
	static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16 默认容量
    static final int MAXIMUM_CAPACITY = 1 << 30; // 最大容量
    static final float DEFAULT_LOAD_FACTOR = 0.75f; // 默认负载因子
    static final int TREEIFY_THRESHOLD = 8; // 树化的门限
    static final int UNTREEIFY_THRESHOLD = 6; // 解除树化的门限
// 树化时table的最小容量，当hashmap中不超过64个元素，即使hash冲突超过树化门限时也不会树化，而是进行扩容
    static final int MIN_TREEIFY_CAPACITY = 64; 
// 关键构造函数
public HashMap(int initialCapacity, float loadFactor) {
        if (initialCapacity < 0)
            throw new IllegalArgumentException("Illegal initial capacity: " +
                                               initialCapacity);
        if (initialCapacity > MAXIMUM_CAPACITY)
            initialCapacity = MAXIMUM_CAPACITY;
        if (loadFactor <= 0 || Float.isNaN(loadFactor))
            throw new IllegalArgumentException("Illegal load factor: " +
                                               loadFactor);
        this.loadFactor = loadFactor; // 默认或者指定的 负载因子
        this.threshold = tableSizeFor(initialCapacity); // 保证容量一定为 2 的幂次
}

负载因子越大则散列表的装填程度越高也就是能容纳更多的元素，元素多了，链表大了，所以此时索引效率就会降低。

负载因子越小则链表中的数据量就越稀疏，此时会对空间造成浪费，但是此时索引效率高。

扩容大小

final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
                   boolean evict) {
        Node<K,V>[] tab; Node<K,V> p; int n, i;
        if ((tab = table) == null || (n = tab.length) == 0)
            n = (tab = resize()).length;
        if ((p = tab[i = (n - 1) & hash]) == null)
            tab[i] = newNode(hash, key, value, null);
        else {
            Node<K,V> e; K k;
            if (p.hash == hash &&
                ((k = p.key) == key || (key != null && key.equals(k))))
                e = p;
            else if (p instanceof TreeNode)
                // 如果当前节点是 树 节点就按照树节点插入数据
                e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
            else {
                // 否则就为 链表节点 ，按照尾插法插入数据
                //JDK1.8的链表插入元素为什么改为了尾插法，
                //则是为了避免出现逆序且链表死循环的问题（JDK1.7的HashMap扩容导致死循环）
                // 。。。省略实现
        }
        ++modCount; // 修改次数
        if (++size > threshold) // 先 put 元素在 扩容
            resize();
        afterNodeInsertion(evict);
        return null;
    }
// ---------------------------扩容实现核心---------------
final Node<K,V>[] resize() {
        Node<K,V>[] oldTab = table;
    	// 记录扩容前的数组长度 和 容量
        int oldCap = (oldTab == null) ? 0 : oldTab.length;
        int oldThr = threshold;
        int newCap, newThr = 0;
        if (oldCap > 0) {
            // 超过最大值无能为力
            if (oldCap >= MAXIMUM_CAPACITY) {
                threshold = Integer.MAX_VALUE;
                return oldTab;
            }else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
                     oldCap >= DEFAULT_INITIAL_CAPACITY)
                newThr = oldThr << 1; // 扩容为原来的两倍
        }
        // 。。。。。。
    	// 跟新容量和长度
        threshold = newThr; 
        @SuppressWarnings({"rawtypes","unchecked"})
        Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap];
        table = newTab;
    
        if (oldTab != null) {
            // 遍历老数组 
            for (int j = 0; j < oldCap; ++j) {
                Node<K,V> e;
                if ((e = oldTab[j]) != null) {
                    // 当前索引只有一个元素 ， 直接重新计算Hash下标索引
                    oldTab[j] = null;
                    if (e.next == null)
                        newTab[e.hash & (newCap - 1)] = e;
                    
                    else if (e instanceof TreeNode)
                        // 树结构处理
                        ((TreeNode<K,V>)e).split(this, newTab, j, oldCap);
                    else { 
                        // 链表结构处理
                        // 循环链表中的元素，计算是新索引还是老索引
                    }
                }
            }
        }
        return newTab;
    }
 // -------------------------------红黑树扩容情况--------------------
     final void split(HashMap<K,V> map, Node<K,V>[] tab, int index, int bit) {
            TreeNode<K,V> b = this; // 调用方法的原始节点
            // 扩容就两位置 原索引 原索引+oldCap
            TreeNode<K,V> loHead = null, loTail = null;
            TreeNode<K,V> hiHead = null, hiTail = null;
            int lc = 0, hc = 0;
            // 遍历整棵树
            for (TreeNode<K,V> e = b, next; e != null; e = next) {
               // 拿到所有节点 重新计算hash
                //.....
                if (hiHead != null) {
                    if (hc <= UNTREEIFY_THRESHOLD) // 节点个数 <= 6改成链表
                        tab[index + bit] = hiHead.untreeify(map);
                    else {
                    	tab[index + bit] = hiHead;
                    if (loHead != null)
                        // 重新构建红黑树
                        hiHead.treeify(tab);
                }
            }
            }
          
        }

HashTable和HashMap区别

继承的父类不同 Hashtable继承自Dictionary类，而HashMap继承自AbstractMap类。
线程安全性不同

Hashtable 中的方法是Synchronize的，而HashMap中的方法在缺省情况下是非Synchronize的。

key和value是否允许null值

#################### hashMap #####################
       static final int hash(Object key) { //自定义的hash算法
           int h;
           return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
       }
#################### hashtable #####################
       int hash = key.hashCode();  //直接用object提供的hashcode方法,所以key不能为null
       int index = (hash & 0x7FFFFFFF) % tab.length;
	if (value == null) { // 在put元素是也判断了value
           throw new NullPointerException();
       }

内部实现使用的数组初始化和扩容方式不同

Hashtable扩容时，将容量变为原来的2倍加1，而HashMap扩容时，将容量变为原来的2倍。 Hashtable和HashMap它们两个内部实现方式的数组的初始大小和扩容的方式。HashTable中hash数组默认大小是11，增加的方式是 old*2+1。

HashSet

底层数据结构

// 底层数据为hashMap
private transient HashMap<E, Object> map;
// HashSet的所有元素的Value值
private static final Object PRESENT = new Object();
// 去重的原因就是 判断插入元素返回的值是否为null
public boolean add(E e) {
    // map在 put相同元素时产生Hash冲突，返回被覆盖的元素值
        return map.put(e, PRESENT)==null;
}

ConcurrentHashMap

核心构造函数

public ConcurrentHashMap(int initialCapacity,
                             float loadFactor, 
                         int concurrencyLevel) { // 并发级别
        if (!(loadFactor > 0.0f) || initialCapacity < 0 || concurrencyLevel <= 0)
            throw new IllegalArgumentException();
        if (initialCapacity < concurrencyLevel)   // Use at least as many bins
            initialCapacity = concurrencyLevel;   // as estimated threads
        long size = (long)(1.0 + (long)initialCapacity / loadFactor);
        int cap = (size >= (long)MAXIMUM_CAPACITY) ?
            MAXIMUM_CAPACITY : tableSizeFor((int)size);
        this.sizeCtl = cap;
    }

concurrencyLevel：并行级别、并发数、Segment 数，怎么翻译不重要，理解它。默认是 16，也就是说 ConcurrentHashMap 有 16 个 Segments，所以理论上，这个时候，最多可以同时支持 16 个线程并发写，只要它们的操作分别分布在不同的 Segment 上。这个值可以在初始化的时候设置为其他值，但是一旦初始化以后，它是不可以扩容的。

currenthashmap是如何保证并发的

JDK8中彻底放弃了Segment转而采用的是Node，其设计思想也不再是JDK1.7中的分段锁思想。

Node：保存key，value及key的hash值的数据结构。其中value和next都用volatile修饰，保证并发的可见性。

Java8 ConcurrentHashMap结构基本上和Java8的HashMap一样，不过保证线程安全性。

// 在高版本的JDK中，抛弃了桶设计，大量使用了 CAS + Volatile + Synchronized来实现并发安全
final V putVal(K key, V value, boolean onlyIfAbsent) {
        if (key == null || value == null) throw new NullPointerException();
        int hash = spread(key.hashCode());
        int binCount = 0;
        for (Node<K,V>[] tab = table;;) {
            Node<K,V> f; int n, i, fh; K fk; V fv;
            if (tab == null || (n = tab.length) == 0)
                tab = initTable();
            else if ((f = tabAt(tab, i = (n - 1) & hash)) == null) {
                if (casTabAt(tab, i, null, new Node<K,V>(hash, key, value)))
                    break;                   // no lock when adding to empty bin
            }
            else if ((fh = f.hash) == MOVED)
                tab = helpTransfer(tab, f);
            else if (onlyIfAbsent // check first node without acquiring lock
                     && fh == hash
                     && ((fk = f.key) == key || (fk != null && key.equals(fk)))
                     && (fv = f.val) != null)
                return fv;
            else {
                // 如果元素 CAS 插入失败 则采用 Synchronized 锁
                V oldVal = null;
                synchronized (f) {
                    // 。。。。。。
                }
                if (binCount != 0) {
                    if (binCount >= TREEIFY_THRESHOLD)
                        treeifyBin(tab, i);
                    if (oldVal != null)
                        return oldVal;
                    break;
                }
            }
        }
        addCount(1L, binCount); // 容量加一也用到了CAS
        return null;
    }

区别总结

特性/集合类	HashMap	Hashtable	ConcurrentHashMap
线程安全	否	是，基于方法锁	是，基于分段锁
继承关系	AbstractMap	Dictionary	AbstractMap，ConcurrentMap
允许null值	K-V都允许	K-V都不允许	K-V都不允许
默认初始容量	16	11	16
默认加载因子	0.75	0.75	0.75
扩容后容量	原来的两倍	原来的两倍加1	原来的两倍
是否支持fail-fast	支持	不支持	fail-safe

ConcurrentSkipListMap

ConcurrentSkipListMap是线程安全的有序的哈希表，适用于高并发的场景。
ConcurrentSkipListMap 和TreeMap，它们虽然都是有序的哈希表。也有不同点

第一，它们的线程安全机制不同，TreeMap是非线程安全的，而ConcurrentSkipListMap是线程安全的。

第二，ConcurrentSkipListMap是通过跳表实现的，而TreeMap是通过红黑树实现的。

// 源码的注释     
 * Here's the sequence of events for a deletion of node n with
    * predecessor b and successor f, initially:
    *
    *        +------+       +------+      +------+
    *   ...  |   b  |------>|   n  |----->|   f  | ...
    *        +------+       +------+      +------+
    *
    * 1. CAS n's value field from non-null to null.
    *    Traversals encountering a node with null value ignore it.
    *    However, ongoing insertions and deletions might still modify
    *    n's next pointer.
    *
    * 2. CAS n's next pointer to point to a new marker node.
    *    From this point on, no other nodes can be appended to n.
    *    which avoids deletion errors in CAS-based linked lists.
    *
    *        +------+       +------+      +------+       +------+
    *   ...  |   b  |------>|   n  |----->|marker|------>|   f  | ...
    *        +------+       +------+      +------+       +------+
    *
    * 3. CAS b's next pointer over both n and its marker.
    *    From this point on, no new traversals will encounter n,
    *    and it can eventually be GCed.
    *        +------+                                    +------+
    *   ...  |   b  |----------------------------------->|   f  | ...
    *        +------+                                    +------+

插入和删除操作都是利用CAS进行，如果过程中发生竞争，当前线程将重试操作，直到成功为止。

ConcurrentSkipListMap内部采用了SkipList数据结构实现，使用三个内部类来构建这样的结构：

Node、Index、HeadIndex

* Head nodes          Index nodes
* +-+    right        +-+                      +-+
* |2|---------------->| |--------------------->| |->null
* +-+                 +-+                      +-+
*  | down              |                        |
*  v                   v                        v
* +-+            +-+  +-+       +-+            +-+       +-+
* |1|----------->| |->| |------>| |----------->| |------>| |->null
* +-+            +-+  +-+       +-+            +-+       +-+
*  v              |    |         |              |         |
* Nodes  next     v    v         v              v         v
* +-+  +-+  +-+  +-+  +-+  +-+  +-+  +-+  +-+  +-+  +-+  +-+
* | |->|A|->|B|->|C|->|D|->|E|->|F|->|G|->|H|->|I|->|J|->|K|->null
* +-+  +-+  +-+  +-+  +-+  +-+  +-+  +-+  +-+  +-+  +-+  +-+

index nodes 和 head nodes 基于Node节点的索引，提供向下和向右的索引
head nodes 比 Index nodes 多了一个level来表示层级

LinkedHashMap实现LRU

介绍LinkedHashMap维护的两种顺序，插入顺序和访问顺序

 LinkedHashMap<String, String> linkedHashMap = new LinkedHashMap<>(16,0.75f,true);;
        linkedHashMap.put("f", "1");
        linkedHashMap.put("s", "2");
        linkedHashMap.put("t", "3");

        System.out.println("插入顺序：");
        for (Map.Entry<String, String> item : linkedHashMap.entrySet()) {
            System.out.println(item.getKey() + ": =>" + item.getValue());
        }

        System.out.println("访问顺序：");
        for (Map.Entry<String, String> item : linkedHashMap.entrySet()) {
            System.out.println(item.getKey() + ": =>" + item.getValue());
        }
}

插入顺序：
f: =>1
s: =>2
t: =>3
访问顺序：
f: =>1
s: =>2
t: =>3

LinkedHashMap<String, String> linkedHashMap = new LinkedHashMap<>(16,0.75f,true);
       linkedHashMap.put("f", "1");
       linkedHashMap.put("s", "2");
       linkedHashMap.put("t", "3");

       System.out.println("插入顺序：");
       for (Map.Entry<String, String> item : linkedHashMap.entrySet()) {
           System.out.println(item.getKey() + ": =>" + item.getValue());
       }

       linkedHashMap.get("s");
       System.out.println("访问顺序：");
       for (Map.Entry<String, String> item : linkedHashMap.entrySet()) {
           System.out.println(item.getKey() + ": =>" + item.getValue());
       }

插入顺序：
f: =>1
s: =>2
t: =>3
访问顺序：
f: =>1
t: =>3
s: =>2

实现代码

public class LRULinkedHashMap<K,V> extends LinkedHashMap<K,V> {

    private int cap; // 缓存容量

    LRULinkedHashMap(int cap){
        super(16,0.75f,true);
        this.cap = cap;
    }

    @Override // 集合容量大于缓存，就删除顶端元素
    protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
        return size() > cap;
    }
}