java容器类的基本类型包括List、Set和Map。它们可以自动调节自身的尺寸,因此和数组不同,在编程时可以将任意数量的对象放置到容器中,并且不需要担心容器应该设置为多大。本文源码的JDK版本为"1.8.0_131"
一、List
有两种类型的list:ArrayList和LinkedList。它们之间区别如下:
1、ArrayList是基于数组实现的,LinkedList是基于链表实现的。
2、对于随机访问get和set,ArrayList有速度优势,LinkedList需要遍历访问。
3、对于新增和删除操作add和remove,LinkedList有优势,因为ArrayList有可能要大量移动数据。
具体来说,ArrayList的主要实现源码为:
1 private static final int DEFAULT_CAPACITY = 10; //Default initial capacity 2 transient Object[] elementData; //The array buffer into which the elements of the ArrayList are stored. 3 private int size; //The size of the ArrayList (the number of elements it contains).
//如果ArrayList初始化时不指定大小,那么它的默认大小为10
主要实现方法有:
1 //Returns the number of elements in this list. 2 public int size() { 3 return size; 4 } 5 6 public boolean isEmpty() { 7 return size == 0; 8 } 9 10 //get 根据数组下标直接访问 11 public E get(int index) { 12 rangeCheck(index); 13 return elementData(index); 14 } 15 E elementData(int index) { 16 return (E) elementData[index]; 17 } 18 19 //set 根据数组下表直接访问并修改 20 public E set(int index, E element) { 21 rangeCheck(index); 22 23 E oldValue = elementData(index); 24 elementData[index] = element; 25 return oldValue; 26 } 27 28 //add 会先检查需不需要扩容,若需要,先扩容,在插入到数组最后一位 29 public boolean add(E e) { 30 ensureCapacityInternal(size + 1); // Increments modCount!! 31 elementData[size++] = e; 32 return true; 33 } 34 //ensureCapacityInternal这个函数最后会传到grow函数里面去 35 //显然,在ArrayList中,每次扩容的幅度是原来大小的一半 36 private void grow(int minCapacity) { 37 // overflow-conscious code 38 int oldCapacity = elementData.length; 39 int newCapacity = oldCapacity + (oldCapacity >> 1); 40 if (newCapacity - minCapacity < 0) 41 newCapacity = minCapacity; 42 if (newCapacity - MAX_ARRAY_SIZE > 0) 43 newCapacity = hugeCapacity(minCapacity); 44 // minCapacity is usually close to size, so this is a win: 45 elementData = Arrays.copyOf(elementData, newCapacity); 46 } 47 48 //remove的操作 49 //如果ArrayList想要移除指定位置的数据,则需要将这个数据之后的所有数据往前移一位,在删除掉最后一个数据 50 public E remove(int index) { 51 rangeCheck(index); 52 53 modCount++; 54 E oldValue = elementData(index); 55 56 int numMoved = size - index - 1; 57 if (numMoved > 0) 58 System.arraycopy(elementData, index+1, elementData, index, 59 numMoved); 60 elementData[--size] = null; // clear to let GC do its work 61 62 return oldValue; 63 }
LinkedList的主要实现源码为:
1 transient int size = 0; 2 3 /** 4 * Pointer to first node. 5 * Invariant: (first == null && last == null) || 6 * (first.prev == null && first.item != null) 7 */ 8 transient Node<E> first; 9 10 /** 11 * Pointer to last node. 12 * Invariant: (first == null && last == null) || 13 * (last.next == null && last.item != null) 14 */ 15 transient Node<E> last; 16 17 // Constructs an empty list. 18 public LinkedList() { 19 } 21 22 /* Constructs a list containing the elements of the specified 23 * collection, in the order they are returned by the collection's 24 * iterator.28 */ 29 public LinkedList(Collection<? extends E> c) { 30 this(); 31 addAll(c); 32 }
其中LinkedList中的每个结点的数据结构为:
1 private static class Node<E> { 2 E item; 3 Node<E> next; 4 Node<E> prev; 5 6 Node(Node<E> prev, E element, Node<E> next) { 7 this.item = element; 8 this.next = next; 9 this.prev = prev; 10 } 11 }
主要实现方法有:
1 //判断是否包含某个数据,需要从头到位的遍历,效率不如基于数组的ArrayList 2 public boolean contains(Object o) { 3 return indexOf(o) != -1; 4 } 5 public int indexOf(Object o) { 6 int index = 0; 7 if (o == null) { 8 for (Node<E> x = first; x != null; x = x.next) { 9 if (x.item == null) 10 return index; 11 index++; 12 } 13 } else { 14 for (Node<E> x = first; x != null; x = x.next) { 15 if (o.equals(x.item)) 16 return index; 17 index++; 18 } 19 } 20 return -1; 21 } 22 23 //若要在LinkedList中add一个数据,则只需在链表末尾添加即可,不用考虑扩容操作,因此效率可能会较高 24 public boolean add(E e) { 25 linkLast(e); 26 return true; 27 } 28 void linkLast(E e) { 29 final Node<E> l = last; 30 final Node<E> newNode = new Node<>(l, e, null); 31 last = newNode; 32 if (l == null) 33 first = newNode; 34 else 35 l.next = newNode; 36 size++; 37 modCount++; 38 }
LinkedList中无论是get()方法还是set()方法,都是基于遍历的,因此效率较低:
1 public E get(int index) { 2 checkElementIndex(index); 3 return node(index).item; 4 } 5 6 public E set(int index, E element) { 7 checkElementIndex(index); 8 Node<E> x = node(index); 9 E oldVal = x.item; 10 x.item = element; 11 return oldVal; 12 } 13 14 Node<E> node(int index) { 15 // assert isElementIndex(index); 16 17 if (index < (size >> 1)) { 18 Node<E> x = first; 19 for (int i = 0; i < index; i++) 20 x = x.next; 21 return x; 22 } else { 23 Node<E> x = last; 24 for (int i = size - 1; i > index; i--) 25 x = x.prev; 26 return x; 27 } 28 }
总结,当进行添加删除操作时,由于不需要扩容和整体移动的操作,因此LinkedList效率比ArrayList高。而在进行查询和修改操作时,由于数组可以直接利用下标进行定位,因此ArrayList效率比LinkedList高。
二、Map
Map可以将一个对象映射到另一个对象上。注意,Map本身只是一个抽象的接口,因此不能直接实例化一个Map对象。hashmap本身是一个用拉链法实现的哈希表,这个表的结构如下图所示:
HashMap的主要实现代码为:
1 //哈希表中的存储数组 2 transient Node<K,V>[] table; 3 4 //哈希表中的存储节点 5 static class Node<K,V> implements Map.Entry<K,V> { 6 final int hash; 7 final K key; 8 V value; 9 Node<K,V> next; 10 11 Node(int hash, K key, V value, Node<K,V> next) { 12 this.hash = hash; 13 this.key = key; 14 this.value = value; 15 this.next = next; 16 } 17 18 public final K getKey() { return key; } 19 public final V getValue() { return value; } 20 public final String toString() { return key + "=" + value; } 21 22 public final int hashCode() { 23 return Objects.hashCode(key) ^ Objects.hashCode(value); 24 } 25 26 public final V setValue(V newValue) { 27 V oldValue = value; 28 value = newValue; 29 return oldValue; 30 } 31 32 public final boolean equals(Object o) { 33 if (o == this) 34 return true; 35 if (o instanceof Map.Entry) { 36 Map.Entry<?,?> e = (Map.Entry<?,?>)o; 37 if (Objects.equals(key, e.getKey()) && 38 Objects.equals(value, e.getValue())) 39 return true; 40 } 41 return false; 42 } 43 }
HashMap的主要实现的方法有:
1 //HashMap中get()方法的过程就是先根据key的hash值寻找数据存储在数组的哪个位置,一个一个遍历直至找到数据或者返回null 2 public V get(Object key) { 3 Node<K,V> e; 4 return (e = getNode(hash(key), key)) == null ? null : e.value; 5 } 6 7 final Node<K,V> getNode(int hash, Object key) { 8 Node<K,V>[] tab; Node<K,V> first, e; int n; K k; 9 if ((tab = table) != null && (n = tab.length) > 0 && 10 (first = tab[(n - 1) & hash]) != null) { 11 if (first.hash == hash && // always check first node 12 ((k = first.key) == key || (key != null && key.equals(k)))) 13 return first; 14 if ((e = first.next) != null) { 15 if (first instanceof TreeNode) 16 return ((TreeNode<K,V>)first).getTreeNode(hash, key); 17 do { 18 if (e.hash == hash && 19 ((k = e.key) == key || (key != null && key.equals(k)))) 20 return e; 21 } while ((e = e.next) != null); 22 } 23 } 24 return null; 25 } 26 27 //remove的过程 28 public V remove(Object key) { 29 Node<K,V> e; 30 return (e = removeNode(hash(key), key, null, false, true)) == null ? 31 null : e.value; 32 } 33 34 final Node<K,V> removeNode(int hash, Object key, Object value, 35 boolean matchValue, boolean movable) { 36 Node<K,V>[] tab; Node<K,V> p; int n, index; 37 if ((tab = table) != null && (n = tab.length) > 0 && 38 (p = tab[index = (n - 1) & hash]) != null) { 39 Node<K,V> node = null, e; K k; V v; 40 if (p.hash == hash && 41 ((k = p.key) == key || (key != null && key.equals(k)))) 42 node = p; 43 else if ((e = p.next) != null) { 44 if (p instanceof TreeNode) 45 node = ((TreeNode<K,V>)p).getTreeNode(hash, key); 46 else { 47 do { 48 if (e.hash == hash && 49 ((k = e.key) == key || 50 (key != null && key.equals(k)))) { 51 node = e; 52 break; 53 } 54 p = e; 55 } while ((e = e.next) != null); 56 } 57 } 58 if (node != null && (!matchValue || (v = node.value) == value || 59 (value != null && value.equals(v)))) { 60 if (node instanceof TreeNode) 61 ((TreeNode<K,V>)node).removeTreeNode(this, tab, movable); 62 else if (node == p) 63 tab[index] = node.next; 64 else 65 p.next = node.next; 66 ++modCount; 67 --size; 68 afterNodeRemoval(node); 69 return node; 70 } 71 } 72 return null; 73 }
HashMap的put()方法:
1 //HashMap的put()方法不能插入重复的元素,若重复,则替换 2 public V put(K key, V value) { 3 return putVal(hash(key), key, value, false, true); 4 } 5 6 final V putVal(int hash, K key, V value, boolean onlyIfAbsent, 7 boolean evict) { 8 Node<K,V>[] tab; Node<K,V> p; int n, i; 9 if ((tab = table) == null || (n = tab.length) == 0) 10 n = (tab = resize()).length; 11 if ((p = tab[i = (n - 1) & hash]) == null) 12 tab[i] = newNode(hash, key, value, null); 13 else { 14 Node<K,V> e; K k; 15 if (p.hash == hash && 16 ((k = p.key) == key || (key != null && key.equals(k)))) 17 e = p; 18 else if (p instanceof TreeNode) 19 e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value); 20 else { 21 for (int binCount = 0; ; ++binCount) { 22 if ((e = p.next) == null) { 23 p.next = newNode(hash, key, value, null); 24 if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st 25 treeifyBin(tab, hash); 26 break; 27 } 28 if (e.hash == hash && 29 ((k = e.key) == key || (key != null && key.equals(k)))) 30 break; 31 p = e; 32 } 33 } 34 if (e != null) { // existing mapping for key 35 V oldValue = e.value; 36 if (!onlyIfAbsent || oldValue == null) 37 e.value = value; 38 afterNodeAccess(e); 39 return oldValue; 40 } 41 } 42 ++modCount; 43 if (++size > threshold) 44 resize(); 45 afterNodeInsertion(evict); 46 return null; 47 }
三、Set
Set类不保存重复的元素,如果你试图将相同对象的多个实例添加到Set中,那么它就会组织这种重复现象。注意,Set本身只是一个抽象的接口,因此不能直接实例化一个Set对象。另外,hashset对查找方面进行了优化操作。
HashSet的主要实现源码有:
//可以看到HashSet的底层是由HashMap实现的
1 private transient HashMap<E,Object> map; 2 3 public HashSet() { 4 map = new HashMap<>(); 5 } 6 7 public HashSet(int initialCapacity) { 8 map = new HashMap<>(initialCapacity); 9 }
HashSet的主要方法有:
1 public Iterator<E> iterator() { 2 return map.keySet().iterator(); 3 } 4 5 public int size() { 6 return map.size(); 7 } 8 9 public boolean isEmpty() { 10 return map.isEmpty(); 11 } 12 13 public boolean contains(Object o) { 14 return map.containsKey(o); 15 } 16 17 public boolean add(E e) { 18 return map.put(e, PRESENT)==null; 19 } 20 21 /** 22 * Dummy value to associate with an Object in the backing Map 23 * private static final Object PRESENT = new Object(); 24 */
24
25 25 public boolean remove(Object o) { 26 return map.remove(o)==PRESENT; 27 }
由于HashSet的底层是基于HashMap的,所以它的查询效率很高。另外如果给add()方法传入的是已经存在的重复值,那么在底层给HashMap也是传入重复的键值对,那么就会将原来相同的键值对覆盖,在上层看来就和没有插入一样的。