Java基础之String分析

String的源码分析

从一段代码说起

public class StringDemo {
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String a = "abc";
		String b = "a" + "b" + "c";
		System.out.println(a == b); //true
		String c = new String("abc");
		System.out.println(a == c); //false
	}
}

下面我们从源码看String的具体实现（本文是jdk1.8）。

String的初始化

String类的属性包含一个不可变的char数组用来存放字符串，一个int型的变量hash用来存放计算后的哈希值

public final class String
    implements java.io.Serializable, Comparable<String>, CharSequence {
    /** The value is used for character storage. */
    private final char value[];
    /** Cache the hash code for the string */
    private int hash; // Default to 0
    /** use serialVersionUID from JDK 1.0.2 for interoperability */
    private static final long serialVersionUID = -6849794470754667710L;

String构造函数

public String()

public String(String original)

public String(char value[])

public String(char value[], int offset, int count)

public String(int[] codePoints, int offset, int count)

public String(byte bytes[], int offset, int length, Charset charset)

public String(byte bytes[], int offset, int length, String charsetName)

public String(byte bytes[], int offset, int length, Charset charset)

public String(byte bytes[], String charsetName)

public String(byte bytes[], Charset charset)

public String(byte bytes[], int offset, int length)

public String(byte bytes[])

public String(StringBuffer buffer)

public String(StringBuilder builder)

String(char[] value, boolean share)

    public String() {
        this.value = "".value;
    }
    public String(String original) {
        this.value = original.value;
        this.hash = original.hash;
    }
    public String(char value[]) {
        this.value = Arrays.copyOf(value, value.length);
    }
    public String(char value[], int offset, int count) {
        if (offset < 0) {
            throw new StringIndexOutOfBoundsException(offset);
        }
        if (count <= 0) {
            if (count < 0) {
                throw new StringIndexOutOfBoundsException(count);
            }
            if (offset <= value.length) {
                this.value = "".value;
                return;
            }
        }
        // Note: offset or count might be near -1>>>1.
        if (offset > value.length - count) {
            throw new StringIndexOutOfBoundsException(offset + count);
        }
        this.value = Arrays.copyOfRange(value, offset, offset+count);
    }
    public String(int[] codePoints, int offset, int count) {
        if (offset < 0) {
            throw new StringIndexOutOfBoundsException(offset);
        }
        if (count <= 0) {
            if (count < 0) {
                throw new StringIndexOutOfBoundsException(count);
            }
            if (offset <= codePoints.length) {
                this.value = "".value;
                return;
            }
        }
        // Note: offset or count might be near -1>>>1.
        if (offset > codePoints.length - count) {
            throw new StringIndexOutOfBoundsException(offset + count);
        }
        final int end = offset + count;
        // Pass 1: Compute precise size of char[]
        int n = count;
        for (int i = offset; i < end; i++) {
            int c = codePoints[i];
  		//BmpCodePoint代码点是65535是2的16次方，刚好是两个字节（即一个字）的大小。在超出两个字节后只能算是有效的代码点，并非是			  BmpCodePoint代码点。从代码中也可看出，BmpCodePoint代码点的整数是可以直接强转成char类型的。在java中char类型刚好占2个字节		  在2个字节以内的整数都可以直接强转换成char类型！
            if (Character.isBmpCodePoint(c))
                continue;
            else if (Character.isValidCodePoint(c))
                n++;
            else throw new IllegalArgumentException(Integer.toString(c));
        }
        // Pass 2: Allocate and fill in char[]
        final char[] v = new char[n];
        for (int i = offset, j = 0; i < end; i++, j++) {
            int c = codePoints[i];
            if (Character.isBmpCodePoint(c))
                v[j] = (char)c;
            else
                Character.toSurrogates(c, v, j++);
        }
        this.value = v;
    }
public String(byte bytes[], int offset, int length, Charset charset) {
        if (charset == null)
            throw new NullPointerException("charset");
        checkBounds(bytes, offset, length);
        this.value =  StringCoding.decode(charset, bytes, offset, length);
}
public String(byte bytes[], int offset, int length, String charsetName)
            throws UnsupportedEncodingException {
        if (charsetName == null)
            throw new NullPointerException("charsetName");
        checkBounds(bytes, offset, length);
        this.value = StringCoding.decode(charsetName, bytes, offset, length);
    }
public String(byte bytes[], int offset, int length, Charset charset) {
        if (charset == null)
            throw new NullPointerException("charset");
        checkBounds(bytes, offset, length);
        this.value =  StringCoding.decode(charset, bytes, offset, length);
    }
public String(byte bytes[], String charsetName)
            throws UnsupportedEncodingException {
        this(bytes, 0, bytes.length, charsetName);
    }
public String(byte bytes[], Charset charset) {
        this(bytes, 0, bytes.length, charset);
    }
public String(byte bytes[], int offset, int length) {
        checkBounds(bytes, offset, length);
        this.value = StringCoding.decode(bytes, offset, length);
    }
public String(byte bytes[]) {
        this(bytes, 0, bytes.length);
    }
public String(StringBuffer buffer) {
        synchronized(buffer) {
            this.value = Arrays.copyOf(buffer.getValue(), buffer.length());
        }
    }
public String(StringBuilder builder) {
        this.value = Arrays.copyOf(builder.getValue(), builder.length());
    }
String(char[] value, boolean share) {
        // assert share : "unshared not supported";
        this.value = value;
    }
private static void checkBounds(byte[] bytes, int offset, int length) {
        if (length < 0)
            throw new StringIndexOutOfBoundsException(length);
        if (offset < 0)
            throw new StringIndexOutOfBoundsException(offset);
        if (offset > bytes.length - length)
            throw new StringIndexOutOfBoundsException(offset + length);
    }

String常用方法

equals方法

public boolean equals(Object anObject) {
    //如果引用的是同一个对象，返回真
    if (this == anObject) {
        return true;
    }
    //如果不是String类型的数据，返回假
    if (anObject instanceof String) {
        String anotherString = (String) anObject;
        int n = value.length;
        //如果char数组长度不相等，返回假
        if (n == anotherString.value.length) {
            char v1[] = value;
            char v2[] = anotherString.value;
            int i = 0;
            //从后往前单个字符判断，如果有不相等，返回假
            while (n-- != 0) {
                if (v1[i] != v2[i])
                        return false;
                i++;
            }
            //每个字符都相等，返回真
            return true;
        }
    }
    return false;
    }

equals的判断规则：

内存地址相同，则为真。

如果对象类型不是String类型，则为假。否则继续判断。

如果对象长度不相等，则为假。否则继续判断。

从后往前，判断String类中char数组value的单个字符是否相等，有不相等则为假。如果一直相等直到第一个数，则返回真。

compareTo 方法

public int compareTo(String anotherString) {
    //自身对象字符串长度len1
    int len1 = value.length;
    //被比较对象字符串长度len2
    int len2 = anotherString.value.length;
    //取两个字符串长度的最小值lim
    int lim = Math.min(len1, len2);
    char v1[] = value;
    char v2[] = anotherString.value;
    int k = 0;
    //从value的第一个字符开始到最小长度lim处为止，如果字符不相等，返回自身（对象不相等处字符-被比较对象不相等字符）
    while (k < lim) {
        char c1 = v1[k];
        char c2 = v2[k];
        if (c1 != c2) {
            return c1 - c2;
        }
        k++;
    }
    //如果前面都相等，则返回（自身长度-被比较对象长度）
    return len1 - len2;
}

先从0开始判断字符大小，如果两个对象能比较字符的地方比较完了还相等，就直接返回自身长度减被比较对象长度，如果两个字符串长度相等，则返回的是0，巧妙地判断了三种情况。

startsWith方法

boolean startsWith(String prefix)

boolean startsWith(String prefix, int toffset)

boolean endsWith(String suffix)

public boolean startsWith(String prefix) {
        return startsWith(prefix, 0);
    }
public boolean startsWith(String prefix, int toffset) {
        char ta[] = value;
        int to = toffset;
        char pa[] = prefix.value;
        int po = 0;
        int pc = prefix.value.length;
        // Note: toffset might be near -1>>>1.
        if ((toffset < 0) || (toffset > value.length - pc)) {
            return false;
        }
        while (--pc >= 0) {
            if (ta[to++] != pa[po++]) {
                return false;
            }
        }
        return true;
    }
public boolean endsWith(String suffix) {
        return startsWith(suffix, value.length - suffix.value.length);
    }

hashCode方法

public int hashCode() {
        int h = hash;
        if (h == 0 && value.length > 0) {
            char val[] = value;
            for (int i = 0; i < value.length; i++) {
                h = 31 * h + val[i];
            }
            hash = h;
        }
        return h;
    }

substring方法

String substring(int beginIndex)

String substring(int beginIndex, int endIndex)

public String substring(int beginIndex) {
        if (beginIndex < 0) {
            throw new StringIndexOutOfBoundsException(beginIndex);
        }
        int subLen = value.length - beginIndex;
        if (subLen < 0) {
            throw new StringIndexOutOfBoundsException(subLen);
        }
        return (beginIndex == 0) ? this : new String(value, beginIndex, subLen);
    }
public String substring(int beginIndex, int endIndex) {
        if (beginIndex < 0) {
            throw new StringIndexOutOfBoundsException(beginIndex);
        }
        if (endIndex > value.length) {
            throw new StringIndexOutOfBoundsException(endIndex);
        }
        int subLen = endIndex - beginIndex;
        if (subLen < 0) {
            throw new StringIndexOutOfBoundsException(subLen);
        }
        return ((beginIndex == 0) && (endIndex == value.length)) ? this
                : new String(value, beginIndex, subLen);
    }

concat方法

public String concat(String str) {
        int otherLen = str.length();
        if (otherLen == 0) {
            return this;
        }
        int len = value.length;
        char buf[] = Arrays.copyOf(value, len + otherLen);
        str.getChars(buf, len);
        return new String(buf, true);
    }
void getChars(char dst[], int dstBegin) {
        System.arraycopy(value, 0, dst, dstBegin, value.length);
    }

replace方法

public String replace(char oldChar, char newChar) {
    //新旧值先对比
    if (oldChar != newChar) {
        int len = value.length;
        int i = -1;
        char[] val = value; /* avoid getfield opcode */
        //找到旧值最开始出现的位置
        while (++i < len) {
            if (val[i] == oldChar) {
                break;
            }
        }
        //从那个位置开始，直到末尾，用新值代替出现的旧值
        if (i < len) {
            char buf[] = new char[len];
            for (int j = 0; j < i; j++) {
                buf[j] = val[j];
            }
            while (i < len) {
                char c = val[i];
                buf[i] = (c == oldChar) ? newChar : c;
                i++;
            }
            return new String(buf, true);
        }
    }
    return this;
}

contains方法

public boolean contains(CharSequence s) {
       return indexOf(s.toString()) > -1;
   }
public int indexOf(String str) {
       return indexOf(str, 0);
   }
public int indexOf(String str, int fromIndex) {
       return indexOf(value, 0, value.length,
               str.value, 0, str.value.length, fromIndex);
   }
static int indexOf(char[] source, int sourceOffset, int sourceCount,
           char[] target, int targetOffset, int targetCount,
           int fromIndex) {
       if (fromIndex >= sourceCount) {
           return (targetCount == 0 ? sourceCount : -1);
       }
       if (fromIndex < 0) {
           fromIndex = 0;
       }
       if (targetCount == 0) {
           return fromIndex;
       }
       char first = target[targetOffset];
       int max = sourceOffset + (sourceCount - targetCount);
       for (int i = sourceOffset + fromIndex; i <= max; i++) {
           /* Look for first character. */
           if (source[i] != first) {
               while (++i <= max && source[i] != first);
           }
           /* Found first character, now look at the rest of v2 */
           if (i <= max) {
               int j = i + 1;
               int end = j + targetCount - 1;
               for (int k = targetOffset + 1; j < end && source[j]
                       == target[k]; j++, k++);
               if (j == end) {
                   /* Found whole string. */
                   return i - sourceOffset;
               }
           }
       }
       return -1;
   }

split方法

public String[] split(String regex) {
        return split(regex, 0);
    }
 public String[] split(String regex, int limit) {
        /* fastpath if the regex is a
         (1)one-char String and this character is not one of the
            RegEx's meta characters ".$|()[{^?*+\\", or
         (2)two-char String and the first char is the backslash and
            the second is not the ascii digit or ascii letter.
         */
        char ch = 0;
        if (((regex.value.length == 1 &&
             ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||
             (regex.length() == 2 &&
              regex.charAt(0) == '\\' &&
              (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&
              ((ch-'a')|('z'-ch)) < 0 &&
              ((ch-'A')|('Z'-ch)) < 0)) &&
            (ch < Character.MIN_HIGH_SURROGATE ||
             ch > Character.MAX_LOW_SURROGATE))
        {
            int off = 0;
            int next = 0;
            boolean limited = limit > 0;
            ArrayList<String> list = new ArrayList<>();
            while ((next = indexOf(ch, off)) != -1) {
                if (!limited || list.size() < limit - 1) {
                    list.add(substring(off, next));
                    off = next + 1;
                } else {    // last one
                    //assert (list.size() == limit - 1);
                    list.add(substring(off, value.length));
                    off = value.length;
                    break;
                }
            }
            // If no match was found, return this
            if (off == 0)
                return new String[]{this};
            // Add remaining segment
            if (!limited || list.size() < limit)
                list.add(substring(off, value.length));
            // Construct result
            int resultSize = list.size();
            if (limit == 0) {
                while (resultSize > 0 && list.get(resultSize - 1).length() == 0) {
                    resultSize--;
                }
            }
            String[] result = new String[resultSize];
            return list.subList(0, resultSize).toArray(result);
        }
        return Pattern.compile(regex).split(this, limit);
    }

trim方法

public String trim() {
    int len = value.length;
    int st = 0;
    char[] val = value;    /* avoid getfield opcode */
    //找到字符串前段没有空格的位置
    while ((st < len) && (val[st] <= ' ')) {
        st++;
    }
    //找到字符串末尾没有空格的位置
    while ((st < len) && (val[len - 1] <= ' ')) {
        len--;
    }
    //如果前后都没有出现空格，返回字符串本身
    return ((st > 0) || (len < value.length)) ? substring(st, len) : this;
}

toCharArray方法

public char[] toCharArray() {
        // Cannot use Arrays.copyOf because of class initialization order issues
        char result[] = new char[value.length];
        System.arraycopy(value, 0, result, 0, value.length);
        return result;
    }

valueOf方法

public static String valueOf(Object obj) {
        return (obj == null) ? "null" : obj.toString();
    }
public static String valueOf(char data[]) {
        return new String(data);
    }
public static String valueOf(char data[], int offset, int count) {
        return new String(data, offset, count);
    }
public static String valueOf(char c) {
        char data[] = {c};
        return new String(data, true);
    }
public static String valueOf(int i) {
        return Integer.toString(i);
    }
public static String valueOf(long l) {
        return Long.toString(l);
    }
public static String valueOf(float f) {
        return Float.toString(f);
    }
public static String valueOf(double d) {
        return Double.toString(d);
    }

intern方法

1	public native String intern();

intern方法是Native调用，它的作用是在方法区中的常量池里通过equals方法寻找等值的对象，如果没有找到则在常量池中开辟一片空间存放字符串并返回该对应String的引用，否则直接返回常量池中已存在String对象的引用。

public class StringDemo {
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String a = "abc";
		String b = "a" + "b" + "c";
		System.out.println(a == b);
		String c = new String("abc");
		System.out.println(a == c);
		String d = new String("abc").intern();
		System.out.println(a == d); //true
	}
}

其他方法

public int length() {
    return value.length;
}
public String toString() {
    return this;
}
public boolean isEmpty() {
    return value.length == 0;
}
public char charAt(int index) {
    if ((index < 0) || (index >= value.length)) {
        throw new StringIndexOutOfBoundsException(index);
    }
    return value[index];
}

String对象的三种比较方式：

==内存比较：直接对比两个引用所指向的内存值，精确简洁直接明了。

equals字符串值比较：比较两个引用所指对象字面值是否相等。

hashCode字符串数值化比较：将字符串数值化。两个引用的hashCode相同，不保证内存一定相同，不保证字面值一定相同。