PHP源码之explode分析-白红宇

PHP源码之explode分析

阅读量：7020 次

发布时间：2019-06-28

本文共 6889 字，大约阅读时间需要 22 分钟。

最近一直在想有关字符串操作的一些效率上的事情，截取字串的问题，都会避免不了重新分配空间的消耗，也顺带看了explode这个函数的源码，理解下，拿出自己的分析共享下^_^。

当我们需要将一个数组根据某个字符或字串进行分割成数组的时候，explode用的很happy，但是你知道～explode是怎么工作的么～～

首先可以肯定的是，explode也是会分配空间的，毫无疑问。

1 //文件1：ext/standard/string.c  2 //先来看下explode的源代码  3 PHP_FUNCTION(explode)  4 {
     5     char *str, *delim;  6     int str_len = 0, delim_len = 0;  7     long limit = LONG_MAX; /* No limit */  8     zval zdelim, zstr;  9     10     if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &delim, &delim_len, &str, &str_len, &limit) == FAILURE) {
    11         return; 12     } 13     14     if (delim_len == 0) {
    15         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter"); 16         RETURN_FALSE; 17     } 18         19         //这里会开辟一个数组，用来存放分割后的数据 20     array_init(return_value); 21         22        //因为这个，我们用explode('|', '');成为了合法的 23     if (str_len == 0) {
    24           if (limit >= 0) {
    25             add_next_index_stringl(return_value, "", sizeof("") - 1, 1); 26         } 27         return; 28     } 29         30         //下面这两个是将原字串和分割符都构建成_zval_struct 结构， 31        //ZVAL_STRINGL会分配空间哦～～源代码随后贴出 32     ZVAL_STRINGL(&zstr, str, str_len, 0);   33     ZVAL_STRINGL(&zdelim, delim, delim_len, 0); 34        //limit值是explode中允许传递的explode的第三个参数，它允许正负 35     if (limit > 1) {
    36         php_explode(&zdelim, &zstr, return_value, limit); 37     } else if (limit < 0) {
    38         php_explode_negative_limit(&zdelim, &zstr, return_value, limit); 39     } else {
    40         add_index_stringl(return_value, 0, str, str_len, 1); 41     } 42 }

1 //ZVAL_STRINGL的源代码：   2 //文件2：zend/zend_API.c     3 #define ZVAL_STRINGL(z, s, l, duplicate) {    \  4         const char *__s=(s); int __l=l;        \  5         Z_STRLEN_P(z) = __l;                \  6         Z_STRVAL_P(z) = (duplicate?estrndup(__s, __l):(char*)__s);\  7         Z_TYPE_P(z) = IS_STRING;            \  8     }  9 .... 10 //estrndup才是主菜： 11 //文件3：zend/zend_alloc.h 12 #define estrndup(s, length)    _estrndup((s), (length) ZEND_FILE_LINE_CC ZEND_FILE_LINE_EMPTY_CC) 13 .... 14 //_estrndup的实现： zend/zend_alloc.c 15 ZEND_API char *_estrndup(const char *s, uint length ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC) 16 {
    17     char *p; 18     p = (char *) _emalloc(length+1 ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC); 19     if (UNEXPECTED(p == NULL)) {
    20         return p; 21     } 22     memcpy(p, s, length);   //分配空间 23     p[length] = 0; 24     return p; 25 } 26 //另外在substr和strrchr strstr中用到的ZVAL_STRING也是使用了上诉的实现

下面根据explode的第三个参数limit来分析调用：条件对应的是explode中最后的三行，对limit条件的不同

注： limit在缺省的时候（没有传递），他的默认值是LONG_MAX，也就是属于分支1的情况

1、limit > 1 :

调用php_explode方法，该方法也可以在ext/standard/string.c中找到，并且是紧接着explode实现的上面出现（所以在查找本函数中调用来自本文件的方法的时候很方便，几乎无一列外都是在该函数的紧接着的上面^_^），

1 PHPAPI void php_explode(zval *delim, zval *str, zval *return_value, long limit)  2 {
     3     char *p1, *p2, *endp;  4 //先得到的是源字串的末尾位置的指针  5     endp = Z_STRVAL_P(str) + Z_STRLEN_P(str);  6 //记录开始位置  7     p1 = Z_STRVAL_P(str);  8 //下面这个是获得分割符在str中的位置，可以看到在strrpos和strpos中也用到了这个方法去定位  9     p2 = php_memnstr(Z_STRVAL_P(str), Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp); 10 11     if (p2 == NULL) {
    12 //因为这个，所以当我们调用explode('|', 'abc');是合法的，出来的的就是array(0 => 'abc') 13         add_next_index_stringl(return_value, p1, Z_STRLEN_P(str), 1); 14     } else {
    15 //依次循环获得下一个分隔符的位置，直到结束 16         do {
    17 //将得到的子字串（上个位置到这个位置中间的一段，第一次的时候上个位置就是开始 18             add_next_index_stringl(return_value, p1, p2 - p1, 1); 19 //定位到分隔符位置p2+分隔符的长度的位置 20 //比如,分隔符='|', 原字串= ’ab|c', p2 = 2,  则p1=2+1=3 21             p1 = p2 + Z_STRLEN_P(delim); 22         } while ((p2 = php_memnstr(p1, Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp)) != NULL && 23                  --limit > 1); 24 //将最后的一个分隔符后面的字串放到结果数组中 25 //explode('|', 'avc|sdf');   => array(0 => 'avc', 1= > 'sdf') 26         if (p1 <= endp) 27             add_next_index_stringl(return_value, p1, endp-p1, 1); 28     } 29 }

2、limit < 0 :

调用php_explode_negative_limit方法

1 PHPAPI void php_explode_negative_limit(zval *delim, zval *str, zval *return_value, long limit)  2 {
     3 #define EXPLODE_ALLOC_STEP 64  4     char *p1, *p2, *endp;  5      6     endp = Z_STRVAL_P(str) + Z_STRLEN_P(str);  7  8     p1 = Z_STRVAL_P(str);  9     p2 = php_memnstr(Z_STRVAL_P(str), Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp); 10 11     if (p2 == NULL) {
    12 //它这里竟然没有处理，那explode('|', 'abc', -1) 就成非法的了，获得不了任何值 13         /* 14         do nothing since limit <= -1, thus if only one chunk - 1 + (limit) <= 0 15         by doing nothing we return empty array 16         */ 17     } else {
    18         int allocated = EXPLODE_ALLOC_STEP, found = 0; 19         long i, to_return; 20         char **positions = emalloc(allocated * sizeof(char *)); 21 //注意这里的positions的声明，这个数组是用来保存所有子字串的读取位置 22         positions[found++] = p1;   //当然起始位置还是需要保存 23 //下面两个循环，第一个是循环所有在字符串中出现的分隔符位置，并保存下一个子字串读取位置起来 24         do {
    25             if (found >= allocated) {
    26                 allocated = found + EXPLODE_ALLOC_STEP;/* make sure we have enough memory */ 27                 positions = erealloc(positions, allocated*sizeof(char *)); 28             } 29             positions[found++] = p1 = p2 + Z_STRLEN_P(delim); 30         } while ((p2 = php_memnstr(p1, Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp)) != NULL); 31 //这个就是从数组中开始获得返回的结果将从哪个子字串开始读        32         to_return = limit + found; 33         /* limit is at least -1 therefore no need of bounds checking : i will be always less than found */ 34         for (i = 0;i < to_return;i++) { /* this checks also for to_return > 0 */ 35             add_next_index_stringl(return_value, positions[i], 36                     (positions[i+1] - Z_STRLEN_P(delim)) - positions[i], 37                     1 38                 ); 39         } 40         efree(positions);//很重要，释放内存 41     } 42 #undef EXPLODE_ALLOC_STEP 43 }

3、limit = 1 or limit = 0 :

当所有第一和第二条件都不满足的时候，就进入的这个分支，这个分支很简单就是将源字串放到输出数组中，explode('|', 'avc|sd', 1) or explode('|', 'avc|sd', 0) 都将返回array(0 => 'avc|sd');

1 //add_index_stringl源代码  2 //文件4：zend/zend_API.c  3 ZEND_API int add_next_index_stringl(zval *arg, const char *str, uint length, int duplicate) /* {
    {
    { */  4 {
     5     zval *tmp;  6  7     MAKE_STD_ZVAL(tmp);  8     ZVAL_STRINGL(tmp, str, length, duplicate);  9 10     return zend_hash_next_index_insert(Z_ARRVAL_P(arg), &tmp, sizeof(zval *), NULL); 11 } 12 13 //zend_hash_next_index_insert 14 //zend/zend_hash.h 15 #define zend_hash_next_index_insert(ht, pData, nDataSize, pDest) \ 16         _zend_hash_index_update_or_next_insert(ht, 0, pData, nDataSize, pDest, HASH_NEXT_INSERT ZEND_FILE_LINE_CC) 17 //zend/zend_hash.c 18 ///太长了～～～～不贴了

可见（不包含分配空间这些），

当limit>1的时候，效率是O（N）【N为limit值】，

当limit<0的时候，效率是O（N+M）【N为limit值, M 为分割符出现次数】，

当limit=1 or limit=0 的时候，效率是O（1）

转载于:https://www.cnblogs.com/xiaoyaoxia/archive/2011/08/04/2127856.html

你可能感兴趣的文章