GroupBlog

一直听说memcpy有内存重叠的问题，想看看为什么会有这个问题？

glib 版本2.15

#include <string.h>
#include <memcopy.h>
#include <pagecopy.h>
 
#undef memcpy
 
void *
memcpy (dstpp, srcpp, len)
     void *dstpp;
     const void *srcpp;
     size_t len;
{
  unsigned long int dstp = (long int) dstpp;
  unsigned long int srcp = (long int) srcpp;
 
  /* Copy from the beginning to the end.  */
 
  /* If there not too few bytes to copy, use word copy.  */
  if (len >= OP_T_THRES)
    {
      /* Copy just a few bytes to make DSTP aligned.  */
      len -= (-dstp) % OPSIZ;
      BYTE_COPY_FWD (dstp, srcp, (-dstp) % OPSIZ);
 
      /* Copy whole pages from SRCP to DSTP by virtual address manipulation,
	 as much as possible.  */
 
      PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len);
 
      /* Copy from SRCP to DSTP taking advantage of the known alignment of
	 DSTP.  Number of bytes remaining is put in the third argument,
	 i.e. in LEN.  This number may vary from machine to machine.  */
 
      WORD_COPY_FWD (dstp, srcp, len, len);
 
      /* Fall out and copy the tail.  */
    }
 
  /* There are just a few bytes to copy.  Use byte memory operations.  */
  BYTE_COPY_FWD (dstp, srcp, len);
 
  return dstpp;
}
libc_hidden_builtin_def (memcpy)

我们一步一步来进行分析:

函数参数 void * memcpy (dstpp, srcpp, len) void *dstpp; const void *srcpp; size_t len;
这种参数形式可以暂时忽视，void * destpp 表示目的地址，const void * srcpp 表示源地址，size_t len 表示长度

地址被转化为 unsigned long int 进行保存

unsigned long int dstp = (long int) dstpp;
unsigned long int srcp = (long int) srcpp;

拷贝数量如果小于 OP_T_THRES 则去执行 BYTE_COPY_FWD (dstp, srcp, len);
其中OP_T_THRES是根据系统的不同来进行设定的，在我的电脑上它的值为16

#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes)				      \
 do									      \
   {									      \
     size_t __nbytes = (nbytes);					      \
     while (__nbytes > 0)						      \
   {								      \
     byte __x = ((byte *) src_bp)[0];				      \
     src_bp += 1;							      \
     __nbytes -= 1;						      \
     ((byte *) dst_bp)[0] = __x;					      \
     dst_bp += 1;							      \
   }								      \
   } while (0)

我们可以很清楚的看到它是一个字节一个字节进行拷贝

那么如果拷贝的字节数过大咧，那么肯定要涉及内存对齐
```
 len -= (-dstp) % OPSIZ;
BYTE_COPY_FWD (dstp, srcp, (-dstp) % OPSIZ);
```

这里用到了小技巧 len -= (-dstp)%OPSIZ;
下面来解释一下这个式子
在这里插入图片描述
其中 (-dstp) % OPSIZ 部分使用一个字节一个字节进行对齐，剩下的部分直接按照虚拟页处理的办法进行处理
PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len);
但是由于限制太多，特殊平台会采用页拷贝的方式

我们还有 WORD_COPY_FWD (dstp, srcp, len, len);拷贝

#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes)		      \
  do									      \
    {									      \
      if (src_bp % OPSIZ == 0)						      \
	_wordcopy_fwd_aligned (dst_bp, src_bp, (nbytes) / OPSIZ);	      \
      else								      \
	_wordcopy_fwd_dest_aligned (dst_bp, src_bp, (nbytes) / OPSIZ);	      \
      src_bp += (nbytes) & -OPSIZ;					      \
      dst_bp += (nbytes) & -OPSIZ;					      \
      (nbytes_left) = (nbytes) % OPSIZ;					      \
    } while (0)

使用word copy的方法进行one word by one word进行拷贝，此处是memcpy的优化关键，优化的条件是拷贝地址处于对齐边界。

总结一下:

先用unsigned long int 保存地址
然后判断需要拷贝的大小，如果小于 OP_T_THRES，直接单字节拷贝。
如果大于 OP_T_THRES，首先对齐。然后需要对齐的部分按照单字节拷贝。对齐之后的按照页拷贝，最后剩余的还可以按照对其边界的一个单词一个单词的拷贝。