memcpy还是memmove?

2008年7月18日 05:09

二话不说,先举例子:

例子1:

 

#include <stdio.h>
#include <STRING.H>

int main(void)
{
    char a[10] = "Hello!" ;
   
    printf("%s\r\n",a);

    memcpy(&a[2], a, 5);

    printf("%s\r\n",a);
   
    return 0;
}

 

例子2:

 

#include <stdio.h>
#include <STRING.H>

int main(void)
{
    char a[10] = "Hello!" ;
   
    printf("%s\r\n",a);

    memmove(&a[2], a, 5);
    printf("%s\r\n",a);
   
    return 0;
}

 

使用gcc 编译

$ gcc -v
Using built-in specs.
Target: i686-pc-cygwin
Configured with: ./configure
Thread model: single
gcc version 4.3.0 (GCC)
 

例子1的结果是:

 

$ ./a.exe
Hello!
HeHeHeH

例子2的结果是:

 

$ ./a.exe
Hello!
HeHello


从这两个简单的例子:我们可以看出memcpy和memmove的基本区别就是:源地址和目的地址是否可以重叠。(其实这个主要和库函数的实现有关)

为了明白这个问题:

我们看看memcpy的源码:

 

 

/* Copy memory to memory until the specified number of bytes
   has been copied.  Overlap is NOT handled correctly.
   Copyright (C) 1991, 1997 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Torbjorn Granlund (tege@sics.se).

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, write to the Free
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307 USA.  */


#include <string.h>
#include <memcopy.h>
#include <pagecopy.h>

#undef memcpy

void *
memcpy (dstpp, srcpp, len)
     void *dstpp;
     const void *srcpp;
     size_t len;
{
  unsigned long int dstp = (long int) dstpp;
  unsigned long int srcp = (long int) srcpp;

  /* Copy from the beginning to the end.  */

  /* If there not too few bytes to copy, use word copy.  */
  if (len >= OP_T_THRES)
    {
      /* Copy just a few bytes to make DSTP aligned.  */
      len -= (-dstp) % OPSIZ;
      BYTE_COPY_FWD (dstp, srcp, (-dstp) % OPSIZ);

      /* Copy whole pages from SRCP to DSTP by virtual address manipulation,
         as much as possible.  */


      PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len);

      /* Copy from SRCP to DSTP taking advantage of the known alignment of
         DSTP.  Number of bytes remaining is put in the third argument,
         i.e. in LEN.  This number may vary from machine to machine.  */


      WORD_COPY_FWD (dstp, srcp, len, len);

      /* Fall out and copy the tail.  */
    }

  /* There are just a few bytes to copy.  Use byte memory operations.  */
  BYTE_COPY_FWD (dstp, srcp, len);

  return dstpp;
}
 

 

 

我们来看看memmove的源码:

 

 

/* Copy memory to memory until the specified number of bytes
   has been copied.  Overlap is handled correctly.
   Copyright (C) 1991, 1995, 1996, 1997 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Torbjorn Granlund (tege@sics.se).

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, write to the Free
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307 USA.  */


#include <string.h>
#include <memcopy.h>
#include <pagecopy.h>

/* All this is so that bcopy.c can #include
   this file after defining some things.  */

#ifndef a1
#define a1       dest   /* First arg is DEST.  */
#define a1const
#define a2       src    /* Second arg is SRC.  */
#define a2const  const
#undef memmove
#endif
#if     !defined(RETURN) || !defined(rettype)
#define RETURN(s)        return (s)      /* Return DEST.  */
#define rettype    void *
#endif


rettype
memmove (a1, a2, len)
     a1const void *a1;
     a2const void *a2;
     size_t len;
{
  unsigned long int dstp = (long int) dest;
  unsigned long int srcp = (long int) src;

  /* This test makes the forward copying code be used whenever possible.
     Reduces the working set.  */

  if (dstp - srcp >= len)       /* *Unsigned* compare!  */
    {
      /* Copy from the beginning to the end.  */

      /* If there not too few bytes to copy, use word copy.  */
      if (len >= OP_T_THRES)
        {
          /* Copy just a few bytes to make DSTP aligned.  */
          len -= (-dstp) % OPSIZ;
          BYTE_COPY_FWD (dstp, srcp, (-dstp) % OPSIZ);

          /* Copy whole pages from SRCP to DSTP by virtual address
             manipulation, as much as possible.  */


          PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len);

          /* Copy from SRCP to DSTP taking advantage of the known
             alignment of DSTP.  Number of bytes remaining is put
             in the third argument, i.e. in LEN.  This number may
             vary from machine to machine.  */


          WORD_COPY_FWD (dstp, srcp, len, len);

          /* Fall out and copy the tail.  */
        }

      /* There are just a few bytes to copy.  Use byte memory operations.  */
      BYTE_COPY_FWD (dstp, srcp, len);
    }
  else
    {
      /* Copy from the end to the beginning.  */
      srcp += len;
      dstp += len;

      /* If there not too few bytes to copy, use word copy.  */
      if (len >= OP_T_THRES)
        {
          /* Copy just a few bytes to make DSTP aligned.  */
          len -= dstp % OPSIZ;
          BYTE_COPY_BWD (dstp, srcp, dstp % OPSIZ);

          /* Copy from SRCP to DSTP taking advantage of the known
             alignment of DSTP.  Number of bytes remaining is put
             in the third argument, i.e. in LEN.  This number may
             vary from machine to machine.  */


          WORD_COPY_BWD (dstp, srcp, len, len);

          /* Fall out and copy the tail.  */
        }

      /* There are just a few bytes to copy.  Use byte memory operations.  */
      BYTE_COPY_BWD (dstp, srcp, len);
    }

  RETURN (dest);
}
 

一进函数我们“惊喜”的发现了:

 

if (dstp - srcp >= len)

这个对源地址目的地址进行了判断,如果地址没有重叠的话,我们可以看到memcpy和memmove所做的处理几乎完全相同;但在else里面我们看到了我们想要的内容。

基本处理思路:不是从前向后拷贝有可能会把还没有处理的数据会“冲掉”么?那么我们就从最后一个字符开始向前拷贝,这样就不会存在没有数据重叠了吧。。:)

前面提到这个问题和库函数的实现有关: 如果拿VC6.0作为编译器的话,其库的实现memcpy和memmove的结果是一样的。这样处理的原因不明。哪位知道讲解一下或者有代码让我看看都不胜感激!

getchar 的思考v0.2

2008年7月10日 05:03

通常我们学到的库函数getchar()会从标准输入得到一个字符,其返回值为intread as an `unsigned char', and cast to `int'.返回值为int的原因大致就是为了照顾特殊的EOF吧。

 

如果运行下面的程序:

 

 

#include <stdio.h>

int main(void)
{
      char a = 0;

      while ( (a = getchar()) && (a != EOF)) {
           putchar(a);
      }
      return 0;
}

 

 

==========

 

Hello,World!

 

输出:

 

Hello,World!

 

会发现居然能够输出整个字符串。其实跟踪一下代码即可发现原因。另外,在手册中还有说明:'getchar' is a macro, defined in `stdio.h'. You can use `getchar' to get the next single character from the standard input stream. As a side effect, `getchar' advances the standard input's current position indicator.这个就解释了为什么会把整个终端输入的所有内容都输出。

 

==========

这个程序还有更重要的问题是关于getchar()这个(类)函数的使用(返回值)。

RETURNS

The next character (read as an `unsigned char', and cast to `int')

 

这意味着getchar的返回值会是int让我们看看代码来深入了解一下为什么getchar要返回一个int值呢?

在glibc的源码中getchar()的实现如下:

 

#define _IO_getc_unlocked(_fp) '
(_IO_BE ((_fp)->_IO_read_ptr >= (_fp)->_IO_read_end, 0) '
? __uflow (_fp) : *(unsigned char *) (_fp)->_IO_read_ptr++)


int
getchar ()
{
int result;
_IO_acquire_lock (_IO_stdin);
result = _IO_getc_unlocked (_IO_stdin);
_IO_release_lock (_IO_stdin);
return result;
}

 

CEOF是一个整型,和任何的char类型都不相同。在上面的代码中_IO_getc_unlocke会返回当前_IO_read_ptr的字符,类型为unsigned char当读取到结束时就会返回__uflow_uflow就会返回EOF。因此EOFglibc中就定义为-1,这个值和任何一个unsigned char都不相同。在_IO_getc_unlocked 返回的时候就要返回一个int值来“照顾”EOF这个特殊角色了。

 

再请回头看在最开始的测试程序中变量a的类型是char。问题就处在这里了:对于char这种类型是signed char还是unsigned char是由编译器决定的。既然EOF是一个和unsigned char 类型(0x00~0xFF)不同的数据,显然char无论是signed char 还是unsigned char都是无法满足EOF的值,对于a != EOF这个条件就永远无法满足了。这样显然我们期望满足的unsigned char数值(0x00~0xFF),又要满足EOF的数据类型,显然int是比较合适的。

 

==========

关于while ( (a = getchar()) && (a != EOF))

 

这里我有意写成这样的原因是想着重说明一下: 对于赋值表达式的值即为赋值以后的左值。即上式同义为while((a = getchar() != EOF) 还要说一下的是 = 的优先级要比 != 低,因此要给 a = getchar 加括号以提升优先级。

 

最后要感谢

 

getchar()的思考

2008年7月03日 07:14

 

    通常我们学到的库函数getchar()会从标准输入得到一个字符,其返回值为int(read as an `unsigned char', and cast to `int').返回值为int的原因大致就是为了照顾特殊的EOF吧。

     但如果运行下面的程序:

 

#include <stdio.h>

int main(void)
{
        char a = 0;

        while ( (a = getchar()) && (a != EOF)) {
                putchar(a);
        }
    return 0;
}

 

Hello,World!
Hello,World!

 

会发现居然能够输出整个字符串。其实跟踪一下代码即可发现:

 

7:        while ( (a = getchar()) && (a != EOF)) {
0040D71C A1 44 4A 42 00       mov         eax,[__iob+4 (00424a44)]
0040D721 83 E8 01             sub         eax,1
0040D724 A3 44 4A 42 00       mov         [__iob+4 (00424a44)],eax
0040D729 83 3D 44 4A 42 00 00 cmp         dword ptr [__iob+4 (00424a44)],0
0040D730 7C 21                jl          main+53h (0040d753)
0040D732 8B 0D 40 4A 42 00    mov         ecx,dword ptr [__iob (00424a40)]
0040D738 0F BE 11             movsx       edx,byte ptr [ecx]
0040D73B 81 E2 FF 00 00 00    and         edx,0FFh
0040D741 89 55 F8             mov         dword ptr [ebp-8],edx
0040D744 A1 40 4A 42 00       mov         eax,[__iob (00424a40)]
0040D749 83 C0 01             add         eax,1
0040D74C A3 40 4A 42 00       mov         [__iob (00424a40)],eax
0040D751 EB 10                jmp         main+63h (0040d763)
0040D753 68 40 4A 42 00       push        offset __iob (00424a40)
0040D758 E8 F3 04 00 00       call        _filbuf (0040dc50)
0040D75D 83 C4 04             add         esp,4
0040D760 89 45 F8             mov         dword ptr [ebp-8],eax
0040D763 8A 4D F8             mov         cl,byte ptr [ebp-8]
0040D766 88 4D FC             mov         byte ptr [ebp-4],cl
0040D769 0F BE 55 FC          movsx       edx,byte ptr [ebp-4]
0040D76D 85 D2                test        edx,edx
0040D76F 74 64                je          main+0D5h (0040d7d5)
0040D771 0F BE 45 FC          movsx       eax,byte ptr [ebp-4]
0040D775 83 F8 FF             cmp         eax,0FFFFFFFFh
0040D778 74 5B                je          main+0D5h (0040d7d5)
 

 

在手册中还有说明:'getchar'  is a macro, defined in `stdio.h'.  You can use `getchar' to get the next single character from the  standard  input  stream.   As  a  side effect,  `getchar'  advances the standard input's current position indicator.这个就解释了为什么会把整个终端输入的所有内容都输出。

别急,这还远远没有完。

这个程序还有的问题是关于getchar()这个函数的使用(返回值)。

RETURNS
The next character (read as an `unsigned char', and cast to `int')

这意味着getchar的返回值会是int,但请注意在程序中变量a的类型是char。问题就处在这里了:对于char这种类型是signed还是unsigned是由编译器决定的。那么将int类型的值赋给char类型显然会有数据截断(或者回转),对于0x80以外的数据显然就出了问题。

另外还有的问题是:

while ( (a = getchar()) && (a != EOF))

我们的比较条件是a != EOF, 当char 类型和int类型比较的时候会发生char shift到int类型,那么这个也和编译器实现相关,当char shift到int时会有以下问题:

如果对于char类型,编译器默认为unsigned char类型,那么对于0x80以外(最高位为1)的数据,那么在shift到int类型的时候就会成为0x00 00 00 FX,这样对于a!= EOF这个条件就成为了永远成立的无用判断了。

嗯,让我们想一想还有什么地方还有错呢?