读一读 fread 的源码 。

windows 下的 fread 属于 msvc crt 中的函数,而 crt 主要是在操作系统和 c 语言中间的一层。由于系统的线程调度,IO 等等,在不同的系统下都不怎么相同,为了衔接语言和操作系统而推出的 crt ,crt 也有在 Linux 下的。比如 glibc 。这次看一下 MSVC CRT (挖坑

在 visual studio 目录下存在 fread 的源码。
虽然写的是 cpp ,不过老底还是 c。声明也是 c 的声明。=。=

size_t __cdecl fread(

extern "C" size_t __cdecl fread(
    void*  const buffer,
    size_t const element_size,
    size_t const element_count,
    FILE*  const stream
    )
{
    // Assume there is enough space in the destination buffer
#pragma warning(suppress:__WARNING_POTENTIAL_BUFFER_OVERFLOW_HIGH_PRIORITY) // 26015 - fread is unsafe
    return fread_s(buffer, _CRT_UNBOUNDED_BUFFER_SIZE, element_size, element_count, stream);
}

简单的调用了 fread_s , 这个 Assume 很有意思,他其实是认为 buffer 足够大,可以容纳了,其实这里面就有溢出问题。

int main() {
	const char* buffer = (char *)malloc(1);
	FILE* file = fopen(".\\text.txt", "rb");
	if (fread(buffer,1,32,file))
		printf("%s", buffer);
	else
		printf("Sad,I can open that file");
	return 0;
}

讲过简单的调试,确实是假定足够大了 233。

extern "C" size_t __cdecl fread_s(
    void*  const buffer,
    size_t const buffer_size,
    size_t const element_size,
    size_t const element_count,
    FILE*  const stream
    )
{
    if (element_size == 0 || element_count == 0)
        return 0;

    // The rest of the argument validation is done in the _nolock function.  Here
    // we only need to validate that the stream is non-null before we lock it.
    if (stream == nullptr)
    {
        if (buffer_size != _CRT_UNBOUNDED_BUFFER_SIZE)
            memset(buffer, _BUFFER_FILL_PATTERN, buffer_size);

        _VALIDATE_RETURN(stream != nullptr, EINVAL, 0);
    }

    size_t return_value = 0;

    _lock_file(stream);
    __try
    {
        return_value = _fread_nolock_s(buffer, buffer_size, element_size, element_count, stream);
    }
    __finally
    {
        _unlock_file(stream);
    }

    return return_value;
}

先不管是否有锁,前面判断 stream 是否为 空指针,如果为空则会抛出异常(调试一下就知道了。那这个应该和 _VALIDATE_RETURN 函数有关。不过我很好奇,他如果都要退出了,为什么还要给 buffer memset 一波。难道是祖传代码?或者有什么我不懂的东西?

这个 _lock_file 很有意思,我放最后看看

接下来就是重头 _fread_nolock_s ,基本的实现也是在这里面的。之前的函数最多是套层壳。

下次再更。。。

extern "C" size_t __cdecl _fread_nolock_s(
    void*  const buffer,
    size_t const buffer_size,
    size_t const element_size,
    size_t const element_count,
    FILE*  const public_stream
    )
{
    __crt_stdio_stream const stream(public_stream);

    if (element_size == 0 || element_count == 0)
        return 0;

    _VALIDATE_RETURN(buffer != nullptr, EINVAL, 0);
    if (!stream.valid() || element_count > (SIZE_MAX / element_size))
    {
        if (buffer_size != _CRT_UNBOUNDED_BUFFER_SIZE)
            memset(buffer, _BUFFER_FILL_PATTERN, buffer_size);

        _VALIDATE_RETURN(stream.valid(),                             EINVAL, 0);
        _VALIDATE_RETURN(element_count <= (SIZE_MAX / element_size), EINVAL, 0);
    }

    // Figure out how big the buffer is; if the stream doesn't currently have a
    // buffer, we assume that we'll get one with the usual internal buffer size:
    unsigned stream_buffer_size = stream.has_any_buffer()
        ? stream->_bufsiz
        : _INTERNAL_BUFSIZ;

    // The total number of bytes to be read into the buffer:
    size_t const total_bytes = element_size * element_count;

    char* data = static_cast<char*>(buffer);

    // Read blocks of data from the stream until we have read the requested
    // number of elements or we fill the buffer.
    size_t remaining_bytes = total_bytes;
    size_t remaining_buffer = buffer_size;
    while (remaining_bytes != 0)
    {
        // If the stream is buffered and has characters, copy them into the
        // result buffer:
        if (stream.has_any_buffer() && stream->_cnt != 0)
        {
            if(stream->_cnt < 0)
            {
                _ASSERTE(("Inconsistent Stream Count. Flush between consecutive read and write", stream->_cnt >= 0));
                stream.set_flags(_IOERROR);
                return (total_bytes - remaining_bytes) / element_size;
            }

            unsigned const bytes_to_read = remaining_bytes < static_cast<size_t>(stream->_cnt)
                ? static_cast<unsigned>(remaining_bytes)
                : static_cast<unsigned>(stream->_cnt);

            if (bytes_to_read > remaining_buffer)
            {
                if (buffer_size != _CRT_UNBOUNDED_BUFFER_SIZE)
                    memset(buffer, _BUFFER_FILL_PATTERN, buffer_size);

                _VALIDATE_RETURN(("buffer too small", 0), ERANGE, 0)
            }

            memcpy_s(data, remaining_buffer, stream->_ptr, bytes_to_read);

            // Update the stream and local tracking variables to account for the
            // read.  Note that the number of bytes actually read is always equal
            // to the number of bytes that we expected to read, because the data
            // was already buffered in the stream.
            remaining_bytes  -= bytes_to_read;
            stream->_cnt     -= bytes_to_read;
            stream->_ptr     += bytes_to_read;
            data             += bytes_to_read;
            remaining_buffer -= bytes_to_read;
        }
        // There is no data remaining in the stream buffer to be read, and we
        // need to read more data than will fit in the buffer (or we need to read
        // at least enough data to fill the buffer completely):
        else if (remaining_bytes >= stream_buffer_size)
        {
            // We can read at most INT_MAX bytes at a time.  This is a hard limit
            // of the lowio _read() function.
            unsigned const maximum_bytes_to_read = remaining_bytes > INT_MAX
                ? static_cast<unsigned>(INT_MAX)
                : static_cast<unsigned>(remaining_bytes);

            // If the stream has a buffer, we want to read the largest chunk that
            // is a multiple of the buffer size, to keep the stream buffer state
            // consistent.  If the stream is not buffered, we can read the maximum
            // number of bytes that we can:
            unsigned const bytes_to_read = stream_buffer_size != 0
                ? static_cast<unsigned>(maximum_bytes_to_read - maximum_bytes_to_read % stream_buffer_size)
                : maximum_bytes_to_read;

            if (bytes_to_read > remaining_buffer)
            {
                if (buffer_size != _CRT_UNBOUNDED_BUFFER_SIZE)
                    memset(buffer, _BUFFER_FILL_PATTERN, buffer_size);

                _VALIDATE_RETURN(("buffer too small", 0), ERANGE, 0)
            }

            // We are about to read data directly from the underlying file
            // descriptor, bypassing the stream buffer.  We reset the stream
            // buffer state to ensure that future seeks do not incorrectly
            // assume that the buffer contents are valid.
            __acrt_stdio_reset_buffer(stream);

            // Do the read.  Note that if the stream is open in text mode, the
            // bytes_read may not be the same as the bytes_to_read, due to
            // newline translation.
            int const bytes_read = _read_nolock(_fileno(stream.public_stream()), data, bytes_to_read);
            if (bytes_read == 0)
            {
                // We encountered EOF:
                stream.set_flags(_IOEOF);
                return (total_bytes - remaining_bytes) / element_size;
            }
            else if (bytes_read < 0)
            {
                // The _read failed:
                stream.set_flags(_IOERROR);
                return (total_bytes - remaining_bytes) / element_size;
            }

            // Update the iteration state to reflect the read:
            remaining_bytes  -= bytes_read;
            data             += bytes_read;
            remaining_buffer -= bytes_read;
        }
        // Otherwise, the stream does not have a buffer, or the stream buffer
        // is full and there is insufficient space to do a direct read, so use
        // __acrt_stdio_refill_and_read_narrow_nolock:
        else
        {
            int const c = __acrt_stdio_refill_and_read_narrow_nolock(stream.public_stream());
            if (c == EOF)
                return (total_bytes - remaining_bytes) / element_size;

            // If we have filled the result buffer before we have read the
            // requested number of elements or reached EOF, it is an error:
            if (remaining_buffer == 0)
            {
                if (buffer_size != _CRT_UNBOUNDED_BUFFER_SIZE)
                    memset(buffer, _BUFFER_FILL_PATTERN, buffer_size);

                _VALIDATE_RETURN(("buffer too small", 0), ERANGE, 0)
            }

            *data++ = static_cast<char>(c);
            --remaining_bytes;
            --remaining_buffer;

            stream_buffer_size = stream->_bufsiz;
        }
    }

    return element_count; // Success!
}

这个代码主要注意的是他实际文件流有两个区域,一个是 stream.cnt ,一个是 stream.public_stream()。前者是stream.public_stream 和 buffer 的一个缓冲区,后者似乎更接近文件流。

由于 stream.cnt 可能被读完了,所有有几个不同的分支,而其余的比一些比较,则是能多读就多读。

lock ? unlock

https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/lock-file?view=msvc-160

这是官网的一个例子,很有趣,不过从 output 的结构上来看,要么是我的电脑硬件上与他的调试机不一样。要么就是 msvc crt 在线程上的实现有所变化。

// crt_lock_file.c
// This example creates multiple threads that write to standard output
// concurrently, first with _file_lock, then without.

#include <stdio.h>
#include <process.h>// _beginthread
#include <windows.h>// HANDLE

void Task_locked( void* str )
{
    for( int i=0; i<1000; ++i )
    {
        _lock_file( stdout );
        for( char* cp = (char*)str; *cp; ++cp )
        {
            _fputc_nolock( *cp, stdout );
        }
        _unlock_file( stdout );
    }
}

void Task_unlocked( void* str )
{
    for( int i=0; i<1000; ++i )
    {
        for( char* cp = (char*)str; *cp; ++cp )
        {
            fputc( *cp, stdout );
        }
    }
}

int main()
{
    HANDLE h[3];
    h[0] = (HANDLE)_beginthread( &Task_locked, 0, "First\n" );
    h[1] = (HANDLE)_beginthread( &Task_locked, 0, "Second\n" );
    h[2] = (HANDLE)_beginthread( &Task_locked, 0, "Third\n" );

    WaitForMultipleObjects( 3, h, true, INFINITE );

    h[0] = (HANDLE)_beginthread( &Task_unlocked, 0, "First\n" );
    h[1] = (HANDLE)_beginthread( &Task_unlocked, 0, "Second\n" );
    h[2] = (HANDLE)_beginthread( &Task_unlocked, 0, "Third\n" );

    WaitForMultipleObjects( 3, h, true, INFINITE );
}