why

I wrote this to get a web page. You can use the code as the basis of a spider (what I am doing).

what

It works to get any kind of web page. It supports https, and http, ftp, gopher, you name it. Even supports local files. It handles NTLM challenge response. If it doesn't work for you, then go to codeguru.com and ask them why.

who

I borrowed heavily from here and here.

how

To compile this code. Run MSVC visual studio 2005 (v8) and click File, New, Project. Click Win32 Console. Click Next. Select MFC, but not ATL. Then paste this code into the main cpp.

where

download precompiled binary here getwebpage.exe

code

// spider4.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include "spider4.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#endif



#include "afxinet.h"

BOOL getURLFile(const LPCTSTR url, const LPCTSTR filename, CString &errorMessage) {
    const int FILEBUFLEN = 1024;
    char httpBuff[FILEBUFLEN];
    TCHAR szCause[255];
    errorMessage = "OK";

    TRY {
        CInternetSession session;
        session.SetOption(INTERNET_OPTION_CONNECT_TIMEOUT, 1000);
        session.SetOption(INTERNET_OPTION_CONNECT_RETRIES, 3);
        CFile *remoteFile = session.OpenURL(url, 1 ,
            INTERNET_FLAG_TRANSFER_BINARY | INTERNET_FLAG_RELOAD | INTERNET_FLAG_KEEP_CONNECTION);
        CFile localFile(filename, CFile::modeCreate |
            CFile::modeWrite |
            CFile::typeBinary);
        int numBytes;
        while (numBytes = remoteFile->Read(httpBuff, FILEBUFLEN)) {
            localFile.Write(httpBuff, numBytes);
        }
    }
    CATCH_ALL(error) {
        error->GetErrorMessage(szCause,254,NULL);
        errorMessage.Format(_T("%s"),szCause);
        return FALSE;
    }
    END_CATCH_ALL;
    return TRUE;
}




// The one and only application object

CWinApp theApp;

using namespace std;

int _tmain(int argc, TCHAR* argv[], TCHAR* envp[])
{
    int nRetCode = 0;

    // initialize MFC and print and error on failure
    if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0))
    {
        // TODO: change error code to suit your needs
        _tprintf(_T("Fatal Error: MFC initialization failed\n"));
        nRetCode = 1;
    }
    else
    {
        if (argc < 2)
        {
            cout << "Params: url file\ncopies data from the url to the file\nurl can be any protocol, http, ftp, even c:.\nfile is optional. it can have blanks but only if it is in quotes.";
            return nRetCode;
        }
        wchar_t *fn;
        if (argc < 3) fn = _T("con:");
        else fn = argv[2];

        CString errorMessage;

        if (! getURLFile(argv[1], fn, errorMessage) )
        {
            cout << errorMessage << endl;
        }
    }

    return nRetCode;
}