why
I wrote this to get a web page. You can use the code as the basis of a spider (what I am doing).
what
It works to get any kind of web page. It supports https, and http, ftp, gopher, you name it. Even supports local files.
It handles NTLM challenge response. If it doesn't work for you, then go to
codeguru.com and ask them why.
who
I borrowed heavily from
here and
here.
how
To compile this code. Run MSVC visual studio 2005 (v8) and click File, New, Project. Click Win32 Console.
Click Next. Select MFC, but not ATL. Then paste this code into the main cpp.
where
download precompiled binary here
getwebpage.exe
code
// spider4.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include "spider4.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#endif
#include "afxinet.h"
BOOL getURLFile(const LPCTSTR url, const LPCTSTR filename, CString &errorMessage) {
const int FILEBUFLEN = 1024;
char httpBuff[FILEBUFLEN];
TCHAR szCause[255];
errorMessage = "OK";
TRY {
CInternetSession session;
session.SetOption(INTERNET_OPTION_CONNECT_TIMEOUT, 1000);
session.SetOption(INTERNET_OPTION_CONNECT_RETRIES, 3);
CFile *remoteFile = session.OpenURL(url, 1 ,
INTERNET_FLAG_TRANSFER_BINARY | INTERNET_FLAG_RELOAD | INTERNET_FLAG_KEEP_CONNECTION);
CFile localFile(filename, CFile::modeCreate |
CFile::modeWrite |
CFile::typeBinary);
int numBytes;
while (numBytes = remoteFile->Read(httpBuff, FILEBUFLEN)) {
localFile.Write(httpBuff, numBytes);
}
}
CATCH_ALL(error) {
error->GetErrorMessage(szCause,254,NULL);
errorMessage.Format(_T("%s"),szCause);
return FALSE;
}
END_CATCH_ALL;
return TRUE;
}
// The one and only application object
CWinApp theApp;
using namespace std;
int _tmain(int argc, TCHAR* argv[], TCHAR* envp[])
{
int nRetCode = 0;
// initialize MFC and print and error on failure
if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0))
{
// TODO: change error code to suit your needs
_tprintf(_T("Fatal Error: MFC initialization failed\n"));
nRetCode = 1;
}
else
{
if (argc < 2)
{
cout << "Params: url file\ncopies data from the url to the file\nurl can be any protocol, http, ftp, even c:.\nfile is optional. it can have blanks but only if it is in quotes.";
return nRetCode;
}
wchar_t *fn;
if (argc < 3) fn = _T("con:");
else fn = argv[2];
CString errorMessage;
if (! getURLFile(argv[1], fn, errorMessage) )
{
cout << errorMessage << endl;
}
}
return nRetCode;
}