// GETURL.C -- Win32 console app
// Andrew Schulman, February 1996
// andrew@ora.com
// cl geturl.c wsock32.lib
// geturl http://www.ora.com

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <io.h>
#include "winsock.h"

void fail(const char *s) { puts(s); exit(1); }

int output(char *msg, char *s);

#define msg(s)  { output("FAIL", s); return 0; }

#define SZWEBINFO_SIZE  20480
#define WINSOCK_VERSION 0x0101
#define NO_FLAGS 0
#define HTTP_PORT 80
#define FTP_PORT 21

SOCKET ConnectWebServerSocket(char * szHostName, int port)
{
    WSADATA wsaData;
    LPHOSTENT pHostEnt;
    SOCKADDR_IN sockAddr;
    int nConnect;
    SOCKET nServerSocket = INVALID_SOCKET;

    if (WSAStartup(WINSOCK_VERSION, &wsaData))
        msg("WSAStartup");
    if (!(pHostEnt = gethostbyname(szHostName)))
        msg("gethostbyname");
    
    if ((nServerSocket = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) ==
        INVALID_SOCKET)
        msg("socket");
    
    sockAddr.sin_family = AF_INET;
    sockAddr.sin_port = htons(port);
    sockAddr.sin_addr = *((LPIN_ADDR)*pHostEnt->h_addr_list);

    if (nConnect = connect(nServerSocket, (LPSOCKADDR)&sockAddr, 
        sizeof(sockAddr)))
    {
        nServerSocket = INVALID_SOCKET;
        msg("connect");
    }
    
    return(nServerSocket);
}

int SendWebQuery(SOCKET nSocket, char * szQuery)
{
    char szWebQuery[1024];
    int retval;

    strcpy(szWebQuery, "GET ");
    strcat(szWebQuery, szQuery);
    strcat(szWebQuery, "\n");
    if (send(nSocket, szWebQuery, lstrlen(szWebQuery), NO_FLAGS) == 
        SOCKET_ERROR)
        msg("send");
    return 0;
}
                                        
UINT RecvWebFile(SOCKET nSocket, char *szWebInfo)
{
    int nCharRecv;
    if ((nCharRecv = recv(nSocket, szWebInfo, 
        SZWEBINFO_SIZE, NO_FLAGS)) == SOCKET_ERROR)
        msg("recv");
    return(nCharRecv);
}

int get_file(SOCKET sock, char *filename);
int get_url(char *url);

int output(char *msg, char *s)
{
    int i;
    if (msg) { fputs(msg, stdout); putchar(' '); }
    puts(s);
    fflush(stdout);
}

#ifdef DO_LATER
void process_tag(SOCKET sock, char *buf)
{
    char *p = buf, *q;
    if (toupper(*p) == 'A')
    {
        while (*p && (*p != '\"')) p++; p++;
        q = p;
        while (*q && (*q != '\"')) q++; *q = 0;
        puts(p);
    }
    // later do IMG, FORMS, etc.
}

void parse_html(SOCKET sock, int recv, char *szWebInfo)
{
    int i;
    char *p;
    // problem! how maintain for multiple calls, but keep recursive?
    // need to build up array of href, *then* process
    // need this for breadth-first anyway
    int got_tag = 0;
    int len = 0;
    char buf[4096];
    
    for (i=recv, p=szWebInfo; i--; p++)
    {
        if (*p == '<') { got_tag++; len = 0; }
        else if (*p == '>') { 
            got_tag--; 
            buf[len] = 0; 
            process_tag(sock, buf); 
            }
        else if (got_tag) { buf[len++] = *p; }
    }
}
#endif

int split(char *url, char *hostname, char *filename)
{
    char *h, *f;
    int protocol = 0;
    
    if (strstr(url, "//"))
    {
        if (strncmp(url, "http://", 7) == 0)
            { url += 7; protocol = HTTP_PORT; }
        else if (strncmp(url, "ftp://", 6) == 0)
            { url += 6; protocol = FTP_PORT; }
        else
            { protocol = 0; msg("protocol"); }
    }
    // OOPS? MIGHT NOT HAVE A HOSTNAME?
    h = url;
    f = url;
    while (*f && (*f != '/')) 
        f++;
    strcpy(filename, *f ? f : "/");
    *f = 0;
    strcpy(hostname, h);
    return protocol;
}

int get_file(SOCKET sock, char *filename)
{
    char szWebInfo[SZWEBINFO_SIZE];
    UINT recv;
    char *s;
    
    if (SendWebQuery(sock, filename) != 0)
        msg("SendWebQuery");
    while (((recv = RecvWebFile(sock, szWebInfo)) != 0))
        fwrite(szWebInfo, recv, 1, stdout);
    return 1;
}

int get_url(char *url)
{
    static int count = 0;
    static SOCKET prev_sock = 0;
    static char prev_hostname[256];
    char hostname[256], filename[512];
    SOCKET sock;
    int protocol;
    
    // strcpy(prev_url, url);

    if (! (protocol = split(url, hostname, filename)))
        return 0;
    
    if (strcmp(hostname, prev_hostname) == 0)
    {
        if (get_file(prev_sock, filename))
            count++;
    }
    else
    {
        closesocket(prev_sock);
        if ((sock = ConnectWebServerSocket(hostname, protocol)) == 
            INVALID_SOCKET)
            msg("ConnectWebServerSocket");
        strcpy(prev_hostname, hostname);
        prev_sock = sock;
        if (get_file(sock, filename))
            count++;
    }
    // if (count >= MAX_SAVED_URL)
    //    fail("Got MAX URLs");
    return 1;
}

int main(int argc, char *argv[])
{
    if (argc < 2)
        fail("usage: geturl <http://whatever>");
    output(0, argv[1]);
    if (! get_url(argv[1]))
        fail("Couldn't get URL");
}

