flat assembler
Message board for the users of flat assembler.

Index > Windows > Using Winsock to download HTTP pages?

Thread Post new topic Reply to topic

Joined: 24 Mar 2013
Posts: 94
Hi everybody.

I found this code in c++ that will read a webpages source code
#include "stdafx.h"
#include <winsock2.h>
#include <windows.h>
#include <iostream>
#include <fstream>
#pragma comment(lib,"ws2_32.lib")

using namespace std;

int main(){
        WSADATA wsaData;

        if (WSAStartup(MAKEWORD(2, 2), &wsaData) != 0) {
                cout << "WSAStartup failed.\n";
                return 1;

        SOCKET Socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);

        struct hostent *host;
        host = gethostbyname("www.last.fm");

        SOCKADDR_IN SockAddr;
        SockAddr.sin_port = htons(80);
        SockAddr.sin_family = AF_INET;
        SockAddr.sin_addr.s_addr = *((unsigned long*)host->h_addr);

        cout << "Connecting...\n";
        if (connect(Socket, (SOCKADDR*)(&SockAddr), sizeof(SockAddr)) != 0){
                cout << "Could not connect";
                return 1;
        cout << "Connected.\n";

        send(Socket, "GET /music/Taylor+swift/+albums?order=reach&page=1 HTTP/1.1\r\nHost: www.last.fm\r\nConnection: close\r\n\r\n", strlen("GET /music/taylor+swift/+albums?order=reach&page=1 HTTP/1.1\r\nHost: www.cplusplus.com\r\nConnection: close\r\n\r\n"), 0);
        char buffer[10000];

        int nDataLength;
        while ((nDataLength = recv(Socket, buffer, 10000, 0)) > 0){
                int i = 0;
                while (buffer[i] >= 32 || buffer[i] == '\n' || buffer[i] == '\r') {
                         cout << buffer[i];                     
                        i += 1;


        return 0;

I have never worked with sockets before in assembly, but I would like to build this project in assembly rather then c++ so this is what I tried in assembly

format PE GUI 4.0
entry start

include 'C:\Users\michelle school\zach\assembly\Fasm\INCLUDE\win32ax.inc'

section '.data' data readable writeable

wsadata WSADATA
 _caption db 'Client application',0
 _igang db 'The client has started very well.',13,10,'It is now going to connect to your own computer',0
 _hostname db 'Wrong hostname',0

  hostname db 'www.lastfm.com',0
  hSock dd ?
  saddr sockaddr_in
  sizesaddr = $-saddr

  buffer rb 0x3000
  sender        db "GET /music/Taylor+swift/+albums?order=reach&page=1 HTTP/1.1",13,10
                  db "Host: www.last.fm",13,10
                  db "Connection: close",13,10,0

  sender2 db 'GET /music/Taylor+swift/+albums?order=reach&page=1 HTTP/1.1\r\nHost: www.last.fm\r\nConnection: close',13,10
         rb 0x100

section '.code' code readable executable

       invoke WSAStartup,0101h,wsadata  ; initialiserer winsock-bibliotek

       invoke  ws_gethostbyname,hostname
       or     eax,eax
       jz     bad_hostname
              virtual at eax
               .host hostent
              end virtual
       mov    eax,[.host.h_addr_list]
       mov    eax,[eax]
       mov    eax,[eax]
       mov     [saddr.sin_addr],eax
       invoke MessageBox,0,_igang,_caption,0
       mov    al,00
       mov    ah,80          ; port 80
       mov     [saddr.sin_port],ax
       mov     [saddr.sin_family],AF_INET
       invoke  ws_socket, AF_INET, SOCK_STREAM, IPPROTO_TCP
       mov     [hSock], eax
       xchg    eax, esi
       invoke  ws_connect, esi, saddr, sizesaddr
       .if eax = 0
       invoke MessageBox,0, "connected", _caption,0
       .if eax <> 0
       invoke MessageBox,0, "not connected", _caption,0
       mov     ebx, buffer
       invoke  ws_send,esi,sender,109,0
       invoke  ws_recv, esi, ebx, 1000, 0
       invoke MessageBox,0, buffer, _caption,0
       invoke ws_closesocket,esi
       invoke WSACleanup
       jmp stopp

        invoke MessageBox,0,_hostname,_caption,0
        jmp stopp

        invoke ExitProcess,0

section '.idata' import data readable writeable

  library kernel,'KERNEL32.DLL',\

  import kernel,\

  import winsock,\

  import user,\

the code seems like it connects to lastfm.com so I think that part works but it seems like it is not sending the correct get string to receive the source code or maybe I am not receiving the result properly.

I tried sending "sender" and "sender2" because I thought maybe the /r/n was the problem but that did not fix it.

if someone knows what the problem is or how to read the source code of a website that would be helpful.

here is the webpage I am trying to read

Post 25 May 2014, 17:41
View user's profile Send private message Reply with quote

Joined: 25 Jul 2010
Posts: 2913
Location: 0x77760000

Fasm doesn't escape strings that way. In fact, FASM doesn't escape strings. Why? Well, you have control over all the bytes so there's no need to escape strings when you can use $A, $D for CRLF.

Also, some web servers require the GET parameter to have the full URL but not for POST. Fix your CRLFs then it'll work. And Wireshark is your best networking debugger.

Also, windows does have an HTTP API (not recommended for )

There's a one liner API to download files as well: URLDownloadToFile. If you want to check for download progress you'll have to get into the COM stuff.

Possibilities are endless my friend.
Post 25 May 2014, 20:09
View user's profile Send private message Reply with quote

Joined: 24 Mar 2013
Posts: 94
How do I fix the CRLF's. I am sorry I have never heard of $A, $D the only thing I have used for crlf before was someone on the forum gave me this code
NL db 13, 10, 0

so i tried sending this string but it still didn't work

 invoke lstrcpyA,buffer2,"GET /music/Taylor+swift/+albums?order=reach&page=1 HTTP/1.1"
       invoke lstrcatA,buffer2,NL
       invoke lstrcatA,buffer2,"Host: www.last.fm"
       invoke lstrcatA,buffer2,NL
       invoke lstrcatA,buffer2,"Connection: close"

buffer2 rb 300

how should I fix the CRLF's
Post 25 May 2014, 20:45
View user's profile Send private message Reply with quote

Joined: 24 Mar 2013
Posts: 94
I feel so dumb, I just realized since you pointed it out to me that I can just use the URLDownloadToFile to download it as an html file and then use readfile to get the contents into a string. I would still like to know the answer to the CRLF if you don't mind, but I will probably go the api route. I have used this api before in an auto updater I have for in my template for my programs.

thanks typedef!
Post 25 May 2014, 20:55
View user's profile Send private message Reply with quote

Joined: 25 Jul 2010
Posts: 2913
Location: 0x77760000

CR equ $0D
LF equ  $0A

             .method db 'GET '
             .server   db 'http://www.last.fm' ; required as HTTP 1.1 requires absolute path
             .path     db '/music/Taylor+swift/+albums?order=reach&page=1 HTTP/1.1', CR, LF
             .host      db 'Host: www.last.fm', CR,LF
             ;.cookie   db 'Cookie: SOMEKEY%3Dsome_var;', 13, 10
             .conn     db  'Connection: close', 13,  10,  13,  10, 0
LEN  = $ - szRequest

Your code was OK, all you needed to do was remove the \r\n then add another CRLF at the end like I did above.

The double CRLF at the end terminates the HTTP protocol's request headers. The server waits for those two bytes and if it doesn't, after some time it'll issue a timeout error.
Post 27 May 2014, 05:50
View user's profile Send private message Reply with quote
Display posts from previous:
Post new topic Reply to topic

Jump to:  

< Last Thread | Next Thread >
Forum Rules:
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You can download files in this forum

Copyright © 1999-2020, Tomasz Grysztar. Also on YouTube, Twitter.

Website powered by rwasa.