flat assembler
Message board for the users of flat assembler.

Index > Windows > Using Winsock to download HTTP pages?

Author
Thread Post new topic Reply to topic
patchariadog



Joined: 24 Mar 2013
Posts: 94
patchariadog 25 May 2014, 17:41
Hi everybody.

I found this code in c++ that will read a webpages source code
Code:
#include "stdafx.h"
#include <winsock2.h>
#include <windows.h>
#include <iostream>
#include <fstream>
#pragma comment(lib,"ws2_32.lib")

using namespace std;

int main(){
        WSADATA wsaData;

        if (WSAStartup(MAKEWORD(2, 2), &wsaData) != 0) {
                cout << "WSAStartup failed.\n";
                system("pause");
                return 1;
        }

        SOCKET Socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);

        struct hostent *host;
        host = gethostbyname("www.last.fm");

        SOCKADDR_IN SockAddr;
        SockAddr.sin_port = htons(80);
        SockAddr.sin_family = AF_INET;
        SockAddr.sin_addr.s_addr = *((unsigned long*)host->h_addr);

        cout << "Connecting...\n";
        if (connect(Socket, (SOCKADDR*)(&SockAddr), sizeof(SockAddr)) != 0){
                cout << "Could not connect";
                system("pause");
                return 1;
        }
        cout << "Connected.\n";

        send(Socket, "GET /music/Taylor+swift/+albums?order=reach&page=1 HTTP/1.1\r\nHost: www.last.fm\r\nConnection: close\r\n\r\n", strlen("GET /music/taylor+swift/+albums?order=reach&page=1 HTTP/1.1\r\nHost: www.cplusplus.com\r\nConnection: close\r\n\r\n"), 0);
        char buffer[10000];

        int nDataLength;
        while ((nDataLength = recv(Socket, buffer, 10000, 0)) > 0){
                int i = 0;
                while (buffer[i] >= 32 || buffer[i] == '\n' || buffer[i] == '\r') {
                         cout << buffer[i];                     
                        i += 1;
                }
        }

        closesocket(Socket);
        WSACleanup();

        
        system("pause");
        return 0;
}
    


I have never worked with sockets before in assembly, but I would like to build this project in assembly rather then c++ so this is what I tried in assembly

Code:
format PE GUI 4.0
entry start

include 'C:\Users\michelle school\zach\assembly\Fasm\INCLUDE\win32ax.inc'

section '.data' data readable writeable
  IPPROTO_TCP  = 6


wsadata WSADATA
 _caption db 'Client application',0
 _igang db 'The client has started very well.',13,10,'It is now going to connect to your own computer',0
 _hostname db 'Wrong hostname',0

  hostname db 'www.lastfm.com',0
  hSock dd ?
  saddr sockaddr_in
  sizesaddr = $-saddr


  buffer rb 0x3000
  sender        db "GET /music/Taylor+swift/+albums?order=reach&page=1 HTTP/1.1",13,10
                  db "Host: www.last.fm",13,10
                  db "Connection: close",13,10,0

  sender2 db 'GET /music/Taylor+swift/+albums?order=reach&page=1 HTTP/1.1\r\nHost: www.last.fm\r\nConnection: close',13,10
         rb 0x100

section '.code' code readable executable
start:

       invoke WSAStartup,0101h,wsadata  ; initialiserer winsock-bibliotek

       invoke  ws_gethostbyname,hostname
       or     eax,eax
       jz     bad_hostname
              virtual at eax
               .host hostent
              end virtual
       mov    eax,[.host.h_addr_list]
       mov    eax,[eax]
       mov    eax,[eax]
       mov     [saddr.sin_addr],eax
       invoke MessageBox,0,_igang,_caption,0
       mov    al,00
       mov    ah,80          ; port 80
       mov     [saddr.sin_port],ax
       mov     [saddr.sin_family],AF_INET
       invoke  ws_socket, AF_INET, SOCK_STREAM, IPPROTO_TCP
       mov     [hSock], eax
       xchg    eax, esi
       invoke  ws_connect, esi, saddr, sizesaddr
       .if eax = 0
       invoke MessageBox,0, "connected", _caption,0
       .endif
       .if eax <> 0
       invoke MessageBox,0, "not connected", _caption,0
       .endif
       mov     ebx, buffer
       invoke  ws_send,esi,sender,109,0
       invoke  ws_recv, esi, ebx, 1000, 0
       invoke MessageBox,0, buffer, _caption,0
       .connectSucceeded:
       invoke ws_closesocket,esi
       invoke WSACleanup
       jmp stopp

bad_hostname:
        invoke MessageBox,0,_hostname,_caption,0
        jmp stopp

stopp:
        invoke ExitProcess,0



section '.idata' import data readable writeable

  library kernel,'KERNEL32.DLL',\
          winsock,'WSOCK32.DLL',\
          user,'USER32.DLL'

  import kernel,\
         ExitProcess,'ExitProcess'

  import winsock,\
        WSAStartup,'WSAStartup',\
        ws_socket,'socket',\
        ws_connect,'connect',\
        ws_gethostbyname,'gethostbyname',\
        ws_send,'send',\
        ws_recv,'recv',\
        ws_closesocket,'closesocket',\
        WSACleanup,'WSACleanup'

  import user,\
        MessageBox,'MessageBoxA'
    


the code seems like it connects to lastfm.com so I think that part works but it seems like it is not sending the correct get string to receive the source code or maybe I am not receiving the result properly.

I tried sending "sender" and "sender2" because I thought maybe the /r/n was the problem but that did not fix it.

if someone knows what the problem is or how to read the source code of a website that would be helpful.

here is the webpage I am trying to read
http://www.last.fm/music/Taylor+Swift/+albums?order=reach&page=1

thanks!
Post 25 May 2014, 17:41
View user's profile Send private message Reply with quote
typedef



Joined: 25 Jul 2010
Posts: 2909
Location: 0x77760000
typedef 25 May 2014, 20:09
\r\n

Fasm doesn't escape strings that way. In fact, FASM doesn't escape strings. Why? Well, you have control over all the bytes so there's no need to escape strings when you can use $A, $D for CRLF.

Also, some web servers require the GET parameter to have the full URL but not for POST. Fix your CRLFs then it'll work. And Wireshark is your best networking debugger.

Also, windows does have an HTTP API (not recommended for )

There's a one liner API to download files as well: URLDownloadToFile. If you want to check for download progress you'll have to get into the COM stuff.

Possibilities are endless my friend.
Post 25 May 2014, 20:09
View user's profile Send private message Reply with quote
patchariadog



Joined: 24 Mar 2013
Posts: 94
patchariadog 25 May 2014, 20:45
How do I fix the CRLF's. I am sorry I have never heard of $A, $D the only thing I have used for crlf before was someone on the forum gave me this code
Code:
NL db 13, 10, 0
    


so i tried sending this string but it still didn't work

Code:
 invoke lstrcpyA,buffer2,"GET /music/Taylor+swift/+albums?order=reach&page=1 HTTP/1.1"
       invoke lstrcatA,buffer2,NL
       invoke lstrcatA,buffer2,"Host: www.last.fm"
       invoke lstrcatA,buffer2,NL
       invoke lstrcatA,buffer2,"Connection: close"

data
buffer2 rb 300
    


how should I fix the CRLF's
Post 25 May 2014, 20:45
View user's profile Send private message Reply with quote
patchariadog



Joined: 24 Mar 2013
Posts: 94
patchariadog 25 May 2014, 20:55
I feel so dumb, I just realized since you pointed it out to me that I can just use the URLDownloadToFile to download it as an html file and then use readfile to get the contents into a string. I would still like to know the answer to the CRLF if you don't mind, but I will probably go the api route. I have used this api before in an auto updater I have for in my template for my programs.

thanks typedef!
Post 25 May 2014, 20:55
View user's profile Send private message Reply with quote
typedef



Joined: 25 Jul 2010
Posts: 2909
Location: 0x77760000
typedef 27 May 2014, 05:50
Code:

CR equ $0D
LF equ  $0A

szRequest:
             .method db 'GET '
             .server   db 'http://www.last.fm' ; required as HTTP 1.1 requires absolute path
             .path     db '/music/Taylor+swift/+albums?order=reach&page=1 HTTP/1.1', CR, LF
             .host      db 'Host: www.last.fm', CR,LF
             ;.cookie   db 'Cookie: SOMEKEY%3Dsome_var;', 13, 10
             .conn     db  'Connection: close', 13,  10,  13,  10, 0
LEN  = $ - szRequest
    


Your code was OK, all you needed to do was remove the \r\n then add another CRLF at the end like I did above.

The double CRLF at the end terminates the HTTP protocol's request headers. The server waits for those two bytes and if it doesn't, after some time it'll issue a timeout error.
Post 27 May 2014, 05:50
View user's profile Send private message Reply with quote
Display posts from previous:
Post new topic Reply to topic

Jump to:  


< Last Thread | Next Thread >
Forum Rules:
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You can download files in this forum


Copyright © 1999-2025, Tomasz Grysztar. Also on GitHub, YouTube.

Website powered by rwasa.