flat assembler
Message board for the users of flat assembler.

Index > Main > Point belong line,circle and other shapes

Author
Thread Post new topic Reply to topic
Overclick



Joined: 11 Jul 2020
Posts: 669
Location: Ukraine
Overclick 07 Sep 2022, 04:10
Hey everyone!
Let's find the fastest solutions to detect the point belong geometric shapes.
I'm sharing my progress on this:
Code:
.data
        macro   click   order,L,T,W,H,labKey,labAbove
        {       
                match =rect,order
                \{
                        dd ?,?,?,?      ;left top right bottom
                        dd L\#f         ;default
                        dd T\#f
                        rept 1 num:W+L \\{dd num\\#f \\}
                        rept 1 num:H+T \\{dd num\\#f \\}
                                dq labKey
                                match any,labAbove \\{dq labKey\\#.\\#labAbove \\}
                                match ,labAbove \\{dq FINISH \\}
                \}
                match =circle,order
                \{
                        dd ?,?,?,?      ;left top radius MARKERcircle
                        dd L\#f                                                 ;default  X
                        dd T\#f                                                         ;Y
                        dd -0f                                                          ;Order
                        rept 1 num:W/2 \\{dd num\\#f \\}                        ;radius
                                dq labKey
                                match any,labAbove \\{dq labKey\\#.\\#labAbove \\}
                                match ,labAbove \\{dq FINISH \\}
                \}
                match =line weight,order
                \{      
                        dd ?,?,?,?
                        dd weight\#f
                        dd weight\#f
                                rept 1 num:W*W+H*H \\{Square=dword num\\#f \\}
                                Result=((Square-0x3F800000) shr 1)+0x3F800000
                        dd Result
                        dd -0f                                                                  ;Order
                                dq labKey
                                match any,labAbove \\{dq labKey#.#labAbove \\}
                                match ,labAbove \\{dq FINISH \\}
                        dd ?,?,?,?      ;X1 Y1 X2 Y2
                        dd L\#f
                        dd T\#f
                        rept 1 num:W+L \\{dd num\\#f \\}
                        rept 1 num:H+T \\{dd num\\#f \\}
                                dq 0,0
                \}
        }
        align 16
        MouseClicks:
                click   circle,1656,1322,80,0,SomeKey
                click   line 5,1588,1523,19,478,SomeAge
                click   rect,1570,1113,722,923,Dragwindow  
        EndMouseClicks:
.code
        .IfWindowResize:
                        mov             r15,MouseClicks
                        mov             r14,48
                        mov             r13,16
                        @@:
                        movaps  xmm15,xword[r15+r13]
                        mulps   xmm15,xmm10                ;Size Coefficient
                        movaps  xword[r15],xmm15
                        add             r15,r14
                        cmp             r15,EndMouseClicks
                        jb              @B
   .wm_mousemove:
        pxor    xmm1,xmm1
        xorps   xmm0,xmm0
        movd    xmm2,r9d
        pcmpgtw xmm1,xmm2
        punpcklwd xmm2,xmm1
        cvtdq2ps xmm13,xmm2
        movlhps xmm13,xmm13
        mov             r13,48
        mov             r14,MouseClicks
        mov             r15,EndMouseClicks
        .mouse_scanner:
                movaps  xmm4,xmm13
                cmp             dword[r14+12],-0f
                je              .mouse_scanner_line
                cmp             dword[r14+8],-0f
                je              .mouse_scanner_circle
                .mouse_scanner_rect:
                        cmpltps xmm4,xword[r14]
                        movmskps r11d,xmm4
                        cmp             r11b,12
                        je              .mouse_scanner_success
                        add             r14,r13
                        cmp             r14,r15
                        jb              .mouse_scanner
                        ret
                .mouse_scanner_circle:
                        movq    xmm4,xmm4
                        subps   xmm4,xword[r14]
                        mulps   xmm4,xmm4
                        haddps  xmm4,xmm0
                        hsubps  xmm4,xmm0
                        comiss  xmm4,xmm0
                        jb              .mouse_scanner_success
                        add             r14,r13
                        cmp             r14,r15
                        jb              .mouse_scanner
                        ret     
                .mouse_scanner_line:
                        subps   xmm4,xword[r14+r13]
                        mulps   xmm4,xmm4
                        haddps  xmm4,xmm0
                                movaps  xmm5,xword[r14]
                                movhlps xmm6,xmm5
                                mulps   xmm5,xmm5
                        subps   xmm4,xmm5
                        sqrtps  xmm4,xmm4
                        haddps  xmm4,xmm0
                        comiss  xmm4,xmm6
                        jb              .mouse_scanner_success
                        add             r14,r13
                        add             r14,r13
                        cmp             r14,r15
                        jb              .mouse_scanner
                        ret     
    

The slowest algorithm is line detection. It uses, for example, sqrtps. Any advice to avoid it? Not sure "the magic number" solution is better anyhow.
Post 07 Sep 2022, 04:10
View user's profile Send private message Visit poster's website Reply with quote
Furs



Joined: 04 Mar 2016
Posts: 2493
Furs 07 Sep 2022, 11:53
If you're just doing a check, you don't need sqrt at all, just compare the squared results directly. There's no point taking the square root.

I'm sure you can do more math simplifications/factoring with that knowledge.

e.g. let's say you want to check if length of vector is less than some value V. You can do:
Code:
++<
Post 07 Sep 2022, 11:53
View user's profile Send private message Reply with quote
Overclick



Joined: 11 Jul 2020
Posts: 669
Location: Ukraine
Overclick 07 Sep 2022, 13:27
Quote:

I'm sure you can do more math simplifications/factoring with that knowledge.

Could you show some?
Post 07 Sep 2022, 13:27
View user's profile Send private message Visit poster's website Reply with quote
Furs



Joined: 04 Mar 2016
Posts: 2493
Furs 08 Sep 2022, 13:43
I mean in general stuff like factoring out things to reduce multiplications and so on (e.g. x*y + x*z = x*(y+z)). Nothing specialized, but without square root it's obviously far simpler to simplify things.

BTW in general when you're doing comparisons you can avoid expensive operations like square root or division by doing such transformations. For example if you have a division, just multiply both sides by the denominator and compare that.
Post 08 Sep 2022, 13:43
View user's profile Send private message Reply with quote
Overclick



Joined: 11 Jul 2020
Posts: 669
Location: Ukraine
Overclick 08 Sep 2022, 14:05
Quote:

I mean in general stuff like factoring out things to reduce multiplications and so on (e.g. x*y + x*z = x*(y+z)). Nothing specialized, but without square root it's obviously far simpler to simplify things.

BTW in general when you're doing comparisons you can avoid expensive operations like square root or division by doing such transformations. For example if you have a division, just multiply both sides by the denominator and compare that.

Did you even look at my progress? Not you didn't. Otherwise you can see how some parameters precalculated at preprocessing stage. I don't ask here theory advices. This thread for practice, better examples, comparison.


Description:
Filesize: 10.13 KB
Viewed: 2932 Time(s)

Untitled.png


Post 08 Sep 2022, 14:05
View user's profile Send private message Visit poster's website Reply with quote
Overclick



Joined: 11 Jul 2020
Posts: 669
Location: Ukraine
Overclick 08 Sep 2022, 15:27
Magic number variant:
Code:
                .mouse_scanner_line:
                        subps   xmm4,xword[r14+r13]
                        mulps   xmm4,xmm4
                        haddps  xmm4,xmm0
                        movaps  xmm5,xword[r14]
                        movhlps xmm6,xmm5
                        mulps   xmm5,xmm5
                        subps   xmm4,xmm5
                                subps   xmm4,xmm3       ; xmm3 = 0,0,1f,1f(lower)
                                psrad   xmm4,1          ; inc/ivec domain latency
                                addps   xmm4,xmm3
                ;       sqrtps  xmm4,xmm4
                        haddps  xmm4,xmm0
                        comiss  xmm4,xmm6
                        jb              .mouse_scanner_success
                        add             r14,r13
                        add             r14,r13
                        cmp             r14,r15
                        jb              .mouse_scanner
                        ret         

Not working at all
Post 08 Sep 2022, 15:27
View user's profile Send private message Visit poster's website Reply with quote
Overclick



Joined: 11 Jul 2020
Posts: 669
Location: Ukraine
Overclick 08 Sep 2022, 16:01
Ups, my mistake
Code:
                                psubd   xmm4,xmm3
                                psrld   xmm4,1
                                paddd   xmm4,xmm3
    

But it calculates not clear enough even if weight is huge.
Post 08 Sep 2022, 16:01
View user's profile Send private message Visit poster's website Reply with quote
Display posts from previous:
Post new topic Reply to topic

Jump to:  


< Last Thread | Next Thread >
Forum Rules:
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You can download files in this forum


Copyright © 1999-2024, Tomasz Grysztar. Also on GitHub, YouTube.

Website powered by rwasa.