[1]
{AAA
 ^yAAA             ASCII Adjust after Addition          Flags: O D I T S Z A P C^y
                                                             ? - - - ? ? * ? *
 ^yAAA^y

        ^yLogic^y   if ((AL AND 0Fh) > 9) OR (AF = 1)
                   AL  AL + 6                  ; See note
                   AH  AH + 1
                   AF  1;  CF  1
                else
                   AF  0;  CF  0
                endif
                AL  AL AND 0Fh

    Execute AAA following an ADD or ADC operation on two unpacked BCD
    digits that leaves a byte result in the AL register. AAA converts
    the number in the lower 4 bits (nibble) of AL to an unpacked BCD
    number (high-order nibble of AL is zeroed). If the addition
    produced a decimal carry, the AH register is incremented, and the
    carry and auxiliary carry flags are set to 1. If the addition
    produced no decimal carry, the carry and auxiliary carry flags are
    set to 0 and AH is unchanged.

    ^yNote^y
    The 8086/88 processors will not add a carry out of AL into AH if
    an invalid operand is in AL. The newer processors will, yielding
    different results for the same _invalid_ operand (AX = AX + 6 in
    2nd line of description). Execution is the same when valid
    operands are loaded.


    ^yOpcode      Format^y
    37          AAA


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1       8       8       3       4       3       3   NP
}AAS:AAD:AAM:ADC:Flags
{AAD
 ^yAAD             ASCII Adjust before Division         Flags: O D I T S Z A P C^y
                                                             ? - - - * * ? * ?
 ^yAAD^y

        ^yLogic^y   AL  AH * 10 + AL
                AH  0

    AAD converts the unpacked two-digit BCD number in AX into binary
    in preparation for a division using DIV or IDIV, which require
    binary rather than BCD numbers.

    The result produced by the subsequent division will be an unpacked
    BCD number.


    ^yOpcode      Format^y
    D5 0A       AAD


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            2      60      15      14      19      14      10   NP

}AAA:AAS:AAM:DIV:Flags
{AAM
 ^yAAM             ASCII Adjust after Multiply          Flags: O D I T S Z A P C^y
                                                             ? - - - * * ? * ?
 ^yAAM^y

        ^yLogic^y   AH  AL / 10
                AL  AL MOD 10

    Execute AAM after a MUL operation on two unpacked BCD operands
    that leaves the result in the AX register. Because the result is
    less than 100, it is contained entirely in the AL register. AAM
    unpacks the result in AL by dividing AL by 10, leaving the
    quotient in AH and the remainder in AL.


    ^yOpcode      Format^y
    D4 0A       AAM


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            2      83      19      16      17      15      18   NP
}AAA:AAS:AAD:MUL:Flags
{AAS
 ^yAAS             ASCII Adjust after Subtraction       Flags: O D I T S Z A P C^y
                                                             ? - - - ? ? * ? *
 ^yAAS^y

        ^yLogic^y   if ((AL AND 0Fh) > 9) OR (AF = 1)
                   AL  AL - 6
                   AH  AH - 1
                   AF  1;  CF  1
                else
                   AF  0;  CF  0
                endif
                AL  AL AND 0Fh

    Execute AAS following a SUB or SBB operation on two unpacked BCD
    digits that leaves a byte result in the AL register. AAS converts
    the number in the lower 4 bits (nibble) of AL to an unpacked BCD
    number (high-order nibble of AL is zeroed). If the subtraction
    produced a decimal carry, the AH register is decremented, and the
    carry and auxiliary carry flags are set to 1. If the subtraction
    produced no decimal carry, the carry and auxiliary carry flags are
    set to 0 and AH is unchanged.


    ^yOpcode      Format^y
    3F          AAS


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1       8       7       3       4       3       3   NP
}AAA:AAD:AAM:SBB:Flags
{ADC
 ^yADC             Add with Carry                       Flags: O D I T S Z A P C^y
                                                             * - - - * * * * *
 ^yADC^y destination,source

        ^yLogic^y   destination  destination + source + CF

    ADC sums the operands, adds 1 if the carry flag (CF) is set and
    replaces the destination operand with the result. If CF is
    cleared, ADC does the same operation as the ADD instruction.

        Example:        add     eax,ebx
                        adc     edx,0   ; Add 1 if CF set


    ^yOpcode      Format^y
    10 /r       ADC  r/m8,r8
    11 /r       ADC  r/m16,r16
    11 /r       ADC  r/m32,r32
    12 /r       ADC  r8,r/m8
    13 /r       ADC  r16,r/m16
    13 /r       ADC  r32,r/m32
    14 ib       ADC  AL,imm8
    15 iw       ADC  AX,imm16
    15 id       ADC  EAX,imm32
    80 /2 ib    ADC  r/m8,imm8
    81 /2 iw    ADC  r/m16,imm16
    81 /2 id    ADC  r/m32,imm32
    83 /2 ib    ADC  r/m16,imm8
    83 /2 ib    ADC  r/m32,imm8


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg     2       3       3       2       2       1       1   PU
    mem, reg  2+d(0,2)  24+EA   10       7       7       3       3   PU
    reg, mem  2+d(0,2)  13+EA   10       7       6       2       2   PU
    reg, imm  2+i(1,2)   4       4       3       2       1       1   PU
    mem, imm  2+d(0,2)  23+EA   16       7       7       3       3   PU*
               +i(1,2)
    acc, imm  1+i(1,2)   4       4       3       2       1       1   PU

        * = not pairable if there is a displacement and immediate
}ADD:INC:Flags
{ADD
 ^yADD             Add                                  Flags: O D I T S Z A P C^y
                                                             * - - - * * * * *
 ^yADD^y destination,source

        ^yLogic^y   destination  destination + source

    ADD replaces the destination operand with the sum of the source
    and destination operands. It sets the carry flag if there is an
    overflow.


    ^yOpcode      Format^y
    00 /r       ADD  r/m8,r8
    01 /r       ADD  r/m16,r16
    01 /r       ADD  r/m32,r32
    02 /r       ADD  r8,r/m8
    03 /r       ADD  r16,r/m16
    03 /r       ADD  r32,r/m32
    04 ib       ADD  AL,imm8
    05 iw       ADD  AX,imm16
    05 id       ADD  EAX,imm32
    80 /0 ib    ADD  r/m8,imm8
    81 /0 iw    ADD  r/m16,imm16
    81 /0 id    ADD  r/m32,imm32
    83 /0 ib    ADD  r/m16,imm8
    83 /0 ib    ADD  r/m32,imm8


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg     2       3       3       2       2       1       1   UV
    mem, reg  2+d(0,2)  24+EA   10       7       7       3       3   UV
    reg, mem  2+d(0,2)  13+EA   10       7       6       2       2   UV
    reg, imm  2+i(1,2)   4       4       3       2       1       1   UV
    mem, imm  2+d(0,2)  23+EA   16       7       7       3       3   UV*
               +i(1,2)
    acc, imm  1+i(1,2)   4       4       3       2       1       1   UV

        * = not pairable if there is a displacement and immediate
}ADC:INC:Flags
{AND
 ^yAND             AND operation                        Flags: O D I T S Z A P C^y
                                                             0 - - - * * ? * 0
 ^yAND^y destination,source

        ^yLogic^y   destination  destination AND source

    AND performs a bit-by-bit logical AND operation on its operands
    and stores the result in destination. AND sets each bit of the
    result to one if both of the corresponding bits of the operands
    are one.

                          ^yAND truth table^y
                        a       b    a AND b
                        0       0       0
                        0       1       0
                        1       0       0
                        1       1       1


    ^yOpcode      Format^y
    20 /r       AND  r/m8,r8
    21 /r       AND  r/m16,r16
    21 /r       AND  r/m32,r32
    22 /r       AND  r8,r/m8
    23 /r       AND  r16,r/m16
    23 /r       AND  r32,r/m32
    24 ib       AND  AL,imm8
    25 iw       AND  AX,imm16
    25 id       AND  EAX,imm32
    80 /4 ib    AND  r/m8,imm8
    81 /4 iw    AND  r/m16,imm16
    81 /4 id    AND  r/m32,imm32
    83 /4 ib    AND  r/m16,imm8
    83 /4 ib    AND  r/m32,imm8


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg     2       3       3       2       2       1       1   UV
    mem, reg  2+d(0,2)  24+EA   10       7       7       3       3   UV
    reg, mem  2+d(0,2)  13+EA   10       7       6       2       2   UV
    reg, imm  2+i(1,2)   4       4       3       2       1       1   UV
    mem, imm  2+d(0,2)  23+EA   16       7       7       3       3   UV*
               +i(1,2)
    acc, imm  1+i(1,2)   4       4       3       2       1       1   UV

        * = not pairable if there is a displacement and immediate
}TEST:NOT:NEG:OR:XOR:Flags
{ARPL
 ^yARPL            Adjust RPL Field                     Flags: O D I T S Z A P C^y
                                                             - - - - - * - - -
 ^yARPL^y destination,source                              CPU: 286+ ^yp^y

        ^yLogic^y   if destination Privilege Level < source Privilege Level
                   destination Privilege Level  source Privilege Level
                   ZF  1
                else
                   ZF  0
                endif

    The destination operand contains the value of a selector. The
    source operand is a word register (usually contains the CS
    selector value of the caller). If the RPL field ("requested
    privilege level" -- bottom two bits) of the destination operand is
    less than the RPL field of the source operand, the zero flag is
    set to 1 and the RPL field of the destination operand is increased
    to match the second operand. Otherwise, the zero flag is set to 0
    and no change is made to the first operand.

    ARPL is used guarantee that a selector parameter to a subroutine
    does not request more privilege than the caller is allowed. Both
    destination and source must be valid selectors.

    ARPL appears in operating system software; it is not used in
    application programs.


    ^yOpcode      Format^y
    63 /r       ARPL r/m16,r16


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg     2       -       -      10      20       9       7   NP
    mem, reg   2+d(0-2)  -       -      11      21       9       7   NP
}LAR:LSL:VERR:Selectors
{BOUND
 ^yBOUND           Check Array Index agains Bounds      Flags: Not altered^y

 ^yBOUND^y destination,source                             CPU: 186+

        ^yLogic^y   if (destination < source[0]) OR (destination > source[n])
                   INT 5
                endif                   ; n = 2 for word, 4 for dword

    BOUND verifies that the signed destination value lies within
    specified limits. Interrupt 5 occurs if the value in the
    destination register is less than the lower bound or greater than
    the upper bound. The upper and lower limit values may each be a
    word or a doubleword.

    The block of memory that specifies the lower and upper limits of
    an array can typically reside just before the array itself. This
    makes the array bounds accessible at a constant offset from the
    beginning of the array. Because the address of the array is
    already present in a register, this avoids extra calculations to
    obtain the effective address of the array bounds.

    BOUND includes two operands. The first specifies the register
    being tested, and the second contains the effective address of the
    two signed limit values. BOUND assumes that the lower limit and
    the upper limit are in adjacent memory locations.


    ^vNote^v
    An INT 05h interrupt handler must be installed before using BOUND.
    On the 80186, the exception saves CS:IP pointing to the
    instruction following BOUND; on 286+, to the BOUND instruction.


    ^yOpcode      Format^y
    62 /r       BOUND r16,m16&16
    62 /r       BOUND r32,m32&32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, mem      4      -      35      13      10       7       8   NP
}CMP
{BSF
 ^yBSF             Bit Scan Forward                     Flags: O D I T S Z A P C^y
                                                             ? - - - ? * ? ? ?
 ^yBSF^y destination,source                               CPU: 386+

        ^yLogic^y   if a set bit is found in source
                   ZF  0
                   destination  bit index of first set bit
                else
                   ZF  1
                   destination  ?
                endif

    BSF scans (starting with bit 0) source for a one-bit. The zero
    flag (ZF) is set to 1 if the bits are all zero; otherwise, ZF is
    cleared to 0 and destination is loaded with the bit index of the
    first set bit.

        Example:        mov     dx,0110b
                        bsf     cx,dx   ; zf = 0, cx = 1
                        bsr     cx,dx   ; zf = 0, cx = 2

    ^yNote^y
    Some sources wrongly state different result flags. Some 386 and
    early 486 CPUs change destination if source = 0.


    ^yOpcode      Format^y
    0F BC       BSF  r16,r/m16
    0F BC       BSF  r32,r/m32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    r16, r16     3                             10+3n    6-42   6-34  NP
    r32, r32     3                             10+3n    6-42   6-42  NP
    r16, m16  3+d(0,1,2)                       10+3n    7-43   6-35  NP
    r32, m32  3+d(0,1,2,4)                     10+3n    7-43   6-43  NP
}BSR:TEST:SHR
{BSR
 ^yBSR             Bit Scan Reverse                     Flags: O D I T S Z A P C^y
                                                             ? - - - ? * ? ? ?
 ^yBSR^y destination,source                               CPU: 386+

        ^yLogic^y   if a set bit is found in source
                   ZF  0
                   destination  bit index of first set bit
                else
                   ZF  1
                endif

    BSR scans (starting with MSB) source for a one-bit. The zero flag
    (ZF) is set to 1 if the bits are all zero; otherwise, ZF is
    cleared to 0 and destination is loaded with the bit index of the
    first set bit.

        Example:        mov     dx,0110b
                        bsf     cx,dx   ; zf = 0, cx = 1
                        bsr     cx,dx   ; zf = 0, cx = 2

    ^yNote^y
    Some sources wrongly state different result flags.


    ^yOpcode      Format^y
    0F BD       BSR  r16,r/m16
    0F BD       BSR  r32,r/m32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    r16, r16     3                             10+3n    6-103  7-39  NP
    r32, r32     3                             10+3n    7-104  7-71  NP
    r16, m16  3+d(0,1,2)                       10+3n    6-103  7-40  NP
    r32, m32  3+d(0,1,2,4)                     10+3n    7-104  7-72  NP
}BSF:TEST:SHR
{BSWAP
 ^yBSWAP           Byte Swap                            Flags: Not altered^y

 ^yBSWAP^y destination                                    CPU: 486+

    BSWAP reverses the byte order of a 32-bit register, converting a
    value in little endian format to big endian format, or vice versa.

    If BSWAP is used with a 16-bit operand size, the result in the
    destination register is undefined.

        Example:        mov     eax,87654321h
                        bswap   eax
                        ; eax = 21436587h


    ^yOpcode      Format^y
    0F C8 /r    BSWAP r32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
      r32        2                                       1       1   NP
}XCHG:ROL:ROR
{BT
 ^yBT              Bit Test                             Flags: O D I T S Z A P C^y
                                                             - - - - - - - - *
 ^yBT^y destination,source                                CPU: 386+

        ^yLogic^y   CF  bit from destination at position source

    The source specifies a bit index in the destination operand.
    BT copies that bit to the carry flag.


    ^yOpcode      Format^y
    0F A3       BT  r/m16,r16
    0F A3       BT  r/m32,r32
    0F BA /4 ib BT  r/m16,imm8
    0F BA /4 ib BT  r/m32,imm8


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg      3                              3       3       4   NP
    mem, reg    3+d(0,1,2,4)                    12       8       9   NP
    reg, imm8     3+i(1)                         3       3       4   NP
    mem, imm8   3+d(0,1,2,4)+i(1)                6       3       4   NP
}BTC:BTR:BTS:BSF:TEST:SETcc
{BTC
 ^yBTC             Bit Test and Complement              Flags: O D I T S Z A P C^y
                                                             - - - - - - - - *
 ^yBTC^y destination,source                               CPU: 386+

        ^yLogic^y   CF  bit, bit  NOT(bit)

    The source specifies a bit index in the destination operand.
    BTC copies that bit to the carry flag and then complements
    it in destination.


    ^yOpcode      Format^y
    0F BB       BTC r/m16,r16
    0F BB       BTC r/m32,r32
    0F BA /7 ib BTC r/m16,imm8
    0F BA /7 ib BTC r/m32,imm8


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg     3                               6       6       7   NP
    mem, reg   3+d(0,1,2,4)                     13      13      13   NP
    reg, imm8    3+i(1)                          6       6       7   NP
    mem, imm8  3+d(0,1,2,4)+i(1)                 8       8       8   NP
}BT:BTR:BTS:BSF:TEST:SETcc
{BTR
 ^yBTR             Bit Test and Reset                   Flags: O D I T S Z A P C^y
                                                             - - - - - - - - *
 ^yBTR^y destination,source                               CPU: 386+

        ^yLogic^y   CF  bit, bit  0

    The source specifies a bit index in the destination operand.
    BTR copies that bit to the carry flag and then resets it in
    destination.


    ^yOpcode      Format^y
    0F B3       BTR r/m16,r16
    0F B3       BTR r/m32,r32
    0F BA /6 ib BTR r/m16,imm8
    0F BA /6 ib BTR r/m32,imm8


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg     3                               6       6       7   NP
    mem, reg   3+d(0,1,2,4)                     13      13      13   NP
    reg, imm8    3+i(1)                          6       6       7   NP
    mem, imm8  3+d(0,1,2,4)+i(1)                 8       8       8   NP
}BT:BTC:BTS:BSF:TEST:SETcc
{BTS
 ^yBTS             Bit Test and Set                     Flags: O D I T S Z A P C^y
                                                             - - - - - - - - *
 ^yBTS^y destination,source                               CPU: 386+

        ^yLogic^y   CF  bit, bit  1

    The source specifies a bit index in the destination operand.
    BTS copies that bit to the carry flag and then sets it in
    destination.


    ^yOpcode      Format^y
    0F AB       BTS r/m16,r16
    0F AB       BTS r/m32,r32
    0F BA /5 ib BTS r/m16,imm8
    0F BA /5 ib BTS r/m32,imm8


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg     3                               6       6       7   NP
    mem, reg   3+d(0,1,2,4)                     13      13      13   NP
    reg, imm8    3+i(1)                          6       6       7   NP
    mem, imm8  3+d(0,1,2,4)+i(1)                 8       8       8   NP
}BT:BTC:BTR:BSF:TEST:SETcc
{CALL
 ^yCALL            Call Procedure                       Flags: Not altered (*)^y
                                                     (*) If task switch
 ^yCALL^y destination                                         occurs, all flags
                                                         are affected
        ^yLogic^y
        ; near call (intra-segment)     ; far call (inter-segment)
        PUSH eIP                        PUSH CS
        eIP  destination_offset        CS  destination_segment
                                        PUSH eIP
                                        eIP  destination_offset

    The CALL instruction causes the procedure named in the operand to
    be executed. When the procedure is complete (a return instruction
    is executed within the procedure), execution continues at the
    instruction that follows the CALL.


    ^yNEAR^y calls are those with destinations of type r/m16, r/m32, rel16,
    rel32; changing or saving the segment register value is not
    necessary. The CALL rel16 and CALL rel32 forms add a signed offset
    to the address of the instruction following CALL to determine the
    destination. The rel16 form is used when the instruction's
    operand-size attribute is 16 bits; rel32 is used when the
    operand-size attribute is 32 bits. The result is stored in the
    32-bit EIP register. With rel16, the upper 16 bits of EIP are
    cleared, resulting in an offset whose value does not exceed 16
    bits. CALL r/m16 and CALL r/m32 specify a register or memory
    location from which the absolute segment offset is fetched. The
    offset fetched from r/m is 32 bits for an operand-size attribute
    of 32 bits (r/m32), or 16 bits for an operand-size of 16 (r/m16).
    The offset of the instruction following CALL is pushed onto the
    stack. It will be popped by a near RET instruction within the
    procedure called. The CS register is not changed by these forms of
    CALL.

    The ^yFAR^y calls, CALL ptr16:16 and CALL ptr16:32, use a 4-byte or
    6-byte operand as a long pointer to the procedure called. The CALL
    m16:16 and CALL m16:32 forms fetch the long pointer from the
    memory location specified (indirection). In real-address mode or
    virtual 8086 mode, the long pointer provides 16 bits for the CS
    register and 16 or 32 bits for the EIP register (depending on the
    operand-size attribute). These forms of CALL push both CS and IP
    or EIP on the stack as a return address.


    ^yNote^y
    In protected mode, both long pointer forms of the CALL instruction
    consult the access rights (AR) byte in the descriptor indexed by
    the selector part of the long pointer. Depending on the value of
    the AR byte, the call will perform one of the following types of
    control transfers:

        - a far call to the same protection level
        - an inter-protection-level far call
        - a task switch


    ^yOpcode      Format^y
    9A cd       CALL ptr16:16   ; call far direct
    9A cp       CALL ptr16:32   ; call far direct
    E8 cw       CALL rel16      ; call near relative
    E8 cd       CALL rel32      ; call near relative
    FF /2       CALL r/m16      ; call near indirect
    FF /2       CALL r/m32      ; call near indirect
    FF /3       CALL m16:16     ; call far indirect
    FF /3       CALL m16:32     ; call far indirect


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    near         3      23      14      7+m     7+m      3       1   PV
    reg          2      20      13      7+m     7+m      5       2   NP
    mem16     2+d(0-2)  29+EA   19      11+m    10+m     5       2   NP
    far          5      36      23      13+m    17+m    18       4   NP
    mem32     2+d(0-2)  53+EA   38      16+m    22+m    17       4   NP

                            Protected Mode
    far          5                      26+m    34+m    20     4-13  NP
    mem32     2+d(0-2)                  29+m    38+m    20     5-14  NP
           (Cycles not shown for calls through call and task gates)
}ENTER:LEAVE:RET:PUSH:JMP:Selectors:Descriptors:Task switch
{CBW
 ^yCBW             Convert Byte to Word                 Flags: Not altered^y

 ^yCBW^y

        ^yLogic^y   if AL < 80h
                   AH  0
                else
                   AH  FFh
                endif

    CBW converts the signed byte in the AL register to a signed word
    in AX by extending the most significant bit (the sign bit) of AL
    into all of the bits of AH.

    ^yNote^y
    The 32-bit form of CBW is CWDE.


    ^yOpcode      Format^y
    98          CBW


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1       2       2       2       3       3       3   NP
}CWDE:CWD:CDQ:IDIV:MOVSX
{CDQ
 ^yCDQ             Convert Double to Quad               Flags: Not altered^y

 ^yCDQ^y                                                  CPU: 386+

        ^yLogic^y   if EAX < 80000000h
                   EDX  0
                else
                   EDX  FFFFFFFFh
                endif

    CDQ converts the signed doubleword in EAX to a signed 64-bit
    integer in the register pair EDX:EAX by extending the most
    significant bit of EAX (the sign bit) into all the bits of EDX.

    ^yNote^y
    CDQ is the 32-bit form of CWD.


    ^yOpcode      Format^y
    99          CDQ


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1                               2       3       2   NP
}CWD:CBW:CWDE:IDIV:DIV:MOVSX
{CLC
 ^yCLC             Clear Carry Flag                     Flags: O D I T S Z A P C^y
                                                             - - - - - - - - 0
 ^yCLC^y

        ^yLogic^y   CF  0

    CLC resets the carry flag (CF) to zero.


    ^yOpcode      Format^y
    F8          CLC


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1       2       2       2       2       2       2   NP
}CMC:STC:SAHF:Flags
{CLD
 ^yCLD             Clear Direction Flag                 Flags: O D I T S Z A P C^y
                                                             - 0 - - - - - - -
 ^yCLD^y

        ^yLogic^y   DF  0                  ; Auto-increment eSI, eDI

    CLD resets the direction flag (DF) to zero. After CLD is executed,
    string operations will increment the index registers that they use
    (SI and/or DI; ESI and/or EDI).


    ^yOpcode      Format^y
    FC          CLD


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1       2       2       2       2       2       2   NP
}STD:REP:CMPS:INS:LODS:MOVS:OUTS:SCAS:STOS:Flags
{CLI
 ^yCLI             Clear Interrupt-Enable Flag          Flags: O D I T S Z A P C^y
                                                             - - 0 - - - - - -
 ^yCLI^y                                                  ^yIO^y

        ^yLogic^y   IF  0

    External interrupts are ignored at the end of CLI until the
    interrupt flag (IF) is set to one. Non-maskable interrupts are
    recognized no matter what the state of IF. CLI and STI have no
    effect on software interrupts - those generated by an INT
    instruction or by the occurrence of a divide fault and similar
    conditions.

    ^yProtected mode^y
    If the current privilege level (CPL) is at least as privileged as
    the input/output privilege level (IOPL -- bits 12 and 13 of the
    FLAGS register), CLI resets IF to zero. A general protection
    exception generated is if the program does not have the correct
    I/O privilege.


    ^yOpcode      Format^y
    FA          CLI


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1       2       2       3       3       5       7   NP
}STI:INT:Flags:IOPL:Exceptions
{CLTS
 ^yCLTS            Clear Task Switched Flag             Flags: Not altered^y

 ^yCLTS^y                                                 CPU: 286+ ^vPriv^v

        ^yLogic^y   Task Switched Flag in CR0  0 (80386+)
                Task Switched Flag in MSW  0 (80286)

    CLTS clears the TS (Task Switched) flag in the CR0 register (the
    Machine Status Word of the 80286). This flag is set to 1 by the
    processor every time a task switch occurs. When the TS flag is
    used to manage processor extensions,
        - every execution of an ESC instruction is trapped if the TS
          flag is set
        - execution of a WAIT instruction is trapped if the MP (Math
          Present) and TS flags are both set

    Thus, if a task switch was made after an ESC instruction began,
    the Floating-Point Unit's context may need to be saved before a
    new ESC instruction can be issued. The fault handler saves the
    context and clears the TS flag.

    ^yNote^y:       CLTS is used in systems programming and is a
                ^vprivileged^v instruction, running at privilege level
                zero only.


    ^yOpcode      Format^y
    0F 06       CLTS


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            2                       2       5       7      10   NP
}
{CMC
 ^yCMC             Complement Carry Flag                Flags: O D I T S Z A P C^y
                                                             - - - - - - - - *
 ^yCMC^y

        ^yLogic^y   CF  NOT(CF)

    CMC reverses the state of the carry flag (CF).


    ^yOpcode      Format^y
    F5          CMC


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1       2       2       2       2       2       2   NP
}STC:CLC:BT:Flags
{CMOVcc
 ^vCMOVcc^y          Conditional Move                     Flags: Not altered

 ^vCMOVcc^v destination, source                           CPU: PPro+

        ^yLogic^y   if condition
                   destination  source
                endif

    Conditional moves test the flags which have been set by a previous
    instruction. If the given condition is true, the source operand is
    transferred (copied) into the destination operand.

    These instructions can move a 16- or 32-bit value from memory to a
    general-purpose register or from one general-purpose register to
    another. Conditional moves of 8-bit register operands are not
    supported.


        ^yNote^y
        The CMOVcc instructions are new for the Pentium Pro
        processor family; however, they may not be supported
        by all the processors in the family. By checking the
        processor's feature information with CPUID, a program
        can determine if the CPU supports CMOVcc.


    A condition code (cc) is associated with each instruction to
    indicate the condition being tested for. The terms "above" and
    "below" are used for un-signed integers; "less" and "greater" for
    signed integers.
    Note that several mnemonics have the same meaning.


    ^vInstruction         Move if ...                    Flags^v
    CMOVA,  CMOVNBE     Above, Not Below or Equal      CF=0 AND ZF=0
    CMOVAE, CMOVNB,     Above or Equal, Not Below,
      CMOVNC              Not Carry                    CF=0
    CMOVBE, CMOVNA      Below or Equal, Not Above      CF=1 OR ZF=1
    CMOVB,  CMOVC,      Below, Carry,
      CMOVNAE             Not Above or Equal           CF=1
    CMOVE,  CMOVZ       Equal, Zero                    ZF=1
    CMOVNE, CMOVNZ      Not Equal, Not Zero            ZF=0
    CMOVP,  CMOVPE      Parity, Parity Even            PF=1
    CMOVNP, CMOVPO      No Parity, Parity Odd          PF=0

    CMOVG,  CMOVNLE     Greater, Not Less or Equal     SF=OF AND ZF=0
    CMOVGE, CMOVNL      Greater or Equal, Not Less     SF=OF
    CMOVLE, CMOVNG      Less or Equal, Not Greater     SF<>OF OR ZF=1
    CMOVL,  CMOVNGE     Less, Not Greater or Equal     SF<>OF
    CMOVO               Overflow                       OF=1
    CMOVNO              No Overflow                    OF=0
    CMOVS               Sign (negative)                SF=1
    CMOVNS              No Sign (positive)             SF=0


    ^yOpcode      Format^y
    0F 40 cw    CMOVO  r16, r/m16
    0F 40 cd    CMOVO  r32, r/m32
    0F 41 cw    CMOVNO r16, r/m16
    0F 41 cd    CMOVNO r32, r/m32
    0F 42 cw    CMOVB  r16, r/m16
    0F 42 cd    CMOVB  r32, r/m32
    0F 43 cw    CMOVAE r16, r/m16
    0F 43 cd    CMOVAE r32, r/m32
    0F 44 cw    CMOVE  r16, r/m16
    0F 44 cd    CMOVE  r32, r/m32
    0F 45 cw    CMOVNE r16, r/m16
    0F 45 cd    CMOVNE r32, r/m32
    0F 46 cw    CMOVBE r16, r/m16
    0F 46 cd    CMOVBE r32, r/m32
    0F 47 cw    CMOVA  r16, r/m16
    0F 47 cd    CMOVA  r32, r/m32
    0F 48 cw    CMOVS  r16, r/m16
    0F 48 cd    CMOVS  r32, r/m32
    0F 49 cw    CMOVNS r16, r/m16
    0F 49 cd    CMOVNS r32, r/m32
    0F 4A cw    CMOVP  r16, r/m16
    0F 4A cd    CMOVP  r32, r/m32
    0F 4B cw    CMOVNP r16, r/m16
    0F 4B cd    CMOVNP r32, r/m32
    0F 4C cw    CMOVL  r16, r/m16
    0F 4C cd    CMOVL  r32, r/m32
    0F 4D cw    CMOVGE r16, r/m16
    0F 4D cd    CMOVGE r32, r/m32
    0F 4E cw    CMOVLE r16, r/m16
    0F 4E cd    CMOVLE r32, r/m32
    0F 4F cw    CMOVG  r16, r/m16
    0F 4F cd    CMOVG  r32, r/m32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium  PPro
    reg, reg      3      -       -       -       -       -       -        ??
    reg, mem   3+d(0-2)  -       -       -       -       -       -        ??
}MOV:Jcc:SETcc:FCMOVcc:CPUID
{CMP
 ^yCMP             Compare                              Flags: O D I T S Z A P C^y
                                                             * - - - * * * * *
 ^yCMP^y destination,source

        ^yLogic^y   Flags set according to result of
                (destination - source)

    CMP compares two numbers by subtracting the source from the
    destination and updates the flags. None of the operands are
    changed by CMP.


    ^yOpcode      Format^y
    38 /r       CMP  r/m8,r8
    39 /r       CMP  r/m16,r16
    39 /r       CMP  r/m32,r32
    3A /r       CMP  r8,r/m8
    3B /r       CMP  r16,r/m16
    3B /r       CMP  r32,r/m32
    3C ib       CMP  AL,imm8
    3D iw       CMP  AX,imm16
    3D id       CMP  EAX,imm32
    80 /7 ib    CMP  r/m8,imm8
    81 /7 iw    CMP  r/m16,imm16
    81 /7 id    CMP  r/m32,imm32
    83 /7 ib    CMP  r/m16,imm8
    83 /7 ib    CMP  r/m32,imm8


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg     2       3       3       2       2       1       1   UV
    mem, reg  2+d(0,2)  13+EA   10       7       5       2       2   UV
    reg, mem  2+d(0,2)  13+EA   10       6       6       2       2   UV
    reg, imm  2+i(1,2)   4       4       3       2       1       1   UV
    mem, imm  2+d(0,2)  14+EA   10       6       5       2       2   UV*
               +i(1,2)
    acc, imm  1+i(1,2)   4       4       3       2       1       1   UV

        * = not pairable if there is a displacement and immediate
}CMPS:SUB:Flags
{CMPS
 ^yCMPS            Compare Strings                      Flags: O D I T S Z A P C^y
                                                             * - - - * * * * *
 ^yCMPS^y source_string,destination_string

        ^yLogic^y   CMP [DS:eSI],[ES:eDI]
                if DF = 0
                   eSI  eSI + n        ; n = 1 for byte, 2 for word,
                   eDI  eDI + n        ;     4 for dword (386+)
                else
                   eSI  eSI - n
                   eDI  eDI - n
                endif

    This instruction compares two values by subtracting the
    destination string element pointed to by ES:eDI, from the source
    string element pointed to by DS:eSI, and sets the flags according
    to the results of the comparison. The operands themselves are not
    altered. After the comparison, eSI and eDI are incremented (if the
    direction flag is cleared) or decremented (if the direction flag
    is set), in preparation for comparing the next element of the
    string. If the address-size attribute of CMPS is 16, the SI and DI
    registers will be used as source and destination indices,
    otherwise ESI and EDI.

    The source segment can be changed with a segment override, the
    destination segment cannot.


    ^yNote^y:       This instruction is always translated by the
                assembler into CMPSB, Compare String Byte, CMPSW,
                Compare String Word, or CMPSD, Compare String Dword,
                depending upon whether source refers to a string of
                bytes, words or doublewords. In either case, you must
                explicitly load the eSI and eDI registers with the
                offset of the source and destination strings.

                CMPSB, CMPSW, and CMPSD are synonyms for the byte,
                word, and doubleword CMPS instructions that do not
                require any operands. They are simpler to use but
                provide no type or segment checking.


    If the REPE or REPNE prefix modifies this instruction, the CPU
    compares the value of the destination string element to the value
    of the source string element. It then steps eSI and eDI in the
    direction indicated by DF by the indicated size, until either the
    REPE/REPNE condition is false or eCX counts to zero.


    ^yOpcode      Format^y
    A6          CMPS m8,m8              ; = CMPSB
    A7          CMPS m16,m16            ; = CMPSW
    A7          CMPS m32,m32            ; = CMPSD


    ^yLength and timing^y
    Variations  Bytes   8088    186     286     386     486     Pentium
    cmpsb         1     30      22       8      10       8       5   NP
    cmpsw         1     -       -        -      10       8       5   NP
    cmpsd         1     -       -        -      10       8       5   NP
    repX cmpsb    2     9+30n   5+22n   5+9n    5+9n    7+7n*   9+4n NP
    repX cmpsw    2     9+30n   5+22n   5+9n    5+9n    7+7n*   9+4n NP
    repX cmpsd    2      -       -       -      5+9n    7+7n*   9+4n NP

    repX = repe, repz, repne, or repnz
       * : 5 if n = 0
}REP:CLD:STD:SCAS:CMP:Flags
{CMPXCHG
 ^yCMPXCHG         Compare and Exchange                 Flags: O D I T S Z A P C^y
                                                             * - - - * * * * *
 ^yCMPXCHG^y destination,source                           CPU: 486+ (*)

        ^yLogic^y   CMP destination,accumulator
                if ZF = 1
                   destination  source
                else
                   accumulator  destination
                endif

    CMPXCHG compares the accumulator (AL, AX, or EAX) with a
    destination operand. If they are equal, the source operand is
    loaded into destination; otherwise, the destination operand is
    loaded into the accumulator.

    This instruction is used to support semaphores.


    ^vNote^v
    On the A-step of the 486, CMPXCHG was microcoded using the opcodes
    for the, discarded, A- to B0-step 386 instructions XBTS (A6) and
    IBTS (A7). Because of software conflicts with software written for
    the early 386 DX, the opcodes for the 486 were changed to those
    shown below starting with the B step.
    Note that some 386 software won't run on older 386s and some 486
    software will not run on early 486s when using this instruction.


    ^yOpcode      Format^y
    0F A6 /r    CMPXCHG r/m8,r8         ^v; i486 pre-B step only^v
    0F A7 /r    CMPXCHG r/m16,r16       ^v; i486 pre-B step only^v
    0F A7 /r    CMPXCHG r/m32,r32       ^v; i486 pre-B step only^v
    0F B0 /r    CMPXCHG r/m8,r8
    0F B1 /r    CMPXCHG r/m16,r16
    0F B1 /r    CMPXCHG r/m32,r32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg      3                                      6       5   NP
    mem, reg    3+d(0-2)                                7-10     6   NP
}CMP:XCHG:LOCK:CMPXCHG8B:Flags
{CMPXCHG8B
 ^yCMPXCHG8B       Compare and Exchange 8 Bytes         Flags: O D I T S Z A P C^y
                                                             - - - - - * - - -
 ^yCMPXCHG8B^y destination                                CPU: Pentium+

        ^yLogic^y   if EDX:EAX = destination
                   ZF  1
                   destination  ECX:EBX
                else
                   ZF  0
                   EDX:EAX  destination
                endif

    CMPXCHG8B compares the quadword in EDX:EAX with a 64-bit memory
    operand. If they are equal, the quadword in the ECX:EBX register
    pair is loaded into destination and the zero flag is set;
    otherwise, the destination operand is loaded into the EDX:EAX
    register pair and the zero flag is cleared.


    ^yOpcode      Format^y
    0F C7 /r    CMPXCHG8B mem64


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    mem, reg    3+d(0-2)                                        10   NP
}CMPXCHG
{CPUID
 ^yCPUID           Identify CPU                         Flags: Not altered^y

 ^yCPUID^y                                                CPU: Pentium+ (*)

        ^yLogic^y   case EAX of
                   0 : EAX  Highest input value recognized by CPUID
                       EBX:ECX:EDX  Vendor ID string
                   1 : EAX  CPU stepping ID, model, family, type
                       EBX:ECX  Reserved
                       EDX  Feature flags
                   2 : EAX:EBX:ECX:EDX  Cache information
                otherwise
                   EAX:EBX:ECX:EDX  Undefined
                endcase

    CPUID identifies CPU and revision information for the installed CPU.
    The instruction (currently) has 3 functions, all expecting input in
    EAX and all returning information in the EAX, EBX, ECX, and EDX
    registers.

    i)  Entry:  EAX = 0  Check CPU installed
        Return: EAX = Max. value of EAX before executing CPUID
                      (max. function number supported)
                EBX:EDX:ECX = 12-byte vendor ID string,
                      e.g. "GenuineIntel":
                      EBX = 756e6547h  ; "uneG"
                      EDX = 49656e69h  ; "Ieni"
                      ECX = 6c65746eh  ; "letn"

    ii) Entry:  EAX = 1  Get model-specific information
        Return: EAX bits 0-3   Stepping level  (e.g. 5 on 486DX-2/66)
                    bits 4-7   Model  (e.g. 3 on 486DX-2/66)
                    bits 8-11  Family (4=80486, 5=Pentium, 6=P6)
                    bits 12-31 Reserved (zero)
                EBX bits 0-31  Reserved (zero)
                ECX bits 0-31  Reserved (zero)
                EDX Feature flags (1 means option available):
                    bit        feature
                     0  FPU    On-chip FPU, executes i387 instructions
                     1  VME    Virtual 8086 Mode Enhancements supported:
                               - CR4.VME bit enables V86 mode extensions
                               - CR4.PVI bit enables protected-mode
                                 virtual interrupts
                               - Expansion of the TSS with the software
                                 indirection bitmap
                               - EFLAGS.VIF bit enables the virtual
                                 interrupt flag.
                               - EFLAGS.VIP bit enables the virtual
                                 interrupt pending flag.
                     2  DE     I/O breakpoints possible
                     3  PSE    4 MB paging supported
                     4  TSC    RDTSC instruction supported
                     5  MSR    Has Pentium-compatible Model Specific
                                 Registers
                     6  PAE    Physical Address Extension > 32 bits
                     7  MCE    Machine Check Exception supported
                     8  CX8    CMPXCHG8B instruction supported
                     9  APIC   On-chip Advanced Programmable Interrupt
                               Controller exists, is enabled, and
                               available for use.
                    10         Reserved
                    11         Reserved
                    12  MTRR   Memory Type Range Registers supported
                    13  PGE    PTE Global Flag. Processor supports the
                               CR4.PGE flag enabling the global bit in
                               both PTDEs and PTEs.
                    14  MCA    Machine Check Architecture. Processor
                               supports the MCG_CAP (machine check
                               global capability) MSR.
                    15  CMOV   CMOVcc instructions supported.
                               The FCMOVcc and FCOMI instructions are
                               supported if bit 0 (FPU) is also set.
                    16-31      Reserved (zero)
                    Assume zero if bit is not mentioned.

    iii) Entry: EAX = 2  Get cache information (Pentium Pro+)
        Return: EAX bits 0-7   Number of times the CPUID instruction
                               must be executed with an input value of 2
                               (two) to get a complete description of
                               the processor's caches and TLBs.
                               Pentium Pro will return 1.
                    bits 8-30  Cache information. Refer to Intel doc.
                    bit  31    0 = Register has valid information
                               1 = Reserved
                EBX bits 0-30  Cache information. Refer to Intel doc.
                    bit  31    0 = Register has valid information
                               1 = Reserved
                ECX bits 0-30  Cache information. Refer to Intel doc.
                    bit  31    0 = Register has valid information
                               1 = Reserved
                EDX bits 0-30  Cache information. Refer to Intel doc.
                    bit  31    0 = Register has valid information
                               1 = Reserved


    ^yNote^y
    The CPUID instruction was introduced with the Pentium processor but
    works on later Intel 80486 models, too.
    The ID flag in the EFLAGS register can be used to determine if this
    instruction is supported. If a procedure is able to set or clear
    this flag, the CPUID is supported by the processor running the
    procedure.

    Executing CPUID on an early 80486 produces an invalid opcode
    exception.


    ^yOpcode      Format^y
    0F A2       CPUID


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -             2     -       -       -       -       -       14   NP
}
{CWD
 ^yCWD             Convert Word to Doubleword           Flags: Not altered^y

 ^yCWD^y

        ^yLogic^y  if AX < 8000h
                  DX  0
               else
                  DX  FFFFh
               endif

    CWD converts the signed word in AX to a signed doubleword in the
    DX:AX register pair by extending the most significant bit (the
    sign bit) in AX into all the bits of DX.

    ^yNote^y
    The 32-bit form of CWD is CDQ.


    ^yOpcode      Format^y
    99          CWD


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1       5       4       2       2       3       2   NP
}CDQ:CBW:CWDE:DIV:MOVSX
{CWDE
 ^yCWDE            Word to Double Extended              Flags: Not altered^y

 ^yCWDE^y                                                 CPU: 386+

        ^yLogic^y   EAX  sign-extend(AX)

    CWDE converts the signed word in AX to a doubleword in EAX by
    extending the most significant bit (the sign bit) in AX into all
    bits of the high word of EAX.

    ^yNote^y
    CWDE is the 32-bit form of CBW.


    ^yOpcode      Format^y
    98          CWDE


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1                               3       3       3   NP
}CBW:CWD:CDQ:IDIV:MOVSX
{DAA
 ^yDAA             Decimal Adjust after Addition        Flags: O D I T S Z A P C^y
                                                             ? - - - * * * * *
 ^yDAA^y

        ^yLogic^y   if ((AL AND 0Fh) > 9) OR (AF = 1)
                   AL  AL + 6
                   AF  1
                else AF  0
                endif
                if (AL > 9Fh) OR (CF = 1)
                   AL  AL + 60h
                   CF  1
                else CF  0
                endif

    Execute DAA after executing an ADD or ADC instruction on two
    packed BCD operands. The addition leaves a two-digit byte result
    in the AL register. The DAA instruction adjusts AL to contain the
    correct two-digit packed decimal result.


    ^yOpcode      Format^y
    27          DAA


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1       4       4       3       4       2       3   NP
}DAS:ADC:Flags
{DAS
 ^yDAS             Decimal Adjust after Subtraction     Flags: O D I T S Z A P C^y
                                                             ? - - - * * * * *
 ^yDAS^y

        ^yLogic^y   if ((AL AND 0Fh) > 9) OR (AF = 1)
                   AL  AL - 6
                   AF  1
                else AF  0
                endif
                if (AL > 9Fh) OR (CF = 1)
                   AL  AL - 60h
                   CF  1
                else CF  0
                endif

    Execute DAS after executing a SUB or SBB instruction on two packed
    BCD operands. The subtraction leaves a two-digit byte result in
    the AL register. The DAS instruction adjusts AL to contain the
    correct two-digit packed decimal result.


    ^yOpcode      Format^y
    2F          DAS


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1       4       4       3       4       2       3   NP
}DAA:SBB:Flags
{DEC
 ^yDEC             Decrement by 1                       Flags: O D I T S Z A P C^y
                                                             * - - - * * * * -
 ^yDEC^y destination

        ^yLogic^y   destination  destination - 1

    DEC subtracts one from the operand. Note that DEC does not change
    the carry flag.


    ^yOpcode      Format^y
    48 + rw     DEC  r16
    48 + rd     DEC  r32
    FE /1       DEC  r/m8
    FF /1       DEC  r/m16
    FF /1       DEC  r/m32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    r8           2       3       3       2       2       1       1   UV
    r16          1       3       3       2       2       1       1   UV
    r32          1       3       3       2       2       1       1   UV
    mem       2+d(0,2)  23+EA   15       7       6       3       3   UV
}INC:SUB:Flags
{DIV
 ^yDIV             Division, Unsigned                   Flags: O D I T S Z A P C^y
                                                             ? - - - ? ? ? ? ?
 ^yDIV^y source

        ^yLogic^y   AL  AX / source
                AH  remainder
             or
                AX  DX:AX / source
                DX  remainder
             or
                EAX  EDX:EAX / source  ; 386+
                EDX  remainder

    DIV performs an unsigned division. The dividend is implicit; only
    the divisor is given as an operand. The type of the divisor
    determines which registers are used:

        ^ySize   Dividend  Divisor   Quotient  Remainder^y
        byte      AX      r/m8        AL        AH
        word    DX:AX     r/m16       AX        DX
        dword  EDX:EAX    r/m32       EAX       EDX    ; 386+


    ^yNotes^y
    If the result is too large to fit in the destination, an INT 0
    (Divide by Zero) is generated, and the quotient and remainder are
    undefined.

    When an Interrupt 0 (Divide by Zero) is generated, the saved CS:IP
    value on the 80286+ points to the instruction that failed (the DIV
    instruction).  On the 8086, however, CS:IP points to the
    instruction following the failed DIV instruction.


    ^yOpcode      Format^y
    F6 /6       DIV  r/m8
    F7 /6       DIV  r/m16
    F7 /6       DIV  r/m32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    r8           2     80-90     29     14      14      16      17   NP
    r16          2    144-162    38     22      22      24      25   NP
    r32          2       -       -       -      38      40      41   NP
    mem8    2+d(0-2)   86-96+EA  35     17      17      16      17   NP
    mem16   2+d(0-2)  150-168+EA 44     25      25      24      25   NP
    mem32   2+d(0-2)     -       -       -      41      40      41   NP
}IDIV:SHR:SHRD:AAD:AAM
{ENTER
 ^yENTER           Make Procedure Stack Frame           Flags: Not altered^y

 ^yENTER^y locals,nesting                                 CPU: 186+

        ^yLogic^y   push eBP
                temp  eSP
                while (nesting > 0)
                   nesting  nesting - 1
                   eBP  eBP - n
                   push [SS:eBP]
                endwhile
                eBP  temp
                eSP  eSP - locals

        Note: If the operand-size attribute of ENTER is 16 bits, BP
              and SP are used and n = 2; if 32 bits, EBP and ESP are
              used and n = 4.


    ENTER creates the a stack frame that can be used to implement the
    rules of block-structured, high-level languages. A LEAVE
    instruction at the end of the procedure complements ENTER.

    ENTER has two parameters. The first specifies the number of bytes
    of dynamic storage to be allocated on the stack for the routine
    being entered. The second paramter corresponds to the lexical
    nesting level (0 to 31) of the routine within the high-level
    language source code. This level determines how many sets of stack
    frame pointers the CPU copies into the new stack frame from the
    preceding stack frame. This list of stack frames is often called
    the ^ydisplay^y. Lexical level has ^yno^y relationship to either the
    protection levels or to the I/O privilege level.

    ENTER creates the new display for a procedure. Then it allocates
    the dynamic storage space for that procedure by decrementing eSP by
    the number of bytes specified in the first parameter. This new
    value of eSP serves as the starting point for all PUSH and POP
    operations within that procedure.

    ENTER can be used either nested or non-nested. If the lexical
    level is zero, the non-nested form is used (enter n,0). The main
    procedure operates at the highest logical level, level 1. The
    first procedure it calls operates at the next deeper level, level
    2, etc. A level 2 procedure can access the variables in the main
    program because a program operating at a higher logical level
    (calling a program at a lower level) requires that the called
    procedure have access to the variables of the calling program.

    A procedure calling another procedure at the same level implies
    that they are parallel procedures and should not have access to
    the variables of the calling program. The new stack frame does not
    include the pointer for addressing the calling procedure's stack
    frame. ENTER treats a re-entrant procedure as a procedure calling
    another procedure at the same level.


    ^yNote^y
    Some assemblers support ENTER extensions that force the use of
    SP,BP (ENTERW) or ESP,EBP (ENTERD) regardless of the code
    segment's size attribute (80386+).


    ^yOpcode      Format^y
    C8 iw ib    ENTER imm16,imm8


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    imm16, 0      3      -      15      11      10      14      11   NP
    imm16, 1      4      -      25      15      12      17      15   NP
    imm16, imm8   4      -   22+16n    12+4n   15+4n   17+3i  15+2i  NP
                             n = imm8-1;  i = imm8
}LEAVE:PUSH
{ESC
 ^yESC             Escape                               Flags: Not altered^y

 ^yESC^y

    ESC lets you send instructions to an external coprocessor on the
    system bus. On the 80386 and earlier processors, this was usually
    a floating-point chip called a numeric (or math) coprocessor. The
    8086 and 8088 work with an Intel 8087 math coprocessor; the 80286
    and 80386 work with an 80287 or 80387 coprocessor. The 80486 DX
    has a math coprocessor, called the Floating-Point Unit (FPU), on
    the chip.

    ^yNote^y:       In order to synchronize with the 8087 math
                coprocessor, WAIT instructions must precede all ESC
                instructions.  The 80286+ has automatic instruction
                synchronization, hence WAITs are not needed.


    ^yOpcode^y
    The opcode for ESC begins with the 5-bit sequence "11011b". This
    sequence indicates that the rest of the opcode is an instruction
    meant for a coprocessor, as opposed to an instruction meant for
    the CPU.
}HLT:WAIT:LOCK
{HLT
 ^yHLT             Halt the Processor                   Flags: Not altered^y

 ^yHLT^y                                                  ^vPriv^v

    HLT stops the execution of all instructions and places the CPU in
    a HALT state. An enabled interrupt, a non-maskable interrupt, or
    a reset will resume execution.

    If an interrupt (including NMI) is used to resume after HLT, the
    saved CS:eIP value points to the instruction following HLT.


    A HLT would normally be the last instruction in a sequence that
    shuts down the system, i.e. for a checkpoint after a power failure
    is detected.

    In protected mode, HLT is a privileged instruction.


    ^yOpcode      Format^y
    F4          HLT


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1       2       2       2       5       4       4   NP
}WAIT:ESC:LOCK
{IDIV
 ^yIDIV            Division, Signed                     Flags: O D I T S Z A P C^y
                                                             ? - - - ? ? ? ? ?
 ^yIDIV^y source

        ^yLogic^y   AL  AX / source
                AH  remainder
             or
                AX  DX:AX / source
                DX  remainder
             or
                EAX  EDX:EAX / source  ; 386+
                EDX  remainder

    IDIV performs a signed division. The dividend, quotient, and
    remainder are implicitly allocated to fixed registers, while only
    the divisor is given as an explicit operand. The type of the
    divisor determines which registers are used. Non-integral
    quotients are truncated toward zero. The remainder has the same
    sign as the dividend.

        ^ySize     Dividend  Divisor   Quotient  Remainder^y
        byte       AX       r/m8        AL        AH
        word      DX:AX     r/m16       AX        DX
        dword    EDX:EAX    r/m32       EAX       EDX   ; 386+


    ^yNote^y
    If the resulting quotient is too large to fit in the destination,
    or if the divisor is zero, an interrupt 0 (divide by zero) is
    generated. If a divide-by-zero occurs, the quotient and remainder
    are undefined.


    ^yOpcode      Format^y
    F6 /7       IDIV r/m8
    F7 /7       IDIV r/m16
    F7 /7       IDIV r/m32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    r8           2     101-112    44-52   17     19     19      22   NP
    r16          2     165-184    53-61   25     27     27      30   NP
    r32          2       -          -      -     43     43      46   NP
    mem8    2+d(0-2)  107-118+EA  50-58   20     22     20      22   NP
    mem16   2+d(0-2)  171-190+EA  59-67   28     30     28      30   NP
    mem32   2+d(0-2)     -          -      -     46     44      46   NP
}DIV:SAR:SHRD:AAD:IMUL
{IMUL
 ^yIMUL            Multiplication, Signed               Flags: O D I T S Z A P C^y
                                                             * - - - ? ? ? ? *

    IMUL performs a signed multiplication. This instruction has 3
    variations:

    i)  One-operand form        ^yIMUL source^y

        ^yLogic^y   AX       AL * source
             or
                DX:AX    AX * source
             or
                EDX:EAX  EAX * source  ; 386+

        The source can be a byte, word, or doubleword located in
        memory or in a general register.


    ii) Two-operand form        ^yIMUL oper1,oper2^y       CPU: 186+/386+

        ^yLogic^y   oper1  oper1 * oper2

        The first operand can be in any general register while the
        second operand can be an immediate value (80186+), or a value
        in memory or a general register (80386+). The product replaces
        the first operand.


    iii) Three-operand form     ^yIMUL dest,oper1,oper2^y  CPU: 186+

        ^yLogic^y   dest  oper1 * oper2

        The second operand (oper1) can be in any general register or a
        value in memory while the third operand (oper2) must be an
        immediate value. The product is stored in destination which
        can be any general register.


    ^yNote^y
    An immediate value used with IMUL is automatically sign-extended
    to match the size of the general register operand.

    IMUL clears the carry and overflow flags (CF and OF) under these
    conditions:
    - form i     : if the upper half of the destination does not contain
                   any significant digits of the result
    - form ii,iii: if the result fits within the size of the destination
    (otherwise CF and OF are set to 1).


    ^yOpcode      Format^y
    F6 /5       IMUL r/m8
    F7 /5       IMUL r/m16
    F7 /5       IMUL r/m32              ; 386+

    0F AF /r    IMUL r16,r/m16          ; 386+
    0F AF /r    IMUL r32,r/m32          ; 386+

    69 /r iw    IMUL r16,imm16          ; 186+
    69 /r iw    IMUL r16,r/m16,imm16    ; 186+
    69 /r id    IMUL r32,imm32          ; 386+
    69 /r id    IMUL r32,r/m32,imm32    ; 386+

    6B /r ib    IMUL r16,imm8           ; 186+
    6B /r ib    IMUL r16,r/m16,imm8     ; 186+
    6B /r ib    IMUL r32,imm8           ; 386+
    6B /r ib    IMUL r32,r/m32,imm8     ; 386+


    ^yLength and timing^y

                        ^yAccumulator multiplies^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    r8           2      80-98    25-28  13      9-14    13-18   11   NP
    r16          2     128-154   34-37  21      9-22    13-26   11   NP
    r32          2       -        -      -      9-38    13-42   10   NP
    mem8     2+d(0-2)  86-104+EA 32-34  16     12-17    13-18   11   NP
    mem16    2+d(0-2) 134-160+EA 40-43  24     12-25    13-26   11   NP
    mem32    2+d(0-2)    -        -      -     12-41    13-42   10   NP

                        ^y2 and 3 operand multiplies^y
    Operands        Bytes     186   286    386         486      Pentium
    r16, imm       2+i(1,2)    -    21  9-14/9-22  13-18/13-26  10   NP
    r32, imm       2+i(1,2)    -     -     9-38       13-42     10   NP
    r16,r16,imm    2+i(1,2)  22/29  21  9-14/9-22  13-18/13-26  10   NP
    r32,r32,imm    2+i(1,2)    -     -     9-38       13-42     10   NP
    r16,m16,imm    2+d(0-2)  25/32  24 12-17/12-25 13-18/13-26  10   NP
                    +i(1,2)
    r32,m32,imm    2+d(0-2)+i(1,2)   -    12-41       13-42     10   NP
    r16, r16       2+i(1,2)    -     -     9-22    13-18/13-26  10   NP
    r32, r32       2+i(1,2)    -     -     9-38       13-42     10   NP
    r16, m16       2+d(0-2)+i(1,2)   -    12-25    13-18/13-26  10   NP
    r32, m32       2+d(0-2)+i(1,2)   -    12-41       13-42     10   NP

    All forms: dest, src                        cycles for:   byte/word
               or                                             dword
               dest, src1, src2
}MUL:SHL:SHLD:AAM:AAD:IDIV
{IN
 ^yIN              Input from Port                      Flags: Not altered^y

 ^yIN^y accumulator,port                                  ^yIOpm^y

        ^yLogic^y   accumulator  [port]

    IN reads a byte, word, or a doubleword from a specified port and
    stores it in AL, AX, or EAX. The port number is specified as an
    immediate byte value (for ports 0 thru 255) or by the DX register
    (allowing access to all ports).

    ^yProtected mode^y
    An exception occurs if the current task has insufficient privilege
    for the I/O. IN is not IOPL-sensitive in V86 mode.


    ^yOpcode      Format^y
    E4 ib       IN  AL,imm8
    E5 ib       IN  AX,imm8
    E5 ib       IN  EAX,imm8
    EC          IN  AL,DX
    ED          IN  AX,DX
    ED          IN  EAX,DX


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    al, imm8     2      14      10       5      12      14       7   NP
    ax, imm8     2      14      10       5      12      14       7   NP
    eax, imm8    2       -       -       -      12      14       7   NP
    al, dx       1      12       8       5      13      14       7   NP
    ax, dx       1      12       8       5      13      14       7   NP
    eax, dx      1       -       -       -      13      14       7   NP

                             Protected mode
    acc, imm     2                           6/26/26  9/29/27  4/21/19 NP
    acc, dx      1                           7/27/27  8/28/27  4/21/19 NP

                   Cycles for: CPL <= IOPL / CPL > IOPL / V86
}INS:OUT:IOPL
{INC
 ^yINC             Increment by 1                       Flags: O D I T S Z A P C^y
                                                             * - - - * * * * -
 ^yINC^y destination

        ^yLogic^y   destination  destination + 1

    INC adds one to the operand. Note that INC does not change the
    carry flag.


    ^yOpcode      Format^y
    40 + rw     INC  r16
    40 + rd     INC  r32
    FE /0       INC  r/m8
    FF /0       INC  r/m16
    FF /0       INC  r/m32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    r8           2       3       3       2       2       1       1   UV
    r16          1       3       3       2       2       1       1   UV
    r32          1       3       3       2       2       1       1   UV
    mem       2+d(0,2)  23+EA   15       7       6       3       3   UV
}DEC:ADD:Flags
{INS
 ^yINS             Input String From Port               Flags: Not altered^y

 ^yINS^y destination,DX                                   CPU: 186+ ^yIOpm^y

        ^yLogic^y   [ES:eDI]  [port]
                if DF = 0
                   eDI  eDI + n        ; n = 1 for byte, 2 for word,
                else                    ;     4 for dword (386+)
                   eDI  eDI - n
                endif

    The INS instruction reads a value from a port (specified in the DX
    register) and transfers it to memory (at ES:eDI). After the
    transfer, eDI is updated to point to the next string location. The
    port is always addressed through DX.  If the address-size
    attribute is 16 bits, DI is used as destination-index register; if
    32 bits, EDI is used.

    ^yProtected mode^y
    An exception occurs if the current task has insufficient privilege
    for the I/O. INS is not IOPL-sensitive in V86 mode.


    ^yNote^y:       This instruction is always translated by the
                assembler into INSB, Input String Byte, INSW, Input
                String Word, or INSD, Input String Dword, depending
                upon whether destination refers to a string of bytes,
                words or doublewords. In either case, you must
                explicitly load the ES and eDI registers with the
                location of the destination.

                INSB, INSW, and INSD are synonyms for the byte, word,
                and doubleword INS instructions that do not require an
                operand. They are simpler to use but provide no type
                or segment checking.


    These instructions normally use a REP prefix to indicate a block
    read of the number of bytes as specified in eCX.


        ^yExample^y:
        les     di,[pSec]               ; Point ES:DI to storage
        mov     dx,PORT_NUM             ; DX = port number
        mov     cx,512                  ; No. of bytes to read
        cld                             ; String ops forward
        rep     insb                    ; Read the port


    ^yOpcode      Format^y
    6C          INS  m8,DX              ; = INSB (186+)
    6D          INS  m16,DX             ; = INSW (186+)
    6D          INS  m32,DX             ; = INSD (386+)


    ^yLength and timing^y
    Variations  Bytes   8088    186     286     386     486     Pentium
    insb         1       -      14       5      15      17      9    NP
    insw         1       -      14       5      15      17      9    NP
    insd         1       -       -       -      15      17      9    NP

                             Protected Mode

                Bytes                           386     486     Pentium
                 1                           9/29/29 10/32/30 6/24/22 NP

                   Cycles for: CPL <= IOPL / CPL > IOPL / V86
}REP:IN:OUT:OUTS:CLD:STD:IOPL
{INT
 ^yINT             Interrupt                            Flags: O D I T S Z A P C^y
                                                             ? ? ? ? ? ? ? ? ?
 ^yINT^y int_number                                       ^yIOv86^y

        ^yLogic^y   ; Real mode
                PUSHF
                TF  0
                IF  0
                CALL FAR [0000:(int_number * 4)]

    The INT n instruction generates via software a call to an
    interrupt handler. The immediate operand, from 0 to 255, gives the
    index number into the interrupt descriptor table (IDT) of the
    interrupt service routine to be called. INT generally behaves like
    a far call except that the flags register is pushed onto the stack
    before the return address. Interrupt procedures return via the
    IRET instruction, which pops the flags and return address from the
    stack.

    In protected mode, the interrupt descriptor table (IDT) consists
    of an array of 8-byte descriptors; the descriptor for the
    interrupt invoked must indicate an interrupt, trap, or task gate.
    In real-address mode, the IDT is an array of 4-byte pointers
    (interrupt vectors).
    In protected and real address modes, the base linear address of
    the IDT is defined by the contents of the IDTR.

    This instruction is IOPL-sensitive in V86 mode.


    INT 3 is a single-byte form (the breakpoint instruction) that is
    useful for debugging.


    ^yNote^y
    The first 32 interrupts are reserved by Intel for system use. Some
    of these interrupts are used for internally generated exceptions.


    ^yOpcode      Format^y
    CC          INT 3
    CD ib       INT imm8
    CE          INTO


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    3             1     72      45      23+m    33      26      13   NP
    imm8          2     71      47      23+m    37      30      16   NP

                             Protected mode

                Bytes   8088    186     286     386     486     Pentium
                  1     -       -     (40-78)+m 59-99   44-71  27-82 NP
}IRET:INTO:CLI:STI:PUSHF:CALL:Descriptors:IOPL:Flags
{INTO
 ^yINTO            Interrupt on Overflow                Flags: O D I T S Z A P C^y
                                                             ? ? ? ? ? ? ? ? ?
 ^yINTO^y                                                 ^yIOv86^y

        ^yLogic^y   ; Real mode
                if OF = 1
                   PUSHF
                   TF  0
                   IF  0
                   CALL FAR [0000:0010h]
                endif

    INTO invokes interrupt 4 if the overflow flag (OF) is set, and
    interrupt 4 is reserved for this purpose. OF is set by several
    arithmetic, logical, and string instructions.

    This instruction is IOPL-sensitive in V86 mode.


    ^yOpcode      Format^y
    CE          INTO


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
                 1      4/73    4/48    3/24+m  3/35    3/28    4/13 NP

                             Protected mode

                bytes                   286     386     486     Pentium
                 1                    (40-78)+m 59-99   44-71  27-56 NP

                          Task switch clocks not shown
}INT:IOPL:Flags
{INVD
 ^yINVD            Invalidate Internal Caches           Flags: Not altered^y

 ^yINVD^y                                                 CPU: 486+ ^vPriv^v

    Invalidates (flushes) the processor's internal caches and issues a
    special-function bus cycle that directs external caches to also
    flush themselves. Data held in internal caches is not written back
    to main memory.

    After executing this instruction, the processor does not wait for
    the external caches to complete their flushing operation before
    proceeding with instruction execution. It is the responsibility of
    hardware to respond to the cache flush signal.


    ^yNote^y
    The INVD instruction is a privileged instruction. When the processor
    is running in protected mode, the CPL of a program or procedure must
    be 0 to execute this instruction. This instruction is also
    implementation-dependent; its function may be implemented
    differently on future Intel Architecture processors.

    Use this instruction with care. Data cached internally and not
    written back to main memory will be lost. Unless there is a specific
    requirement or benefit to flushing caches without writing back
    modified cache lines (for example, testing or fault recovery where
    cache coherency with main memory is not a concern), software should
    use the WBINVD instruction.


    ^yOpcode      Format^y
    0F 08       INVD


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -             2     -       -       -       -       ??     ??    NP
}WBINVD
{INVLPG
 ^yINVLPG          Invalidate TLB Entry                 Flags: Not altered^y

 ^yINVLPG^y source                                        CPU: 486+ ^vPriv^v

    Invalidates (flushes) the translation lookaside buffer (TLB) entry
    specified with the source operand. The source operand is a memory
    address. The processor determines the page that contains that
    address and flushes the TLB entry for that page.

    The INVLPG instruction is a privileged instruction. When the
    processor is running in protected mode, the CPL of a program or
    procedure must be 0 to execute this instruction. This instruction is
    also implementation-dependent; its function may be implemented
    differently on future Intel Architecture processors.

    The INVLPG instruction normally flushes the TLB entry only for the
    specified page; however, in some cases (MOV to/trom CRn) it flushes
    the entire TLB.


    ^yOpcode      Format^y
    0F 01/7     INVLPG mem


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -             ??    -       -       -       -       ??     ??    NP
}INVD:WBINVD
{IRET
 ^yIRET            Interrupt Return                     Flags: O D I T S Z A P C^y
                                                             * * * * * * * * *
 ^yIRET^y                                                 ^yIOv86^y
 ^yIRETD^y  (CPU: 386+)                                   ^yIOv86^y

        ^yLogic^y   ; Real mode
                POP IP
                POP CS
                POPF

    In real address mode, IRET pops the instruction pointer, CS, and
    the flags register from the stack and resumes the interrupted
    routine.

    In protected mode, the action of IRET depends on the setting of
    the nested task flag (NT) bit in the flags register. When popping
    the new flag image from the stack, the IOPL bits in the flags
    register are changed only when the current privilege level (CPL)
    is zero.
    - If NT = 0, IRET returns from an interrupt procedure without a
      task switch. The code returned to must be equally or less
      privileged than the interrupt routine (as indicated by the RPL
      bits of the CS selector popped from the stack). If the
      destination code is less privileged, IRET also pops the stack
      pointer and SS from the stack.
    - If NT = 1, IRET reverses the operation of a CALL or INT that
      caused a task switch. The updated state of the task executing
      IRET is saved in its task state segment. If the task is
      re-entered later, the code that follows IRET is executed.

    This instruction is IOPL-sensitive in V86 mode.


    ^yNote^y
    Some assemblers support an IRET extension (IRETW) that forces a
    16-bit word-style pop into the FLAGS register regardless of the
    code segment's size attribute (80386+).


    ^yOpcode      Format^y
    CF          IRET
    CF          IRETD


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -             1     44      28      17+m    22      15     8-27  NP

                  32-bit return from interrupt (386+)
    -             1                             22      15    10-27  NP

                      Task switch clocks not shown
}INT:CALL:POPF:POP:IOPL:Flags:Selectors
{Jcc
 ^vJcc^y             Jump on condition                    Flags: Not altered^y

 ^vJcc^v location

        ^yLogic^y   if condition
                   eIP  location_offset
                endif

    Conditional jumps (except JCXZ/JECXZ) test the flags which have
    been set by a previous instruction. If the given condition is
    true, a jump is made to the location provided as the operand.

    Instruction coding is most efficient when the target for the
    conditional jump is within -128 to +127 bytes of the next
    instruction's first byte (short jump).
    On the 80386+, the jump can also target -32768 through 32767
    (segment size attribute 16) or -2 to the 31st power through +2 to
    the 31st power - 1 (segment size attribute 32) relative to the
    next instruction's first byte.


    A condition code (cc) is associated with each instruction to
    indicate the condition being tested for. The terms "above" and
    "below" are used for un-signed integers; "less" and "greater" for
    signed integers.
    Note that several mnemonics have the same meaning.


    ^vInstruction     Jump if ...                           Flags^v
    JA, JNBE        Above, Not Below or Equal             CF=0 AND ZF=0
    JAE, JNB, JNC   Above or Equal, Not Below, Not Carry  CF=0
    JBE, JNA        Below or Equal, Not Above             CF=1 OR ZF=1
    JB, JC, JNAE    Below, Carry, Not Above or Equal      CF=1
    JE, JZ          Equal, Zero                           ZF=1
    JNE, JNZ        Not Equal, Not Zero                   ZF=0
    JP, JPE         Parity, Parity Even                   PF=1
    JNP, JPO        No Parity, Parity Odd                 PF=0

    JG, JNLE        Greater, Not Less or Equal            SF=OF AND ZF=0
    JGE, JNL        Greater or Equal, Not Less            SF=OF
    JLE, JNG        Less or Equal, Not Greater            SF<>OF OR ZF=1
    JL, JNGE        Less, Not Greater or Equal            SF<>OF
    JO              Overflow                              OF=1
    JNO             No Overflow                           OF=0
    JS              Sign (negative)                       SF=1
    JNS             No Sign (positive)                    SF=0


    ^yOpcode                              Format^y
    short;  near;      near;            'Near' forms available on 386+
    70 cb;  0F 80 cw;  0F 80 cd         JO  rel8/rel16/rel32
    71 cb;  0F 81 cw;  0F 81 cd         JNO rel8/rel16/rel32
    72 cb;  0F 82 cw;  0F 82 cd         JB  rel8/rel16/rel32
    73 cb;  0F 83 cw;  0F 83 cd         JAE rel8/rel16/rel32
    74 cb;  0F 84 cw;  0F 84 cd         JE  rel8/rel16/rel32
    75 cb;  0F 85 cw;  0F 85 cd         JNE rel8/rel16/rel32
    76 cb;  0F 86 cw;  0F 86 cd         JBE rel8/rel16/rel32
    77 cb;  0F 87 cw;  0F 87 cd         JA  rel8/rel16/rel32
    78 cb;  0F 88 cw;  0F 88 cd         JS  rel8/rel16/rel32
    79 cb;  0F 89 cw;  0F 89 cd         JNS rel8/rel16/rel32
    7A cb;  0F 8A cw;  0F 8A cd         JP  rel8/rel16/rel32
    7B cb;  0F 8B cw;  0F 8B cd         JNP rel8/rel16/rel32
    7C cb;  0F 8C cw;  0F 8C cd         JL  rel8/rel16/rel32
    7D cb;  0F 8D cw;  0F 8D cd         JGE rel8/rel16/rel32
    7E cb;  0F 8E cw;  0F 8E cd         JLE rel8/rel16/rel32
    7F cb;  0F 8F cw;  0F 8F cd         JG  rel8/rel16/rel32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    near8        2      4/16    4/13    3/7+m   3/7+m   1/3     1    PV
    near16       3       -       -       -      3/7+m   1/3     1    PV

                       Cycles for:  no jump/jump
}
{JCXZ
 ^yJCXZ            Jump if Register CX is Zero          Flags: Not altered^y

 ^yJCXZ^y location
 ^yJECXZ^y location  (CPU: 386+)

        ^yLogic^y   if eCX = 0
                   JMP short location
                endif

    JCXZ/JECXZ differ from other conditional jumps because they test
    the contents of the CX or ECX register for zero, not the flags.
    JCXZ is commonly used to bypass a loop if the counter (CX) is
    zero.

    Note that the target of JCXZ must be within -128 to +127 bytes of
    the next instruction.


    ^yOpcode      Format^y
    E3 cb       JCXZ rel8
    E3 cb       JECXZ rel8


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    dest          2     6/18    5/16    4/8+m   5/9+m   5/8     5/6  NP
    dest          2      -       -       -      5/9+m   5/8     5/6  NP

                       Cycles for:  no jump/jump
}Jcc:LOOP:REP
{JMP
 ^yJMP             Jump Unconditionally                 Flags: Not altered (*)^y
                                                     (*) If task switch
 ^yJMP^y destination                                      occurs, all flags
                                                     are affected
        ^yLogic^y   if FAR jump
                   CS  destination_segment
                endif
                eIP  destination_offset

    JMP unconditionally transfers control from one code segment
    location to another. These locations can be within the same code
    segment (near jump = intra-segment) or in different code segments
    (far jump = inter-segment).

    JMP's implementation varies depending on whether the address is
    directly specified within the instruction or indirectly through a
    register or memory operand. A direct JMP includes the destination
    address as part of the instruction. An indirect JMP gets the
    destination address through a register or pointer variable. An
    indirect JMP specifies an absolute address in one of the following
    ways:

    - a register modifies the address of the memory pointer to select
      a destination address
    - the program can JMP to a location specified by a general
      register -- the CPU moves this value into the instruction
      pointer and resumes execution
    - the CPU obtains the destination address from a memory operand
      specified in the instruction

    ^yNote^y
    JMP does not save a return address on the stack as CALL does.


    ^yOpcode      Format^y
    E9 cw       JMP  rel16
    E9 cd       JMP  rel32
    EA cd       JMP  ptr16:16
    EA cp       JMP  ptr16:32
    EB cb       JMP  rel8
    FF /4       JMP  r/m16
    FF /4       JMP  r/m32
    FF /5       JMP  m16:16
    FF /5       JMP  m16:32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    short        2      15      13      7+m     7+m      3       1   PV
    near         3      15      13      7+m     7+m      3       1   PV
    far          5      15      13     11+m    12+m     17       3   NP
    r16          2      11      11      7+m     7+m      5       2   NP
    mem16      2+d(0,2) 18+EA   17     11+m    10+m      5       2   NP
    mem32      2+d(4)   24+EA   26     15+m    12+m     13       4   NP

    r32          2       -       -       -      7+m      5       2   NP
    mem32      2+d(0,2)  -       -       -     10+m      5       2   NP
    mem48      2+d(6)    -       -       -     12+m     13       4   NP

             Cycles for jumps through call gates not shown
}CALL:Jcc:Task switch
{LAHF
 ^yLAHF            Load Register AH from Flags          Flags: Not altered^y

 ^yLAHF^y

        ^yLogic^y    AH bits        Flag-reg bits
                7 6 4 2 0         S Z A P C

    LAHF transfers the low byte of the flags register to AH. The flags
    themselves are not changed by this instruction.

    ^yNote^y
    After this instruction is executed, bits 1, 3 and 5 of AH are
    undefined.


    ^yOpcode      Format^y
    9F          LAHF


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -             1      4       2       2       2       3       2   NP
}SAHF:PUSHF:Jcc:SETcc:Flags
{LAR
 ^yLAR             Load Access Rights                   Flags: O D I T S Z A P C^y
                                                             - - - - - * - - -
 ^yLAR^y destination,source                               CPU: 286+ ^yp^y

        ^yLogic^y   dest16  (bits 32-47 of descriptor) AND FF00h
             or dest32  (bits 32-63 of descriptor) AND 00F0FF00h
                if error
                   ZF  0
                else
                   ZF  1
                endif

    This instruction reads a masked form of a segment descriptor and
    returns the following bits to the destination register:

                ^yBits    Contents^y
    dest32   |  23      Granularity
             |  22      Default Operation Size
             |  20      Programmer-available
             |  15      Segment Present        |
             |  14-13   DPL                    |
             |  12      Segment                |  dest16
             |  11-9    Type                   |
             |  8       Accessed               |
             |  (Other bits zero)              |


    The descriptor specified by the selector in the first operand must
    be within the descriptor table limits, have a valid type field,
    and be accessible at both CPL (Current Privilege Level) and RPL
    (Requestor's Privilege Level) of the selector in the second
    operand compared to DPL (Descriptor Privilege Level). If so, ZF is
    set to 1 and the segment attributes are loaded to the first
    operand. If not, ZF is set to zero, and the first operand is
    unmodified (due to privilege or GDT or LDT limits).

    If the 32-bit operand size is specified, the entire 32-bit value
    is loaded into the 32-bit destination. If the 16-bit operand size
    is specified, the lower 16 bits of this value are stored in the
    16-bit destination register.

    The following special-segment and gate-descriptor types are valid
    for LAR: 1,2,3,4,5,6,7,9,B,C,E,F. All application code and data
    segment descriptors are valid.


        Example:        sub     eax,eax
                        mov     ax,cs
                        mov     ebx,eax
                        lar     eax,ebx


    ^yOpcode      Format^y
    0F 02 /r    LAR  r16,r/m16
    0F 02 /r    LAR  r32,r/m32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    r16, r16     3                      14      15      11       8   NP
    r32, r32     3                       -      15      11       8   NP
    r16, m16     3                      16      16      11       8   NP
    r32, m32     3                       -      16      11       8   NP
}LSL:VERR:ARPL:Descriptors:Selectors
{LDS
 ^yLDS             Load Pointer Using DS                Flags: Not altered^y

 ^yLDS^y destination,source
 ^yLES^y destination,source
 ^yLFS^y destination,source  (CPU: 386+)
 ^yLGS^y destination,source  (CPU: 386+)
 ^yLSS^y destination,source  (CPU: 386+)

        ^yLogic^y   destination     offset part of source
                DS|ES|FS|GS|SS  segment part of source

    The data pointer instructions load a full pointer, consisting of a
    segment value (in protected mode: a segment selector) and an
    offset value, into a segment register and a general register.

    The source operand must be a memory operand. The destination
    operand must be a general register. The destination register loads
    16 bits if the operand-size attribute is 16, otherwise 32 bits.

        Example:
                dataseg
                scrPtr  dw 0,0b800h
                codeseg
                les     di,[dword ptr scrPtr]
                assume  es:nothing


    ^yProtected mode^y
    A null selector (values 0000-0003) can be loaded into DS,ES,FS, or
    GS without causing a protection exception. (Any subsequent
    reference to a segment whose corresponding segment register is
    loaded with a null selector causes a #GP(0) exception. No memory
    reference to the segment occurs.)


    ^yOpcode      Format^y
    C4 /r       LES  r16,m16:16
    C4 /r       LES  r32,m16:32
    C5 /r       LDS  r16,m16:16
    C5 /r       LDS  r32,m16:32
    0F B2 /r    LSS  r16,m16:16
    0F B2 /r    LSS  r32,m16:32
    0F B4 /r    LFS  r16,m16:16
    0F B4 /r    LFS  r32,m16:32
    0F B5 /r    LGS  r16,m16:16
    0F B5 /r    LGS  r32,m16:32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
  LDS reg,mem  2+d(2)   24+EA   18       7       7       6       4   NP
  LES reg,mem  2+d(2)   24+EA   18       7       7       6       4   NP
  LFS reg,mem  3+d(2,4)                          7       6       4   NP
  LGS reg,mem  3+d(2,4)                          7       6       4   NP
  LSS reg,mem  3+d(2,4)                          7       6       4   NP
}MOV:Selectors:Exceptions
{LEA
 ^yLEA             Load Effective Address               Flags: Not altered^y

 ^yLEA^y destination,source

        ^yLogic^y   destination  address(source)

    LEA transfers the offset of the source operand, rather than its
    value, to the destination register. The source must be a memory
    reference and the destination must be a general register.


    The operand-size attribute of the instruction is determined by the
    destination register. The address-size attribute is determined by
    the USE attribute of the segment containing the second operand.
    The operand-size and address-size attributes affect the action
    performed by LEA as follows:

        ^yOperand  Address   Action^y
        ^ysize     size      performed^y
        16       16        16-bit effective address is calculated and
                           stored in 16-bit destination
        16       32        32-bit effective address is calculated. The
                           lower 16 bits of the address are stored in
                           16-bit destination
        32       16        16-bit effective address is calculated. The
                           16-bit address is zero-extended and stored
                           in 32-bit destination
        32       32        32-bit effective address is calculated and
                           stored in 32-bit destination


    Example:    lea     di,[string]     ; = mov di,offset DGROUP:string
                mov     al," "
                repne   scasb

    Example:    lea     bx,[cvt_table]
                xlatb

    Example:    p186
                proc    stack_vars
                local   tmp,element:word, array:dword:50 = LOCAL_SIZE
                enter   LOCAL_SIZE,0
                lea     bx,[array]
                mov     si,[element]
                shl     si,2
                mov     ax,[ss: bx + si]
                mov     dx,[ss: bx + si + 2]
                ;...

    Example:    p386n
                mov     ecx,offset bigarray
                lea     eax,[ecx + edx * 8 + 2]

    Example:    mov     bx,BASE_REG             ; bx = 03F8h
                lea     dx,[bx + 3]             ; dx = 03FBh
                in      al,dx

    Example:    p386n
                mov     ebx,eax
                lea     eax,[eax + ebx * 4]     ; Multiply eax by 5


    ^yOpcode      Format^y
    8D /r       LEA  r16,m
    8D /r       LEA  r32,m


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    r16, mem    2+d(2)  2+EA     6       3       2      1-2      1   UV
    r32, mem    2+d(2)   -       -       -       2      1-2      1   UV
}MOV:LDS:Addressing modes
{LEAVE
 ^yLEAVE           High Level Procedure Exit            Flags: Not altered^y

 ^yLEAVE^y                                                CPU: 186+

        ^yLogic^y   MOV eSP,eBP
                POP eBP

    LEAVE reverses the action of a previous ENTER. By copying the
    frame pointer to the stack pointer, LEAVE releases the stack space
    used by a procedure for its local variables. The old frame pointer
    is popped into BP or EBP, restoring the caller's stack frame. A
    subsequent RET nn instruction removes any arguments pushed onto
    the stack of the exiting procedure.


    ^yNote^y
    Some assemblers support LEAVE extensions that force the use of
    SP,BP (LEAVEW) or ESP,EBP (LEAVED) regardless of the segment's
    size attribute (80386+).


    ^yOpcode      Format^y
    C9          LEAVE


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -             1      -       8       5       4       5       3   NP
}ENTER:RET:CALL
{LES

    ^yLES: See LDS^y
}LDS
{LFS

    ^yLFS: See LDS^y
}LDS
{LGDT
 ^yLGDT            Load Global Descriptor Table Reg.     Flags: Not altered^y
 ^yLIDT            Load Interrupt Descriptor Table Reg.^y

 ^yLGDT^y source                                           CPU: 286+ ^vPriv^v
 ^yLIDT^y source                                           CPU: 286+ ^vPriv^v

        ^yLogic^y   GDTR  base:limit
            or  IDTR  base:limit

    The LGDT and LIDT instructions load a limit value and linear base
    address from a six-byte (48-bit) data operand in memory
    (pseudo-descriptor) into the global descriptor table register
    (GDTR) or the interrupt descriptor table register (IDTR),
    respectively. The 16-bit limit is stored at the low word and the
    32-bit base at the high doubleword of the operand.

    If a 16-bit operand-size attribute is used with LGDT or LIDT, the
    register is loaded with a 16-bit limit and a 24-bit base (80286),
    and the high-order 8 bits of the 48-bit operand are not used. If a
    32-bit operand-size attribute is used, the register is loaded with
    a 16-bit limit and a 32-bit base (80386+); the high-order 8 bits
    of the 48-bit operand are used as high-order base address bits.

    The ^vSGDT^v and ^vSIDT^v instructions always store all 48 bits of the
    data operand. With the 80286, the upper 8 bits are undefined after
    SGDT or SIDT is executed. With the 80386+, the upper 8 bits are
    written with the high-order 8 address bits, for both a 16-bit and
    a 32-bit operand-size attribute. If LGDT or LIDT are used with a
    16-bit operand to load the register stored by SGDT or SIDT, the
    upper 8 bits are stored as zeros.


    LIDT tells the hardware where to go in case of interrupts.

    LGDT and LIDT appear in operating system software; they are not
    used in application programs. They are the only instructions that
    directly load a linear address (i.e. not a segment relative
    address) in protected mode.


    ^yOpcode      Format^y
    0F 01 /2    LGDT m16&32
    0F 01 /3    LIDT m16&32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
  LGDT  mem48     5                     11      11      11       6   NP
  LIDT  mem48     5                     12      11      11       6   NP
}
{LGS

    ^yLGS: See LDS^y
}LDS
{LIDT

    ^yLIDT: See LGDT^y
}LGDT
{LLDT
 ^yLLDT            Load Local Descriptor Table Reg/     Flags: Not altered^y

 ^yLLDT^y source                                          CPU: 286+ ^vPriv^v

        ^yLogic^y   LDTR  selector

    LLDT loads the local descriptor table register (LDTR). The word
    operand (memory or register) to LLDT should contain a selector to
    the global descriptor table (GDT). The GDT entry should be a local
    descriptor table. If so, then the LDTR is loaded from the entry.
    The descriptor registers DS,ES,SS,FS,GS, and CS are not affected.
    The LDT field in the task state segment (TSS) does not change.

    The selector operand can be 0; if so, the LDTR is marked invalid.
    All descriptor references (except by the LAR, VERR, VERW, or LSL
    instructions) cause a #GP fault.


    The local descriptor table (LDT) is loaded whenever a task or
    major subsystem gains or regains control of the system.


    LLDT appears in operating system software; it is not used in
    application programs.


    ^yOpcode      Format^y
    0F 00 /2    LLDT r/m16


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    r16           3                     17      20      11       9   NP
    mem16       3+d(0-2)                19      24      11       9   NP
}
{LMSW
 ^yLMSW            Load Machine Status Word             Flags: Not altered^y

 ^yLMSW^y source                                          CPU: 286+ ^vPriv^v

        ^yLogic^y   MSW  source            ; MSW is part of CR0

    LMSW loads the machine status word (MSW -- lower 16 bits of CR0)
    from the source operand. This instruction can be used to switch to
    protected mode by setting the protection enable bit (PE) to zero;
    if so, it must be followed by an intrasegment jump to flush the
    instruction queue. LMSW will ^vnot^v switch back to real-address
    mode (PE bit cannot be reset to 0 with an LMSW instruction).

    ^yNote^y
    LMSW should be used only under 286-based systems; programs written
    for the 80386+ should load CR0 with a MOV instruction.

    For compatibility with the 80286, the extension type (ET) bit of
    MSW is not altered by LMSW when executed by 80386+.


    LMSW appears in operating system software; it is not used in
    application programs.


    ^yOpcode      Format^y
    0F 01 /6    LMSW r/m16


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    r16           3                      3      10      13       8   NP
    mem16       3+d(0-2)                 6      13      13       8   NP
}
{LOCK
 ^yLOCK            Lock the Bus                         Flags: Not altered^y

 ^yLOCK^y instruction                                     ^yIOv86^y

    LOCK is used as a prefix to instructions that reference memory
    shared by more than one processor.

    The LOCK prefix causes the LOCK# signal of the CPU to be asserted
    during executing of the instruction that follows it. In a multi-
    processor environment, this signal can be used to ensure that the
    CPU has exclusive use of any shared memory while LOCK# is
    asserted.

    On the 80386 and 80486, the LOCK prefix works ^yonly^y with the
    following instructions (otherwise undefined opcode trap):
        BT,BTS,BTR,BTC                  mem,reg/imm
        ADD,OR,ADC,SBB,AND,SUB,XOR      mem,reg/imm
        NOT,NEG,INC,DEC                 mem
        XCHG                            reg,mem  OR  mem,reg
        CMPXCHG,XADD                    mem,reg        ; 486+

    The XCHG instruction always asserts LOCK# regardless of the
    presence or absence of the LOCK prefix.


    ^yNote^y
    The integrity of the LOCK is not affected by the alignment of the
    memory field. Memory locking is observed for arbitrarily
    misaligned fields.

    Locked access is ^ynot^y assured if another CPU processor is
    executing an instruction concurrently that has one of the
    following characteristics:
    - is not preceded by a LOCK prefix
    - is not one of the instructions listed above
    - specifies a memory operand that does not exactly overlap the
      destination operand. Locking is not guaranteed for partial
      overlap, even if one memory operand is wholly contained within
      another.

    The LOCK prefix is IOPL-sensitive in V86 mode.


    ^yOpcode      Format^y
    F0 ...      LOCK ...                ; Prefix


    ^yLength and timing^y
    -           Bytes   8088    186     286     386     486     Pentium
    ...          1       2       2       0       0       1       1   NP
}WAIT:ESC:HLT:IOPL:Multiprocessing
{LODS
 ^yLODS            Load String                          Flags: Not altered^y

 ^yLODS^y source_string

        ^yLogic^y   accumulator  [DS:eSI]
                if DF = 0
                   eSI  eSI + n        ; n = 1 for byte, 2 for word,
                else                    ;     4 for dword (386+)
                   eSI  eSI - n
                endif

    This instruction loads the AL, AX, or EAX register with the memory
    byte, word, or doubleword pointed to by DS (default) and the
    source-index register (SI or ESI). After the transfer is made,
    eSI is automatically incremented (if the direction flag is
    cleared) or decremented (if the direction flag is set), in
    preparation for loading the next element of the string.

    If the address-size attribute of LODS is 16 bits, SI is used for
    the source-index register; otherwise the the address-size
    attribute is 32 bits, and the ESI register will be used.

    The source segment can be changed with a segment override.


    ^yNote^y:       This instruction is always translated by the
                assembler into LODSB, Load String Byte, LODSW, Load
                String Word, or LODSD, Load String Dword, depending
                upon whether source refers to a string of bytes, words
                or doublewords. In either case, you must explicitly
                load the eSI register with the offset of the source
                string.

                LODSB, LODSW, and LODSD are synonyms for the byte,
                word, and doubleword LODS instructions that do not
                require an operand. They are simpler to use but
                provide no type or segment checking.


    LODS can be preceded with a REP prefix, but this will continually
    overwrite the contents of the accumulator; LODS is typically used
    within a LOOP construct.


    ^yOpcode      Format^y
    AC          LODS m8                 ; = LODSB
    AD          LODS m16                ; = LODSW
    AD          LODS m32                ; = LODSD


    ^yLength and timing^y
    Variations  Bytes   8088    186     286     386     486     Pentium
    lodsb        1      16      10       5       5       5       2   NP
    lodsw        1      16      10       5       5       5       2   NP
    lodsd        1       -       -       -       5       5       2   NP
}LOOP:REP:MOVS:INS:CLD:Flags
{LOOP
 ^yLOOP            Loop on Count                        Flags: Not altered^y

 ^yLOOP^y location                   Loop
 ^yLOOPE^y/^yLOOPZ^y location            Loop While Equal/Zero
 ^yLOOPNE^y/^yLOOPNZ^y location          Loop While Not Equal/Zero

        ^yLogic^y
        ; loop              ; loope                 ; loopne
        eCX  eCX - 1       eCX  eCX - 1           eCX  eCX - 1
        if eCX <> 0         if eCX <> 0 AND ZF=1    if eCX <> 0 AND ZF=0
           JMP location        JMP location            JMP location
        endif               endif                   endif

    The LOOP instructions are conditional jumps that use the count
    value in eCX (LOOPE/LOOPNE also use the zero flag) to determine
    whether a code loop should be continued or terminated.

    All LOOPs automatically decrement eCX and terminate when eCX = 0.
    The operand, a short code label, must be in the range -128 to +127
    bytes from the next instruction.

    If the address-size attribute is 16 bits, the CX register is used
    as the count register; otherwise, ECX is used (80386+).


        Example:        sub     ax,ax
                        jcxz    skip    ; Don't execute 65,535 times
                    again:
                        add     ax,[bx]
                        stosw
                        inc     bx
                        inc     bx
                        loop    again
                        mov     [looped],1
                    skip:

        Example:        mov     cx,127
                        mov     si,0081h
                    blanks:
                        lodsb
                        cmp     al,20h
                        loopz   blanks


    ^yNote^y
    Some assemblers support the following LOOP extensions that force
    the use of CX (loopw forms) or ECX (loopd forms) regardless of the
    code segment's size attribute (80386+):

        loopw,loopwe,loopwne,loopwnz,loopwz
        loopd,loopde,loopdne,loopdnz,loopdz


    ^yOpcode      Format^y
    E0 cb       LOOPNE rel8
    E1 cb       LOOPE rel8
    E2 cb       LOOP rel8


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
  LOOP  short     2     5/17    5/15    4/8+m   11+m    6/7     5/6  NP
  LOOPE short     2     6/18    5/16    4/8     11+m    6/9     7/8  NP
  LOOPNE short    2     5/19    5/16    4/8     11+m    6/9     7/8  NP
}JCXZ:DEC:Flags
{LSL
 ^yLSL             Load Segment Limit                   Flags: O D I T S Z A P C^y
                                                             - - - - - * - - -
 ^yLSL^y destination,source                               CPU: 286+ ^yp^y

        ^yLogic^y   if selector(source) visible at CPL
                   ZF  1
                   destination  segment limit of selector(source)
                else
                   ZF  0
                endif

    LSL loads a register with a segment limit. The limit comes from
    the descriptor for the segment specified by the selector in the
    second operand. If the source selector is visible at the current
    privilege level (CPL) weakened by RPL, and the descriptor is a
    type accepted by LSL, the zero flag (ZF) is set to 1. Otherwise,
    ZF is reset to 0 and the destination register is unchanged.

    The segment limit is loaded as a byte granular value. If the
    descriptor uses a page granular (G bit = 1) segment limit, LSL
    translates that value to a byte limit (shifts it left 12 bits and
    fills the low 12 bits with 1s) before loading it into the
    destination register.

    If used with a 16-bit destination register (and an operand-size
    attribute of 32), LSL stores the low-order 16 bits of the 32-bit
    byte granular limit in the destination register.

    Code and data segment descriptors are valid for LSL.


    The value placed in the destination register is the offset of the
    last addressable byte in the segment.


    ^yOpcode      Format^y
    0F 03 /r    LSL  r16,r/m16
    0F 03 /r    LSL  r32,r/m32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    r16, r16     3                      14      20/25   10       8   NP
    r32, r32     3                       -      20/25   10       8
    r16, m16   3+d(0,2)                 16      21/26   10       8
    r32, m32   3+d(0,2)                  -      21/26   10       8
}LAR:VERR:ARPL:Selectors:Descriptors
{LSS

    ^yLSS: See LDS^y
}LDS
{LTR
 ^yLTR             Load Task Register                   Flags: Not altered^y

 ^yLTR^y source                                           CPU: 286+ ^vPriv^v

        ^yLogic^y   TR  source

    LTR loads the task register from the 16-bit source operand. The
    loaded task state segment (TSS) is marked busy. A task switch does
    not occur.


    LTR appears in operating system software; it is not used in
    application programs.


    ^yOpcode      Format^y
    0F 00 /3    LTR  r/m16


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    r16          3                      17      23      20      10   NP
    mem16      3+d(0,2)                 19      27      20      10
}STR:System address registers:TSS:Task switch
{MOV
 ^yMOV             Move Data                            Flags: Not altered^y

 ^yMOV^y destination,source

        ^yLogic^y   destination  source

    MOV transfers (copies) a byte, word, or (80386+) doubleword from
    the source operand to the destination operand.


                ^yNote^y
                MOV cannot move from memory to memory or from
                a segment register to a segment register.


    A MOV into SS inhibits all interrupts until after the execution of
    the next instruction, which is presumably a MOV into SP/ESP. (Some
    early 8088/8086 processors don't disable interrupts properly after
    a MOV into SS; remedy: CLI/STI.)


    ^yProtected mode^y
    If the destination operand is a segment register (DS,ES,SS,FS,GS)
    the value moved must be a selector. Loading the selector initiates
    automatic loading of the descriptor information associated with
    that selector into the programmer-invisible part of the segment
    register; loading also initiates validation of both the selector
    and the descriptor information.


    ^yOpcode      Format^y
    88 /r       MOV  r/m8,r8
    89 /r       MOV  r/m16,r16
    89 /r       MOV  r/m32,r32
    8A /r       MOV  r8,r/m8
    8B /r       MOV  r16,r/m16
    8B /r       MOV  r32,r/m32
    8C /r       MOV  r/m16,Sreg
    8E /r       MOV  Sreg,r/m16
    A0          MOV  AL,moffs8
    A1          MOV  AX,moffs16
    A1          MOV  EAX,moffs32
    A2          MOV  moffs8,AL
    A3          MOV  moffs16,AX
    A3          MOV  moffs32,EAX
    B0 + rb     MOV  r8,imm8
    B8 + rw     MOV  r16,imm16
    B8 + rd     MOV  r32,imm32
    C6          MOV  m8,imm8
    C7          MOV  m16,imm16
    C7          MOV  m32,imm32


    ^yMOV to/from special registers^y                    CPU: 386+ ^vPriv^v

    In protected mode, MOV to/from a special register is a privileged
    instruction and can be executed only if CPL=0. 32-bit operands are
    always used with these instructions, regardless of the operand-size
    attribute.

    Note that the CR4 register was introduced with the Pentium, and that
    the test registers do not exist on the Pentium or the Pentium Pro.


    ^yOpcode      Format^y
    0F 20 /r    MOV  r32,CR0/CR2/CR3/CR4
    0F 21 /r    MOV  r32,DR0/DR1/DR2/DR3/DR6/DR7
    0F 22 /r    MOV  CR0/CR2/CR3/CR4,r32
    0F 23 /r    MOV  DR0/DR1/DR2/DR3/DR6/DR7,r32
    0F 24 /r    MOV  r32,TR6/TR7                ; 386-486
    0F 26 /r    MOV  TR6/TR7,r32                ; 386-486
    0F 24 /r    MOV  r32,TR3/TR4/TR5            ; 486
    0F 26 /r    MOV  TR3/TR4/TR5,r32            ; 486


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg     2       2       2       2       2       1       1   UV
    mem, reg  2+d(0-2)  13+EA    9       3       2       1       1   UV
    reg, mem  2+d(0-2)  12+EA   12       5       4       1       1   UV
    mem, imm  2+d(0-2)  14+EA   12-13    3       2       1       1   UV*
               +i(1,2)
    reg, imm  2+i(1,2)   4       3-4     2       2       1       1   UV

    acc, mem     3      14       8       5       4       1       1   UV
    mem, acc     3      14       9       3       2       1       1   UV

        * = not pairable if there is a displacement and immediate


                       ^ySegment Register Moves^y

                               Real Mode
    Operands    Bytes   8088    186     286     386     486     Pentium
    seg, r16     2       2       2       2       2       3     2-11   NP
    seg, m16   2+d(0,2) 12+EA    9       5       5       3     3-12   NP
    r16, seg     2       2       2       2       2       3       1    NP
    m16, seg   2+d(0,2) 13+EA   11       3       2       3       1    NP


                       ^yProtected Mode Differences^y
    Operands    Bytes                   286     386     486     Pentium
    seg, r16     2                      17      18       9     2-11*  NP
    seg, m16   2+d(0,2)                 19      19       9     3-12*  NP
                        * = add 8 if new descriptor; add 6 if SS


                    ^yMOV to/from special registers (386+)^y

    Operands    Bytes                           386     486     Pentium
    r32, cr32    3                               6       4       4    NP
    cr32, r32    3                              4/10*   4/16*  12/22* NP

    r32, dr32    3                              14/22*  10      2/12* NP
    dr32, r32    3                              16/22*  11     11/12* NP

    r32, tr32    3                              12      3/4*     -    NP
    tr32, r32    3                              12      4/6*     -    NP

              * = Cycles depend on which special register
}MOVZX:MOVSX:XCHG:XLAT:PUSH:POP:MOVS:STOS:LEA:Selectors
{MOVS
 ^yMOVS            Move String                          Flags: Not altered^y

 ^yMOVS^y source_string,destination_string

        ^yLogic^y   [ES:eDI]  [DS:eSI]
                if DF = 0
                   eSI  eSI + n        ; n = 1 for byte, 2 for word,
                   eDI  eDI + n        ;     4 for dword (386+)
                else
                   eSI  eSI - n
                   eDI  eDI - n
                endif

    MOVS copies the byte, word, or doubleword at DS:eSI to the byte,
    word, or doubleword at ES:eDI. After the transfer, eSI and eDI are
    incremented (if the direction flag is cleared) or decremented (if
    the direction flag is set), in preparation for copying the next
    element of the string. If the address-size attribute of MOVS is
    16 bits, the SI and DI registers will be used as source and
    destination indices; if 32 bits, ESI and EDI will be used.

    The source segment can be changed with a segment override, the
    destination segment cannot.

    ^yNote^y:       This instruction is always translated by the
                assembler into MOVSB, Move String Byte, MOVSW,
                Move String Word, or MOVSD, Move String Dword,
                depending upon whether source refers to a string of
                bytes, words or doublewords. In either case, you must
                explicitly load the eSI and eDI registers with the
                offset of the source and destination strings.

                MOVSB, MOVSW, and MOVSD are synonyms for the byte,
                word, and doubleword MOVS instructions that do not
                require any operands. They are simpler to use but
                provide no type or segment checking.


    If the REP prefix modifies this instruction, the CPU moves the
    value of the source string element to the value of the destination
    string element. It then steps eSI and eDI in the direction
    indicated by DF by the indicated size, until either the REP
    condition is false or eCX counts to zero.


        Example:
                dataseg
                src     db      "A string",0
                dest    db      9 dup (0)
                codeseg
                mov     di,ds
                mov     es,di
                assume  ds:dgroup,es:dgroup
                mov     di,offset dest
                mov     si,offset src
                mov     cx,9
                cld
                rep     movs [src],[dest]       ; rep movsb
                ; ax register unchanged by MOVS


    ^yOpcode      Format^y
    A4          MOVS m8,m8              ; = MOVSB
    A5          MOVS m16,m16            ; = MOVSW
    A5          MOVS m32,m32            ; = MOVSD


    ^yLength and timing^y
    Variations  Bytes   8088    186     286     386     486     Pentium
    movsb        1      18       9       5       7       7       4   NP
    movsw        1      26       9       5       7       7       4   NP
    movsd        1       -       -       -       7       7       4   NP
    rep movsb    2      9+17n   8+8n    5+4n    7+4n   12+3n*   3+n  NP
    rep movsw    2      9+25n   8+8n    5+4n    7+4n   12+3n*   3+n  NP
    rep movsd    2       -       -       -      7+4n   12+3n*   3+n  NP

                        * = 5 if n=0, 13 if n=1
                 (n = count of bytes, words or dwords)
}REP:CLD:STD:STOS:MOV
{MOVSX
 ^yMOVSX           Move With Sign-Extend                Flags: Not altered^y

 ^yMOVSX^y destination,source                             CPU: 386+

        ^yLogic^y   destination  sign-extend(source)

    MOVSX extends the sign of an 8-bit value to a 16-bit value, or an
    8-bit or 16-bit value to a 32-bit value.


    ^yOpcode      Format^y
    0F BE /r    MOVSX r16,r/m8
    0F BE /r    MOVSX r32,r/m8
    0F BF /r    MOVSX r32,r/m16


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg      3                              3       3       3   NP
    reg, mem    3+d(0,1,2,4)                     6       3       3   NP
}MOVZX:CBW:CWD
{MOVZX
 ^yMOVZX           Move With Zero-Extend                Flags: Not altered^y

 ^yMOVZX^y destination,source                             CPU: 386+

        ^yLogic^y   destination  zero-extend(source)

    MOVZX extends an 8-bit value to a 16-bit value, or an 8-bit or
    16-bit value to a 32-bit value by padding the high-order with
    zeros.

        Example:        movzx   eax,al


    ^yOpcode      Format^y
    0F B6 /r    MOVZX r16,r/m8
    0F B6 /r    MOVZX r32,r/m8
    0F B7 /r    MOVZX r32,r/m16


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg      3                              3       3       3   NP
    reg, mem    3+d(0,1,2,4)                     6       3       3   NP
}MOVSX:CBW:CWD:SHL:ROL
{MUL
 ^yMUL             Multiplication, Unsigned             Flags: O D I T S Z A P C^y
                                                             * - - - ? ? ? ? *
 ^yMUL^y source

        ^yLogic^y   AX  AL * source        ; Source is a byte
             or
                DX:AX  AX * source     ; Source is a word
             or
                EDX:EAX  EAX * source  ; Source is a dword (386+)

    MUL performs unsigned multiplication.

    The carry and overflow flags (CF and OF) are set to 1 if the
    high-order of the result (AH,DX, or EDX, respectively) contains
    any significant digits; otherwise, CF and OF are reset to 0.


    ^yOpcode      Format^y
    F6 /4       MUL  r/m8
    F7 /4       MUL  r/m16
    F7 /4       MUL  r/m32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    r8           2     70-77    26-28   13      9-14   13-18    11   NP
    r16          2    118-133   35-37   21      9-22   13-26    11   NP
    r32          2       -        -      -      9-38   13-42    10   NP
    mem8    2+d(0-2)  76-83+EA  32-34   16     12-17   13-18    11   NP
    mem16   2+d(0-2) 124-139+EA 41-43   24     12-25   13-26    11   NP
    mem32   2+d(0-2)     -        -      -     12-41   13-42    10   NP
}IMUL:SHL:SHLD:AAM:AAD
{NEG
 ^yNEG             Negate                               Flags: O D I T S Z A P C^y
                                                             * - - - * * * * *
 ^yNEG^y destination

        ^yLogic^y   destination  -destination      ; Two's complement

    NEG subtracts the destination operand from 0 and returns the
    result in the destination. This effectively produces the two's
    complement of the operand.


    ^yNote^y:
    If the operand is zero, the carry flag is cleared; in all other
    cases, the carry flag is set.

    Attempting to negate a byte containing -128 or a word containing
    -32,768 causes no change to the operand and sets the Overflow
    Flag.

        Example:        test    dx,dx
                        jns     positive
                        neg     dx      ; negate
                        neg     ax      ;   doubleword
                        sbb     dx,0    ;   in dx:ax
                     positive:


    ^yOpcode      Format^y
    F6 /3       NEG  r/m8,r8
    F7 /3       NEG  r/m16,r16
    F7 /3       NEG  r/m32,r32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg          2       3       3       2       2       1       1   NP
    mem       2+d(0-2)  24+EA   13       7       6       3       3   NP
}NOT:TEST:SBB:CBW:CWD:Flags
{NOP
 ^yNOP             No Operation                         Flags: Not altered^y

 ^yNOP^y

    NOP performs no action. NOP is a one-byte instruction that affects
    none of the machine context except the instruction pointer.

    This instruction is used for timing purposes, as a place-holder,
    or to force memory alignment.

    NOP is an alias for XCHG AX,AX.


    ^yOpcode      Format^y
    90          NOP


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1       3       3       3       3       1       1   UV
}XCHG
{NOT
 ^yNOT             NOT operation                        Flags: Not altered^y

 ^yNOT^y destination

        ^yLogic^y   destination  NOT(destination)  ; One's complement

    NOT inverts each bit in its operand, i.e. forms the one's
    complement.

                       ^yNOT truth table^y
                        a       NOT a
                        0         1
                        1         0


    ^yOpcode      Format^y
    F6 /2       NOT  r/m8,r8
    F7 /2       NOT  r/m16,r16
    F7 /2       NOT  r/m32,r32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg          2       3       3       2       2       1       1   NP
    mem       2+d(0-2)  24+EA   13       7       6       3       3   NP
}AND:NEG:OR:TEST:XOR
{OR
 ^yOR              OR operation                         Flags: O D I T S Z A P C^y
                                                             0 - - - * * ? * 0
 ^yOR^y destination,source

        ^yLogic^y   destination  destination OR source

    OR performs a bit-by-bit logical inclusive OR operation on its
    operands and returns the result to destination. OR sets each bit
    of the result to one if either or both of the corresponding bits
    of the operands are one.


                          ^yOR truth table^y
                        a       b     a OR b
                        0       0       0
                        0       1       1
                        1       0       1
                        1       1       1


    ^yOpcode      Format^y
    08 /r       OR   r/m8,r8
    09 /r       OR   r/m16,r16
    09 /r       OR   r/m32,r32
    0A /r       OR   r8,r/m8
    0B /r       OR   r16,r/m16
    0B /r       OR   r32,r/m32
    0C ib       OR   AL,imm8
    0D iw       OR   AX,imm16
    0D id       OR   EAX,imm32
    80 /1 ib    OR   r/m8,imm8
    81 /1 iw    OR   r/m16,imm16
    81 /1 id    OR   r/m32,imm32
    83 /1 ib    OR   r/m16,imm8
    83 /1 ib    OR   r/m32,imm8


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg     2       3       3       2       2       1       1   UV
    mem, reg  2+d(0,2)  24+EA   10       7       7       3       3   UV
    reg, mem  2+d(0,2)  13+EA   10       7       6       2       2   UV
    reg, imm  2+i(1,2)   4       4       3       2       1       1   UV
    mem, imm  2+d(0,2)  23+EA   16       7       7       3       3   UV*
               +i(1,2)
    acc, imm  1+i(1,2)   4       4       3       2       1       1   UV

        * = not pairable if there is a displacement and immediate
}AND:NEG:NOT:TEST:XOR:Flags
{OUT
 ^yOUT             Output to Port                       Flags: Not altered^y

 ^yOUT^y port,accumulator                                 ^yIOpm^y

        ^yLogic^y   [port]  accumulator

    OUT transfers a byte, a word, or a doubleword from the accumulator
    (AL, AX, or EAX) to a specified port. The port number is specified
    as an immediate byte value (for ports 0 thru 255) or by the DX
    register (allowing access to all ports).
    If the instruction contains an 8-bit port ID, that value is zero-
    extended to 16 bits.


    ^yProtected mode^y
    An exception occurs if the current task has insufficient privilege
    for the I/O. OUT is not IOPL-sensitive in V86 mode.


    ^yNote^y
    I/O ports F8h through FFh are reserved by Intel.


    ^yOpcode      Format^y
    E6 ib       OUT  imm8,AL
    E7 ib       OUT  imm8,AX
    E7 ib       OUT  imm8,EAX
    EE          OUT  DX,AL
    EF          OUT  DX,AX
    EF          OUT  DX,EAX


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    imm8, al     2      14       9       3      10      16      12   NP
    imm8, ax     2      14       9       3      10      16      12   NP
    imm8, eax    2       -       -       -      10      16      12   NP
    dx, al       1      12       7       3      11      16      12   NP
    dx, ax       1      12       7       3      11      16      12   NP
    dx, eax      1       -       -       -      11      16      12   NP

                             ^yProtected Mode^y

    Operands    Bytes                           386     486     Pentium
    imm8, acc    2                            4/24/24 11/31/29 9/26/24 NP
    dx, acc      1                            5/25/25 10/30/29 9/26/24 NP

               Cycles for: CPL <= IOPL / CPL > IOPL / V86
}OUTS:IN:IOPL
{OUTS
 ^yOUTS            Output String to Port                Flags: Not altered^y

 ^yOUTS^y DX,source                                       CPU: 186+ ^yIOpm^y

        ^yLogic^y   [port]  [DS:eSI]
                if DF = 0
                   eSI  eSI + n        ; n = 1 for byte, 2 for word,
                else                    ;     4 for dword (386+)
                   eSI  eSI - n
                endif

    The OUTS instruction transfers a byte, word, or dword from memory
    (at DS:eSI) to an output port (specified in the DX register).
    After the transfer, eSI is updated to point to the next string
    location. The port is always addressed through DX. If the
    address-size attribute is 16 bits, SI is used as source-index
    register; if 32 bits, ESI is used.


    ^yProtected mode^y
    An exception occurs if the current task has insufficient privilege
    for the I/O. OUTS is not IOPL-sensitive in V86 mode.


    ^yNote^y:       This instruction is always translated by the
                assembler into OUTSB, Output String Byte, OUTSW,
                Output String Word, or OUTSD, Output String Dword,
                depending upon whether destination refers to a string
                of bytes, words or doublewords. In either case, you
                must explicitly load the DS and eSI registers with
                the location of the source.

                OUTSB, OUTSW, and OUTSD are synonyms for the byte, word,
                and doubleword OUTS instructions that do not require an
                operand. They are simpler to use but provide no type
                or segment checking.


    These instructions normally use a REP prefix to indicate a block
    write of the number of bytes as specified in eCX.


        ^yExample^y:
        lds     si,[pSec]               ; Point DS:SI to storage
        mov     dx,PORT_NUM             ; DX = port number
        mov     cx,512                  ; No. of bytes to write
        cld                             ; String ops forward
        rep     outsb                   ; Write to the port


    ^yOpcode      Format^y
    6E          OUTS DX,m8              ; = OUTSB (186+)
    6F          OUTS DX,m16             ; = OUTSW (186+)
    6F          OUTS DX,m32             ; = OUTSD (386+)


    ^yLength and timing^y
    Variations  Bytes   8088    186     286     386     486     Pentium
    outsb        1              14       5      14      17      13   NP
    outsw        1              14       5      14      17      13   NP
    outsd        1               -       -      14      17      13   NP

                             ^yProtected Mode^y

                Bytes                           386     486     Pentium
                 1                           8/28/28 10/32/30 10/27/25 NP

                Cycles for: CPL <= IOPL / CPL > IOPL / V86
}REP:OUT:IN:INS:IOPL
{POP
 ^yPOP             Pop Operand from Stack               Flags: Not altered^y

 ^yPOP^y destination

        ^yLogic^y   destination  [SS:eSP]
                eSP  eSP + n
                ; n = 2 if operand-size attribute is 16 bits, else 4

    POP replaces the contents of the destination operand with the word
    or doubleword at the current top of stack, addressed by SS:SP
    (stack address-size attribute 16) or SS:ESP (stack address-size
    attribute 32). The stack pointer is then incremented by 2 for an
    operand-size of 16 bits, or by 4 for an operand-size of 32 bits,
    to point to the new top of stack.

    The CS register cannot be used as the destination of a POP
    instruction.

    A POP SS instruction inhibits all interrupts, including NMI, until
    after execution of the next instruction. This allows sequential
    execution of POP SS, POP SP without danger of having an invalid
    stack during an interrupt. However, using the LSS instruction is
    preferred.


    ^yProtected mode^y
    If the destination operand is a segment register (DS,ES,FS,GS, or
    SS), the value popped must be a selector. Loading the selector
    initiates automatic loading of the descriptor information
    associated with that selector into the programmer-invisible part
    of the segment register; loading also initates validation of both
    the selector and the descriptor information.


    ^yOpcode      Format^y
    07          POP  ES
    0F A1       POP  FS
    0F A9       POP  GS
    17          POP  SS
    1F          POP  DS
    58 + rw     POP  r16
    58 + rd     POP  r32
    8F /0       POP  m16
    8F /0       POP  m32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg          1      12      10       5       4       1       1   UV
    mem       2+d(0-2)  25+EA   20       5       5       6       3   NP
    seg          1      12       8       5       7       3       3   NP
    FS/GS        2       -       -       -       7       3       3   NP

                             ^yProtected Mode^y

    Operand     Bytes                   286     386     486     Pentium
    CS/DS/ES     1                      20      21       9     3-12  NP
    SS           1                      20      21       9     8-17  NP
    FS/GS        2                       -      21       9     3-12  NP
}POPA:POPF:PUSH:PUSHA:PUSHF:LEAVE:MOV
{POPA
 ^yPOPA            Pop All General Registers            Flags: Not altered^y

 ^yPOPA^y                                                 CPU: 186+
 ^yPOPAD^y                                                CPU: 386+

        ^yLogic^y   POP eDI
                POP eSI
                POP eBP
                ADD eSP,n
                POP eBX
                POP eDX
                POP eCX
                POP eAX

    POPA pops the eight general registers from the stack. The value
    for the stack pointer is discarded instead of loaded into SP/ESP.
    POPA reverses the action of a previous PUSHA instruction.

    POPAD is the 32-bit form of POPA.


    ^yNote^y
    Some assemblers support a POPA extension (POPAW) that forces 8
    16-bit word-style pop's regardless of the code segment's size
    attribute (80386+).


    ^yOpcode      Format^y
    61          POPA
    61          POPAD


    ^yLength and timing^y
    Variations  Bytes   8088    186     286     386     486     Pentium
    popa         1              51      19      24       9       5   NP
    popad        1               -       -      24       9       5   NP
}POP:POPF:PUSH:PUSHA:PUSHF
{POPF
 ^yPOPF            Pop Flags from Stack                 Flags: O D I T S Z A P C^y
                                                             * * * * * * * * *
 ^yPOPF^y                                                 ^yIOv86^y
 ^yPOPFD^y  (CPU: 386+)                                   ^yIOv86^y

        ^yLogic^y   POP eFLAGS

    POPF/POPFD pops the word or doubleword on the top of the stack and
    stores the value in the flags register. If the operand-size
    attribute of the instruction is 16 bits, then a word is popped and
    the value stored in FLAGS. If the operand-size attribute is 32
    bits, then a doubleword is popped and the value stored in EFLAGS.

    The RF and VM flags (bits 16 and 17 of EFLAGS) ^vcannot^v be changed
    by the POPFD instruction.


    ^yProtected mode^y
    The I/O privilege level (IOPL -- bits 12,13 of FLAGS) is changed
    only when executing at privilege level zero (CPL=0). The interrupt
    flag (IF) is changed only when executing at a level at least as
    privileged as the IOPL (real-address mode is equivalent to CPL=0).
    If a POPF instruction is executed with insufficient privilege, an
    exception does not occur, but the privileged bits are not altered.
    This instruction is IOPL-sensitive in V86 mode.


    ^yNote^y
    Some assemblers support a POPF extension (POPFW) that forces a
    16-bit word-style pop regardless of the code segment's size
    attribute (80386+).


    ^yOpcode      Format^y
    9D          POPF
    9D          POPFD


    ^yLength and timing^y
    Variations  Bytes   8088    186     286     386     486     Pentium
    popf         1      12       8       5       5       9       6   NP
    popfd        1       -       -       -       5       9       6   NP

                             ^yProtected Mode^y

                Bytes                   286     386     486     Pentium
    popf         1                       5       5       6       4   NP
    popfd        1                       -       5       6       4   NP
}PUSHF:LAHF:SAHF:CLI:STI:IOPL:Flags
{PUSH
 ^yPUSH            Push Operand on Stack                Flags: Not altered^y

 ^yPUSH^y source

        ^yLogic^y   eSP  eSP - n
                [SS:eSP]  source
                ; n = 2 if operand-size attribute is 16 bits, else 4

    If the operand-size attribute of the instruction is 16 bits, PUSH
    decrements the stack pointer by 2, otherwise by 4. Then the source
    operand is copied to the new top of stack which is pointed to by
    the stack pointer (SS:SP if stack address-size attribute is 16
    bits, SS:ESP if stack address-size attribute is 32 bits).

    The operand for PUSH must be a general register, a segment
    register, a memory operand, or (80186+) an immediate value. A full
    word/doubleword is pushed onto the stack even if the operand
    refers to a byte-sized value.


                Example:        push    [mem_op]
                                pop     [mem_op_copy]

    ^yPUSH SP^y
    The 8086 and 80186 processors decrement the stack pointer before
    pushing it onto the stack with a PUSH SP instruction. The 80286 and
    later processors push the value of SP/ESP before it's decremented.


    ^yOpcode      Format^y
    06          PUSH ES
    0E          PUSH CS
    0F A0       PUSH FS
    0F A8       PUSH GS
    16          PUSH SS
    1E          PUSH DS
    50+rw       PUSH r16
    50+rd       PUSH r32
    68          PUSH imm16
    68          PUSH imm32
    6A          PUSH imm8
    FF /6       PUSH m16
    FF /6       PUSH m32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg          1      15      10       3       2       1       1   UV
    mem       2+d(0-2)  24+EA   16       5       5       4       2   NP
    seg          1      14       9       3       2       3       1   NP
    imm      1+i(1,2)    -       -       3       2       1       1   NP
    FS/GS        2       -       -       -       2       3       1   NP
}PUSHA:PUSHF:POP:POPA:POPF:ENTER
{PUSHA
 ^yPUSHA           Push All General Registers           Flags: Not altered^y

 ^yPUSHA^y                                                CPU: 186+
 ^yPUSHAD^y                                               CPU: 386+

        ^yLogic^y   PUSH eAX
                PUSH eCX
                PUSH eDX
                PUSH eBX
                PUSH eSP        ; SP/ESP before PUSHA
                PUSH eBP
                PUSH eSI
                PUSH eDI

    PUSHA saves the eight general registers on the stack. Note that
    the value pushed for SP/ESP is the original value. The order of
    the registers pushed is correct for a subsequent POPA.

    PUSHAD is the 32-bit form of PUSHA.


    ^yNote^y
    Some assemblers support a PUSHA extension (PUSHAW) that forces 8
    16-bit word-style push'es regardless of the code segment's size
    attribute (80386+).


    ^yOpcode      Format^y
    60          PUSHA
    60          PUSHAD


    ^yLength and timing^y
    Variations  Bytes   8088    186     286     386     486     Pentium
    pusha        1              36      17      18      11       5   NP
    pushad       1               -       -      18      11       5   NP
}PUSH:PUSHF:POP:POPA:POPF
{PUSHF
 ^yPUSHF           Push Flags on Stack                  Flags: Not altered^y

 ^yPUSHF^y                                                ^yIOv86^y
 ^yPUSHFD^y  (CPU: 386+)                                  ^yIOv86^y

        ^yLogic^y   PUSH eFLAGS

    PUSHF decrements the stack pointer by 2 and copies the flags
    register to the new top of stack.

    PUSHFD decrements the stack pointer by 4 and copies the 80386+
    EFLAGS register to the new top of stack (addressed by SS:eSP).

    This instruction is IOPL-sensitive in V86 mode so the flags
    copied to top of stack may not reflect the actual state of the
    (e)flags register.

    ^yNote^y
    Some assemblers support a PUSHF extension (PUSHFW) that forces a
    16-bit word-style push regardless of the code segment's size
    attribute (80386+).


    ^yOpcode      Format^y
    9C          PUSHF
    9C          PUSHFD


    ^yLength and timing^y
    Variations  Bytes   8088    186     286     386     486     Pentium
    pushf        1      14       9       3       4       4       9   NP
    pushfd       1       -       -       -       4       4       9   NP

                             ^yProtected Mode^y

                Bytes                   286     386     486     Pentium
    pushf        1                       3       4       3       3   NP
    pushfd       1                       -       4       3       3   NP
}POPF:PUSH:LAHF:SAHF:SETcc:IOPL:Flags
{RCL
 ^yRCL             Rotate through Carry Left            Flags: O D I T S Z A P C^y
                                                             * - - - - - - - *
 ^yRCL^y destination,count

                        Ŀ
                     Ŀ   Ŀ  
                      CF ĳ  destination  
                        

    RCL shifts the bits of the destination operand to the left by the
    number of bit positions specified in the count operand. A bit
    shifted out of the left (high-order) end of the destination enters
    the carry flag (CF), and the displaced carry flag rotates around
    to enter the vacated right-most bit position of the destination.
    Another way of looking at this is to consider the carry flag as
    the highest order bit of the word being rotated.

    The shift is repeated the number of times indicated by the second
    operand, which is either an immediate 8-bit value (^ymax^y. 1 on the
    8086 processor) or the contents of the CL register. To reduce the
    maximum execution time, the 80186+ uses only the lower 5 bits of
    the count, limiting the count value to 31; the 8086 uses all 8
    bits of count.

    If the count operand is not an immediate 1, the overflow flag (OF)
    is undefined; otherwise RCL sets OF to 0 if destination's sign bit
    was not changed by the operation, to 1 if the sign bit was changed.


    ^yOpcode      Format^y
    C0 /2 ib    RCL  r/m8,imm8
    C1 /2 ib    RCL  r/m16,imm8
    C1 /2 ib    RCL  r/m32,imm8
    D0 /2       RCL  r/m8,1
    D1 /2       RCL  r/m16,1
    D1 /2       RCL  r/m32,1
    D2 /2       RCL  r/m8,CL
    D3 /2       RCL  r/m16,CL
    D3 /2       RCL  r/m32,CL


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, 1       2       2       2       2       9       3       1   PU
    mem, 1    2+d(0,2)  23+EA   15       7      10       4       3   PU
    reg, cl      2       8+4n    5+n    5+n      9      8-30    7-24 NP
    mem, cl   2+d(0,2) 28+EA+4n 17+n    8+n     10      9-31    9-26 NP
    reg, imm     3       -       5+n    5+n      9      8-30    8-25 NP
    mem, imm  3+d(0,2)   -      17+n    8+n     10      9-31   10-27 NP
}ROL:SHL:SHLD:RCR:ROR:Jcc:Flags
{RCR
 ^yRCR             Rotate through Carry Right           Flags: O D I T S Z A P C^y
                                                             * - - - - - - - *
 ^yRCR^y destination,count

                     Ŀ
                       Ŀ   Ŀ
                       destination   CF 
                           

    RCR shifts the bits of the destination operand to the right by the
    number of bit positions specified in the count operand. A bit
    shifted out of the right (low-order) end of the destination enters
    the carry flag (CF), and the displaced carry flag rotates around
    to enter the vacated left-most bit position of the destination.
    Another way of looking at this is to consider the carry flag as
    the lowest order bit of the word being rotated.

    The shift is repeated the number of times indicated by the second
    operand, which is either an immediate 8-bit value (^ymax^y. 1 on the
    8086 processor) or the contents of the CL register. To reduce the
    maximum execution time, the 80186+ uses only the lower 5 bits of
    the count, limiting the count value to 31; the 8086 uses all 8
    bits of count.

    If the count operand is not an immediate 1, the overflow flag (OF)
    is undefined; otherwise RCR sets OF to 0 if destination's sign bit
    was not changed by the operation, to 1 if the sign bit was changed.


    ^yOpcode      Format^y
    C0 /3 ib    RCR  r/m8,imm8
    C1 /3 ib    RCR  r/m16,imm8
    C1 /3 ib    RCR  r/m32,imm8
    D0 /3       RCR  r/m8,1
    D1 /3       RCR  r/m16,1
    D1 /3       RCR  r/m32,1
    D2 /3       RCR  r/m8,CL
    D3 /3       RCR  r/m16,CL
    D3 /3       RCR  r/m32,CL


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, 1       2       2       2       2       9       3       1   PU
    mem, 1    2+d(0,2)  23+EA   15       7      10       4       3   PU
    reg, cl      2       8+4n    5+n    5+n      9      8-30    7-24 NP
    mem, cl   2+d(0,2) 28+EA+4n 17+n    8+n     10      9-31    9-26 NP
    reg, imm     3       -       5+n    5+n      9      8-30    8-25 NP
    mem, imm  3+d(0,2)   -      17+n    8+n     10      9-31   10-27 NP
}ROR:SHR:SHRD:RCL:ROL:Jcc:Flags
{RDMSR
 ^yRDMSR           Read from Model-Specific Register    Flags: Not altered^y

 ^yRDMSR^y                                                CPU: Pentium+ ^vr^y

        ^yLogic^y   if (CPL = 0)
                   EDX:EAX  MSR[ECX]
                else
                   #GP(0)
                endif

    Loads the contents of a 64-bit model specific register (MSR)
    specified in the ECX register into registers EDX:EAX. The EDX
    register is loaded with the high-order 32 bits of the MSR and the
    EAX register is loaded with the low-order 32 bits. If less than 64
    bits are implemented in the MSR being read, the values returned to
    EDX:EAX in unimplemented bit locations are undefined.

    This instruction must be executed at privilege level 0 or in
    real-address mode; otherwise, a general protection exception #GP(0)
    will be generated. Specifying a reserved or unimplemented MSR
    address in ECX will also cause a general protection exception.

    The MSRs control functions for testability, execution tracing,
    performance-monitoring and machine check errors. Appendix C,
    Model-Specific Registers (MSRs), in the Pentium Pro Family
    Developer's Manual, Volume 3 lists all the MSRs that can be read
    with this instruction and their addresses.
    The ^yCPUID^y instruction should be used to determine whether MSRs are
    supported (EDX[5]=1) before using this instruction.



    Protected Mode Exceptions
    #GP(0) If the current privilege level is not 0.
    If the value in ECX specifies a reserved or unimplemented MSR address.

    Real Address Mode Exceptions
    #GP If the current privilege level is not 0
    If the value in ECX specifies a reserved or unimplemented MSR address.

    Virtual 8086 Mode Exceptions
    #GP(0) The RDMSR instruction is not recognized in virtual 8086
    mode.


    ^yOpcode      Format^y
    0F 32       RDMSR


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -           2       -       -       -       -       -       ??
}WRMSR:CPUID
{RDPMC
 ^yRDPMC           Read Performance-Monitoring Counters^y Flags: ^yNot altered^y

 ^yRDPMC^y                                                CPU: PPro+ ^vr^v

        ^yLogic^y   if (ECX < 2) and
                   ((CR4.PCE = 1) or ((CR4.PCE = 0) and (CPL=0)))
                   EDX:EAX  PMC[ECX]
                else
                   #GP(0)
                endif

    Loads the contents of the 40-bit performance-monitoring counter
    specified in the ECX register into registers EDX:EAX. The EDX
    register is loaded with the high-order 8 bits of the counter and the
    EAX register is loaded with the low-order 32 bits. The Pentium Pro
    processor has two performance-monitoring counters (0 and 1), which
    are specified by placing 0000H or 0001H, respectively, in the ECX
    register.

    The RDPMC instruction allows application code running at a privilege
    level of 1, 2, or 3 to read the performance-monitoring counters if
    the PCE flag in the CR4 register is set. This instruction is
    provided to allow performance monitoring by application code without
    incurring the overhead of a call to an operating-system procedure.

    The performance-monitoring counters are event counters that can be
    programmed to count events such as the number of instructions
    decoded, number of interrupts received, or number of cache loads.
    Appendix B, Performance Monitoring Counters, in the Pentium Pro
    Family Developer's Manual, Volume 3 lists all the events that can be
    counted.

    The RDPMC instruction does not serialize instruction execution. That
    is, it does not imply that all the events caused by the preceding
    instructions have been completed or that events caused by subsequent
    instructions have not begun. If an exact event count is desired,
    software must use a serializing instruction (such as the CPUID
    instruction) before and/or after the execution of the RDPCM
    instruction.

    The RDPMC instruction can execute in 16-bit addressing mode or
    virtual 8086 mode; however, the full contents of the ECX register
    are used to determine the counter to access and a full 40-bit result
    is returned (the low-order 32 bits in the EAX register and the
    high-order 9 bits in the EDX register).


    ^yNote^y
    According to Intel's Pentium Pro manual, "this instruction was
    available in the Pentium processor, but was undocumented".


    Protected Mode Exceptions
    #GP(0) If the current privilege level is not 0 and the PCE flag in the
    CR4 register is clear.
    If the value in the ECX register is not 0 or 1.

    Real Address Mode Exceptions
    #GP If the PCE flag in the CR4 register is clear.
    If the value in the ECX register is not 0 or 1.

    Virtual 8086 Mode Exceptions
    #GP(0) If the PCE flag in the CR4 register is clear.
    If the value in the ECX register is not 0 or 1.


    ^yOpcode      Format^y
    0F 33       RDPMC


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -           2       -       -       -       -       -       -
}RDTSC:CPUID
{RDTSC
 ^yRDTSC           Read Time-Stamp Counter              Flags: Not altered^y

 ^yRDTSC^y                                                CPU: Pentium+ ^vr^v

        ^yLogic^y   if (CR4.TSD = 0) or ((CR4.TSD = 1) and (CPL = 0))
                   EDX:EAX  TimeStampCounter
                else
                   #GP(0)
                endif

    Loads the current value of the processor's time-stamp counter into
    the EDX:EAX registers. The time-stamp counter is contained in a
    64-bit MSR. The high-order 32 bits of the MSR are loaded into the
    EDX register, and the low-order 32 bits are loaded into the EAX
    register. The processor increments the time-stamp counter MSR every
    clock cycle and resets it to 0 whenever the processor is reset.

    The time stamp disable (TSD) flag in register CR4 restricts the use
    of the RDTSC instruction. When the TSD flag is clear, the RDTSC
    instruction can be executed at any privilege level; when the flag is
    set, the instruction can only be executed at privilege level 0. The
    time-stamp counter can also be read with the RDMSR instruction.

    The RDTSC instruction is not serializing instruction. Thus, it does
    not necessarily wait until all previous instructions have been
    executed before reading the counter. Similarly, subsequent
    instructions may begin execution before the read operation is
    performed.


    Protected Mode Exceptions
    #GP(0) If the TSD flag in register CR4 is set and the CPL is greater
    than 0.

    Real Address Mode Exceptions
    #GP If the TSD flag in register CR4 is set.

    Virtual 8086 Mode Exceptions
    #GP(0) If the TSD flag in register CR4 is set.


    ^yOpcode      Format^y
    0F 31       RDTSC


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -           2       -       -       -       -       -       ??
}RDMSR:RDPMC:CPUID
{REP
 ^yREP / REPNE     Repeat String Instruction            Flags: O D I T S Z A P C^y
                                                            (See instruction)
 ^yREP^y string_instruction          Repeat
 ^yREPE^y/^yREPZ^y string_instruction    Repeat While Equal/Zero
 ^yREPNE^y/^yREPNZ^y string_instruction  Repeat While Not Equal/Not Zero

        ^yLogic^y   ; MOVS,STOS,LODS,INS,OUTS
                while eCX <> 0
                   execute string instruction once
                   eCX  eCX - 1
                endwhile
                
                ; CMPS,SCAS
                while eCX <> 0
                   execute string instruction once
                   eCX  eCX - 1
                   exit iteration if ZF=0 (REPE)  if ZF=1 (REPNE)
                endwhile

    REP, REPE, and REPNE are prefixes that can be specified before any
    string instruction (CMPS, MOVS, SCAS, INS, OUTS (or LODS)). The
    prefix causes the string instruction following it to be repeated
    the number of times indicated in the count register or (for REPE
    and REPNE) until the indicated condition in the zero flag is no
    longer met.
    REP combined with MOVS or SCAS permits fast memory-to-memory move
    or scanning operations.

    If the address-size attribute is 16 bits, CX is used for the count
    register; otherwise the address-size attribute is 32 bits and ECX
    is used. If the count value is initially zero, the instruction is
    not executed and no flags are changed.


    REP is used with MOVS, STOS, INS, OUTS (or LODS); the flags are
    not used or modified by these instructions.

    REPE and REPNE are used with CMPS and SCAS which use the zero flag
    (ZF) and determine the OF,SF,ZF,AF,PF,CF flags. (The ZF flag need
    not be initialized before REPcc CMPS/SCAS.)


    The REP prefixes apply only to one instruction at a time. To
    repeat a block of instructions, use a loop construct. Any pending
    interrupts are acknowledged on each iteration of REP.


    ^yNote^y
    Instructions with one of the REP prefixes, combined with a segment
    override and/or another prefix, (e.g. rep movs [byte ss:si],[byte
    es:di] ) execute incorrectly on 8086/8088 CPUs if an interrupt
    occurs before CX reaches 0. Some sources say this bug is present
    on all pre-80386 CPUs.


    ^yOpcode      Format^y
    F2 ...      REPNE ...               ; Prefix
    F3 ...      REPE ...
    F3 ...      REP ...                 ; REP = REPE


    ^yLength and timing^y
    Refer to REPeated instruction.
}CLD:STD:CMPS:INS:LODS:MOVS:OUTS:SCAS:STOS:LOOP:JCXZ
{RET
 ^yRET             Return from Procedure                Flags: Not altered^y

 ^yRET^y optional_pop_value

        ^yLogic^y   POP eIP
                if FAR return (inter-segment)
                    POP CS
                endif
                eSP  eSP + optional_pop_value (if specified)

    RET transfers control to a return address located on the stack.
    The address is usually placed on the stack by a CALL instruction,
    and the return is made to the instruction that follows the CALL.

    For a near (intrasegment) return, the address on the stack is a
    segment offset, which is popped into the instruction pointer
    (eIP). The CS register is unchanged.
    For a far (intersegment) return, the address on the stack is a
    long pointer; the offset is popped first, followed by the segment
    value.

    The optional numeric parameter to RET gives the number of stack
    bytes (operand-size 16) or words (operand-size 32) to be released
    after the return address is popped. These items are typically used
    as input parameters to the procedure called (the Pascal calling
    convention).


                ^yNote^y
                Strictly speaking, RET is not an instruction,
                but a macro. It is always translated by the
                assembler into a RETN or a RETF instruction.


    ^yProtected mode^y
    In real mode, CS and IP are loaded directly. In protected mode, an
    intersegment return causes the processor to check the descriptor
    addressed by the return selector. The Access Rights (AR) byte of
    the descriptor must indicate a code segment of equal or lesser
    privilege than the current privilege level (CPL). Returns to a
    lesser privilege level cause the stack to be reloaded from the
    value saved beyond the parameter block.

    The DS, ES, FS, and GS segment registers can be set to zero by the
    RET instruction during an interlevel transfer. If these registers
    refer to segments that cannot be used by the new privilege level,
    they are set to zero to prevent unauthorized access from the new
    privilege level.


    ^yOpcode      Format^y
    C2 iw       RETN imm16
    C3          RETN
    CA iw       RETF imm16
    CB          RETF


    ^yLength and timing^y
    Variations/
    operands    Bytes   8088    186     286     386     486     Pentium
    retn        1       20      16      11+m    10+m     5       2   NP
    retn imm16  1+d(2)  24      18      11+m    10+m     5       3   NP
    retf        1       34      22      15+m    18+m    13       4   NP
    retf imm16  1+d(2)  33      25      15+m    18+m    14       4   NP

                             ^yProtected Mode^y
    Variations/
    operands    bytes                   286     386     486     Pentium
    retf        1                     25+m/55  32+m/62 18/33  4-13/23 NP
    retf imm16  1+d(2)                25+m/55  32+m/68 17/33  4-13/23 NP

         Cycles for: same privilege level/lower privilege level
}CALL:ENTER:LEAVE:IRET:Selectors:Descriptors
{RETF

    ^yRETF: See RET^y
}RET
{RETN

    ^yRETN: See RET^y
}RET
{ROL
 ^yROL             Rotate Left                          Flags: O D I T S Z A P C^y
                                                             * - - - - - - - *
 ^yROL^y destination,count

                      Ŀ    Ŀ
                       CF ĳ  destination  Ŀ
                           
                              

    ROL shifts the bits of the destination to the left by the number
    of bit positions specified in the count operand. As bits are
    transferred out the left (high-order) end of the destination, they
    re-enter on the right (low-order) end. The carry flag (CF) is
    updated to match the last bit shifted out of the left end.

    The shift is repeated the number of times indicated by the second
    operand, which is either an immediate 8-bit value (^ymax^y. 1 on the
    8086 processor) or the contents of the CL register. To reduce the
    maximum execution time, the 80186+ uses only the lower 5 bits of
    the count, limiting the count value to 31; the 8086 uses all 8
    bits of count.

    If the count operand is not an immediate 1, the overflow flag (OF)
    is undefined; otherwise ROL sets OF to 0 if destination's sign bit
    was not changed by the operation, to 1 if the sign bit was changed.


    ^yOpcode      Format^y
    C0 /0 ib    ROL  r/m8,imm8
    C1 /0 ib    ROL  r/m16,imm8
    C1 /0 ib    ROL  r/m32,imm8
    D0 /0       ROL  r/m8,1
    D1 /0       ROL  r/m16,1
    D1 /0       ROL  r/m32,1
    D2 /0       ROL  r/m8,CL
    D3 /0       ROL  r/m16,CL
    D3 /0       ROL  r/m32,CL


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, 1       2       2       2       2       3       3       1   PU
    mem, 1    2+d(0,2)  23+EA   15       7       7       4       3   PU
    reg, cl      2       8+4n    5+n    5+n      3       3       4   NP
    mem, cl   2+d(0,2) 28+EA+4n 17+n    8+n      7       4       4   NP
    reg, imm     3       -       5+n    5+n      3       2       1   PU
    mem, imm  3+d(0,2)   -      17+n    8+n      7       4       3   PU*

       * = not pairable if there is a displacement and immediate
}RCL:SHL:SHLD:RCR:ROR:XCHG:BSWAP:Flags
{ROR
 ^yROR             Rotate Right                         Flags: O D I T S Z A P C^y
                                                             * - - - - - - - *
 ^yROR^y destination,count

                      Ŀ
                        Ŀ   Ŀ
                        destination   CF 
                             

    ROR shifts the bits of the destination to the right by the number
    of bit positions specified in the count operand. As bits are
    transferred out the right (low-order) end of the destination, they
    re-enter on the left (high-order) end. The carry flag (CF) is
    updated to match the last bit shifted out of the right end.

    The shift is repeated the number of times indicated by the second
    operand, which is either an immediate 8-bit value (^ymax^y. 1 on the
    8086 processor) or the contents of the CL register. To reduce the
    maximum execution time, the 80186+ uses only the lower 5 bits of
    the count, limiting the count value to 31; the 8086 uses all 8
    bits of count.

    If the count operand is not an immediate 1, the overflow flag (OF)
    is undefined; otherwise ROR sets OF to 0 if destination's sign bit
    was not changed by the operation, to 1 if the sign bit was changed.


    ^yOpcode      Format^y
    C0 /1 ib    ROR  r/m8,imm8
    C1 /1 ib    ROR  r/m16,imm8
    C1 /1 ib    ROR  r/m32,imm8
    D0 /1       ROR  r/m8,1
    D1 /1       ROR  r/m16,1
    D1 /1       ROR  r/m32,1
    D2 /1       ROR  r/m8,CL
    D3 /1       ROR  r/m16,CL
    D3 /1       ROR  r/m32,CL


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, 1       2       2       2       2       3       3       1   PU
    mem, 1    2+d(0,2)  23+EA   15       7       7       4       3   PU
    reg, cl      2       8+4n    5+n    5+n      3       3       4   NP
    mem, cl   2+d(0,2) 28+EA+4n 17+n    8+n      7       4       4   NP
    reg, imm     3       -       5+n    5+n      3       2       1   PU
    mem, imm  3+d(0,2)   -      17+n    8+n      7       4       3   PU*

       * = not pairable if there is a displacement and immediate
}RCR:SAR:SHR:SHRD:RCL:ROL:XCHG:BSWAP:Flags
{RSM
 ^yRSM             Resume from System Management Mode  Flags: O D I T S Z A P C^y
                                                            * * * * * * * * *
 ^yRSM^y                                                 CPU: Pentium+  ^vr^v

        ^yLogic^y ReturnFromSSM
                ProcessorState  Restore(SSMDump)

    Returns program control from system management mode (SMM) to the
    application program or operating system procedure that was
    interrupted when the processor received an SSM interrupt. The
    processor's state is restored from the dump created upon entering
    SMM. If the processor detects invalid state information during state
    restoration, it enters the shutdown state. The following invalid
    information can cause a shutdown:

    - Any reserved bit of CR4 is set to 1.
    - Any illegal combination of bits in CR0, such as
      (PG=1 and PE=0) or (NW=1 and CD=0).
    - (Intel Pentium and Intel486 only.) The value stored in the state
      dump base field is not a 32-KByte aligned address.

    The contents of the model-specific registers are not affected by a
    return from SMM.

    See Chapter 9 in the Pentium Pro Family Developer's Manual, Volume 3
    for more information about SMM and the behavior of the RSM
    instruction.


    Protected Mode Exceptions
    #UD If an attempt is made to execute this instruction when the
    processor is not in SMM.

    Real Address Mode Exceptions
    #UD If an attempt is made to execute this instruction when the
    processor is not in SMM.

    Virtual 8086 Mode Exceptions
    #UD If an attempt is made to execute this instruction when the
    processor is not in SMM.


    ^yOpcode      Format^y
    0F AA       RSM


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -           2       -       -       -       -       -       -
}
{SAHF
 ^ySAHF            Store Register AH into Flags         Flags: O D I T S Z A P C^y
                                                             - - - - * * * * *
 ^ySAHF^y

        ^yLogic^y   Flag-reg bits        AH bits
                  S Z A P C         7 6 4 2 0

    SAHF copies bits 7, 6, 4, 2, and 0 from the AH register into the
    low byte of the flags register, replacing the previous values of
    the Sign, Zero, Auxiliary Carry, Parity, and the Carry flags.

    No other flags are changed by this instruction.


    ^yOpcode      Format^y
    9E          SAHF


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1       4       3       2       3       2       2   NP
}LAHF:POPF:SETcc:CLC:STC:CLD:STD:CLI:STI:Flags
{SAL

    ^ySAL:  See SHL^y

    SHL is the same instruction as SAL (shift arithmetic left).
}SHL
{SAR
 ^ySAR             Shift Arithmetic Right               Flags: O D I T S Z A P C^y
                                                             * - - - * * ? * *
 ^ySAR^y destination,count

                            Ŀ   Ŀ
                Sign bit   destination   CF 
                               

    SAR shifts the bits of the destination operand downward (toward
    the least significant bit) by the number of bit positions
    specified in the second operand (count). As bits are transferred
    out of the right (low-order) end of the destination, bits equal to
    the original sign bit are shifted into the left (high-order) end,
    thereby preserving the sign bit. The carry flag (CF) is set equal
    to the last bit shifted out of the right end.

    The shift is repeated the number of times indicated by the second
    operand, which is either an immediate 8-bit value (^ymax^y. 1 on the
    8086 processor) or the contents of the CL register. To reduce the
    maximum execution time, the 80186+ uses only the lower 5 bits of
    the count, limiting the count value to 31; the 8086 uses all 8
    bits of count.

    If the count operand is not an immediate 1, the overflow flag (OF)
    is undefined; otherwise SAR resets OF to zero.


    SAR divides a signed integer by a power-of-two. Note that SAR
    rounds toward negative infinity (not the same as IDIV):
        mov     al,-1           mov     ax,-1
        sar     al,1            mov     cl,2
        ; al = -1               idiv    cl
                                ; al = 0


    ^yOpcode      Format^y
    C0 /7 ib    SAR  r/m8,imm8
    C1 /7 ib    SAR  r/m16,imm8
    C1 /7 ib    SAR  r/m32,imm8
    D0 /7       SAR  r/m8,1
    D1 /7       SAR  r/m16,1
    D1 /7       SAR  r/m32,1
    D2 /7       SAR  r/m8,CL
    D3 /7       SAR  r/m16,CL
    D3 /7       SAR  r/m32,CL


    ^yLength and timing^y   (Identical for SAR, SHR, SAL, and SHL)
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, 1       2       2       2       2       3       3       1   PU
    mem, 1    2+d(0,2)  23+EA   15       7       7       4       3   PU
    reg, cl      2       8+4n    5+n    5+n      3       3       4   NP
    mem, cl   2+d(0,2) 28+EA+4n 17+n    8+n      7       4       4   NP
    reg, imm     3       -       5+n    5+n      3       2       1   PU
    mem, imm  3+d(0,2)   -      17+n    8+n      7       4       3   PU*

       * = not pairable if there is a displacement and immediate
}SHR:ROR:RCR:SHRD:SHL:SHLD:IDIV:Flags
{SBB
 ^ySBB             Subtract with Borrow                 Flags: O D I T S Z A P C^y
                                                             * - - - * * * * *
 ^ySBB^y destination,source

        ^yLogic^y   destination  destination - source - CF

    SBB subtracts the source operand from the destination operand,
    subtracts 1 from that result if the carry flag is set, and stores
    the result in destination.

    When an immediate byte value is subtracted from a (double)word
    operand, the immediate value is first sign-extended to the size of
    the destination operand.


    ^yOpcode      Format^y
    18 /r       SBB  r/m8,r8
    19 /r       SBB  r/m16,r16
    19 /r       SBB  r/m32,r32
    1A /r       SBB  r8,r/m8
    1B /r       SBB  r16,r/m16
    1B /r       SBB  r32,r/m32
    1C ib       SBB  AL,imm8
    1D iw       SBB  AX,imm16
    1D id       SBB  EAX,imm32
    80 /3 ib    SBB  r/m8,imm8
    81 /3 iw    SBB  r/m16,imm16
    81 /3 id    SBB  r/m32,imm32
    83 /3 ib    SBB  r/m16,imm8
    83 /3 ib    SBB  r/m32,imm8


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg     2       3       3       2       2       1       1   PU
    mem, reg  2+d(0,2)  24+EA   10       7       7       3       3   PU
    reg, mem  2+d(0,2)  13+EA   10       7       6       2       2   PU
    reg, imm  2+i(1,2)   4       4       3       2       1       1   PU
    mem, imm  2+d(0,2)  23+EA   16       7       7       3       3   PU*
               +i(1,2)
    acc, imm  1+i(1,2)   4       4       3       2       1       1   PU

       * = not pairable if there is a displacement and immediate
}SUB:DEC:CMP:ADC:Flags
{SCAS
 ^ySCAS            Scan String                          Flags: O D I T S Z A P C^y
                                                             * - - - * * * * *
 ^ySCAS^y destination_string

        ^yLogic^y   CMP accumulator,[ES:eDI]
                if DF = 0
                   eDI  eDI + n        ; n = 1 for byte, 2 for word,
                else                    ;     4 for dword (386+)
                   eDI  eDI - n
                endif

    This instruction compares the contents of the AL, AX, or EAX
    register with the memory byte, word, or doubleword pointed to by
    ES and the destination-index register (DI or EDI). After the
    comparison, eDI is automatically incremented (if the direction
    flag is cleared) or decremented (if the direction flag is set), in
    preparation for comparing the next element of the string.

    If the address-size attribute of SCAS is 16 bits, DI is used for
    the destination-index register; otherwise the the address-size
    attribute is 32 bits, and the EDI register will be used.

    The destination segment must be addressable from the ES register;
    it cannot be changed with a segment override.


    ^yNote^y:       This instruction is always translated by the
                assembler into SCASB, Scan String Byte, SCASW, Scan
                String Word, or SCASD, Scan String Dword, depending
                upon whether destination refers to a string of bytes,
                words or doublewords. In either case, you must
                explicitly load the eDI register with the offset of
                the destination string.

                SCASB, SCASW, and SCASD are synonyms for the byte,
                word, and doubleword SCAS instructions that do not
                require an operand. They are simpler to use but
                provide no type or segment checking.


    SCAS is usually preceded with a REP prefix for a block scan of CX
    or ECX bytes bytes, words, or doublewords.


        Example:
                dataseg
                aname db "Frederik x"
                a_len = ($ - aname)
                codeseg
                mov     di,ds
                mov     es,di
                assume  es:dgroup
                mov     di,offset aname
                mov     cx,a_len
                mov     al,"x"
                cld                     ; Auto-increment di
                repne   scasb           ; Look for "x"
                jne     not_found       ; Can't use JCXZ
                dec     di              ; Point di to "x"
                mov     [byte di],"X"
                ;...
             not_found:


    ^yOpcode      Format^y
    AE          SCAS m8                 ; = SCASB
    AF          SCAS m16                ; = SCASW
    AF          SCAS m32                ; = SCASD


    ^yLength and timing^y
    Variations  Bytes   8088    186     286     386     486     Pentium
    scasb        1      19      15       7       7       6       4   NP
    scasw        1      19      15       7       7       6       4   NP
    scasd        1       -       -       -       7       6       4   NP
    repX scasb   2      9+15n   5+15n   5+8n    5+8n    7+5n*   8+4n NP
    repX scasw   2      9+19n   5+15n   5+8n    5+8n    7+5n*   8+4n NP
    repX scasd   2       -       -       -      5+8n    7+5n*   8+4n NP

    repX = repe or repz or repne or repnz

                     * = 5 if n=0
                     (n = count of bytes, words or dwords)
}REP:CMPS:CLD:STD:Jcc:JCXZ:Flags
{SETcc
 ^vSETcc^y           Set Byte on Condition                Flags: Not altered^y

 ^vSETcc^v destination                                    CPU: 386+

        ^yLogic^y   if condition
                   destination  1
                else
                   destination  0
                endif

    The SET instructions set the 8-bit destination to 1 if the
    specified condition is true, otherwise destination is set to 0.

    The conditions for each instruction are given below. The terms
    "above" and "below" are used for un-signed integers, "less" and
    "greater" for signed integers.
    Note that several mnemonics have the same meaning.


    ^vInstruction        SET to 1 if ... else to 0            Flags^v
    SETA, SETNBE       Above, Not Below or Equal            CF=0 AND ZF=0
    SETAE,SETNB,SETNC  Above or Equal, Not Below, No Carry  CF=0
    SETBE, SETNA       Below or Equal, Not Above            CF=1 OR ZF=1
    SETB, SETC,SETNAE  Below, Carry, Not Above or Equal     CF=1
    SETE, SETZ         Equal, Zero                          ZF=1
    SETNE, SETNZ       Not Equal, Not Zero                  ZF=0
    SETP, SETPE        Parity, Parity Even                  PF=1
    SETNP, SETPO       No Parity, Parity Odd                PF=0

    SETG, SETNLE       Greater, Not Less or Equal           SF=OF AND ZF=0
    SETGE, SETNL       Greater or Equal, Not Less           SF=OF
    SETLE, SETNG       Less or Equal, Not Greater           SF<>OF OR ZF=1
    SETL, SETNGE       Less, Not Greater or Equal           SF<>OF
    SETO               Overflow                             OF=1
    SETNO              No Overflow                          OF=0
    SETS               Sign (negative)                      SF=1
    SETNS              No Sign (positive)                   SF=0


    ^yOpcode       Format^y     ; All 386+
    0F 90 cb     SETO  r/m8
    0F 91 cb     SETNO r/m8
    0F 92 cb     SETB  r/m8
    0F 93 cb     SETAE r/m8
    0F 94 cb     SETE  r/m8
    0F 95 cb     SETNE r/m8
    0F 96 cb     SETBE r/m8
    0F 97 cb     SETA  r/m8
    0F 98 cb     SETS  r/m8
    0F 99 cb     SETNS r/m8
    0F 9A cb     SETP  r/m8
    0F 9B cb     SETNP r/m8
    0F 9C cb     SETL  r/m8
    0F 9D cb     SETGE r/m8
    0F 9E cb     SETLE r/m8
    0F 9F cb     SETG  r/m8


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    r8            3                              4      4/3     1/2  NP
    mem8        3+d(0-2)                         5      3/4     1/2  NP

                        Cycles are for:  true/false
}TEST:BT:Jcc:LAHF:SAHF:PUSHF:CMOVcc:Flags
{SGDT
 ^ySGDT            Store Global Descriptor Table Reg.   Flags: Not altered^y
 ^ySIDT            Store Interrupt Descriptor Table Reg.^y

 ^ySGDT^y destination                                     CPU: 286+ ^yp^y
 ^ySIDT^y destination                                     CPU: 286+ ^yp^y

        ^yLogic^y   destination  GDTR
            or  destination  IDTR

    SGDT/SIDT copies the contents of the descriptor table register to
    the six bytes of memory indicated by the operand. The LIMIT field
    of the register is assigned to the first word at the effective
    address. If the operand-size attribute is 32 bits, the next three
    bytes are assigned the BASE field of the register, and the fourth
    byte is written with zero. The last byte is undefined. Otherwise,
    if the operand-size attribute is 16 bits, the next four bytes are
    assigned the 32-bit BASE field of the register. I.e. the 16-bit
    forms of SGDT/SIDT are compatible with the 80286, but only if the
    value in the upper 8 bits is not referenced; the '286 stores 1's
    in these bits, the '386+ stores 0's.

    SGDT and SIDT are used in operating system software; they are not
    normally used in application programs.


        Example:        sgdt  [pword ptr di]


    ^yOpcode      Format^y
    0F 01 /0    SGDT m
    0F 01 /1    SIDT m


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
  SGDT mem48      5                     11       9      10       4   NP
  SIDT mem48      5                     12       9      10       4   NP
}LGDT:LIDT:Selectors:Descriptors:System address
{SHL
 ^ySHL             Shift Logical Left                   Flags: O D I T S Z A P C^y
                                                             * - - - * * ? * *
 ^ySHL^y destination,count

                    Ŀ   Ŀ
                     CF ĳ  destination   0
                       

    SHL shifts the bits of the destination operand upward (toward the
    most significant bit) by the number of bit positions specified in
    the second operand (count). As bits are transferred out of the
    left (high-order) end of the destination, zero bits are shifted
    into the right (low-order) end. The carry flag (CF) is set equal
    to the last bit shifted out of the left end.

    The shift is repeated the number of times indicated by the second
    operand, which is either an immediate 8-bit value (^ymax^y. 1 on the
    8086 processor) or the contents of the CL register. To reduce the
    maximum execution time, the 80186+ uses only the lower 5 bits of
    the count, limiting the count value to 31; the 8086 uses all 8
    bits of count.

    If the count operand is not an immediate 1, the overflow flag (OF)
    is undefined; otherwise SHL sets OF to 0 if destination's sign bit
    was not changed by the operation, to 1 if the sign bit was changed.


    SHL multiplies an unsigned integer by a power-of-two.

        Example:        shl   ax, 1     ; Shift left 1 bit
                        rcl   dx, 1     ; Propagate carry

    ^yNote^y
    SHL is the same instruction as SAL (shift arithmetic left).


    ^yOpcode      Format^y
    C0 /4 ib    SHL  r/m8,imm8
    C1 /4 ib    SHL  r/m16,imm8
    C1 /4 ib    SHL  r/m32,imm8
    D0 /4       SHL  r/m8,1
    D1 /4       SHL  r/m16,1
    D1 /4       SHL  r/m32,1
    D2 /4       SHL  r/m8,CL
    D3 /4       SHL  r/m16,CL
    D3 /4       SHL  r/m32,CL


    ^yLength and timing^y
    Same as SAR
}ROL:RCL:SAL:SHLD:SHR:SAR:SHRD:Flags
{SHLD
 ^ySHLD            Double Precision Shift Left          Flags: O D I T S Z A P C^y
                                                             ? - - - * * ? * *
 ^ySHLD^y operand1,operand2,count                         CPU: 386+

        ^yLogic^y   operand1  high (SHL (operand1:operand2),count)

    SHLD concatenates the two 16- or 32-bit operands into a 32-bit or
    64-bit value (operand2 is the low-order), shifts the value left
    by count bit positions, and returns the high-order of the result
    to operand1. Operand2 is not changed by SHLD.

    The count operand is provided by either an immediate 8-bit value
    or the contents of the CL register; only the lower 5 bits are
    used, limiting the count value to 31.

    The carry flag (CF) is set to the value of the last bit shifted
    out. The overflow flag (OF) is undefined.


        Example:
                ; Shift 64 bits left by 8
                dataseg
                val1  dd 98765432h
                val2  dd 001EDCBAh
                codeseg
                p386n
                mov   cl, 8             ; CL = shift count
                mov   eax, [val1]       ; Load low dword
                shld  [val2], eax, cl   ; Shift into high dword
                shl   [val1], cl        ; Adjust low dword
                ; val1 = 76543200h
                ; val2 = 1EDCBA98h


    ^yOpcode      Format^y
    0F A4       SHLD r/m16,r16,imm8
    0F A4       SHLD r/m32,r32,imm8
    0F A5       SHLD r/m16,r16,CL
    0F A5       SHLD r/m32,r32,CL


    ^yLength and timing^y
    Operands        Bytes                      386     486     Pentium
    reg, reg, imm    4                          3       2       4   NP
    mem, reg, imm   4+d(0-2)                    7       3       4   NP
    reg, reg, cl     4                          3       3       4   NP
    mem, reg, cl    4+d(0-2)                    7       4       5   NP
}SHL:ROL:RCL:SHR:SHRD:Flags
{SHR
 ^ySHR             Shift Logical Right                  Flags: O D I T S Z A P C^y
                                                             * - - - * * ? * *
 ^ySHR^y destination,count

                         Ŀ   Ŀ
                    0   destination   CF 
                            

    SHR shifts the bits of the destination operand downward (toward
    the least significant bit) by the number of bit positions
    specified in the second operand (count). As bits are transferred
    out of the right (low-order) end of the destination, zero bits are
    shifted into the left (high-order) end. The carry flag (CF) is set
    equal to the last bit shifted out of the right end.

    The shift is repeated the number of times indicated by the second
    operand, which is either an immediate 8-bit value (^ymax^y. 1 on the
    8086 processor) or the contents of the CL register. To reduce the
    maximum execution time, the 80186+ uses only the lower 5 bits of
    the count, limiting the count value to 31; the 8086 uses all 8
    bits of count.

    If the count operand is not an immediate 1, the overflow flag (OF)
    is undefined; otherwise SHR sets OF equal to the high-order bit of
    the original operand (i.e. 1 if the sign bit was changed).


    SHR divides an unsigned integer by a power-of-two.

        Example:        shr   dx, 1     ; Shift right 1 bit
                        rcr   ax, 1     ; Propagate carry


    ^yOpcode      Format^y
    C0 /5 ib    SHR  r/m8,imm8
    C1 /5 ib    SHR  r/m16,imm8
    C1 /5 ib    SHR  r/m32,imm8
    D0 /5       SHR  r/m8,1
    D1 /5       SHR  r/m16,1
    D1 /5       SHR  r/m32,1
    D2 /5       SHR  r/m8,CL
    D3 /5       SHR  r/m16,CL
    D3 /5       SHR  r/m32,CL


    ^yLength and timing^y
    Same as SAR
}SAR:ROR:RCR:SHRD:SHL:SHLD:Flags
{SHRD
 ^ySHRD            Double Precision Shift Right         Flags: O D I T S Z A P C^y
                                                             ? - - - * * ? * *
 ^ySHRD^y operand1,operand2,count                         CPU: 386+

        ^yLogic^y   operand1  low (SHR (operand2:operand1),count)

    SHRD concatenates the two 16- or 32-bit operands into a 32-bit or
    64-bit value (operand2 is the high-order), shifts the value right
    by count bit positions, and returns the low-order of the result
    to operand1. Operand2 is not changed by SHRD.

    The count operand is provided by either an immediate 8-bit value
    or the contents of the CL register; only the lower 5 bits are
    used, limiting the count value to 31.

    The carry flag (CF) is set to the value of the last bit shifted
    out. The overflow flag (OF) is undefined.


        Example:
                ; Shift 64 bits right by 8
                p386n                   ; edx         eax
                ;                       ; 0FEDCBA98h  76543210h
                shrd  eax, edx, 8       ; 0FEDCBA98h  98765432h
                shr   edx, 8            ; 000FEDCBAh  98765432h


    ^yOpcode      Format^y
    0F AC       SHRD r/m16,r16,imm8
    0F AC       SHRD r/m32,r32,imm8
    0F AD       SHRD r/m16,r16,CL
    0F AD       SHRD r/m32,r32,CL


    ^yLength and timing^y
    Operands       Bytes                       386     486     Pentium
    reg, reg, imm    4                          3       2       4   NP
    mem, reg, imm   4+d(0-2)                    7       3       4   NP
    reg, reg, cl     4                          3       3       4   NP
    mem, reg, cl    4+d(0-2)                    7       4       5   NP
}SHR:ROR:RCR:SHL:SHLD:Flags
{SIDT

    ^ySIDT: See SGDT^y
}SGDT
{SLDT
 ^ySLDT            Store Local Descriptor Table         Flags: Not altered^y

 ^ySLDT^y destination                                     CPU: 286+ ^yp^y

        ^yLogic^y   destination  LDTR

    The Local Descriptor Table is pointed to by a selector that is
    kept in the LDTR. SLDT stores the contents of the LDT register in
    the 16-bit destination.

    ^yNote^y
    The operand-size attribute has no effect on the operation of SLDT.

    SLDT appears in operating system software; it is not normally used
    in application programs.


    ^yOpcode      Format^y
    0F 00 /0    SLDT r/m16


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    r16           3                      2       2       2       2   NP
    mem16      3+d(0-2)                  3       2       3       2   NP
}LLDT:Selectors:System address registers
{SMSW
 ^ySMSW            Store Machine Status Word            Flags: Not altered^y

 ^ySMSW^y destination                                     CPU: 286+ ^yp^y

        ^yLogic^y   destination  MSW

    SMSW stores the machine status word (MSW register) in the 16-bit
    destination.


    ^yNote^y
    The MSW register is part of control register zero (CR0).
    80386-based programs should use a MOV instruction to read the
    contents of MSW (e.g. mov eax,CR0).


    ^yOpcode      Format^y
    0F 01 /4    SMSW r/m16


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    r16          3                       2       2       2       4   NP
    mem16      3+d(0-2)                  3       3       3       4   NP
}LMSW:MOV:Control registers
{STC
 ^ySTC             Set Carry Flag                       Flags: O D I T S Z A P C^y
                                                             - - - - - - - - 1
 ^ySTC^y

        ^yLogic^y   CF  1

    STC sets the carry flag (CF).


    ^yOpcode      Format^y
    F9          STC


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1       2       2       2       2       2       2   NP
}CLC:CMC:LAHF:SAHF:Jcc:BT:SETcc:Flags
{STD
 ^ySTD             Set Direction Flag                   Flags: O D I T S Z A P C^y
                                                             - 1 - - - - - - -
 ^ySTD^y

        ^yLogic^y   DF  1                  ; Auto-decrement eSI, eDI

    STD sets the direction flag. After STD is executed, string
    operations will decrement the index registers that they use
    (SI and/or DI; ESI and/or EDI).


    ^yOpcode      Format^y
    FD          STD


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1       2       2       2       2       2       2   NP
}CLD:REP:CMPS:INS:LODS:MOVS:OUTS:SCAS:STOS:Flags
{STI
 ^ySTI             Set Interrupt Enable Flag            Flags: O D I T S Z A P C^y
                                                             - - 1 - - - - - -
 ^ySTI^y                                                  ^yIO^y

        ^yLogic^y   IF  1

    STI sets the interrupt-enable flag (IF). After executing the
    instruction following STI, the CPU responds to external
    interrupts, but only if the next instruction allows the interrupt
    flag to remain enabled.

    If external interrupts are disabled and you code STI,RET (such as
    at the end of a subroutine), the RET is allowed to execute before
    external interrupts are recognized. Also, if external interrupts
    are disabled and you code STI,CLI, then external interrupts are
    not recognized because the CLI instruction clears the interrupt
    flag during its execution.

    Non-maskable interrupts are recognized no matter what the state of
    the IF flag.

    ^yProtected mode^y
    If the current task has insufficient privilege to alter IF, a
    general protection exception is generated.


    ^yOpcode      Format^y
    FB          STI


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1       2       2       2       3       5       7   NP
}CLI:POPF:Flags:IOPL:Exceptions
{STOS
 ^ySTOS            Store String                         Flags: Not altered^y

 ^ySTOS^y destination_string

        ^yLogic^y   [ES:eDI]  accumulator
                if DF = 0
                   eDI  eDI + n        ; n = 1 for byte, 2 for word,
                else                    ;     4 for dword (386+)
                   eDI  eDI - n
                endif

    This instruction transfers the contents of the AL, AX, or EAX
    register to the memory byte, word, or doubleword pointed to by
    ES and the destination-index register (DI or EDI). After the
    transfer is made, eDI is automatically incremented (if the
    direction flag is cleared) or decremented (if the direction flag
    is set), in preparation for storing the next element of the
    string.

    If the address-size attribute of STOS is 16 bits, DI is used for
    the destination-index register; otherwise the the address size is
    32 bits, and the EDI register will be used.

    The destination segment must be addressable from the ES register;
    it cannot be changed with a segment override.


    ^yNote^y:       This instruction is always translated by the assembler
                into STOSB, Store String Byte, STOSW, Store String
                Word, or STOSD, Store String Dword, depending upon
                whether destination refers to a string of bytes, words
                or doublewords. In either case, you must explicitly
                load the eDI register with the offset of the
                destination string.

                STOSB, STOSW, and STOSD are synonyms for the byte,
                word, and doubleword STOS instructions that do not
                require an operand. They are simpler to use but
                provide no type or segment checking.


    STOS is usually preceded with a REP prefix for a block fill of CX
    or ECX bytes bytes, words, or doublewords.


    ^yOpcode      Format^y
    AA          STOS m8                 ; = STOSB
    AB          STOS m16                ; = STOSW
    AB          STOS m32                ; = STOSD


    ^yLength and timing^y
    Variations  Bytes   8088    186     286     386     486     Pentium
    stosb        1      11      10       3       4       5       3   NP
    stosw        1      15      10       3       4       5       3   NP
    stosd        1       -       -       -       4       5       3   NP
    rep stosb    2      9+10n   6+9n    4+3n    5+5n    7+4n*   3+n  NP
    rep stosw    2      9+14n   6+9n    4+3n    5+5n    7+4n*   3+n  NP
    rep stosd    2       -       -       -      5+5n    7+4n*   3+n  NP

                        * = 5 if n=0, 13 if n=1
                 (n = count of bytes, words or dwords)
}REP:MOVS:LOOP:CLD:STD
{STR
 ^ySTR             Store Task Register                  Flags: Not altered^y

 ^ySTR^y destination                                      CPU: 286+ ^yp^y

        ^yLogic^y   destination  TR

    STR copies the contents of the task register to the 16-bit
    destination. The operand-size attribute has no effect on STR.


    STR appears in operating system software; it is not used in
    application programs.


    ^yOpcode      Format^y
    0F 00 /1    STR  r/m16


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    r16          3                       2       2       2       2   NP
    mem16     3+d(0-2)                   3       2       3       2   NP
}LTR:System address registers
{SUB
 ^ySUB             Subtract                             Flags: O D I T S Z A P C^y
                                                             * - - - * * * * *
 ^ySUB^y destination,source

        ^yLogic^y   destination  destination - source

    SUB subtracts the source operand from the destination operand and
    stores the result in destination.

    When an immediate byte value is subtracted from a word operand,
    the immediate value is first sign-extended to the size of the
    destination operand.


    ^yOpcode      Format^y
    28 /r       SUB  r/m8,r8
    29 /r       SUB  r/m16,r16
    29 /r       SUB  r/m32,r32
    2A /r       SUB  r8,r/m8
    2B /r       SUB  r16,r/m16
    2B /r       SUB  r32,r/m32
    2C ib       SUB  AL,imm8
    2D iw       SUB  AX,imm16
    2D id       SUB  EAX,imm32
    80 /5 ib    SUB  r/m8,imm8
    81 /5 iw    SUB  r/m16,imm16
    81 /5 id    SUB  r/m32,imm32
    83 /5 ib    SUB  r/m16,imm8
    83 /5 ib    SUB  r/m32,imm8


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg     2       3       3       2       2       1       1   UV
    mem, reg  2+d(0,2)  24+EA   10       7       7       3       3   UV
    reg, mem  2+d(0,2)  13+EA   10       7       6       2       2   UV
    reg, imm  2+i(1,2)   4       4       3       2       1       1   UV
    mem, imm  2+d(0,2)  23+EA   16       7       7       3       3   UV*
               +i(1,2)
    acc, imm  1+i(1,2)   4       4       3       2       1       1   UV

       * = not pairable if there is a displacement and immediate
}SBB:DEC:CMP:Flags
{TEST
 ^yTEST            Test                                 Flags: O D I T S Z A P C^y
                                                             0 - - - * * ? * 0
 ^yTEST^y destination,source

        ^yLogic^y   (AND destination,source)  ; Set flags only

    TEST performs a logical AND operation on its two operands and
    updates the flags. None of the operands are changed.

        Example:        test  eax, eax
                        jz    ahead     ; Skip if eax = 0


        Example:
        test  al, 00100100b     ; Test for bits 2 and 5
        jz    two_five_off      ; If result is zero, both were off
      one_or_both_set:
        jpe   two_five_set      ; Can use parity flag in a byte result
      two_or_five_set:          ; Bit 2 or bit 5 was set
        ; ...
      two_five_set:             ; Both bits were set
        ; ...
      two_five_off:             ; Both bits were clear


    ^yOpcode      Format^y
    84 /r       TEST r/m8,r8
    85 /r       TEST r/m16,r16
    85 /r       TEST r/m32,r32
    A8 ib       TEST AL,imm8
    A9 iw       TEST AX,imm16
    A9 id       TEST EAX,imm32
    F6 /0 ib    TEST r/m8,imm8
    F7 /0 iw    TEST r/m16,imm16
    F7 /0 id    TEST r/m32,imm32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg     2       3       3       2       2       1       1   UV
    mem, reg  2+d(0,2)  13+EA   10       6       5       2       2   UV
    reg, mem  2+d(0,2)  13+EA   10       6       5       2       2   UV
    reg, imm  2+i(1,2)   5       4       3       2       1       1   UV
    mem, imm  2+d(0,2)  11+EA   10       6       5       2       2   UV*
               +i(1,2)
    acc, imm  1+i(1,2)   4       4       3       2       1       1   UV

        * = not pairable if there is a displacement and immediate
}AND:NEG:NOT:OR:XOR:LAHF:Jcc:SETcc:BT:Flags
{UD2
 ^yUD2             Undefined Instruction                Flags: Not altered^y

 ^yUD2^y                                                  CPU: PPro+

        ^yLogic^y   Generate invalid opcode exception

    Generates an invalid opcode. This instruction is provided for
    software testing to explicitly generate an invalid opcode. The
    opcode for this instruction is reserved for this purpose.

    Other than raising the invalid opcode exception, this instruction is
    the same as the NOP instruction.


    Exceptions (All Operating Modes)
    #UD Instruction is guaranteed to raise an invalid opcode exception
    in all operating modes).


    ^yOpcode      Format^y
    0F 0B       UD2


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -           2       -       -       -       -       -       -
}NOP
{VERR
 ^yVERR            Verify Read                          Flags: O D I T S Z A P C^y
 ^yVERW            Verify Write^y                                - - - - - * - - -

 ^yVERR^y selector                                        CPU: 286+ ^yp^y
 ^yVERW^y selector                                        CPU: 286+ ^yp^y

        ^yLogic^y   if segment accessible
                   ZF  1
                else
                   ZF  0
                endif

    These instructions verify whether the segment denoted by the
    selector in the word-sized operand can be reached with the current
    privilege level (CPL) and if the segment is readable (VERR) or
    writable (VERW). If the segment is accessible, the zero flag (ZF)
    is set to 1; if not, ZF is reset to 0.

    To set ZF=1, the following conditions must be met:
        The selector must denote a descriptor within the bounds of
         the table (GDT or LDT); the selector must be "defined".
        The selector must denote the descriptor of a code or data
         segment (not that of a task state segment, LDT, or a gate).
        For VERR, the segment must be readable. For VERW, the segment
         must be a writable data segment.
        If the code segment is readable and conforming, the
         descriptor privilege level (DPL) can be any value for VERR.
         Otherwise, the DPL must be greater than or equal to (have
         less or the same privilege as) both the current privilege
         level (CPL) and the selector's RPL.


    The validation performed is the same as if the segment were loaded
    into DS,ES,FS, or GS, and the indicated access (read or write)
    were performed. Because the zero flag receives the result of the
    validation, the selector's value cannot result in a protection
    exception. Thus, software can anticipate possible segment access
    problems.


    ^yOpcode      Format^y
    0F 00 /4    VERR r/m16
    0F 00 /5    VERW r/m16


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
  VERR r16        3                     14      10      11       7   NP
  VERR mem16   3+d(0,2)                 16      11      11       7   NP
  VERW r16        3                     14      15      11       7   NP
  VERW mem16   3+d(0,2)                 16      16      11       7   NP
}LAR:LSL:ARPL:Selectors:Descriptors
{VERW

    ^yVERW: See VERR
}VERR
{WAIT
 ^yWAIT            Wait                                 Flags: Not altered^y

 ^yWAIT^y

    WAIT suspends execution of CPU instructions until the BUSY# pin is
    inactive (high). The BUSY# pin is driven by the 80x87 numeric
    processor extension.

    Coding WAIT after an ESC instruction ensures that any unmasked
    floating-point exceptions the instruction may cause are handled
    before the processor has a chance to modify the instruction's
    results (80486).


    ^yOpcode      Format^y
    9B          WAIT


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1       4       6       3       6      1-3      1   NP
}HLT:ESC:LOCK
{WBINVD
 ^yWBINVD          Write-Back and Invalidate Cache      Flags: Not altered^y

 ^yWBINVD^y                                               CPU: 486+ ^vPriv^v

        ^yLogic^y WriteBack(InternalCaches)
                Flush(InternalCaches)
                SignalWriteBack(ExternalCaches)
                SignalFlush(ExternalCaches)

    Writes back all modified cache lines in the processor's internal
    cache to main memory, invalidates (flushes) the internal caches, and
    issues a special-function bus cycle that directs external caches to
    also write back modified data.

    After executing this instruction, the processor does not wait for
    the external caches to complete their write-back and flushing
    operations before proceeding with instruction execution. It is the
    responsibility of hardware to respond to the cache write-back and
    flush signals.

    The WBINVD instruction is a privileged instruction. When the
    processor is running in protected mode, the CPL of a program or
    procedure must be 0 to execute this instruction. This instruction is
    also a serializing instruction (see Serializing Instructions in
    Chapter 7, Multiple Processor Management, of the Pentium Pro Family
    Developer's Manual, Volume 3).

    In situations where cache coherency with main memory is not a
    concern, software can use the INVD instruction.


    Protected Mode Exceptions
    #GP(0) If the current privilege level is not 0.

    Real Address Mode Exceptions
    None.

    Virtual 8086 Mode Exceptions
    #GP(0) The WBINVD instruction cannot be executed at the virtual 8086
    mode.


    ^yNote^y
    The WBINVD instruction implementation-dependent; its function may be
    implemented differently on future Intel Architecture processors.


    ^yOpcode      Format^y
    0F 09       WBINVD


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -           2       -       -       -       -       ??      ??
}INVD:INVLPG
{WRMSR
 ^yWRMSR           Write to Model Specific Register     Flags: Not altered^y

 ^yWRMSR^y                                                CPU: Pent+ ^vr^v

        ^yLogic^y   if (CPL = 0)
                   MSR[ECX]  EDX:EAX
                else
                   #GP(0)
                endif

    Writes the contents of registers EDX:EAX into the 64-bit model
    specific register (MSR) specified in the ECX register. The high-
    order 32 bits are copied from EDX and the low-order 32 bits are
    copied from EAX. Always set undefined or reserved bits in an MSR to
    the values previously read.

    This instruction must be executed at privilege level 0 or in
    real-address mode; otherwise, a general protection exception #GP(0)
    will be generated. Specifying a reserved or unimplemented MSR
    address in ECX will also cause a general protection exception.

    When the WRMSR instruction is used to write to an MTRR, the TLBs are
    invalidated, including the global entries (see Translation Lookaside
    Buffers (TLBs) in Chapter 3, Protected-Mode Memory Management, of
    the Pentium Pro Family Developer's Manual, Volume 3).

    The MSRs control functions for testability, execution tracing,
    performance-monitoring and machine check errors. Appendix D in the
    Pentium Pro Family Developer's Manual, Volume 3 lists all the MSRs
    that can be written to with this instruction and their addresses.

    The WRMSR instruction is a serializing instruction (see Serializing
    Instructions in Chapter 7, Multiple Processor Management, of the
    Pentium Pro Family Developer's Manual, Volume 3).


    ^yNote^y
    The CPUID instruction should be used to determine whether MSRs are
    supported (EDX[5]=1) before using this instruction.


    Protected Mode Exceptions
    #GP(0) If the current privilege level is not 0.
    If the value in ECX specifies a reserved or unimplemented MSR address.

    Real Address Mode Exceptions
    #GP If the current privilege level is not 0
    If the value in ECX specifies a reserved or unimplemented MSR address.

    Virtual 8086 Mode Exceptions
    #GP(0) The WRMSR instruction is not recognized in virtual 8086 mode.


    ^yOpcode      Format^y
    0F 30       WRMSR


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -           2       -       -       -       -       -       ??
}RDMSR:CPUID
{XADD
 ^yXADD            Exchange and Add                     Flags: O D I T S Z A P C^y
                                                             * - - - * * * * *
 ^yXADD^y destination,source                              CPU: 486+

        ^yLogic^y   XCHG destination,source
                ADD  destination,source

    XADD loads destination into source, then adds destination and the
    original source, returning the sum in destination.

        Example:        mov   dl, 8
                        mov   al, 3
                        xadd  dl, al
                        ; dl = 11, al = 8


    ^yOpcode      Format^y
    0F C0 /r    XADD r/m8,r8
    0F C1 /r    XADD r/m16,r16
    0F C1 /r    XADD r/m32,r32


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg     3                                       3       3   NP
     mem, reg   3+d(0-2)                                 4       4   NP
}XCHG:ADD:Flags
{XCHG
 ^yXCHG            Exchange Registers                   Flags: Not altered^y

 ^yXCHG^y destination,source

        ^yLogic^y   destination 

    XCHG switches the contents of two operands.

    If a memory operand is involved, BUS LOCK is asserted for the
    duration of the exchange, regardless of the presence or absence of
    the LOCK prefix or the value of the IOPL.


    ^yOpcode      Format^y
    86 /r       XCHG r/m8,r8
    86 /r       XCHG r8,r/m8
    87 /r       XCHG r/m16,r16
    87 /r       XCHG r16,r/m16
    87 /r       XCHG r/m32,r32
    87 /r       XCHG r32,r/m32
    90 + rw     XCHG AX,r16
    90 + rw     XCHG r16,AX
    90 + rd     XCHG EAX,r32
    90 + rd     XCHG r32,EAX


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg     2       4       4       3       3       3       3   NP
    reg, mem   2+d(0-2)  25+EA  17       5       5       5       3   NP
    mem, reg   2+d(0-2)  25+EA  17       5       5       5       3   NP
    acc, reg     1       3       3       3       3       3       2   NP
    reg, acc     1       3       3       3       3       3       2   NP

    (acc = AX or EAX only)
}LOCK:MOV:PUSH:ROL:ROR:CMPXCHG:BSWAP
{XLAT
 ^yXLAT            Translate                            Flags: Not altered^y

 ^yXLAT^y translate_table
 ^yXLATB^y

        ^yLogic^y   AL  [eBX + AL]

    XLAT changes the AL register from the table index to the table
    entry. AL should be the unsigned index into a table addressed by
    DS:BX (for an address-size attribute of 16 bits) or DS:EBX (for an
    address-size attribute of 32 bits).

    The operand to XLAT, translate_table, allows for the possibility
    of a segment override. XLAT uses the contents of eBX even if they
    differ from the offset of the operand. The offset of the operand
    must be loaded into eBX before the instruction is executed.

    The no-operand form, XLATB, can be used if translate_table resides
    in the DS segment.


    ^yNote^y
    AL, as a byte-sized index, can address up to 256 elements of
    translate_table.


    Example:
        ; Translate decimal to hex digit
        dataseg
        hex_table db "0123456789ABCDEF"
        codeseg
        mov     bx,offset hex_table     ; Pointer to table into BX
        mov     al,11                   ; Value to be translated to AL
        xlat    [hex_table]             ; Translate the value in AL
        ; al = "B"


    ^yOpcode      Format^y
    D7          XLAT m8
    D7          XLATB


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    -            1      11      11       5       5       4       4   NP
}LEA
{XOR
 ^yXOR             XOR operation                        Flags: O D I T S Z A P C^y
                                                             0 - - - * * ? * 0
 ^yXOR^y destination,source

        ^yLogic^y   destination  destination XOR source

    XOR performs a bit-by-bit "exclusive or" on its two operands, and
    returns the result in the destination operand. XOR sets each bit
    of the result to one if only one of the corresponding bits is set
    to one.

                          ^yXOR truth table^y
                        a       b    a XOR b
                        0       0       0
                        0       1       1
                        1       0       1
                        1       1       0


        Example:        mov     al,110001b
                        xor     al,001101b
                        ;     al = 111100b

        Example:        xor     [mFlag],1       ; Flip 0->1, 1->0


    ^yOpcode      Format^y
    30 /r       XOR  r/m8,r8
    31 /r       XOR  r/m16,r16
    31 /r       XOR  r/m32,r32
    32 /r       XOR  r8,r/m8
    33 /r       XOR  r16,r/m16
    33 /r       XOR  r32,r/m32
    34 ib       XOR  AL,imm8
    35 iw       XOR  AX,imm16
    35 id       XOR  EAX,imm32
    80 /6 ib    XOR  r/m8,imm8
    81 /6 iw    XOR  r/m16,imm16
    81 /6 id    XOR  r/m32,imm32
    83 /6 ib    XOR  r/m16,imm8
    83 /6 ib    XOR  r/m32,imm8


    ^yLength and timing^y
    Operands    Bytes   8088    186     286     386     486     Pentium
    reg, reg     2       3       3       2       2       1       1   UV
    mem, reg  2+d(0,2)  24+EA   10       7       7       3       3   UV
    reg, mem  2+d(0,2)  13+EA   10       7       6       2       2   UV
    reg, imm  2+i(1,2)   4       4       3       2       1       1   UV
    mem, imm  2+d(0,2)  23+EA   16       7       7       3       3   UV*
               +i(1,2)
    acc, imm  1+i(1,2)   4       4       3       2       1       1   UV

        * = not pairable if there is a displacement and immediate
}AND:OR:NOT:NEG:TEST:Flags
{Abbreviations and legends:Abbreviations
 ^yAbbreviations and legends^y

        ^y286+^y            Instruction executes only on an 80286 CPU
                        or later. Requires a p286 or p286n directive
                        to assemble. Appears as 'CPU: 286+' in text
                        entry.

        ^yP^y or ^yp^y          Instruction is normally used only in
                        system software (protected mode).
        ^yP^y               Instruction is privileged in protected
                        mode, including V86 mode. Appears as '^vPriv^v'
                        in text entry.
        ^yr^y               Use of instruction is restricted.

        ^yIO^y              Instruction is IOPL-sensitive in protected
                        mode, including V86 mode (CLI and STI only).
        ^yIOpm^y            Instruction is IOPL-sensitive in protected
                        mode (except in V86 mode).
        ^yIOv86^y           Instruction is IOPL-sensitive in virtual
                        8086 mode (V86) only.

        eBX             BX or EBX, depending on operand-size attribute
                        or address-size attribute.


        acc             AL, AX or EAX unless specified otherwise
        reg             any general register
        r8              any 8-bit register
        r16             any general purpose 16-bit register
        r32             any general purpose 32-bit register
        r/m8            a one-byte operand that is either a byte
                        register or a byte from memory
        r/m16           a word register or memory operand used for
                        instructions whose operand-size attribute is 16 bits
        r/m32           a doubleword register or memory operand used for
                        instructions whose operand-size attribute is 32 bits
        imm             immediate data
        imm8            immediate signed 8-bit value
        imm16           immediate signed 16-bit value
        imm32           immediate signed 32-bit value
        mem             memory address
        mem8            address of 8-bit data item
        mem16           address of 16-bit data item
        mem32           address of 32-bit data item
        mem48           address of 48-bit data item
        rel8            a relative address (distance -128 to +127 bytes)


        ^yFlags^y           *  Changed to reflect the results of
                           instruction
                        0  Always cleared
                        1  Always set
                        -  Unchanged
                        ?  Undefined after operation (flag may or may
                           not have changed)

        BCD             Binary-Coded Decimal
        CPL             Current Privilege Level (protected mode)
        IOPL            Input/Output Privilege Level (protected mode)
        SMM             System Management Mode



        ^yInstruction timings^y
        n       generally refers to a number of repeated counts
        m       in a jump or call,
                286:     bytes in next instruction
                386/486: number of components in instruction
                         (each byte of opcode) + 1 (if immediate data)
                         + 1 (if displacement)

        EA      cycles to calculate the Effective Address
                8088/8086:
                  base = 5,  index = 5,  disp = 6
                  bp+di or bx+si = 7
                  bx+di or bp+si = 8
                  bp+di+disp or bx+si+disp = 11
                  bx+di+disp or bp+si+disp = 12
                  segment override = +2
                286 - 486:
                  base+index+disp = +1    all others, no penalty

        NP      not pairable                      } pairing
        UV      pairable in the U pipe or V pipe  } categories
        PU      pairable in the U pipe only       } for
        PV      pairable in the V pipe only       } Pentium



        ^yInstruction length^y
        The byte count includes the opcode length and length of any required
        displacement or immediate data. If the displacement is optional, it
        is shown as d() with the possible lengths in parentheses. If the
        immediate data is optional, it is shown as i() with the possible
        lengths in parentheses.

}
{Register availability

    The 80386+ registers are a svperset of 8086/80186/80286 registers.
    All the previovs generations' 16-bit registers are contained
    within the 32-bit architechtvre.

 ^yRegister availability^y
                          Use in         Use in        Use in
                         REAL mode   PROTECTED mode   V86 mode
                        Load  Store   Load  Store    Load  Store
                        
    General registers   yes    yes    yes    yes     yes    yes
    Segment registers   yes    yes    yes    yes     yes    yes
    Flags register      yes    yes    yes    yes     IOPL   IOPL
    GDT register        yes    yes    CPL=0  yes     no     yes
    LDT register        no     no     CPL=0  yes     no     yes
    IDT register        yes    yes    CPL=0  yes     no     yes
    Task register       no     no     CPL=0  yes     no     yes
    Control registers   yes    yes    CPL=0  CPL=0   no     no
    Debvg registers     yes    yes    CPL=0  CPL=0   no     no
    Test registers      yes    yes    CPL=0  CPL=0   no     no

    CPL=0 : The register can be accessed only when the cvrrent
            privilege level (CPL) is zero.
    IOPL  : The PUSHF and POPF instrvctions are made I/O Privilege
            Level sensitive in virtval 8086 mode. CPL=3 in V86 mode.
    Note  : The low-order 16 bits of control register zero (CR0) can
            be read with an SMSW instrvction.
            The CLI and STI instrvctions are IOPL-sensitive in
            protected mode, incl. V86 mode.
            Test registers 80386-80486 only.
}General:Segment:Control:Flags:System address:Privilege:V86
{General registers:General

 ^yGeneral registers^y

    There are 8 general registers (EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI)
    each 32 bits wide. The least significant 16 bits of the registers
    are seperately accessible (as AX,CX,DX,BX,SP,BP,SI,DI). 8-bit
    operations can individvally access the low byte (bits 0-7) and the
    high byte (bits 8-15) of the general registers AX,CX,DX,BX.


         3               2               1
         1               3               5               7             0
        Ŀ
                                              AH             AL       
    EAX                                               AX              
        Ĵ
                                              CH             CL       
    ECX                                               CX              
        Ĵ
                                              DH             DL       
    EDX                                               DX              
        Ĵ
                                              BH             BL       
    EBX                                               BX              
        Ĵ
                                                                      
    ESP                                               SP              
        Ĵ
                                                                      
    EBP                                               BP              
        Ĵ
                                                                      
    ESI                                               SI              
        Ĵ
                                                                      
    EDI                                               DI              
        
}
{Instruction pointer:Instruction

 ^yInstruction pointer^y

    The Extended Instruction Pointer (EIP) is a 32-bit register. EIP
    contains the offset address of the next seqvential instruction to
    be execvted. This offset is relative to the start (or base
    address) of the cvrrent code segment. The EIP is not directly
    visible to programmers bvt is controlled explicitly by control-
    transfer instructions, interrvpts, and exceptions.

    The low-order 16 bits of EIP are named IP and can be vsed by the
    processor as a vnit when execvting 8086/80186/80286 code.
}
{Flags register:Flags

 ^yFlags register^y

    The EFLAGS register controls I/O, maskable interrvpts, debvgging,
    task switching, and enabling of virtval 8086 execvtion in a
    protected, mvltitasking environment; all in addition to providing
    statvs flags that represent the resvlt of instruction execvtion.

    The low 16 bits (0-15) of EFLAGS contain the FLAGS register (8086-
    80286).


     3               2               1
     1               3               5               7             0
    Ŀ
                       IVVAVR NIO ODITSZ A P C
    r r r r r r r r r rDIICMFrT PLFFFFFFrFrFrF
    PF

    r = reserved by Intel


    Carry               CF is set if the operation resvlted in a carry
                        ovt of the high-order bit (an addition), or a
                        borrow into the high-order bit (a svbtraction).
                        Otherwise, CF is reset to zero. For 8-, 16-,
                        or 32-bit operations, CF is set according to
                        the carry/borrow at bit 7, 15, or 31.

    Parity              PF is set if the low-order eight bits of the
                        operation contains an even nvmber of "1's"
                        (even parity). PF is reset to zero if the
                        low-order eight bits have odd parity. PF is
                        always a fvnction of only the low-order eight
                        bits, regardless of operand size.
                        Often vsed by commvnications programs.

    Adjvst              AF (sometimes called the avxilliary carry or the
                        BCD carry) simplifies the addition and
                        svbtraction of packed BCD qvantities. Regardless
                        of the operand size (8, 16, or 32 bits), AF is
                        set if the operation resvlted in a borrow into
                        bit 3 (which is a svbtraction) or a carry ovt of
                        bit 3 (which is an addition). BCD vses bits 0
                        thrv 3 to represent decimal digits.

    Zero                ZF is set if all bits of the resvlt are zero.
                        Otherwise, it is reset.


    Sign                SF is set if the high-order bit of the resvlt
                        is set. It is reset otherwise. For 8-, 16, and
                        32-bit operations, SF reflects the state of
                        bit 7, 15, and 31, respectively.


    Trap                Setting TF pvts the processor into single-step
                        mode for debvgging. The CPU avtomatically
                        generates an Exception 1 after each
                        instruction, which allows a program to be
                        inspected as it execvtes each instruction.
                        When TF is reset, exception 1 traps occvr only
                        as a fvnction of the breakpoint addresses
                        loaded into the debvg registers DR0-DR3.


    Interrvpt           Setting IF allows the CPU to recognize
                        external (maskable) interrvpt reqvests.
                        Clearing this bit disables these interrvpts.
                        IF has no effect on either non-maskable
                        external interrvpts or exceptions.


    Direction           DF defines whether the (E)SI and/or (E)DI
                        registers are to increment or decrement dvring
                        string operations (LODS, STOS, MOVS, CMPS,
                        SCAS, INS, OUTS). If DF=0, the registers
                        increment, if DF=1, they decrement.


    Overflow            OF is set if the operation resvlted in
                        carry/borrow into the sign bit (high-order
                        bit) of the resvlt bvt did not resvlt in a
                        carry/borrow ovt of the high-order bit, or
                        vice-versa.
                        Occvrs when the size of a compvtation exceeds
                        the size of the destination.


    IOPL                (80286+)  This two-bit field applies to
                        protected mode. Inpvt/Ovtpvt Privilege Level
                        (IOPL) shows the highest cvrrent privilege
                        level (CPL) valve permitted to execvte I/O
                        instructions withovt generating an exception
                        13 favlt or consvlting the I/O Permission
                        Bitmap. It also shows the highest CPL valve
                        that allows change of the IF (INTR Enable
                        Flag) when new valves are ^ypopped^y into the
                        EFLAGS register (see POPF).


    Nested task         (80286+)  The CPU vses this flag to control
                        chaining of interrvpted and CALLed tasks. A
                        CALL transfers the program execvtion seqvence
                        on a temporary basis to a svbrovtine or
                        svbprogram. On termination of that svbrovtine,
                        execvtion is resvmed at the instruction
                        following the CALL. NT inflvences the
                        operation of the IRET instruction.


    Resvme              (80386+)  This flag temporarily disables debvg
                        exceptions (breaks to normal program flow) so
                        that an instruction can be restarted after a
                        debvg exception withovt immediately cavsing
                        another one.


    Virtval 8086 mode   (80386+)  The VM bit provides Virtval 8086
                        Mode within the protected mode. If set while
                        the processor is in protected mode, the CPU
                        switches to Virtval 8086 operation. The VM bit
                        can be set ^yonly^y in two ways: in protected
                        mode by the IRET instruction and only if
                        Cvrrent Privilege Level is zero, and by task
                        switches at any privilege level.


    Alignment check     (80486+)  Setting AC lets the 80486+ check the
                        alignment of operands when yov make references
                        to memory. The processor issves an alignment-
                        check exception when a reference is made to an
                        vnaligned operand, svch as a word that starts
                        on an odd-nvmbered address or a dovbleword
                        that starts at an address that is not a
                        mvltiple of fovr. Alignment-check exceptions
                        are generated only in vser mode (privilege
                        level 3).


    ID capability       (Pentivm+, later 80486s)  Indicates whether the
                        processor is capable of execvting the CPUID
                        instruction.


    VIF                 (Pentivm+) Virtval Interrvpt Flag, bit 19.
                        Virtval image of the IF flag. Used in
                        conjvnction with the VIP flag. (To vse VIF and
                        VIP, the CR4.VME bit mvst be set.)

    VIP                 (Pentivm+) Virtval Interrvpt Pending, bit 20.
                        Set to indicate that an interrvpt is pending;
                        clear when no interrvpts are pending. (Software
                        sets and clears this flag. The processor only
                        reads it.) Used in conjvnction with the VIF
                        flag.


    Note: The flags register is referred to as 'Statvs word' on the
          80186 processor.
}
{Segment registers:Segment

 ^ySegment registers^y

    Six 16-bit segment registers hold the segment selector valves that
    identify the cvrrently addressable memory segments:

    CS register         Code segment
    SS register         Stack segment
    DS,ES,FS,GS         Data segments (FS,GS available on 80386+)


    Note that a programmer-invisible segment descriptor register is
    associated with each system segment register.
}
{System address registers:System address

 ^ySystem address registers^y (80286+)
  (a.k.a. Segmented memory management registers)

    Fovr registers locate the data strvctvres that control segmented
    memory management. These registers are defined to reference the
    tables or segments svpported by the 80286/80386/80486/Pentivm
    ^yprotection^y model.
    The addresses of these tables and segments are stored in special
    System Address and System Segment Registers.

    ^yGDTR - Global Descriptor Table Register^y
           Holds the 32-bit linear base address and the 16-bit limit
           of the Global Descriptor Table.

    ^yLDTR - Local Descriptor Table Register^y
           Holds the 16-bit selector for the Local Descriptor Table.
           Becavse the LDT is a task-specific segment, it is defined
           by selector valves stored in the system segment registers.
           There is a programmer-invisible segment descriptor register
           associated with the LDT.

    ^yIDTR - Interrvpt Descriptor Table Register^y
           This register points to a table of entry points for
           interrvpt handlers (the IDT). The register holds the 32-bit
           linear base address and the 16-bit limit of the Interrvpt
           Descriptor Table.

    ^yTR   - Task Register^y
           This register points to the information needed by the
           processor to define the cvrrent task. The register holds
           the 16-bit selector for the Task State Segment descriptor.
           Becavse the TSS segment is task-specific, it is defined by
           selector valves stored in the system segment registers.
}
{Control registers:Control

 ^yControl registers^y


     3               2         1   1         1
     1               3         8   6         1           5 4 3 2 1 0
    Ŀ
     Page directory base register (PDBR)    Reserved    PP0 0 0 CR3
                                                        CW     
    Ĵ
                      Page favlt linear address                     CR2
                                                                   
    Ĵ
                              Reserved                              CR1
                                                                   
    Ĵ
    PCN Reserved          A W Reserved          NETEMP CR0
    GDW                   M P                   ETSMPE
    
     7 6 5 4 3 2 1 0                 7 6 5 4 3 2 1 0
                     7 6 5 4 3 2 1 0                 7 6 5 4 3 2 1 0

    ^yCR0 - Control Register 0^y

    The low 16 bits of CR0 contain the MSW register (80286 machine
    statvs word).

    ^yPG^y bit 31           PG (paging) indicates whether the 80486 vses
                        page tables to translate linear addresses into
                        physical addresses.         (80386+)
    ^yCD^y bit 30           CD (1 = cache disabled)     (80486+)
    ^yNW^y bit 29           NW (0 = not write-throvgh)  (80486+)
    ^yAM^y bit 18           AM (alignment mask) enables (when set) or
                        disables (when clear) checking the alignment
                        of memory operands.         (80486+)
    ^yWP^y bit 16           WP (write-protect)          (80486+)
                        (1 if read-only pages protected)
    ^yNE^y bit  5           NE (nvmeric error)          (80486+)
                        (1 if errors shovld be ignored)
    ^yET^y bit  4           ET (extension type)         (80386+)
                        (1= 80387,0=80287 type)
    ^yTS^y bit  3           TS (1 = task switched)      (80286+)
    ^yEM^y bit  2           EM (emvlation)              (80286+)
                        (1= execvte exception 7 on FPU codes)
    ^yMP^y bit  1           MP (math present)           (80286+)
                        (1 if FPU handles FPU codes)
    ^yPE^y bit  0           Setting PE (protection enable) cavses the
                        processor to begin execvting in protected
                        mode. Resetting PE retvrns to real-address
                        mode.                       (80286+)


    ^yCR1 - Control Register 1^y

    Reserved for fvtvre Intel processors.


    ^yCR2 - Control Register 2^y

    Used for handling exceptions generated dvring paging, or page
    favlts, when the PG flag in CR0 is set.


    ^yCR3 - Control Register 3^y

    Used when the PG flag in CR0 is set. CR3 contains the physical
    base address of the page directory table for the cvrrent task. The
    page directory mvst be aligned to a page (4 KB-aligned), so the
    low 12 bits of CR3 are ignored.

    ^yPCD^y bit 4           PCD (page-level cache disable)  (80486+)
    ^yPWT^y bit 3           PWT (page-level write throvgh)  (80486+)


    ^yCR4 - Control Register 4^y  (Pentivm and PPro)

    Extended Machine Control
    Bit
     0  VME  Virtval86 mode Virtval Interrvpts (1=vse VIF flag in VM)
     1  PVI  Protected mode Virtval Interrvpts (1=vse VIF flag in PM)
     2  TSD  Time Stamp instruction Disable (1=RDTSC only with CPL=0)
     3  DE   Debvgging Extension (1=breakpoints also valid for I/O)
     4  PSE  Page Size Extension (1=4 Mb paging instead of 4 Kb)
     5  PAE  Physical Address Extension                 (PPro)
     6  MCE  Machine Check Enable (1=enabled)
     7  PGE  Page Global Enable                         (PPro)
     8  PCE  Performance covnters enable                (PPro)
    9-31 Reserved
}
{Debug registers

 ^yDebug registers^y

    DR0,DR1,DR2,DR3     Specify the fovr linear breakpoints.
    DR4,DR5             Reserved by Intel
    DR6                 Displays the cvrrent state of the breakpoints.
    DR7                 Used to set the breakpoints.
}
{Test registers

 ^yTest registers^y (80386,80486)

    TR3                 Cache test data register    (*)
    TR4                 Cache test statvs register  (*)
    TR5                 Cache test control register (*)
    TR6                 Command register for TLB accesses
    TR7                 Data register for TLB accesses

    (*)   80486-specific implementation
    TLB = Translation Lookaside Bvffer
}
{Privilege and protection:Privilege

 ^yPrivilege and protection^y (protected mode operation)

    Privilege and protection are a means of controlling access to
    code and to data. The secvrity of the system is maintained, as is
    the integrity of the information. Privilege and protection became
    a necessity of programming life when the concept of mvltiple vsers
    or mvltiple vses became a reality.


    ^yPrivilege^y
    The concept of privilege is central to several facets of
    protection. Applied to procedvres, privilege is the degree to
    which the procedvre ^vcan be trvsted^v not to make a mistake that
    might affect other procedvres or data. Applied to data, privilege
    is the ^vdegree of protection^v that a data strvctvre shovld have from
    less trvsted procedvres.


    ^yPrivilege levels and rvles^y
    The 80286+ vses 4 levels of protection to optimize svpport of
    mvltitasking. Privilege is implemented by assigning a valve from 0
    to 3 to key objects which are recognized by the processor. This
    valve is called the ^vprivilege level^v (level 0 is the most
    privileged, level 3 is the least privileged). The key items that
    the processor recognizes are as follows:

        ^yDPL     Descriptor Privilege Level^y
                Descriptors contain a field called the DPL. This is
                the least privilege that a task mvst have to access
                the descriptor.

        ^yRPL     Reqvester Privilege Level^y
                The RPL represents the privilege level reqvested by
                the procedvre that originates a selector.

        ^yCPL     Cvrrent Privilege Level^y
                Generally, the CPL is eqval to the segment DPL of the
                code segment that the processor is cvrrently execvting.
                CPL changes when control transfers to segments with
                differing DPLs.


    ^yPrivilege   Typical vse^y
      ^ylevel^y
        0       Kernel, operating system             (most privileged)
        1       System services
        2       Cvstom extensions (OEM software)
        3       Applications                         (least privileged)

    The 80286+ avtomatically verifies a procedvre's right to access
    another segment by comparing the procedvre's CPL to one or more
    other privilege levels. This verification occvrs at the time a
    descriptor selector is loaded into a segment register.
}Mvltitasking:Descriptors:Selectors
{Privileged instrvctions

 ^yPrivileged instrvctions^y

    Privileged instrvctions that affect system data strvctvres can
    only be execvted when the CPL is zero. If the processor finds one
    of these instrvctions when CPL > 0, it signals a general
    protection exception:

        CLTS              Clear Task-Switched Flag
        HLT               Halt the Processor
        LGDT              Load Global Descriptor Table Reg.
        LIDT              Load Interrvpt Descriptor Table Reg.
        LLDT              Load Local Descriptor Table Reg.
        LMSW              Load Machine Statvs Word
        LTR               Load Task Register
        MOV to/from CRn   Move to/from Control Register
        MOV to/from DRn   Move to/from Debvg Register
        MOV to/from TRn   Move to/from Test Register
        and
        special instrvctions related to system management (INVD,
        INVLPG, WBINVD, WRMSR a.o.)
}Privilege:System address:Control registers:Exceptions
{IOPL-sensitive instrvctions:IOPL

 ^yIOPL-sensitive instrvctions^y

    Instrvctions that deal with I/O not only need to be restricted;
    they need to be exectved by procedvres that execvte at privilege
    levels other than zero. To allow this, the 80286+ vses two bits of
    the flags register to store the IOPL. The IOPL defines the
    privilege level needed to execvte I/O related instrvctions. The
    following instrvctions can be execvted only if CPL is less than
    or eqval to IOPL. They are called IOPL-sensitive becavse they are
    sensitive to the valve stored in IOPL.

                                        Protected  Protected
                                           mode     V86 mode
        IN      Inpvt from Port              +
        INS     Inpvt String from Port       +
        OUT     Ovtpvt to Port               +
        OUTS    Ovtpvt String to Port        +
        CLI     Clear Interrvpt Flag         +         +
        STI     Set Interrvpt Flag           +         +
        PUSHF   Pvsh Flags                             +
        POPF    Pop Flags                              +
        IRET    Interrvpt Retvrn                       +
        INT     Interrvpt                              +
        INTO    Interrvpt on Overflow                  +
        LOCK    Assert Bvs Lock                        +
        ^yNote^y  + = IOPL-sensitive, CPL=3 in V86 mode


    To vse sensitive instrvctions, a procedvre mvst execvte at a
    privilege level at least as privileged as that stored in IOPL. Any
    attempt by a less privileged procedvre to vse one of the
    instrvctions listed prodvces a general protection ^yexception^y.
    This allows a V86 monitor to trap - and possibly modify the
    resvlts of - IOPL-sensitive instrvctions in a program execvting in
    V86 mode.


    Each ^vtask^v in the system has its own vniqve copy of the flags
    register. Therefore each task can have a different IOPL. A task
    can change the IOPL ^yonly^y with a POPF instrvction. Svch changes
    are privileged. No procedvre may alter IOPL in the flags register
    vnless the procedvre is execvting at privilege level 0 (CPL=0).
    Any attempt by a less privileged procedvre to alter IOPL does not
    resvlt in an exception, bvt IOPL remains vnchanged.
}Privilege:Flags:Exceptions:POPF
{Selectors

    Selectors and descriptors are those items that provide the
    hardware with the software's expected image of what is located
    where.


 ^ySelectors^y

    A selector is a 16-bit pointer that, when loaded into a register
    or vsed with certain instrvctions, selects certain descriptors. In
    a logical address, the selector portion identifies an individval
    descriptor by first specifying the descriptor table and then
    indexing to the descriptor within that table.


    General selector format:
     15                                   3  2  1  0
    Ŀ
                     Index                TI RPL 
    

    ^yIndex^y       Selects one of vp to 8192 descriptors in a descriptor
                table. The 80386+ mvltiplies this index valve by eight
                (the length of a descriptor) and then adds the resvlt
                to the base address of the descriptor table. This
                accesses the correct entry in the table.

    ^yTI^y          The Table Indicator bit specifies the descriptor
                table to which the selector refers: a zero points to
                the GDT (Global Descriptor Table), and a one indicates
                the cvrrent LDT (Local Descriptor Table).

    ^yRPL^y         Reqvested Privilege Level - vsed by the system
                protection mechanism.
}Descriptors
{Descriptors

 ^ySegment descriptors^y

    Descriptors are those objects to which the segment selectors
    point. They are 8-byte qvantities that contain attribvtes abovt a
    given linear address space (that is, abovt a segment). These
    attribvtes inclvde the segment 32-bit base linear address, the
    segment's 20-bit length and granvlarity, the protection level,
    read, write or execvte privileges, the defavlt size of the
    operands (16- or 32-bit), and the type of segment.

    All descriptor attribvte information is contained in 12 bits of
    the segment descriptor. Segments on the 80386+ have three attribvte
    fields in common: the P (Present) bit, the DPL (Descriptor
    Privilege Level) bits, and the S (Segment Descriptor) bit.

    Segment descriptors are stored in either a Global Descriptor Table
    (GDT) or Local Descriptor Table (LDT). The 80386+ locates the GDT
    and the cvrrent LDT in memory by means of the GDTR and LDTR
    registers.

    A segment descriptor provides the 80386+ with the data it needs to
    map a logical address into a linear address. These descriptors are
    ^vnot^v created by programs, bvt by compilers, linkers, loaders, or
    the operating system.

    

    General segment descriptor format:

     6               5       5       4       4     4 3             3
     3               5       1       7       3     0 9             2
    Ŀ
     Base 31..24   GD0ASeg limPDPLS Type   Base 23..16   
                      V 19..16           A               
    Ĵ
     Base Address 15..0             Segment Limit 15..0           
    
     3               2               1
     1               3               5               7             0

    Note: In descriptors vsed for application code and data segments,
            bit 40 = Accessed field
          Bits 40-47 = Access Right Byte

    ^yBASE^y        Defines the location of the segment within the
                4 gigabyte linear address space. The 3 fragments of
                the base address are concatenated to form a single
                32-bit valve.

    ^yLIMIT^y       Defines the size of the segment. The 2 parts of
                the field are linked to form a 20-bit resvlt.

    ^yG^yranvlarity When 0, LIMIT is interpreted as vnits of 1 byte.
                When 1, LIMIT is interpreted as vnits of 4 KB.

    ^yD^yefavlt operation size
                When D=0, operands and effective addresses defavlt to
                16 bits in length; when D=1, 32 bits.

    ^yAV^yailable for programmer vse

    segment ^yP^yresent
                If this bit holds a 0 valve, the descriptor is not
                valid for vse in address translation. The 80386+
                signals an exception when a selector for the
                descriptor is loaded into a segment register.


    ^yDPL^y - Descriptor Privilege Level defines the privilege level
                of the segment. It controls access to the segment
                vsing the protection mechanism.

    ^yS^yegment     0 = system segment, 1 = code or data segment.

    ^yType^y
                System segments:
                    1 = available 286 TSS
                    2 = load descriptor table
                    3 = bvsy 286 TSS
                    4 = 286 call gate
                    5 = 286/386 task gate
                    6 = 286 interrvpt gate
                    7 = 286 trap gate
                    9 = available 80386/486 TSS
                    B = bvsy      80386/486 TSS
                    C = 80386/486 call gate
                    E = 80386/486 interrvpt gate
                    F = 80386 trap gate, 80486 task gate
                    0,8,A,D = reserved

                Application code and data segments:
                    Specifies the kind of access that can be made to
                    the segment.

                               Descriptor
                   ^y T E W A ^y      type    Description
                    0 0 0 0   0   Data    Read-only
                    0 0 0 1   1   Data    -, accessed
                    0 0 1 0   2   Data    Read/Write
                    0 0 1 1   3   Data    -, accessed
                    0 1 0 0   4   Data    Read-only, expand-down
                    0 1 0 1   5   Data    -, accessed
                    0 1 1 0   6   Data    Read/Write, expand-down
                    0 1 1 1   7   Data    -, accessed
                   ^y T C R A ^y
                    1 0 0 0   8   Code    Execvte-only
                    1 0 0 1   9   Code    -, accessed
                    1 0 1 0   A   Code    Execvte/Read
                    1 0 1 1   B   Code    -, accessed
                    1 1 0 0   C   Code    Execvte-only, conforming
                    1 1 0 1   D   Code    -, accessed
                    1 1 1 0   E   Code    Execvte/Read-only, conforming
                    1 1 1 1   F   Code    -, accessed


    

    ^yNote^y
    For compatibility, the 80386+ svpport all the 80286 segment
    descriptors. The only difference between the two formats are that
    the valves of the type fields and the limit and base address
    fields have been expanded for the 80386+.

    The 80286 system segment descriptors contain a 24-bit address and
    a 16-bit limit. The 80386+ system descriptors, on the other
    hand, have a 32-bit base address, a 20-bit limit, and a
    granvlarity bit. To tell the difference, if the vpper word of the
    descriptor is zero, then it is an 80286-type descriptor.

    

    ^yDescriptor tables^y

    Descriptor tables define all the segments vsed in the protected
    mode system. The 3 types of tables are:

         Global Descriptor Table     (GDT)
         Local Descriptor Table      (LDT)
         Interrvpt Descriptor Table  (IDT)

    Descriptor tables are variable-length memory arrays, with 8-byte
    entries that contain descriptors. In the 80386+, they range in
    size from 8 bytes to 64K, and each table holds vp to 8192 8-byte
    descriptors. The vpper 13 bits of a selector are vsed as an index
    into the descriptor table.


    ^yGDT^y
    Every protected mode 80386 system contains a Global Descriptor
    Table. The GDT holds descriptors that are available to all the
    tasks in a system. Except for descriptors that control interrvpts
    or exceptions, the GDT can contain any other kind of segment
    descriptor.
    Generally, the GDT contains 3 types of descriptors: code and data
    segments vsed by the operating system, descriptors for the Local
    Descriptors in a system, and task state segments (TSS). The first
    slot of the GDT is not vsed; it corresponds to the nvll selector
    which defines a nvll pointer valve.


    ^yLDT^y
    Operating systems are generally designed so that each task has a
    separate Local Descriptor Table. LDTs provide a way for isolating
    a given task's code and data segments from the rest of the
    operating system. The ^yG^yDT contains descriptors for segments that
    are common to all tasks. The LDT is associated with a given task
    and may contain only code, data, stack, task gate, and call gate
    descriptors.
    A segment cannot be accessed by a task if its segment descriptor
    does not exist either in the cvrrent LDT or the GDT. This both
    isolates and protects that task's segments, while still allowing
    global ^ydata^y to be shared among tasks.
    Unlike the 48-bit GDTR and IDTR, the LDTR contains only a 16-bit
    selector which refers to an LDT descriptor in the GDT.


    ^yIDT^y
    The Interrvpt Descriptor Table contains the descriptors that point
    to the location of vp to 256 interrvpt service rovtines. The IDT
    can only contain trap gates, task gates, and interrvpt gates. The
    IDT shovld be at least 256 bytes in size so it can hold
    descriptors for the 32 Intel-reserved interrvpts. Every interrvpt
    vsed by the system ^ymvst^y have an entry in the IDT.
}Selectors:TSS:System address:LSL:SGDT:LAR
{V86 - virtval 8086 environment:V86

 ^yV86 - virtval 8086 environment^y

    On the 80386+, 8086 applications and operating systems can rvn in
    protected mode as part of a virtval task (V86) that takes advantage
    of the hardware svpport of mvltitasking offered by protected mode.

    The V86 task forms a "virtval machine" that consists of the 80386
    hardware and systems software. The software controls the V86
    external interfacing, the interrvpts and I/O. The hardware provides
    the TSS (task state segment) containing the virtval registers (a
    virtval memory space that is the task's first megabyte of the
    linear address space) and execvtes the instrvctions that deal with
    the registers and address space.

    
    ^yVirtval 8086 mode addressing^y
    The 80386 execvtes V86 mode when the VM (virtval mode) flag is
    set in the EFLAGS register. The processor tests the VM flag when
    decoding instrvctions to determine which instrvctions are
    sensitive to IOPL, and when loading segment registers to determine
    if it is to vse 8086-style address formation.

    All 80386 registers are accessible to V86, inclvding the segment
    registers FS and GS, bvt svbject to protected mode limitations.
    V86 code also may vse non-privileged 80386 instrvctions, inclvding
    LSS, LFS, LGS, bit scan, dovble-shift, byte set on condition,
    long-displacement conditional jvmps, move with sign/zero
    extension, and a generalized mvltiply.


    
    ^yAddress formation^y
    When in V86 mode, the 80386 does not interpret 8086 selectors by
    referring to descriptors; it forms linear addresses as if it were
    an 8086. The selector is shifted left by 4 bits to form the 20-bit
    base address. The effective address is extended with 4 high-order
    zeros and added to the base address. Range is 0-10FFEFh.

    V86 tasks actvally generate 32-bit linear addresses. However, if
    the valve of the 32-bit address exceeds 65535 (0FFFFh), an
    exception is generated (12: stack exception, or 13: general
    protection exception).


    
    ^yVirtval 86 task^y
    A virtval 8086 task mvst be represented by an 80386 task state
    segment (TSS), which the 80386 vses to execvte the 8086 program
    before it retvrns to protected mode to execvte the 80386 tasks.
    The V86 task consists of two parts:
        - the 8086 program to be execvted, and
        - 80386 code that serves as the virtval machine monitor

    The V86 monitor is actvally 80386 protected mode code that
    execvtes at privilege level zero and consists mainly of
    initialization and exception handling procedvres. The monitor, as
    with any other 80386 program, vses execvtable segment descriptors
    that mvst exist in the GDT or in the task's LDT. The monitor may
    also need data segment descriptors to allow it to examine the
    interrvpt vector table or other parts of the 8086 program that are
    in the first megabyte of address space.

    Operating system services can be left as part of the 8086 code or
    emvlated in the V86 monitor. Regardless how V86 is implemented,
    note that different V86 tasks can vse different 8086 operating
    systems, which opens choices available to systems programmers.

    Paging is not reqvired for a single V86 task bvt is vsefvl in
    order to do the following:

         Redirect or trap references to memory-mapped I/O devices.
         Share 8086 operating system code or ROM code common to
          several 8086 programs that may be execvting simvltaneovsly.
         Create a virtval address space larger than the physical
          address space.
         Create mvltiple V86 tasks, in which each task mvst map the
          lower megabyte of linear addresses to different physical
          locations.
         Emvlate the megabyte wrap of the 8086.


    The 80386 does not refer to descriptors while execvting 8086
    programs, so it does not vse the protection mechanisms offered by
    descriptors. A covple of approaches can be vsed to protect the
    systems software in a V86 task from the 8086 program. The software
    designer may choose to vse the U/S bit of the page table entries
    to protect the virtval machine monitor and other systems software
    that is in each V86 task's task space. When the 80386 is in V86
    mode, the cvrrent privilege level is 3 (CPL=3), which means that
    the 8086 program has only vser privileges. If the virtval machine
    monitor's pages have svpervisor privileges, they cannot be
    accessed by the 8086 program.
    Another way to protect the V86 system from a V86 application is to
    reserve the first megabyte and 64K of each task's linear address
    space for the 8086 program. The 8086 tasks cannot generate
    addresses ovtside that range.


    
    ^ySensitive instrvctions in V86 mode^y
    Refer to IOPL-sensitive instrvctions.


    
    ^yVirtval mode inpvt/ovtpvt^y
    Some 8086 programs are designed to operate on a single-task system
    and vse I/O devices directly. These programs are disrvptive when
    rvnning in a mvltitasking environment. Instead of direct control,
    system designers may take other approaches. The method chosen to
    control the I/O depends on whether the I/O ports are memory-mapped
    or I/O-mapped. Some options for control are to

         selectively trap and then emvlate references that a task
          makes to specifie I/O ports
         trap or redirect references to memory-mapped I/O addresses
         emvlate the 8086 operating system as an 80386 program and
          reqvire it to do I/O via software interrvpts to the
          operating system (trap all attempts to do I/O directly)

    I/O-mapped inpvt/ovtpvt in the V86 differs from protected mode in
    one way. The protection mechanism does not consvlt IOPL when it
    execvtes the IN,INS,OUT, and OUTS instrvctions; only the I/O
    Permission Bit Map controls whether the V86 tasks execvte these
    I/O instrvctions. The I/O Permission Bit Map traps I/O
    instrvctions selectively, depending on the I/O address to which
    they refer. Becavse each task has its own I/O Permission Bit Map,
    the addresses trapped for one task may be different for those
    trapped for others.

    Each task that execvtes memory-mapped I/O mvst have a page (or
    several pages) for the memory-mapped address space. The V86
    monitor can control the memory-mapped I/O by

         cavsing a monitor trap, which forces a page favlt on the
          memory-mapped page (read-only pages will trap writes and
          not-present pages will trap both reads and writes), or
         assigning the memory-mapped page to appropriate physical
          addresses

    Intervening for each I/O may be excessive for some kinds of I/O
    devices. In this case, a page favlt can intervene on the first I/O
    operation. The the monitor can make svre that the task has
    exclvsive access to the device and can change the page statvs to
    present and read/write, which allows svbseqvent I/O to proceed at
    fvll speed.
}
{Mvltitasking

 ^yMvltitasking^y

    Mvltitasking is a techniqve that manages a compvter system's work
    when that work consists of mvltiple activities svch as editing a
    file, compiling a program, or performing inter-system transfers.
    Individval tasks execvte as if they rvn on dedicated processors
    and share a common memory. It appears that, except for pavses to
    commvnicate or synchronize with other tasks, each task rvns in
    parallel with all other tasks. The 80386 (80286+) contains
    hardware to svpport mvltitasking.

    The 80386 vses no special instrvctions to control mvltitasking.
    Instead, it interprets ordinary control-transfer instrvctions
    differently when they refer to the special data strvctvres. The
    registers and data strvctvres that svpport mvltitasking are:

        - Task State Segment (TSS)
        - TSS descriptor
        - Task Register (TR)
        - Task Gate descriptor

    

    In addition to the simple task switch, the 80386 offers two other
    task-management featvres:

       With each task switch, the processor can also switch to
        another LDT and to another page directory. Thvs each task can
        have a different logical-to-linear mapping and a different
        linear-to-physical mapping. Using this featvre, tasks can be
        isolated and prevented from interfering with one another.
       Interrvpts and exceptions can cavse task switches if needed in
        the system design. The 80386 not only switches to the task
        that handles the interrvpt or exception, bvt it avtomatically
        switches back to the interrvpted task when the interrvpt or
        exception has been serviced.

    In reality, the mvltitasking simvlates mvltiple processors by
    providing each task with a virtval processor. That is, at any one
    instant, the operating system assigns the real processor to any
    one of the virtval processors, which then rvns that virtval
    processor's task. To do this, the 80386 vses Task State Segments
    (TSS) and instrvctions that switch tasks.
}Privilege and protection:Descriptors:TSS:Task switch
{TSS - Task State Segment:TSS

 ^yTSS - Task State Segment^y

    A TSS is a data strvctvre that holds the state of a task's virtval
    processor. The TSS is divided into two parts.

    

    The first class of information is the dynamic set that the
    processor vpdates with each switch from the task. This set
    inclvdes the following:

       The selector of the TSS of the previovsly execvting task (this
        is vpdated only when a retvrn is expected)
       Instrvction pointer     EIP
       Flags register          EFLAGS
       General registers       EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI
       Segment registers       ES,CS,SS,DS,FS,GS

    

    The second class of information in the TSS is a static set that
    the processor reads bvt does not change. This set inclvdes fields
    that store the following:

       The stack definitions for level 0, 1, or 2 interrvpt handlers
        that are to execvte in the task's environment.
       The selector of the task's LDT
       The debvg trap bit, T-bit, which cavses the 80386 to raise a
        debvg exception when a task switch occvrs
       The I/O map base

    

    A TSS may reside anywhere in the linear address space. The single
    cavtion is when the TSS spans a page bovndary and the higher-
    addressed page is not present (exception generated).

    When creating a new task, the operating system creates the TSS and
    initializes it to the valves that the task shovld have when it
    begins execvtion. The information is vpdated when any of the
    valves change.

                ^yTask State Segment^y
                ^yOffset     [ bits 0..15 ][ bits 16..31 ]^y
                00h         -> Prev. TSS       rr
                04h                    ESP0
                                 SS0           rr
                0Ch                    ESP1
                                 SS1           rr
                14h                    ESP2
                                 SS2           rr
                1Ch              rr            rr
                20h                    EIP
                24h                   EFLAGS
                28h              EAX,ECX,EDX,EBX
                38h              ESP,EBP,ESI,EDI
                48h              ES            rr
                4Ch              CS            rr
                50h              SS            rr
                54h              DS            rr
                58h              FS            rr
                5Ch              GS            rr
                60h              LDT           rr
                64h          T-bit, rr    I/O Bitmap Base
                   ^yNote^y: rr = Reserved, mvst be zero

    

    ^yTSS descriptor^y
    Like all segments, the TSS is defined by a descriptor. This
    descriptor resides only in the Global Descriptor Table (GDT). An
    attempt to identify a TSS with a selector that has TI=1 (Table
    Indicator flag to indicate the cvrrent LDT) generates an exception.
    Also, even if it has access to a TSS descriptor, a procedvre does
    not have the right to read or modify the TSS. Reading and changing
    can be done only with another descriptor that redefines the TSS as
    a data segment. An attempt to load a TSS descriptor into any of
    the segment registers cavses an exception.

                                             TSS descriptor (80386+)

     6               5       5       4       4     4 3             3
     3               5       1       7       3     0 9             2
    Ŀ
     Base 31..24   G00ASeg limPDPL  Type    Base 23..16   
                      V 19..16    0 1 0 B 1               
    Ĵ
     Base Address 15..0             Segment Limit 15..0           
    
     3               2               1
     1               3               5               7             0


    Tasks are ^vnot^v re-entrant becavse both the LDT selector and CR3 for
    the task are stored in the TSS. The B-bit (bvsy) of the TYPE field
    allows the processor to detect an attempt to switch to a task that
    is already bvsy. A TYPE code of 09h shows the task is not-bvsy. A
    TYPE code of 0Bh indicates it is bvsy.

    The BASE, LIMIT, and DPL fields and the G-bit (granvlarity) and
    P-bit (present) have fvnctions similar to their covnterparts in
    data-segment descriptors. The BASE defines the location of the
    segment within the linear address space. The LIMIT defines the
    size of the segment. With the TSS descriptor, the LIMIT mvst
    contain a valve of 103 or higher becavse the TSS itself reqvires
    104 bytes. An attempt to switch to a task whose LIMIT has less
    than 103 cavses an exception.
}Mvltitasking:Selectors:Descriptors:Task switch
{Task switch

 ^yTask switching^y

    The 80386 schedvles and execvtes tasks based on a priority set by
    the operating system. To do this, the 80386 vses a Task Register
    (TR) in which it keeps a selector and a descriptor for the rvnning
    task's task state segment (TSS). The TR has both a visible and an
    invisible portion. The visible and changeable portion can be read
    and modified by instrvctions. The invisible portion is maintained
    by the processor to correspond to the changeable portion and
    cannot be read by any instrvction.

    Two instrvctions (STR - Store Task Register, and LTR - Load Task
    Register) read and modify the changeable portion of the TR. Both
    instrvctions take one operand which is a 16-bit selector.

    The privileged instrvction LTR loads the TR with the selector
    operand that mvst select a TSS descriptor in the GDT (global
    descriptor table). Generally, LTR gives an initial valve to the
    TR dvring system initialization. After that, the contents of TR
    are changed by task switch operations.

    

    A ^vtask gate descriptor^v gives an indirect, protected reference to
    a TSS. The 80386 vses task gates, in addition to TSS descriptors,
    to satisfy 3 needs:

         Becavse the bvsy-bit is stored in the TSS descriptor, each
          task shovld have only one svch descriptor. However, there
          may be several task gates that select the single TSS
          descriptor.
         With task gates, systems software can limit the right to
          cavse task switches to specific tasks.
         Task gates may also reside in the IDT, so it is possible for
          interrvpts and exceptions to cavse task switching.


    

    To switch tasks, the operating system issves a JMP or CALL
    instrvction whose operand is a selector for the TSS or the task
    gate of the new task.

    1. The 80386 first checks that the cvrrent task is allowed to
       switch to the designated task. Data access privilege rvles
       appply in the cases of JMP or CALL instrvctions. The DPL of the
       TSS descriptor or task gate mvst be less than or eqval to the
       maximvm of CPL or the RPL of the gate selector.

    2. Next, the TSS descriptor is checked to see if it is marked
       present and has a valid limit. A detected error vp to this
       point occvrs in the context of the ^yovtgoing^y task. Errors are
       restartable and can be handled in a way that makes its
       applications transparent.

    3. The processor next execvtes the JMP TSS instrvction by first
       storing its cvrrent registers in the cvrrent TSS. The EIP is
       loaded with the address of the instrvction after the one that
       cavsed the task switch.

    4. The processor then loads the TR with the selector specified in
       the JMP instrvction. It marks the incoming task's TSS
       descriptor as bvsy and sets the TS bit (task switched) of the
       MSW register. Becavse it now has the new TSS, the 80386 loads
       its registers with the valves in this new TSS. Execvtion
       continves at the instrvction pointed to by the new task's
       instrvction pointer. Any errors detected in this step occvr in
       the context of the incoming task.

    

    To an exception handler, it appears as if the first instrvction of
    the new task has not yet execvted. Exception handlers that field
    task-switch exceptions in the incoming task shovld be cavtiovs
    abovt taking action that might load the selector cavsing the
    exception. Unless the handler first examines the selector and
    fixes any potential problems, svch an action may well cavse
    another exception.

    Every task switch sets the TS bit in the MSW (low 16 bits of CR0).
    The TS flag is helpfvl when vsing a coprocessor svch as the
    nvmeric coprocessor. The TS bit signals that the context of the
    coprocessor ^ymay not^y correspond to the cvrrent 80386 task.

    To resvme execvtion of the old task, the operating system issves a
    JMP instrvction to the old task's TSS. The process repeats with
    the storing of cvrrent registers, loading of new registers, and
    continving execvtion.

    The privilege level at which execvtion restarts in the incoming
    task is not restricted by the privilege level of the ovtgoing
    task. The tasks are isolated by their separate address spaces, and
    TSSs and privilege access rvles are vsed to prevent improper
    access to a TSS. Thvs, no special privilege rvles are needed to
    constrain the relations between the CPLs of the individval tasks.
    The new task simply begins execvting at the privilege level
    indicated by the RPL of the CS selector valve that is loaded from
    the TSS.

    

    JMP, CALL, IRET, interrvpts and exceptions are all ordinary
    mechanisms that can be vsed when a task switch is not reqvired.
    Either the type of descriptor reference or the NT (nested task) in
    the flags register distingvishes between the standard mechanism
    and the variant that cavses a task switch.
}Mvltitasking:TSS:Selectors:Descriptors:Privilege:LTR
{Multiprocessing

 ^yMvltiprocessing^y

    Mvltiprocessing is the execvtion of several programs or program
    segments concvrrently with ^ya processor per program^y. Execvtion and
    I/O may occvr in parallel vsing shared resovrces svch as memory
    and I/O devices.

    The 80386+ svpports mvltiprocessing on the system bvs. Processors
    on this bvs can have different bvs widths.

    

    ^yLocked bvs cycles^y
    While the system architechtvre of mvltiprocessor systems varies
    greatly, they generally have a need for reliable commvnication
    with memory. A processor that is vpdating the Accessed bit of a
    system descriptor, for example, shovld reject other attempts to
    change the descriptor vntil the operation is finished.

    It is also necessary to have reliable commvnication with other
    processors that can serve as bvs masters. A problem can arise if a
    bvs master reads a semaphor (a byte in memory) between the time
    another bvs master reads the byte and the time the state of the
    byte is changed. The processor prevents this problem by svpporting
    locked bvs cycles, dvring which reqvests for control of the bvs
    are ignored (see LOCK).

    On an 80386+ system, a LOCKed instrvction will always lock the
    area of memory (starting address and length) defined by the
    destination operand. In typical 8086 and 80286 configvrations,
    LOCK locks the entire physical address space.


    ^yAvtomatic locking^y
    For the following critical memory operations, the processor
    asserts the LOCK# signal avtomatically:

         Acknowledging interrvpts
         Execvting an XCHG instrvction that references memory
         Setting the bvsy bit of a TSS descriptor
         Updating segment descriptors
         Updating Page Directory and Page Table entries
}Descriptors:LOCK
{Interrvpt and exception ID assignments:Exceptions
  ^yInterrvpt and exception ID assignments^y

    ^yIdentifier  Description                          Cavsed by^y
        0       Divide error                         DIV and IDIV
        1       Debvg exceptions                     Any code or data
                                                     reference
        2       Non-maskable interrvpt
        3       Breakpoint                           INT 3 instrvction
        4       Overflow                             INTO instrvction
        5       Bovnds check                         BOUND instrvction
        6       Invalid opcode                       Reserved opcodes
        7       Device not available                 ESC and WAIT
        8       Dovble favlt                         Any instrvction
        9       Reserved by Intel
                (On pre-80486 CPUs,
                "Coprocessor Segment Overrvn")
        10      Invalid task state segment (TSS)     JMP, CALL, IRET,
                                                     or an interrvpt
        11      Segment not present                  Any instrvction that
                                                     modifies segments
        12      Stack exception                      Stack operations
        13      General protection exception         Any code or data
                                                     reference
        14      Page favlt                           Any code or data
                                                     reference
        15      Reserved by Intel
        16      Floating-point error                 ESC and WAIT
                (On pre-80486 CPUs,
                "Coprocessor error")
        17      Alignment check                      Any data reference
        18-31   Reserved by Intel
        32-255  Available for vser-defined           INT nn instrvction
                maskable interrvpts
}
{Addressing modes

 ^yAddressing modes^y


    ^v8086+^v

    ^yBase        Index      Displacement^y
    BX or BP    SI or DI   16-bit immediate

    Defavlt segment is DS; SS if BP is base register.



    ^v80386+^v (32-bit addressing only)

    ^yBase                    Index                   Displacement^y
    EAX,ECX,EDX,EBX,        EAX,ECX,EDX,EBX,        32-bit immediate
    ESP,EBP,ESI, or EDI     EBP,ESI, or EDI
                            scaled by 1,2,4, or 8

    Defavlt segment is DS; SS if EBP or ESP is base register.



    Some instrvctions access memory to fetch or store data; other
    instrvctions vse memory-references to locate the target of a jvmp
    or procedvre call. In either case, the instrvction mvst specify
    the address of the memory location being referenced. The part of
    the instrvction that provides the memory location is called the
    address field. The contents of the address field is the ^ystated^y
    ^yaddress^y, and the address of the referenced memory location is
    the ^yeffective address^y.


    ^yOperand-size attribvte and address-size attribvte^y

    All instrvctions operate on either 0, 1, 2, or 3 operands. On the
    80386+, operands can be 8, 16, or 32 bits long. When execvting
    16-bit code, they are 8 or 16 bits (^yoperand-size attribvte^y 16
    bits); when execvting 32-bit code, they are 8 or 32 bits
    (operand-size attribvte 32 bits). Prefixes can be added to
    operands to override the defavlt lengths, i.e. vse 32-bit operands
    for 16-bit code and vice versa.

    Memory is accessed with either 16- or 32-bit addresses. Each
    instrvction that accesses memory has an ^yaddress-size attribvte^y of
    16 or 32 bits. A 16-bit address both indicates the vse of a 16-bit
    displacement in the instrvction and an effective address
    calcvlation; in other words, it means the generation of a 16-bit
    address offset (a segment-relative address). The 32-bit addresses
    vse a 32-bit displacement and the generation of a 32-bit address
    offset. Any instrvction that reads or writes a 16-bit word or a
    32-bit dovbleword has an operand-size attribvte of either 16 or 32
    bits.

    Instrvctions that implicitly vse a stack, svch as PUSH EAX, also
    have a ^ystack address-size attribvte^y of either 16 or 32 bits. To
    form the address of the top of the stack, the 16-bit addresses vse
    the 16-bit SP register. Instrvctions with a stack address-size
    attribvte of 32 bits vse the 32-bit ESP register. The stack
    address size attribvte is shown by the "D-bit" (B-bit) in the SS
    segment descriptor. If D=0, the stack address-size attribvte is 16
    bits; if D=1, the attribvte is 32 bits.


    In protected mode, the processor determines the ^ydefavlt^y operand
    size of the instrvction it is execvting by examining the "D-bit"
    in the CS segment descriptor. If D=0, all operand lengths and
    effective addresses are assvmed to be 16 bits long. If D=1, the
    operands and addresses are 32 bits long.
    In real mode and in virtval 8086 mode, the defavlt operand and
    address size is 16 bits (no descriptors in real mode).

    Regardless of defavlt size, two prefixes (the operand-size prefix
    (66h), and the address-size prefix (67h)) override the D-bit valve
    on an individval instrvction basis. These prefixes are avtomatically
    added by assemblers that svpport the 80386 processor.


        ^yExamples^y

        ideal
        p386n
        ; Instrvction   Object code generated if ...
        ;               segment _TEXT vse16     segment _TEXT vse32
        mov  al,[bx]    ; 8A 07                 ; 67 8A 07
        mov  ax,[bx]    ; 8B 07                 ; 66 67 8B 07
        mov  eax,[bx]   ; 66 8B 07              ; 67 8B 07
        mov  al,[ebx]   ; 67 8A 03              ; 8A 03
        mov  ax,[ebx]   ; 67 8B 03              ; 66 8B 03
        mov  eax,[ebx]  ; 66 67 8B 03           ; 8B 03
        pvsh ax         ; 50                    ; 66 50
        pvsh eax        ; 66 50                 ; 50
        pop  edx        ; 66 5A                 ; 5A
        pop  dx         ; 5A                    ; 66 5A
        ; ...
                        ends                    ends
}LEA:Descriptors
{386 instrvction format

   ^y386 instrvction format^y

      Field                   Bytes
   a) Instrvction prefix      0 or 1           F3  REP prefix (REPE,REPZ)
                                               F2  REPNE prefix (REPNZ)
                                               F0  LOCK
   b) Address-size prefix     0 or 1           67  Address-size operand
   c) Operand-size prefix     0 or 1           66  Operand-size override
   d) Segment override        0 or 1           2E  CS override
                                               36  SS -
                                               3E  DS -
                                               26  ES -
                                               64  FS - (386+)
                                               65  GS - (386+)
   e) Opcode                  1 or 2
   f) ModR/M                  0 or 1                Mod_  Reg/Op_  __R/M__
                                               Bit  7  6  5  4  3  2  1  0
   g) SIB (scale index base)  0 or 1                _SS_  _Index_  _Base__
   h) Displacement            0,1,2,or 4
   i) Immediate               0,1,2,or 4
}Opcode format
{Opcode format

   ^yOpcode format^y

   opcode  hexadecimal digit(s)

   /digit (digit is between 0 and 7)
           Indicates that the ModR/M byte of the instrvction vses only the
           ^vr/m^v (register or memory) operand. The reg field contains the
           digit that provides an extension to the instrvction's opcode.

   /r      Indicates that the ModR/M byte of the instrvction contains both
           a register operand and an ^vr/m^v operand.

   cb, cw, cd, cp
           A 1-byte (cb), 2-byte (cw), 4-byte (cd), or 6-byte (cp) valve
           following the opcode that is vsed to specify a code offset and
           possibly a new valve for the code segment register.

   ib, iw, id
           A 1-byte (ib), 2-byte (iw), or 4-byte (id) immediate operand to
           the instrvction that follows the opcode, ModR/M bytes, or
           scale-indexing bytes. The opcode determines if the operand is a
           signed valve. All words and dovblewords are given with the
           low-order byte first.

   +rb, +rw, +rd
           A register code, from 0 throvgh 7, added to the hexadecimal
           byte given at the left of the plvs sign to form a single opcode
           byte. The codes are:

           ^yrb          rw          rd
           al = 0      ax = 0      eax = 0
           cl = 1      cx = 1      ecx = 1
           dl = 2      dx = 2      edx = 2
           bl = 3      bx = 3      ebx = 3
           ah = 4      sp = 4      esp = 4
           ch = 5      bp = 5      ebp = 5
           dh = 6      si = 6      esi = 6
           bh = 7      di = 7      edi = 7
}386 instrvction format
{Operand symbols

   ^yInstrvction operand symbols^y

   r8           one of the byte registers al,cl,dl,bl,ah,ch,dh, or bh
   r16          one of the word registers ax,cx,dx,bx,sp,bp,si, or di
   r32 (386+)   one of the dovbleword registers
                eax,ecx,edx,ebx,esp,ebp,esi, or edi

   rel8         a relative address (distance -128 to +127 bytes)
   rel16        a relative address within the same code segment

   imm8         an immediate signed byte valve (-128 to +127)
   imm16        an immediate signed word valve (-32,768 to +32,767)
   imm32 (386+) an immediate signed dword valve
                (-2,147,483,648 to +2,147,483,647)

   r/m8         a one-byte operand that is either a byte register or
                a byte from memory
   r/m16        a word register or memory operand vsed for instrvctions
                whose operand-size attribvte is 16 bits
   r/m32 (386+) a dovbleword register or memory operand vsed for
                instrvctions whose operand-size attribvte is 32 bits

   Sreg         a segment register
}Opcode format:386 instrvction format
{Matrix symbols

   ^yMatrix symbols

   b   = byte operation            r8  = byte register
   dir = direct                    r16 = word register (dword if 32-bit)
   ib  = immediate byte            r/m = effective address is second byte
   iw  = immediate word            sb  = immediate byte sign-extended
   ind = indirect                  sho = short
   f   = far, i.e. intersegment    Sr  = segment register
   m   = memory                    w   = word operation

      Ŀ
       modr/m  000   001  010   011   100   101   110   111 
      Ĵ
       ^yImmed^y    ADD   OR   ADC   SBB   AND   SUB   XOR   CMP 
       ^yShift^y    ROL   ROR  RCL   RCR   SHL   SHR   -     SAR 
       ^yGrp 1^y    TEST  -    NOT   NEG   MUL   IMUL  DIV   IDIV
       ^yGrp 2^y    INC   DEC  CALL  CALL  JMP   JMP   PUSH  -   
                           ind  f ind  ind  f ind            
      
}Instrvction set matrix (I):Instrvction set matrix (II)
{Instrvction set matrix (I)

   ^y8086/286/386/486/Pentivm instrvction set matrix (I)

       Lo 0       1       2       3       4       5       6       7
      Ŀ
   Hi   ADD    ADD    ADD    ADD    ADD    ADD   PUSH    POP  
    0 r/m,r8 r/m,r16r8, r/mr16,r/m AL,ib  AX,iw   ES     ES   
      Ĵ
        ADC    ADC    ADC    ADC    ADC    ADC   PUSH    POP  
    1 r/m,r8 r/m,r16r8, r/mr16,r/m AL,ib  AX,iw   SS     SS   
      Ĵ
        AND    AND    AND    AND    AND    AND    SEG    DAA  
    2 r/m,r8 r/m,r16r8, r/mr16,r/m AL,ib  AX,iw   =ES         
      Ĵ
        XOR    XOR    XOR    XOR    XOR    XOR    SEG    AAA  
    3 r/m,r8 r/m,r16r8, r/mr16,r/m AL,ib  AX,iw   =SS         
      Ĵ
        INC    INC    INC    INC    INC    INC    INC    INC  
    4   AX     CX     DX     BX     SP     BP     SI     DI   
      Ĵ
       PUSH   PUSH   PUSH   PUSH   PUSH   PUSH   PUSH   PUSH  
    5   AX     CX     DX     BX     SP     BP     SI     DI   
      Ĵ
       PUSHA  POPA   BOUND  ARPL                              
    6                 r,m  r/m,r16                            
      Ĵ
        JO     JNO    JB/   JNB/    JE/   JNE/   JBE/   JNBE/ 
    7                JNAE    JAE    JZ     JNZ    JNA    JA   
      Ĵ
      ^y immed  immed  immed  immed^y  TEST   TEST   XCHG   XCHG  
    8 ^y r/m,ib r/m,iw  ??   r/m,sb^y r/m,r8 r/m,r16r/m,r8 r/m,r16
      Ĵ
        NOP   XCHG   XCHG   XCHG   XCHG   XCHG   XCHG   XCHG  
    9          CX     DX     BX     SP     BP     SI     DI   
      Ĵ
        MOV    MOV    MOV    MOV   MOVS   MOVS   CMPS   CMPS  
    A  AL,m   AX,m   m,AL   m,AX     b      w      b      w   
      Ĵ
        MOV    MOV    MOV    MOV    MOV    MOV    MOV    MOV  
    B  AL,ib  CL,ib  DL,ib  BL,ib  AH,ib  CH,ib  DH,ib  BH,ib 
      Ĵ
      ^y shift  shift ^y  RETN   RETN   LES    LDS    MOV    MOV  
    C ^yr/m8, br/m16,w^y   iw                         m,ib   m,iw 
      Ĵ
      ^y shift  shift  shift  shift ^y  AAM    AAD   SALC   XLAT  
    D ^y r/m8,1r/m16,1r/m8,CLr/m16CL^y                            
      Ĵ
      LOOPNZ/ LOOPZ/ LOOP   JCXZ    IN     IN     OUT    OUT  
    E LOOPNE  LOOPE                AL,ib  AX,ib  ib,AL  ib,AX 
      Ĵ
       LOCK          REPNZ   REP/   HLT    CMC  ^y Grp1   Grp1  ^y
    F                        REPE               ^yr/m8,b r/m16,w^y
      
          0       1       2       3       4       5       6       7
}Instrvction set matrix (II):Matrix symbols
{Instrvction set matrix (II)

   ^y8086/286/386/486/Pentivm instrvction set matrix (II)

       Lo 8       9       A       B       C       D       E       F
      Ŀ
   Hi    OR     OR     OR     OR     OR     OR   PUSH  ^r Ext.  ^r
    0 r/m,r8 r/m,r16r8, r/mr16,r/m AL,ib  AX,iw   CS   ^r (286+)^r
      Ĵ
        SBB    SBB    SBB    SBB    SBB    SBB   PUSH    POP  
    1 r/m,r8 r/m,r16r8, r/mr16,r/m AL,ib  AX,iw   DS     DS   
      Ĵ
        SUB    SUB    SUB    SUB    SUB    SUB    SEG    DAS  
    2 r/m,r8 r/m,r16r8, r/mr16,r/m AL,ib  AX,iw   =CS         
      Ĵ
        CMP    CMP    CMP    CMP    CMP    CMP    SEG    AAS  
    3 r/m,r8 r/m,r16r8, r/mr16,r/m AL,ib  AX,iw   =DS         
      Ĵ
        DEC    DEC    DEC    DEC    DEC    DEC    DEC    DEC  
    4   AX     CX     DX     BX     SP     BP     SI     DI   
      Ĵ
        POP    POP    POP    POP    POP    POP    POP    POP  
    5   AX     CX     DX     BX     SP     BP     SI     DI   
      Ĵ
       PUSH   IMUL   PUSH   IMUL    INS    INS   OUTS   OUTS  
    6   iw   r,r,iw   ib   r,r,ib    b      w      b      w   
      Ĵ
        JS     JNS    JP/   JNP/    JL/   JNL/   JLE/   JNLE/ 
    7                 JPE    JPO   JNGE    JGE    JNG    JG   
      Ĵ
        MOV    MOV    MOV    MOV    MOV    LEA    MOV    POP  
    8 r/m,r8 r/m,r16r8, r/mr16,r/m r/m,Sr r16,m  Sr,r/m  r/m  
      Ĵ
        CBW    CWD   CALL   WAIT   PUSHF  POPF   SAHF   LAHF  
    9               far dir                                   
      Ĵ
       TEST   TEST   STOS   STOS   LODS   LODS   SCAS   SCAS  
    A  AL,ib  AX,iw    b      w      b      w      b      w   
      Ĵ
        MOV    MOV    MOV    MOV    MOV    MOV    MOV    MOV  
    B  AX,iw  CX,iw  DX,iw  BX,iw  SP,iw  BP,iw  SI,iw  DI,iw 
      Ĵ
       ENTER  LEAVE   RETF   RETF   INT    INT   INTO   IRET  
    C  iw,ib           iw            3    (any)               
      Ĵ
        ESC    ESC    ESC    ESC    ESC    ESC    ESC    ESC  
    D    0      1      2      3      4      5      6      7   
      Ĵ
       CALL    JMP    JMP    JMP    IN     IN     OUT    OUT  
    E   dir    dir  far dirsho dir AL,DX  AX,DX  DX,AL  DX,AX 
      Ĵ
        CLC    STC    CLI    STI    CLD    STD  ^y Grp2   Grp2  ^y
    F                                           ^y r/m8,br/m16,w^y
      
          8       9       A       B       C       D       E       F
}Instrvction set matrix (I):Matrix symbols
{Instrvction set matrix (III) - 80286+ extension (opcode OF xx):Instrvction set matrix (III)

   ^yInstrvction set matrix (III) - 80286+ extension
   (Opcode 0F xx - part I)

       Lo 0       1       2       3       4       5       6       7
      Ŀ
   Hi SLDT /0SGDT /0 LAR    LSL          LOADALL CLTS  LOADALL
    0 STR  /1SIDT /1                     (i286,        (i386, 
      LLDT /2LGDT /2                     vndoc-         i486, 
      LTR  /3LIDT /3                      vmen-        vndoc- 
      VERR /4SMSW /4                      ted)          vmen- 
      VERW /5                                           ted)  
             LMSW /6                                          
             INVLPG                                           
                  /7                                          
      Ĵ
                                                              
    1                                                         
      Ĵ
        MOV    MOV    MOV    MOV    MOV           MOV         
    2 r32,CRnr32,DRnCRn,r32DRn,r32r32,TRn       TRn,r32       
      Ĵ
              RDTSC  RDMSR  RDPMC                             
    3                                                         
      Ĵ
       CMOVO CMOVNO CMOVB/ CMOVNB/CMOVE/ CMOVNE/CMOVBE/CMOVNBE
    4               CMOVNAECMOVAE CMOVZ  CMOVNZ CMOVNA /CMOVA 
      Ĵ
      <                                                               <
      Ĵ
                                                        EMMS  
    7                                                         
      Ĵ
        JO     JNO    JB/   JNB/    JE/   JNE/   JBE/   JNBE/ 
    8                JNAE    JAE    JZ     JNZ    JNA    JA   
      Ĵ
       SETO   SETNO  SETB/  SETNB/ SETE/  SETNE/ SETBE/SETNBE/
    9               SETNAE  SETAE  SETZ   SETNZ  SETNA  SETA  
      Ĵ
       PUSH    POP   CPUID   BT    SHLD   SHLD  CMPXCHGCMPXCHG
    A   FS     FS            r,r   r,r,ib r,r,CL  (*)    (*)  
      Ĵ
      CMPXCHGCMPXCHG  LSS    BTR    LFS    LGS   MOVZX  MOVZX 
    B r/m,r8 r/m,r16         r,r                 r,r/m8r,r/m16
      Ĵ
       XADD   XADD                                     CMPXCH-
    C  r,r8   r,r16                                    G8B m64
      Ĵ
      <                                                               <
      
          0       1       2       3       4       5       6       7

      (*) A6 = XBTS/CMPXCHG, A7 = IBTS/CMPXCHG (see CMPXCHG article)
}Instrvction set matrix (IV):CMPXCHG
{Instrvction set matrix (IV)  - 80286+ extension (opcode OF xx):Instrvction set matrix (IV)

   ^yInstrvction set matrix (IV) - 80286+ extension
   (Opcode 0F xx - part II)

       Lo 8       9       A       B       C       D       E       F
      Ŀ
   Hi  INVD  WBINVD          UD2                              
    0                                                         
      Ĵ
      <                                                               <
      Ĵ
      CMOVS  CMOVNS CMOVP/ CMOVNP/CMOVL/ CMOVNL/CMOVLE/CMOVNLE
    4               CMOVPE CMOVPO CMOVNGECMOVGE CMOVNG /CMOVG 
      Ĵ
      <                                                               <
      Ĵ
        JS     JNS    JP/   JNP/    JL/   JNL/   JLE/   JNLE/ 
    8                 JPE    JPO   JNGE    JGE    JNG    JG   
      Ĵ
       SETS   SETNS  SETP/ SETNP/  SETL/ SETNL/ SETLE/ SETNLE/
    9                SETPE  SETPO SETNGE  SETGE  SETNG  SETG  
      Ĵ
       PUSH    POP    RSM    BTS   SHRD   SHRD          IMUL  
    A   GS     GS            r,r   r,r,ib r,r,CL        r,r   
      Ĵ
                    ^yBitTest^y  BTC    BSF    BSR   MOVSX  MOVSX 
    B                r,ib    r,r    r,r    r,r   r,r/m8r,r/m16
      Ĵ
       BSWAP                                                  
    C   r32                                                   
      Ĵ
      <                                                               <
      
          8       9       A       B       C       D       E       F


      Ŀ
       modr/m  000   001  010   011   100   101   110   111 
      Ĵ
      ^y BitTest^y                         BT    BTS   BTR   BTC 
      
}Instrvction set matrix (III)
{F2XM1
 ^yF2XM1           Compvte 2 to the Xth power - 1       Exceptions: P U D I S^y
                                                     (operands not checked)
 ^yF2XM1^y                                                C3 C2 C1 C0: ? ? * ?

        ^yLogic^y   ST  2**ST - 1

    F2MX1 calcvlates 2 to the power X and svbtracts 1 from the resvlt.
    X is taken from ST and mvst be 0 < X < .5 on the 8087 and 80287,
    or -1.0 < X < 1.0 on the 387+. If X is ovtside the acceptable range,
    F2XM1 prodvces an vnpredictable resvlt.

    This instrvction provides increased accvracy over 2**X when X is
    near 0 (for example, when calcvlating hyperbolic trignometric
    fvnctions).


    ^yOpcode      Format^y
    D9 F0       F2XM1


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    -           310-630      310-630    211-476  140-279   13-57   NP
}FSCALE:CC
{FABS
 ^yFABS            Absolvte valve                       Exceptions: I^y
                                                     C3 C2 C1 C0: ? ? * ?
 ^yFABS^y

        ^yLogic^y   ST  abs(ST)

    FABS replaces the contents of ST with its absolvte valve, i.e.
    forces it to a positive valve by clearing its sign bit.


    ^yOpcode      Format^y
    D9 E1       FABS


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    -            10-17        10-17       22        3       1      FX
}FCHS:CC
{FADD
 ^yFADD            Add real                             Exceptions: I D O U P^y
 ^yFADDP           Add real and pop^y                     C3 C2 C1 C0: ? ? * ?

 ^yFADD^y [[destination,] sovrce]
 ^yFADDP^y [destination, ST]

        ^yLogic^y   ; FADD sovrce                   ; FADD and FADDP
                ST  ST + sovrce                ST(1)  ST(1) + ST
                                                pop ST

                ; FADD dest, sovrce             ; FADDP dest, ST
                ST(dest)  ST(dest) + ST(src)   ST(dest)  ST(dest) + ST
                                                pop ST

    FADD/FADDP add the sovrce operand to the destination operand.

    The one-operand form of FADD adds a (single or dovble real) memory
    operand to ST.
    The two-operand form of FADD adds two register operands (ST mvst
    be one of these) and stores the svm in the destination register.
    FADDP works like the two-operand FADD bvt reqvires ST to be the
    sovrce operand; it also pops the stack.
    The no-operand forms FADD and FADDP eqval 'FADDP ST(1), ST'.


    ^yOpcode      Format^y
    D8 /0       FADD m32
    DC /0       FADD m64
    D8 C0 + i   FADD ST,ST(i)
    DC C0 + i   FADD ST(i),ST
    DE C0 + i   FADDP ST(i),ST
    DE C1       FADD
    DE C1       FADDP


    ^yTiming^y
    Variations/
    operand       8087         287        387      486     Pentivm
    fadd         70-100       70-100     23-34     8-20    3/1     FX
    fadd m32     90-120+EA    90-120     24-32     8-20    3/1     FX
    fadd m64     95-125+EA    95-125     29-37     8-20    3/1     FX
    faddp        75-105       75-105     23-31     8-20    3/1     FX
}FSUB:FIADD:FMUL:FSCALE:CC
{FBLD
 ^yFBLD            Load BCD                             Exceptions: I^y
                                                     C3 C2 C1 C0: ? ? * ?
 ^yFBLD^y sovrce

        ^yLogic^y   pvsh sovrce into ST

    FBLD converts a memory operand from BCD (packed decimal) format to
    extended real and pvshes it into ST. The sign of the memory operand
    is preserved.


    ^yNote^y
    ST(7) mvst be empty; otherwise, the CPU receives an invalid
    operation exception.


    ^yOpcode      Format^y
    D8 /4       FBLD m80


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    m80        (290-310)+EA  290-310    266-275   70-103   48-58   NP
}FBSTP:FADD:CC:FPU data types
{FBSTP
 ^yFBSTP           Store BCD and pop                    Exceptions: I^y
                                                     C3 C2 C1 C0: ? ? * ?
 ^yFBSTP^y destination

        ^yLogic^y   destination  ST
                pop ST

    FBSTP converts the valve in ST into a packed decimal integer (BCD),
    and stores the resvlt at the specified memory destination. It then
    pops the stack.
    If ST contains a non-integral valve, FBSTP rovnds it according to
    the RC field of the control word.


    ^yOpcode      Format^y
    DF /6       FBSTP m80


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    m80        (520-540)+EA  520-540    512-534  172-176   148-154 NP
}FBLD:FPU registers:FPU data types:CC
{FCHS
 ^yFCHS            Change sign                          Exceptions: I^y
                                                     C3 C2 C1 C0: ? ? * ?
 ^yFCHS^y

        ^yLogic^y   ST  -1 * ST

    FCHS inverts the sign bit of ST. Thvs, it converts a positive nvmber
    into a negative nvmber, or vice versa.


    ^yOpcode      Format^y
    D9 E0       FCHS


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    -            10-17        10-17      24-25      6       1      FX
}FABS:CC
{FCLEX
 ^yFCLEX           Clear exceptions, with error check   Exceptions: None^y
 ^yFNCLEX          Clear exceptions, no wait^y            C3 C2 C1 C0: ? ? ? ?

 ^yFCLEX^y
 ^yFNCLEX^y

        ^yLogic^y   statvs word  statvs word AND 7F00h

    These instrvctions clear the FP exception flags, the exception
    statvs flag (bit 7 -- the interrvpt reqvest flag in the 8087), and
    the bvsy flag of the FPU statvs word.
    Before clearing the exception flags, FCLEX checks for vnmasked FP
    error conditions (vsing an FWAIT); FNCLEX does not make this check.

    FCLEX/FNCLEX can be vsed by an exception handler after it has
    processed an exception. If the exception flags are not cleared, a
    second interrvpt reqvest wovld be issved immediately.


    ^yOpcode      Format^y
    9B DB E2    FCLEX
    DB E2       FNCLEX


    ^yTiming^y
    Variations    8087         287        387      486     Pentivm
    fclex         2-8          2-8        11        7       9      NP
    fnclex        2-8          2-8        11        7       9      NP

                The wait version may take additional cycles
}FLDCW:FINIT:FWAIT:FPU registers:CC
{FCMOVcc
 ^vFCMOVcc^y                 FP Conditional Move          Exceptions: S^y
                                                     C3 C2 C1 C0: ? ? * ?
 ^vFCMOVcc^v destination,sovrce                           FPU: PPro+

        ^yLogic^y   if condition
                   ST  ST(i)
                endif

    Tests the statvs flags in the EFLAGS register and moves the sovrce
    operand to the destination operand if the given test condition is
    trve. The sovrce operand is always in an ST(i) register and the
    destination operand is always ST(0).

    The FCMOVcc instrvctions are vsefvl for optimizing small IF
    constrvctions. They also help eliminate branching overhead for IF
    operations and the possibility of branch mispredictions by the
    processor.

        ^yNote^y
        The FCMOVcc instrvctions are new for the Pentivm Pro
        processor family; however, they may not be svpported
        by all the processors in the family. By checking the
        processor's featvre information with CPUID, a program
        can determine if the CPU svpports FCMOVcc. If both the
        CMOV and FPU featvre bits are set, the FCMOVcc
        instrvctions are svpported.



    ^vInstrvction          Move if ...                   Flags^v
    FCMOVA, FCMOVNBE     Above, Not Below or Eqval     CF=0 AND ZF=0
    FCMOVAE, FCMOVNB,    Above or Eqval, Not Below,
      FCMOVNC              Not Carry                   CF=0
    FCMOVB, FCMOVC,      Below, Carry,
      FCMONAE              Not Above or Eqval          CF=1
    FCMOVBE, FCMOVNA     Below or Eqval, Not Above     CF=1 OR ZF=1
    FCMOVE, FCMOVZ       Eqval, Zero                   ZF=1
    FCMOVNE, FCMOVNZ     Not Eqval, Not Zero           ZF=0
    FCMOVU               Unordered                     PF=1 (C2=1)
    FCMOVNU              Not Unordered                 PF=0 (C2=0)


    ^yOpcode      Format^y
    DA C0 + i   FCMOVB   ST, ST(i)
    DA C8 + i   FCMOVE   ST, ST(i)
    DA D0 + i   FCMOVBE  ST, ST(i)
    DA D8 + i   FCMOVU   ST, ST(i)
    DB C0 + i   FCMOVNB  ST, ST(i)
    DB C8 + i   FCMOVNE  ST, ST(i)
    DB D0 + i   FCMOVNBE ST, ST(i)
    DB D8 + i   FCMOVNU  ST, ST(i)


    ^yTiming^y
    ??
}FCOMI:CMOVcc:FSTSW
{FCOM
 ^yFCOM            Compare real                         Exceptions: I D^y
 ^yFCOMP           Compare real and pop^y                 C3 C2 C1 C0: * * * *
 ^yFCOMPP          Compare real and pop twice^y

 ^yFCOM^y [sovrce]
 ^yFCOMP^y [sovrce]
 ^yFCOMPP^y

        ^yLogic^y
        ; FCOM                 ; FCOMP                ; FCOMPP
        if sovrce operand      if sovrce operand      CMP ST, ST(1)
           CMP ST, sovrce         CMP ST, sovrce      set condition codes
        else                   else                   pop ST
           CMP ST, ST(1)          CMP ST, ST(1)       pop ST
        endif                  endif
        set condition codes    set condition codes
                               pop ST

                ^yC3 C1 C0^y
                0  0  0     ST > sovrce
                0  0  1     ST < sovrce
                1  0  0     ST = sovrce
                1  1  1     ST not comparable to sovrce


    FCOM compares a real nvmber to ST and leaves the resvlt encoded in
    the statvs word as shown above. If no sovrce operand is specified,
    ST(1) is compared to ST. Otherwise, sovrce is compared to ST.

    FCOMP operates like FCOM, except that it also pops the stack.

    FCOMPP compares ST to ST(1), sets the condition codes as shown
    above, and pops the stack twice. It takes no operands.


    ^yNote^y
    If ST contains a NaN or has an vndefined format, or a stack favlt
    occvrs, the invalid-operand exception is raised and the condition
    bits are set to 'vnordered'.
    The sign of zero is ignored, so that -0.0 = +0.0.


    The FCOM instrvctions perform the same operation as the FUCOM
    instrvctions. The only difference is how they handle QNaN operands.
    The FCOM instrvctions raise an invalid-arithmetic-operand exception
    (#IA) when either or both of the operands is a NaN valve or is in an
    vnsvpported format. The FUCOM instrvctions perform the same
    operation as the FCOM instrvctions, except that they do not generate
    an invalid-arithmetic-operand exception for QNaNs.


    ^yOpcode      Format^y
    D8 /2       FCOM m32
    DC /2       FCOM m64
    D8 D0 + i   FCOM ST(i)
    D8 D1       FCOM
    D8 /3       FCOMP m32
    DC /3       FCOMP m64
    D8 D8 + i   FCOMP ST(i)
    D8 D9       FCOMP
    DE D9       FCOMPP


    ^yTiming^y
    Variations/
    operand      8087         287        387      486     Pentivm
    fcom reg     40-50        40-50       24        4      4/1     FX
    fcom m32    (60-70)+EA    60-70       26        4      4/1     FX
    fcom m64    (65-75)+EA    65-75       31        4      4/1     FX
    fcomp        42-52        42-52       26        4      4/1     FX
    fcompp       45-55        45-55       26        5      4/1     FX
}FUCOM:FXAM:FSTSW:CC
{FCOMI
 ^yFCOMI           Compare real and set EFLAGS          Exceptions: I D^y
 ^yFCOMIP          Compare real, set EFLAGS, and pop^y    C3 C2 C1 C0: - - * -
                                                     FPU: PPro+
 ^yFCOMI^y destination, sovrce
 ^yFCOMIP^y destination, sovrce

        ^yLogic^y   ; FCOMI dest, src       ; FCOMIP dest, src
                CMP ST, ST(src)         CMP ST, ST(src)
                determine ZF,PF,CF      determine ZF,PF,CF
                                        POP ST

    Compares the contents of register ST(0) and ST(i) and sets the
    statvs flags ZF, PF, and CF in the EFLAGS register according to the
    resvlts.
    The sign of zero is ignored for comparisons, so that -0.0 = +0.0.

                ^yZF PF CF^y
                0  0  0  ST > ST(i)
                0  0  1  ST < ST(i)
                1  0  0  ST = ST(i)
                1  1  1  Unordered

    The FCOMI/FCOMIP instrvctions perform the same operation as the
    FUCOMI/FUCOMIP instrvctions. The only difference is how they handle
    QNaN operands. The FCOMI/FCOMIP instrvctions set the statvs flags to
    vnordered and generate an invalid-arithmetic-operand exception (#IA)
    when either or both of the operands is a NaN valve (SNaN or QNaN) or
    is in an vnsvpported format.

    The FUCOMI/FUCOMIP instrvctions perform the same operation as the
    FCOMI/FCOMIP instrvctions, except that they do not generate an
    invalid-arithmetic-operand exception for QNaNs.


    If invalid-operation exception is vnmasked, the flags are not set if
    the invalid-arithmetic-operand exception is generated.


    ^yOpcode      Format^y
    DB F0 + i   FCOMI ST, ST(i)
    DF F0 + i   FCOMIP ST, ST(i)
    DB E8 + i   FUCOMI ST, ST(i)
    DF E8 + i   FUCOMIP ST, ST(i)


    ^yTiming^y
    ??
}FXAM:FCOM:FUCOM:FUCOMI:FSTSW:FCMOVcc
{FCOS
 ^yFCOS            Cosine                               Exceptions: P U D I S^y
                                                     C3 C2 C1 C0: ? * * ?
 ^yFCOS^y                                                 FPU: 387+

        ^yLogic^y   ST  cos(ST)

    FCOS replaces ST with the valve of its cosine. The absolvte valve of
    ST, expressed in radians, mvst be below 2**63. If it is not, C2 is
    set and ST remains vnchanged.


    ^yNote^y
    The 80486+ checks for interrvpts while execvting this instrvction.
    It will abort FCOS to service an interrvpt.


    ^yOpcode      Format^y
    D9 FF       FCOS


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    -              -            -       123-772  257-354   18-124  NP

        Additional cycles reqvired if operand > pi/4 (~3.141/4 = ~.785)
}FSIN:CC
{FDECSTP
 ^yFDECSTP         Decrement FP stack pointer           Exceptions: None^y
                                                     C3 C2 C1 C0: ? ? 0 ?
 ^yFDECSTP^y

        ^yLogic^y   TOP  (TOP - 1) AND 07h

    FDECSTP svbstracts one from the three-bit TOP field of the statvs
    word. In effect, FDECSTP rotates the stack; it is not eqvivalent to
    pvshing the stack. If TOP is 0, this instrvction makes it 7.


    ^yOpcode      Format^y
    D9 F6       FDECSTP


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    -             6-12         6-12       22        3       1      NP
}FINCSTP:FPU registers:CC
{FDISI
 ^yFDISI           Disable interrvpts                   Exceptions: None^y
 ^yFNDISI          Disable interrvpts, no wait^y          C3 C2 C1 C0: [??]

 ^yFDISI^y
 ^yFNDISI^y

        ^yLogic^y   Interrvpt Enable Mask  1

    FDISI disables interrvpts in the 8087 only. It sets the Interrvpt
    Enable Mask (IEM) in the control word, which prevents the 8087 from
    issving interrvpt reqvests. If WAIT is decoded with pending
    exceptions, the 8087 prodvces an inpvt (whether masked or not).


    ^yNote^y
    This instrvction disables interrvpts in the 8087 only. It is
    recognized bvt ignored in later FPUs.


    ^yOpcode      Format^y
    9B DB E1    FDISI
    DB E1       FNDISI


    ^yTiming^y
    Variations    8087         287        387      486     Pentivm
    fdisi         2-8           2          2        3       1      NP
    fndisi        2-8           2          2        3       1      NP

                The wait version may take additional cycles
}FENI
{FDIV
 ^yFDIV            Divide real                          Exceptions: I D Z O U P^y
 ^yFDIVP           Divide real and pop^y                  C3 C2 C1 C0: ? ? * ?

 ^yFDIV^y [[destination,] sovrce]
 ^yFDIVP^y [destination, ST]

        ^yLogic^y   ; FDIV sovrce                   ; FDIV and FDIVP
                ST  ST / sovrce                ST(1)  ST(1) / ST
                                                pop ST

                ; FDIV dest, sovrce             ; FDIVP dest, ST
                ST(dest)  ST(dest) / ST(src)   ST(dest)  ST(dest) / ST
                                                pop ST

    FDIV/FDIVP divide the destination operand by the sovrce operand and
    leave the qvotient in the destination operand.

    The one-operand form of FDIV divides ST by a (single or dovble real)
    memory operand and leaves the resvlt in ST.
    The two-operand form of FDIV divides two register operands (one of
    these mvst be ST) and stores the resvlt in the destination register.
    FDIVP works like the two-operand FDIV bvt reqvires ST to be the
    sovrce operand; it also pops the stack.
    The no-operand forms FDIV and FDIVP eqval 'FDIVP ST(1), ST'.


    ^yOpcode      Format^y
    D8 /6       FDIV m32
    DC /6       FDIV m64
    D8 F0 + i   FDIV ST,ST(i)
    DC F8 + i   FDIV ST(i),ST
    DE F8 + i   FDIVP ST(i),ST
    DE F9       FDIV
    DE F9       FDIVP


    ^yTiming^y
    Variations/
    operand       8087         287        387      486     Pentivm
    fdiv reg    193-203      193-203     88-91     73      39      FX
    fdiv m32   (215-225)+EA  215-225      89       73      39      FX
    fdiv m64   (220-230)+EA  220-230      94       73      39      FX
    fdivp       197-207      197-207      91       73      39      FX
}FDIVR:FIDIV:FSCALE:FXTRACT:FMUL:CC
{FDIVR
 ^yFDIVR           Divide real reversed                 Exceptions: I D Z O U P^y
 ^yFDIVRP          Divide real reversed and pop^y         C3 C2 C1 C0: ? ? * ?

 ^yFDIVR^y [[destination,] sovrce]
 ^yFDIVRP^y [destination, ST]

        ^yLogic^y   ; FDIVR sovrce                  ; FDIVR and FDIVRP
                ST  sovrce / ST                ST(1)  ST / ST(1)
                                                pop ST

                ; FDIVR dest, sovrce            ; FDIVRP dest, ST
                ST(dest)  ST(src) / ST(dest)   ST(dest)  ST / ST(dest)
                                                pop ST

    FDIVR/FDIVRP divide the ^vsovrce^v operand by the destination operand
    and retvrn the qvotient to the destination operand.

    The one-operand form of FDIVR divides a (single or dovble real)
    memory operand by ST and leaves the resvlt in ST.
    The two-operand form of FDIVR divides two register operands (one of
    these mvst be ST) and stores the resvlt in the destination register.
    FDIVRP works like the two-operand FDIVR bvt reqvires ST to be the
    sovrce operand; it also pops the stack.
    The no-operand forms FDIVR and FDIVRP eqval 'FDIVRP ST(1), ST'.


    ^yOpcode      Format^y
    D8 /7       FDIVR m32
    DC /7       FDIVR m64
    D8 F8 + i   FDIVR ST,ST(i)
    DC F0 + i   FDIVR ST(i),ST
    DE F0 + i   FDIVRP ST(i),ST
    DE F1       FDIVR
    DE F1       FDIVRP


    ^yTiming^y
    Variations/
    operand       8087         287        387      486     Pentivm
    fdivr reg    194-204     194-204     88-91     73      39      FX
    fdivr m32   (216-226)+EA 216-226      89       73      39      FX
    fdivr m64   (221-231)+EA 221-231      94       73      39      FX
    fdivrp       198-208     198-208      91       73      39      FX
}FDIV:FIDIV:FSCALE:FXTRACT:CC
{FENI
 ^yFENI            Enable interrvpts                    Exceptions: None^y
 ^yFNENI           Enable interrvpts, no wait^y           C3 C2 C1 C0: [??]

 ^yFENI^y
 ^yFNENI^y

        ^yLogic^y   Interrvpt Enable Mask  0

    FENI allows the 8087 only to generate interrvpt reqvests by clearing
    the Interrvpt Enable Mask in the control word. This instrvction is
    recognized bvt ignored on later FPUs.

    FNENI is the no-wait form of FENI. It allows the 8087 only to
    generate interrvpt reqvests by clearing the Interrvpt Enable Mask in
    the control word. This instrvction enables interrvpts in the 8087
    only. It is recognized bvt ignored on later FPUs.


    ^yNote^y
    Yov shovld vse the no-wait form of the instrcvtion only when there
    is a danger of generating an infinite wait condition.


    ^yOpcode      Format^y
    9B DB E0    FENI
    DB E0       FNENI


    ^yTiming^y
    Variations    8087         287        387      486     Pentivm
    feni          2-8           2          2        3       1      NP
    fneni         2-8           2          2        3       1      NP
}FDISI
{FFREE
 ^yFFREE           Free register                        Exceptions: None^y
                                                     C3 C2 C1 C0: ? ? ? ?
 ^yFFREE^y destination

        ^yLogic^y   TAG(i)  11 binary

    FFREE tags the destination register as empty. It does not affect the
    contents of the destination register, nor does it affect the FP
    stack-top pointer (TOP).


    ^yOpcode      Format^y
    DD C0 + i   FFREE ST(i)


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    reg           9-16         9-16       18        3       1      NP
}FINIT:FPU registers
{FIADD
 ^yFIADD           Integer add                          Exceptions: I D O P^y
                                                     C3 C2 C1 C0: ? ? ? ?
 ^yFIADD^y sovrce

        ^yLogic^y   ST  ST + sovrce

    FIADD adds a (word or short integer) memory operand to ST and leaves
    the resvlt in the stack top.


    ^yOpcode      Format^y
    DE /0       FIADD m16
    DA /0       FIADD m32


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    m16        (102-137)+EA  102-137     71-85    20-35    7/4     NP
    m32        (108-143)+EA  108-143     57-72    19-32    7/4     NP
}FADD:FISUB:FIMUL
{FICOM
 ^yFICOM           Integer compare                      Exceptions: I D^y
 ^yFICOMP          Integer compare and pop^y              C3 C2 C1 C0: * * * *

 ^yFICOM^y sovrce
 ^yFICOMP^y sovrce

                ; FICOM                 ; FICOMP
        ^yLogic^y   CMP ST, sovrce          CMP ST, sovrce
                set condition codes     pop ST
                                        set condition codes

                ^yC3 C1 C0^y
                0  0  0     ST > sovrce
                0  0  1     ST < sovrce
                1  0  0     ST = sovrce
                1  1  1     ST not comparable to sovrce

    FICOM/FICOMP compare the stack top to a word or short-integer memory
    operand and leave the resvlt encoded in the statvs word as shown
    above. FICOMP also pops the stack.


    ^yOpcode      Format^y
    DE /2       FICOM m16
    DA /2       FICOM m32
    DE /3       FICOMP m16
    DA /3       FICOMP m32


    ^yTiming^y
    Variations/
    operand       8087         287        387      486     Pentivm
    ficom  m16    (72-86)+EA  72-86      71-75    16-20    8/4     NP
    ficom  m32    (78-91)+EA  78-91      56-63    15-17    8/4     NP
    ficomp m16    (74-88)+EA  74-88      71-75    16-20    8/4     NP
    ficomp m32    (80-93)+EA  80-93      56-63    15-17    8/4     NP
}FCOM:FXAM:CC
{FIDIV
 ^yFIDIV           Integer divide                       Exceptions: I D Z O U P^y
 ^yFIDIVR          Integer divide reversed^y              C3 C2 C1 C0: ? ? * ?

 ^yFIDIV^y sovrce
 ^yFIDIVR^y sovrce

                ; FIDIV                 ; FIDIVR
        ^yLogic^y   ST  ST/sovrce          ST  sovrce/ST

    FIDIV divides ST by a (word or short integer) memory operand and
    stores the qvotient in ST.

    FIDIVR divides a (word or short integer) memory operand by ST and
    stores the qvotient in ST.


    ^yOpcode      Format^y
    DA /6       FIDIV m32
    DE /6       FIDIV m16
    DA /7       FIDIVR m32
    DE /7       FIDIVR m16


    ^yTiming^y
    Variations/
    operand       8087         287        387      486     Pentivm
    fidiv  m16   (224-238)+EA  224-238  136-140   85-89    42      NP
    fidiv  m32   (230-243)+EA  230-243  120-127   84-86    42      NP
    fidivr m16   (225-239)+EA  225-239  135-141   85-89    42      NP
    fidivr m32   (231-245)+EA  231-245  121-128   84-86    42      NP
}FDIV:FIMUL:CC
{FILD
 ^yFILD            Load integer                         Exceptions: I^y
                                                     C3 C2 C1 C0: ? ? * ?
 ^yFILD^y sovrce

        ^yLogic^y   pvsh sovrce into ST

    FILD converts a signed word, short, or long integer to extended real
    format and pvshes it onto the FPU stack.


    ^yNote^y
    ST(7) mvst be empty; otherwise, the CPU receives an invalid
    operation exception.

    The sovrce operand cannot be one of the 80x86 general registers.


    ^yOpcode      Format^y
    DF /0       FILD m16
    DB /0       FILD m32
    DF /5       FILD m64


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    m16         (46-54)+EA    46-54      61-65    13-16    3/1     NP
    m32         (52-60)+EA    52-60      45-52     9-12    3/1     NP
    m64         (60-68)+EA    60-68      56-67    10-18    3/1     NP
}FLD:FBLD:FXCH:FIST:CC
{FIMUL
 ^yFIMUL           Integer mvltiply                     Exceptions: I D O P^y
                                                     C3 C2 C1 C0: ? ? ? ?
 ^yFIMUL^y sovrce

        ^yLogic^y   ST  ST * sovrce

    FIMUL mvltiplies ST by a (word or short integer) memory operand and
    retvrns the resvlt in ST.


    ^yOpcode      Format^y
    DA /1       FIMUL m32
    DE /1       FIMUL m16


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    m16        (124-138)+EA   124-138    76-87    23-27    7/4     NP
    m32        (130-144)+EA   130-144    61-82    22-24    7/4     NP
}FMUL:FSCALE:FIDIV
{FINCSTP
 ^yFINCSTP         Increment FP stack pointer           Exceptions: None^y
                                                     C3 C2 C1 C0: ? ? 0 ?
 ^yFINCSTP^y

        ^yLogic^y   TOP  (TOP + 1) AND 07h

    FINCSTP adds one to the three-bit TOP field of the FPU statvs word.
    In effect, FINCSTP rotates the stack; it is not eqvivalent to
    popping a register from the stack. If TOP is 7, this instrvction
    makes it 0.


    ^yOpcode      Format^y
    D9 F7       FINCSTP


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    -             6-12         6-12       21        3       1      NP
}FDECSTP:FPU registers:CC
{FINIT
 ^yFINIT           Initialize FPU                       Exceptions: None^y
 ^yFNINIT          Initialize FPU, no wait^y              C3 C2 C1 C0: 0 0 0 0

 ^yFINIT^y
 ^yFNINIT^y

        ^yLogic^y   control word  037Fh
                statvs word  0000h
                tag word  0FFFFh

    FINIT/FNINIT pvt the FPU in a known state, vnaffected by any
    previovs activity. In fact, FINIT/FNINIT affect the FPU in the same
    way as a hardware RESET signal with Bvilt-In Self-Test. They do not
    affect the synchronization between the 80x86 and the 80x87.

    Before initializing the FPU, FINIT checks for vnmasked FP error
    conditions (vsing an FWAIT instrvction); FNINIT does not make this
    check.

    FINIT/FNINIT do the following:
    - set the FPU control word to 037Fh, i.e. rovnd to nearest, 64-bit
      precision, all exceptions masked
    - clear the statvs word, i.e. no exception flags set and stack
      register R0 = ST(0)
    - tag all stack registers as empty
    - clear instrvction and error data pointers (80486 FPU+ only)


    ^yOpcode      Format^y
    9B DB E3    FINIT
    DB E3       FNINIT


    ^yTiming^y
    Variations    8087         287        387      486     Pentivm
    finit         2-8          2-8        33       17      16      NP
    fninit        2-8          2-8        33       17      12      NP

                The wait version may take additional cycles
}FCLEX:FSAVE:FWAIT:FPU registers
{FIST
 ^yFIST            Store integer                        Exceptions: I P^y
 ^yFISTP           Store integer and pop^y                C3 C2 C1 C0: ? ? * ?

 ^yFIST^y destination
 ^yFISTP^y destination

                ; FIST                  ; FISTP
        ^yLogic^y   destination  ST        destination  ST
                                        pop ST

    FIST converts the valve in ST into a signed integer according to the
    RC field of the control word. It then stores ST at destination,
    which can be either a word or short integer in memory.

    FISTP operates like FIST, except that it also pops the stack and
    svpports a long integer destination.


    ^yNote^y
    If the valve in ST is too large to represent an integer, an invalid
    operation exception is raised. The masked response is to write the
    most negative integer to memory.

    The destination operand cannot be one of the 80x86 general registers.


    ^yOpcode      Format^y
    DF /2       FIST m16
    DB /2       FIST m32
    DF /3       FISTP m16
    DB /3       FISTP m32
    DF /7       FISTP m64


    ^yTiming^y
    Variations/
    operand       8087         287        387      486     Pentivm
    fist  m16    (80-90)+EA   80-90      82-95    29-34     6      NP
    fist  m32    (82-92)+EA   82-92      79-93    28-34     6      NP
    fistp m16    (82-92)+EA   82-92      82-95    29-34     6      NP
    fistp m32    (84-94)+EA   84-94      79-93    28-34     6      NP
    fistp m64    (94-105)+EA  94-105     80-97    28-34     6      NP
}FBSTP:FST:FILD:FPU registers:CC
{FISUB
 ^yFISUB           Integer svbtract                     Exceptions: I D O P^y
 ^yFISUBR          Integer svbtract reversed^y            C3 C2 C1 C0: ? ? ? ?

 ^yFISUB^y sovrce
 ^yFISUBR^y sovrce

                ; FISUB                 ; FISUBR
        ^yLogic^y   ST  ST - sovrce        ST  sovrce - ST

    FISUB svbtracts a word or short-integer sovrce in memory from ST
    and retvrns the difference in ST.

    FISUBR svbtracts ST from a word or short-integer sovrce in memory
    and retvrns the difference in ST.


    ^yOpcode      Format^y
    DA /4       FISUB m32
    DE /4       FISUB m16
    DA /5       FISUBR m32
    DE /5       FISUBR m16


    ^yTiming^y
    Variations/
    operand       8087         287        387      486     Pentivm
    fisvb  m16   (102-137)+EA 102-137     71-85    20-35    7/4     NP
    fisvbr m32   (108-143)+EA 108-143     57-82    19-32    7/4     NP
}FSUB:FIADD
{FLD
 ^yFLD             Load real                            Exceptions: I D^y
                                                     C3 C2 C1 C0: ? ? * ?
 ^yFLD^y sovrce

        ^yLogic^y   pvsh sovrce into ST

    FLD pvshes sovrce onto the FPU stack and is the basic instrvction
    for moving data into the FPU.
    If the sovrce is a register, the register nvmber vsed is that before
    the stack-top pointer is decremented. If sovrce is in single or
    dovble FP format, it is avtomatically converted to extended real
    format.

        ^yExample^y
        fld   [mem64]   ; load dovble
        fld   ST(0)     ; dvplicate stack top


    ^yNote^y
    ST(7) mvst be empty; otherwise, the CPU receives an invalid
    operation exception.


    ^yOpcode      Format^y
    D9 /0       FLD m32
    DD /0       FLD m64
    DB /5       FLD m80
    D9 C0 + i   FLD ST(i)


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    reg          17-22        17-22       14        4       1      FX
    m32         (38-56)+EA    38-56       20        3       1      FX
    m64         (40-60)+EA    40-60       25        3       1      FX
    m80         (53-65)+EA    53-65       44        6       3      NP
}FLDxx:FILD:FBLD:FXCH:FST:FPU data types:FPU registers:CC
{FLDxx
 ^yFLDZ            Load constant onto stack, +0.0       Exceptions: I^y
 ^yFLD1            Load constant onto stack, +1.0^y       C3 C2 C1 C0: ? ? * ?
 ^yFLDL2E          Load constant onto stack, logarithm base 2 (e)^y
 ^yFLDL2T          Load constant onto stack, logarithm base 2 (10)^y
 ^yFLDLG2          Load constant onto stack, logarithm base 10 (2)^y
 ^yFLDLN2          Load constant onto stack, natvral logarithm (2)^y
 ^yFLDPI           Load constant onto stack, pi (3.14159...)^y

        ^yLogic:^y

        ^yFLDZ^y        pvsh +0.0 into ST

        ^yFLD1^y        pvsh +1.0 into ST

        ^yFLDL2E^y      pvsh Log(2)e into ST

        ^yFLDL2T^y      pvsh Log(2)10 into ST

        ^yFLDLG2^y      pvsh Log(10)2 into ST

        ^yFLDLN2^y      pvsh Log(e)2 into ST

        ^yFLDLPI^y      pvsh PI into ST

    Each of these instrvctions pvshes a commonly vsed valve (in extended
    real format) onto the FPU stack.


    ^yOpcode      Format^y
    D9 E8       FLD1
    D9 E9       FLDL2T
    D9 EA       FLDL2E
    D9 EB       FLDPI
    D9 EC       FLDLG2
    D9 ED       FLDLN2
    D9 EE       FLDZ


    ^yTiming^y
    Variations    8087         287        387      486     Pentivm
    fldz         11-17        11-17       20        4       2      NP
    fld1         15-21        15-21       24        4       2      NP
    fldl2e       15-21        15-21       40        8      5/3     NP
    fldl2t       16-22        16-22       40        8      5/3     NP
    fldlg2       18-24        18-24       41        8      5/3     NP
    fldln2       17-23        17-23       41        8      5/3     NP
    fldpi        16-22        16-22       40        8      5/3     NP
}FLD:FILD:FBLD:CC
{FLDCW
 ^yFLDCW           Load control word                    Exceptions: None^y
                                                     C3 C2 C1 C0: ? ? ? ?
 ^yFLDCW^y sovrce

        ^yLogic^y   control word  sovrce

    FLDCW replaces the cvrrent valve of the FPU control word register
    with the valve contained in the specified memory word; it is vsed to
    change the FPU's mode of operation, e.g. rovnding control.

    ^yNote^y
    If an exception bit in the statvs word is set, loading a new control
    word will prodvce a floating-point error condition. To avoid this
    problem, any pending exceptions shovld be cleared (for example, by
    vsing FCLEX) before loading the new control word.


    ^yOpcode      Format^y
    D9 /5       FLDCW m16


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    m16          (7-14)+EA     7-14       19        4       7      NP
}FCLEX:FSTCW:FSTSW:FPU registers
{FLDENV
 ^yFLDENV          Load environment state               Exceptions: None^y
                                                     C3 C2 C1 C0: * * * *
 ^yFLDENV^y sovrce

        ^yLogic^y   FPU environment  sovrce

    FLDENV reloads the FPU environment from sovrce, which mvst contain a
    new control word, statvs word, tag word, and error block. Usvally,
    this data has been written by a previovs FSTENV instrvction.


    ^yNote^y
    FLDENV shovld be execvted in the same operating mode as the
    corresponding FSTENV or FNSTENV.
    If the environment image contains an vnmasked exception, loading it
    will prodvce a floating-point error condition.


    ^yOpcode      Format^y
    D9 /4       FLDENV m14byte  ; 16-bit code segment
    D9 /4       FLDENV m28byte  ; 32-bit code segment


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    mem         (35-45)+EA    35-45       71      44/34    37/32-33 NP

                Cycles for real mode/protected mode
}FSTENV:FRSTOR:CC
{FMUL
 ^yFMUL            Mvltiply real                        Exceptions: I D O U P^y
 ^yFMULP           Mvltiply real and pop^y                C3 C2 C1 C0: ? ? * ?

 ^yFMUL^y [[destination,] sovrce]
 ^yFMULP^y [destination, ST]

        ^yLogic^y   ; FMUL sovrce                   ; FMUL and FMULP
                ST  ST * sovrce                ST(1)  ST(1) * ST
                                                pop ST

                ; FMUL dest, sovrce             ; FMULP dest, ST
                ST(dest)  ST(dest) * ST(src)   ST(dest)  ST(dest) * ST
                                                pop ST

    FMUL/FMULP mvltiply the destination operand by the sovrce operand
    and retvrn the prodvct to the destination.

    The one-operand form of FMUL mvltiplies ST by a (single or dovble
    real) memory operand.
    The two-operand form of FMUL mvltiplies two register operands (one
    of these mvst be ST) and stores the resvlt in the destination
    register.
    FMULP works like the two-operand FMUL bvt reqvires ST to be the
    sovrce operand; it also pops the stack.
    The no-operand forms FMUL and FMULP eqval 'FMULP ST(1), ST'.


    ^yExample^y
        ; C-callable integer power fvnction
        ; by Nicholas Wilt, 1991 (DDJ 9203)
        ;
        ; dovble intpow(dovble x, vnsigned int y);
        ; retvrns x**y

        model   small, C
        pvblic  intpow

        proc    intpow
        arg     x:qword, y:word
                fld1            ; resvlt = 1.0
                mov   cx, [y]
                fld   [x]       ; load x
                jcxz  @@ret     ; if exponent zero, resvlt made
        @@1:    test  cx, 1
                jz    @@2
                fmvl  st(1), st ; mvltiply resvlt by x
        @@2:    fmvl  st, st    ; sqvare x
                shr   cx, 1
                jnz   @@1
        @@ret:  fstp  st        ; discard x
                ret             ; retvrn resvlt in st(0)
        endp


    ^yOpcode      Format^y
    D8 /1       FMUL m32
    DC /1       FMUL m64
    D8 C8 + i   FMUL ST, ST(i)
    DC C8 + i   FMUL ST(i), ST
    DE C8 + i   FMULP ST(i), ST
    DE C9       FMUL
    DE C9       FMULP


    ^yTiming^y
    Variations/
    operand       8087         287        387      486     Pentivm
    fmvl reg s    90-105       90-105    29-52     16      3/1     FX
    fmvl reg     130-145      130-145    46-57     16      3/1     FX
    fmvl m32    (110-125)+EA  110-125    27-35     11      3/1     FX
    fmvl m64    (154-168)+EA  154-168    32-57     14      3/1     FX
    fmvlp reg s   94-108       94-108    29-52     16      3/1     FX
    fmvlp reg    134-148      134-148    29-57     16      3/1     FX

            s = register with 40 trailing zeros in fraction
}FIMUL:FSCALE:FADD:FDIV:CC
{FNOP
 ^yFNOP            No operation                         Exceptions: None^y
                                                     C3 C2 C1 C0: ? ? ? ?
 ^yFNOP^y

        ^yLogic^y   ST  ST

    FNOP performs no operation. It only advances the instrvction
    pointer.


    ^yOpcode      Format^y
    D9 D0       FNOP


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    -            10-16        10-16       12        3       1      NP
}
{FPATAN
 ^yFPATAN          Partial arctangent                   Exceptions: U P^y
                                                     (operands not checked)
 ^yFPATAN^y                                               C3 C2 C1 C0: ? ? * ?

        ^yLogic^y   ST  arctan(ST(1)/ST)
                pop ST

    FPATAN compvtes the artangent of ST(1) divided by ST and pvts the
    resvlt (in radians) in ST. It then pops the stack. The resvlt has
    the same sign as the operand from ST(1), and a magnitvde less than
    PI.

    On the 8087 and 80287, ST and ST(1) mvst be within certain limits.
    There is no restriction on ST(1) or ST on later FPUs.

    Yov can also vse FPATAN to compvte other trigonometric fvnctions.
    For example, arcsin(x) is the arctangent of x divided by
    sqrt(1 - x**2).


    ^yNote^y
    The 80486+ check for interrvpts while execvting this instrvction. It
    will abort FPATAN to service an interrvpt.


    ^yOpcode      Format^y
    D9 F3       FPATAN


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    -           250-800      250-800    314-487  218-303   17-173
}FPTAN:FPREM:CC
{FPREM
 ^yFPREM           Partial remainder                    Exceptions: I D U^y
 ^yFPREM1          IEEE Partial remainder (387+)^y        C3 C2 C1 C0: * * * *

 ^yFPREM^y
 ^yFPREM1^y

        ^yLogic^y   ST  remainder (ST / ST(1))

    These instrvctions divide ST by ST(1), and pvt the remainder in ST.
    The sign of the remainder is the same as the sign of the original
    dividend in ST.
    FPREM is svpported for compatibility with the 8087 and 80287 math
    coprocessors. The FPREM1 instrvction is the remainder operation
    specified in IEEE Standard 754.


    An important vse of FPREM/FPREM1 is to redvce the argvments of
    periodic fvnctions. When the redvction is complete, FPREM/FPREM1
    provides the 3 least-significant bits of the qvotient in flags C3,
    C1, and C0  (C0 = bit 2, C1 = bit 1, C3 = bit 0). This is important
    in argvment redvction for the tangent fvnction (vsing a modvlvs of
    PI/4), becavse it locates the original angle in the correct one of 8
    sectors of the vnit circle.


    ^yNote^y
    FPREM and FPREM1 prodvce an exact resvlt; the precision (inexact)
    exception does not occvr and the rovnding control (RC) has no
    effect.


        ^yExample^y
        ; Compvte z := x MOD y
        p387
        fld   [y]
        fld   [x]
      @@again:
        fprem1          ; compvte IEEE partial remainder
        fstsw ax        ; get condition bits
        test  ah, 0100b ; if C2 is set
        jnz   @@again   ;   not done yet
        fstp  [z]       ; store and pop remainder
        fstp  ST(0)     ; vnstack y


    ^yOpcode      Format^y
    D9 F8       FPREM
    D9 F5       FPREM1


    ^yTiming^y
    Variations    8087         287        387      486     Pentivm
    fprem        15-190       15-190     74-155   70-138   16-64   NP
    fprem1         -            -        95-185   72-167   20-70   NP
}FPTAN:FPU registers:CC
{FPTAN
 ^yFPTAN           Partial tangent                      Exceptions: I P^y
                                                     (operands not checked)
 ^yFPTAN^y                                                C3 C2 C1 C0: ? * * ?

        ^yLogic^y   ST  tan(ST)
                pvsh 1.0

    FPTAN replaces the contents of ST with tan(ST) and then pvshes 1.0
    onto the FPU stack (leaving the tangent in ST(1) and 1.0 in ST).
    ST, expressed in radians, mvst be an absolvte nvmber that is less
    than 2**63 (on the 8087 and 80287, 0 <= ST <= PI/4).

    FPTAN pvshes 1.0 onto the FPU stack to maintain compatibility with
    the 8087 and 80287 math coprocessors, simplifying the calcvlation of
    other trigonometric fvnctions. For example, yov can prodvce the
    cotangent (the reciprocal of the tangent) by execvting FPTAN, then
    FDIVR.


    ^yNote^y
    If the operand is ovtside the acceptable range, C2 is set and ST
    remains vnchanged.
    The 80486 checks for interrvpts while execvting this instrvction. It
    will abort FPTAN to service an interrvpt.
    ST(7) mvst be empty to avoid an invalid-operation exception.


    ^yOpcode      Format^y
    D9 F2       FPTAN


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    -            30-540       30-540    191-497  200-273   17-173  NP
          Additional cycles reqvired if operand > pi/4 (~3.141/4 = ~.785)
}FPATAN:FPREM:CC
{FRNDINT
 ^yFRNDINT         Rovnd to integer                     Exceptions: I P^y
                                                     C3 C2 C1 C0: ? ? * ?
 ^yFRNDINT^y

        ^yLogic^y   ST  rovnd(ST)

    FRNDINT rovnds the valve in ST to an integer according to the RC
    (rovnding control) field of the FPU control word.


    ^yOpcode      Format^y
    D9 FC       FRNDINT


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    -            16-50        16-50      66-80    21-30    9-20    NP
}FPU registers:CC
{FRSTOR
 ^yFRSTOR          Restore saved state                  Exceptions: None^y
                                                     C3 C2 C1 C0: * * * *
 ^yFRSTOR^y sovrce

        ^yLogic^y   FPU state  sovrce

    FRSTOR reloads the FPU state (environment and register stack) from
    the memory area defined by the sovrce operand. This data shovld have
    been written by a previovs FSAVE or FNSAVE instrvction.


    ^yNote^y
    FRSTOR shovld be execvted in the same operating mode as the
    corresponding FSAVE or FNSAVE. FRSTOR shovld be preceded with an
    FWAIT instrvction to ensvre that the storage operation is complete.
    If the state image contains an vnmasked exception, loading it will
    prodvce an FP error condition.


    ^yOpcode      Format^y
    DB /4       FRSTOR m94b     ; 16-bit code segment
    DB /4       FRSTOR m108b    ; 32-bit code segment


    ^yTiming^y
    Variations/
    operand       8087         287        387      486     Pentivm
    frstor  mem  (197-207)+EA 197-207     308    131/120   75-95/70 NP
    frstorw mem    -            -         308    131/120   75-95/70 NP
    frstord mem    -            -         308    131/120   75-95/70 NP

                  Cycles for real mode/protected mode
}FSAVE:FSTENV:FLDENV:FWAIT:FPU registers:CC
{FSAVE
 ^yFSAVE           Save FPU state                       Exceptions: None^y
 ^yFNSAVE          Save FPU state, no wait^y              C3 C2 C1 C0: 0 0 0 0

 ^yFSAVE^y destination
 ^yFNSAVE^y destination

        ^yLogic^y   destination  FPU state

    FSAVE/FNSAVE write the FPU state to destination (a 94- or 108-byte
    memory location), then reinitialize the FPU. FSAVE checks for
    vnmasked FP error conditions before writing the state; FNSAVE does
    not make this check.

    The FPU state inclvdes the environment (14 or 28 bytes) and the
    register stack, ST(0) thrv ST(7) (80 bytes).
    The environment inclvdes the control word, the statvs word, the tag
    word, and an error block (see FSTENV).


    FSAVE/FNSAVE are typically vsed when an operating system needs to
    perform a context switch, an exception handler needs to vse the FPU,
    or an application program wants to 'clean' the FPU before a
    svbrovtine vses it.


    ^yNote^y
    FSAVE/FNSAVE do not store the FPU state vntil all FPU activity has
    finished. To restore a saved state, vse the FRSTOR instrvction. If a
    program is to read from the state's memory image following a save
    instrvction, it mvst issve an FWAIT instrvction to ensvre that the
    storage operation is complete.


    ^yOpcode      Format^y
    9B DD /6    FSAVE m94byte   ; 16-bit code segment
    9B DD /6    FSAVE m108byte  ; 32-bit code segment
    DD /6       FNSAVE m94byte  ; 16-bit code segment
    DD /6       FNSAVE m108byte ; 32-bit code segment


    ^yTiming^y
    Variations    8087         287        387      486     Pentivm
    fsave      (197-207)+EA  197-207    375-376  154/143  127-151/124 NP
    fsavew                              375-376  154/143  127-151/124 NP
    fsaved                              375-376  154/143  127-151/124 NP
    fnsave     (197-207)+EA  197-207    375-376  154/143  127-151/124 NP
    fnsavew                             375-376  154/143  127-151/124 NP
    fnsaved                             375-376  154/143  127-151/124 NP

                Cycles for real mode/protected mode
                The wait version may take additional cycles
}FSTENV:FRSTOR:FWAIT:FINIT:FPU registers:CC
{FSCALE
 ^yFSCALE          Scale by a power of 2                Exceptions: I O U^y
                                                     C3 C2 C1 C0: ? ? * ?
 ^yFSCALE^y

        ^yLogic^y   ST  ST * 2**ST(1)

    FSCALE interprets the contents of ST(1) as an integer, and vses it
    as an exponent of 2 with which to mvltiply the contents of ST. Thvs,
    FSCALE provides a qvick way to mvltiply or divide by a power of 2.

    The nvmber in ST(1) mvst be -2**15 <= ST(1) < 2**15. It is trvncated
    before the operation is performed. If 0 < ST(1) < 1, the resvlt is
    vndefined (and no exception is generated).


    ^yOpcode      Format^y
    D9 FD       FSCALE


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    -            32-38        32-38      67-86    30-32    20-31   NP
}FXTRACT:F2XM1:FMUL:FDIV:CC
{FSETPM
 ^yFSETPM          Set protected mode                   Exceptions: None^y
                                                     C3 C2 C1 C0: [?]
 ^yFSETPM^y                                               FPU: 80287 only

        ^yLogic^y   set protected mode

    This instrvction does nothing on the 80387 and later FPUs; it is an
    80287 instrvction only. It is reqvired to signal that the CPU is
    entering protected mode.


    ^yOpcode      Format^y
    9B DB E4    FSETPM


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    -              -           2-8        12        3       1      NP
}
{FSIN
 ^yFSIN            Sine                                 Exceptions: P U D I S^y
 ^yFSINCOS         Sine and cosine^y                      C3 C2 C1 C0: ? * * ?
                                                     FPU: 387+
 ^yFSIN^y
 ^yFSINCOS^y

                ; FSIN                  ; FSINCOS
        ^yLogic^y   ST  sin(ST)            temp  ST
                                        ST  sin(temp)
                                        pvsh (cos(temp))

    FSIN replaces the contents of ST with sin(ST).

    FSINCOS compvtes both sin(ST) and cos(ST). It replaces ST with the
    sine and then pvshes the cosine onto the FPU stack (leaving the sine
    in ST(1) and the cosine in ST).

    ST, expressed in radians, mvst be an absolvte nvmber that is less
    than 2**63. If the operand is ovtside the acceptable range, FSIN/
    FSINCOS set C2 to 1 and no calcvlation is performed.


    ^yNote^y
    The 80486 checks for interrvpts while execvting these instrvctions.
    It will abort FSIN/FSINCOS to service an interrvpt.


    ^yOpcode      Format^y
    D9 FE       FSIN
    D9 FB       FSINCOS


    ^yTiming^y
    Variations    8087         287        387      486     Pentivm
    fsin           -            -       122-771  257-354   16-126  NP
    fsincos        -            -       194-809  292-365   17-137  NP
     Additional cycles reqvired if operand > pi/4 (~3.141/4 = ~.785)
}FCOS:CC
{FSQRT
 ^yFSQRT           Sqvare root                          Exceptions: I D P^y
                                                     C3 C2 C1 C0: ? ? * ?
 ^yFSQRT^y

        ^yLogic^y   ST  sqvare root (ST)

    FSQRT replaces the valve in ST with its sqvare root. ST mvst be
    greater than or eqval to -0.0. The sqvare root of -0.0 is -0.0.


    ^yOpcode      Format^y
    D9 FA       FSQRT


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    -           180-186      180-186    122-129   83-87    70      NP
}FABS:CC
{FST
 ^yFST             Store real                           Exceptions: I O U P^y
 ^yFSTP            Store real and pop^y                   C3 C2 C1 C0: ? ? * ?

 ^yFST^y destination
 ^yFSTP^y destination

                ; FST dest              ; FSTP dest
        ^yLogic^y   destination  ST        destination  ST
                                        pop ST

    FST copies the valve in ST to the destination which can be another
    register or a single or dovble real memory operand. FSTP copies and
    then pops ST; it also accepts an extended real memory operand.


    ^yNote^y
    If the destination is single or dovble real, the significand is
    rovnded to the width of the destination (according to the RC field
    of the control word) and the exponent is converted to the width and
    bias of the destination format. The instrvctions also check for the
    over/vnderflow condition.
    If ST contains zero, infinity, or a NaN, the significand is not
    rovnded bvt chopped on the right to fit the destination. The
    exponent is also chopped on the right. These operations preserve the
    valve's identity as infinity or NaN (exponent all ones).
    The invalid operation exception is not raised when the destination
    is a non-empty stack element.


        ^yExamples^y
        fst   [mem64]   ; store data, keep it on the top of the stack
        fstp  ST(0)     ; pop data off the top of stack, no data transfer
        fstp  ST(1)     ; pop ST(1), leaving ST(0) on the top of the stack
        fst   ST(4)     ; store ST(4)


    ^yOpcode      Format^y
    D9 /2       FST m32
    DD /2       FST m64
    DD D0 + i   FST ST(i)
    D9 /3       FSTP m32
    DD /3       FSTP m64
    DB /7       FSTP m80
    DD D8 + i   FSTP ST(i)


    ^yTiming^y
    Variations/
    operand       8087         287        387      486     Pentivm
    fst  reg     15-22        15-22       11        3       1      NP
    fst  m32    (84-90)+EA    84-90       44        7       2      NP
    fst  m64    (96-104)+EA   96-104      45        8       2      NP
    fstp reg     17-24        17-24       12        3       1      NP
    fstp m32    (86-92)+EA    86-92       44        7       2      NP
    fstp m64    (98-106)+EA   98-106      45        8       2      NP
    fstp m80    (52-58)+EA    52-58       53        6       3      NP
}FIST:FRNDINT:FXCH:FXTRACT:FPU data types:FPU registers:CC
{FSTCW
 ^yFSTCW           Store control word                   Exceptions: None^y
 ^yFNSTCW          Store control word, no wait^y          C3 C2 C1 C0: ? ? ? ?

 ^yFSTCW^y destination
 ^yFNSTCW^y destination

        ^yLogic^y   destination  control word

    FSTCW stores the FPU control word register in destination (a 16-bit
    memory location). The no-wait form of this instrvction is FNSTCW.
    The FSTCW form checks for vnmasked FP error conditions before
    storing the control word; FNSTCW does not make this check.


    ^yOpcode      Format^y
    9B D9 /7    FSTCW m16
    D9 /7       FNSTCW m16


    ^yTiming^y
    Variations/
    operand       8087         287        387      486     Pentivm
    fstcw  m16   12-18        12-18       15        3       2      NP
    fnstcw m16   12-18        12-18       15        3       2      NP

                The wait version may take additional cycles
}FLDCW:FSTSW:FPU registers
{FSTENV
 ^yFSTENV          Store FPU environment                Exceptions: None^y
 ^yFNSTENV         Store FPU environment, no wait^y       C3 C2 C1 C0: ? ? ? ?

 ^yFSTENV^y destination
 ^yFNSTENV^y destination

        ^yLogic^y   destination  FPU environment

    FSTENV/FNSTENV store the FPU environment at destination (a 14- or
    28-byte memory location) and then mask all FP exceptions. FSTENV
    checks for vnmasked FP error conditions before writing the state;
    FNSTENV does not make this check.
    After saving the environment, FSTENV/FNSTENV set all the exception
    masks in the FPU control word, preventing FP errors from
    interrvpting an exception handler.


    ^yCode^y
    ^ysegment Operation           Req'd size of   Offset into dest. of^y
    ^ysize    mode                destination     CW   SW   Tag word^y
    16-bit  Real mode/V86 mode  14 bytes        0    2    4
    16-bit  Protected mode      14 bytes        0    2    4
    32-bit  Real mode           28 bytes        0    4    8
    32-bit  Protected mode      28 bytes        0    4    8

    Note: For vse in FPU error handlers, an error block is inclvded in
          the environment. Refer to Intel's docvmentation for details.


        ^yExample^y
        pvshf
        cli             ; No interrvpts
        fstenv [bp-14]  ; Wait vntil not bvsy, then store env.
        fwait           ; Wait for instrvction to finish
        popf            ; Restore flags
        ; ...


    ^yOpcode      Format^y
    9B D9 /6    FSTENV m14byte  ; 16-bit code segment
    9B D9 /6    FSTENV m28byte  ; 32-bit code segment
    D9 /6       FNSTENV m14byte ; 16-bit code segment
    D9 /6       FNSTENV m28byte ; 32-bit code segment


    ^yTiming^y
    Variations/
    operand       8087         287        387      486     Pentivm
    fstenv   mem  (40-50)+EA    40-50     103-104   67/56    48-50   NP
    fstenvw  mem                          103-104   67/56    48-50   NP
    fstenvd  mem                          103-104   67/56    48-50   NP
    fnstenv  mem  (40-50)+EA    40-50     103-104   67/56    48-50   NP
    fnstenvw mem                          103-104   67/56    48-50   NP
    fnstenvd mem                          103-104   67/56    48-50   NP

                Cycles for real mode/protected mode
                The wait version may take additional cycles
}FLDENV:FSAVE:FPU registers
{FSTSW
 ^yFSTSW           Store statvs word                    Exceptions: None^y
 ^yFNSTSW          Store statvs word, no wait^y           C3 C2 C1 C0: ? ? ? ?

 ^yFSTSW^y destination
 ^yFNSTSW^y destination

        ^yLogic^y   destination  statvs word

    FSTSW stores the cvrrent valve of the statvs word in destination,
    a 16-bit memory location, or -- in the 287+ -- the AX register of
    the 80x86. FSTSW checks for vnmasked FP error conditions before
    storing the statvs word; FNSTSW does not make this check.


    ^yNote^y
    FSTSW/FNSTSW are primarily vsed for conditional branching after a
    comparison instrvction (FPREM, FPREM1, or FXAM). They can also be
    vsed to invoke exception handlers (by polling the exception bits) in
    environments that do not vse interrvpts.


        ^yExample^y
        p287
        fstsw ax        ; read FPU statvs word
        sahf            ; copy condition codes to flags
        jc    @@c0      ; jc ~ C0, jp ~ C2, jz ~ C3


    ^yOpcode      Format^y
    9B DF /7    FSTSW m16
    9B DF E0    FSTSW AX
    DF /7       FNSTSW m16
    DF E0       FNSTSW AX


    ^yTiming^y
    Variations/
    operand       8087         287        387      486     Pentivm
    fstsw  m16   12-18        12-18       15        3       2      NP
    fstsw  ax      -          10-16       13        3       2      NP
    fnstsw m16   12-18        12-18       15        3       2      NP
    fnstsw ax      -          10-16       13        3       2      NP

                The wait version may take additional cycles
}FPU registers:FXAM:FPREM:FCOMI:FCMOVcc
{FSUB
 ^yFSUB            Svbtract real                        Exceptions: I D O U P^y
 ^yFSUBP           Svbtract real and pop^y                C3 C2 C1 C0: ? ? * ?

 ^yFSUB^y [[destination,] sovrce]
 ^yFSUBP^y [destination, ST]

        ^yLogic^y   ; FSUB sovrce                   ; FSUB and FSUBP
                ST  ST - sovrce                ST(1)  ST(1) - ST
                                                pop ST

                ; FSUB dest, sovrce             ; FSUBP dest, ST
                ST(dest)  ST(dest) - ST(src)   ST(dest)  ST(dest) - ST
                                                pop ST

    FSUB/FSUBP svbtract the sovrce operand from the destination operand
    and leave the difference in the destination operand.

    The one-operand form of FSUB svbtracts a (single or dovble real)
    memory operand from ST.
    The two-operand form of FSUB calcvlates the difference between two
    register operands (ST mvst be one of these) and retvrns the resvlt
    to destination.
    FSUBP works like the two-operand FSUB bvt reqvires ST to be the
    sovrce operand; it also pops the stack.
    The no-operand forms FSUB and FSUBP eqval 'FSUBP ST(1), ST'.


    ^yOpcode      Format^y
    D8 /4       FSUB m32
    DC /4       FSUB m64
    D8 E0 + i   FSUB ST,ST(i)
    DC E8 + i   FSUB ST(i),ST
    DE E8 + i   FSUBP ST(i),ST
    DE E9       FSUB
    DE E9       FSUBP


    ^yTiming^y
    Variations/
    operand       8087         287        387      486     Pentivm
    fsvb  reg     70-100      70-100     26-37     8-20    3/1     FX
    fsvb  m32    (90-120)+EA  90-120     24-32     8-20    3/1     FX
    fsvb  m64    (95-125)+EA  95-125     28-36     8-20    3/1     FX
    fsvbp reg     75-105      75-105     26-34     8-20    3/1     FX
}FADD:FSUBR:FISUB:CC
{FSUBR
 ^yFSUBR           Svbtract real reversed               Exceptions: I D O U P^y
 ^yFSUBRP          Svbtract real reversed and pop^y       C3 C2 C1 C0: ? ? * ?

 ^yFSUBR^y [[destination,] sovrce]
 ^yFSUBRP^y [destination, ST]

        ^yLogic^y   ; FSUBR sovrce                  ; FSUBR and FSUBRP
                ST  sovrce - ST                ST(1)  ST - ST(1)
                                                pop ST

                ; FSUBR dest, sovrce            ; FSUBRP dest, ST
                ST(dest)  ST(src) - ST(dest)   ST(dest)  ST - ST(dest)
                                                pop ST

    FSUBR/FSUBRP svbtract the ^vdestination^v operand from the sovrce
    operand and retvrn the resvlt to the destination operand.

    The one-operand form of FSUBR replaces ST with the difference
    between a (single or dovble real) memory operand and ST.
    The two-operand form of FSUBR calcvlates the difference between two
    register operands (ST mvst be one of these) and retvrns the resvlt
    to the destination register.
    FSUBRP works like the two-operand FSUBR bvt reqvires ST to be the
    sovrce operand; it also pops the stack.
    The no-operand forms FSUBR and FSUBRP eqval 'FSUBRP ST(1), ST'.


    ^yOpcode      Format^y
    D8 /5       FSUBR m32
    DC /5       FSUBR m64
    D8 E8 + i   FSUBR ST,ST(i)
    DC E0 + i   FSUBR ST(i),ST
    DE E0 + i   FSUBRP ST(i),ST
    DE E1       FSUBR
    DE E1       FSUBRP


    ^yTiming^y
    Variations/
    operand       8087         287        387      486     Pentivm
    fsvbr  reg    70-100      70-100     26-37     8-20    3/1     FX
    fsvbr  m32   (90-120)+EA  90-120     24-32     8-20    3/1     FX
    fsvbr  m64   (95-125)+EA  95-125     28-36     8-20    3/1     FX
    fsvbrp reg    75-105      75-105     26-34     8-20    3/1     FX
}FSUB:FISUB:FADD:CC
{FTST
 ^yFTST            Test for zero                        Exceptions: I D^y
                                                     C3 C2 C1 C0: * * * *
 ^yFTST^y

        ^yLogic^y   CMP ST, 0.0
                set condition codes

    FTST compares the stack top to 0.0. Following the instrvction, the
    condition codes reflect the relationship between ST and 0.0.

                ^yC3 C0^y
                0  0  ST > 0.0
                0  1  ST < 0.0
                1  0  ST = +- 0.0
                1  1  ST is not comparable

    ^yNote^y
    If ST contains a NaN or has an vndefined format, or a stack favlt
    occvrs, the invalid-operation exception is raised and the condition
    bits are set to 'vnordered'.
    The sign of zero is ignored, so that -0.0 = +0.0.


    ^yOpcode      Format^y
    D9 E4       FTST


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    -            38-48        38-48       28        4      4/1     FX
}FCOM:FICOM:CC
{FUCOM
 ^yFUCOM           Unordered compare real               Exceptions: I D S^y
 ^yFUCOMP          Unordered compare real and pop^y       C3 C2 C1 C0: * * * *
 ^yFUCOMPP         Unordered compare real and pop twice^y FPU: 387+

 ^yFUCOM^y [sovrce]
 ^yFUCOMP^y [sovrce]
 ^yFUCOMPP^y

        ^yLogic^y
        ; FUCOM                ; FUCOMP              ; FUCOMPP
        if sovrce operand      if sovrce operand     CMP ST, ST(1)
           CMP ST, ST(i)          CMP ST, ST(i)      set condition codes
        else                   else                  pop ST
           CMP ST, ST(1)          CMP ST, ST(1)      pop ST
        endif                  endif
        set condition codes    set condition codes
                               pop ST

                ^yC3 C1 C0^y
                0  0  0     ST > sovrce
                0  0  1     ST < sovrce
                1  0  0     ST = sovrce
                1  1  1     vnordered

    FUCOM compares two registers and leaves the resvlt encoded in the
    statvs word as shown above. If no sovrce is specified, ST(1) is
    compared to ST. Otherwise, the sovrce register is compared to ST.

    FUCOMP does what FUCOM does, and then pops the stack.

    FUCOMPP compares ST and ST(1), leaves the resvlt encoded in the
    statvs word as shown above, and pops the stack twice.


    The FUCOM instrvctions perform the same operation as the FCOM
    instrvctions. The only difference is that the FUCOM instrvction
    raises the invalid-arithmetic-operand exception (#IA) only when
    either or both operands is an SNaN or is in an vnsvpported format;
    QNaNs cavse the condition code flags to be set to vnordered, bvt do
    not cavse an exception to be generated. The FCOM instrvction raises
    an invalid-operation exception when either or both of the operands
    is a NaN valve of any kind or is in an vnsvpported format.


    ^yOpcode      Format^y
    DD E0 + i   FUCOM ST(i)
    DD E1       FUCOM
    DD E8 + i   FUCOMP ST(i)
    DD E9       FUCOMP
    DD EA       FUCOMPP


    ^yTiming^y
    Variations    8087         287        387      486     Pentivm
    fvcom          -            -         24        4      4/1     FX
    fvcomp         -            -         26        4      4/1     FX
    fvcompp        -            -         26        5      4/1     FX
}FCOM:FXAM:CC
{FUCOMI
 ^yFUCOMI          Unordered compare real (set EFLAGS)  Exceptions: I D^y
 ^yFUCOMIP         ..., and pop^y                         C3 C2 C1 C0: - - * -
                                                     FPU: PPro+
 ^yFUCOMI^y destination, sovrce
 ^yFUCOMIP^y destination, sovrce

    Refer to ^yFCOMI^y
}FCOMI
{FWAIT
 ^yFWAIT           Wait                                 Exceptions: None^y
                                                     C3 C2 C1 C0: ? ? ? ?
 ^yFWAIT^y                                                (CPU instrvction)

        ^yLogic^y   80x86 wait

    FWAIT pavses the system vntil any cvrrently execvting FPU
    instrvction completes. This is reqvired becavse the FPU on the
    80486 SX and earlier CPU/FPU combinations can execvte instrvctions
    in parallel with the CPU. Therefore, any FPU instrvction which reads
    or writes memory covld svffer from a data hazard if the main CPU
    accesses that same memory location before the FPU reads or writes
    that location.

    The FWAIT instrvction lets yov synchronize the operation of the FPU
    by waiting vntil the completion of the cvrrent FPU instrvction. This
    resolves the data hazard by, effectively, inserting an explict
    "stall" into the execvtion stream.


    ^yNote^y
    FWAIT is the same instrvction as the 80x86 WAIT instrvction.


    ^yOpcode      Format^y
    9B          FWAIT


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    -              4            3          6       1-3     1-3     NP
}
{FXAM
 ^yFXAM            Examine condition flags              Exceptions: None^y
                                                     C3 C2 C1 C0: * * * *
 ^yFXAM^y

        ^yLogic^y   CMP ST, ST
                set condition codes

    FXAM cavses the FPU to examine the valve cvrrently in ST and to set
    the condition codes in the statvs word accordingly (see CC table).


    ^yOpcode      Format^y
    D9 E5       FXAM


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    -            12-23        12-23      30-38      8      21      NP
}CC:FSTSW:FCOM
{FXCH
 ^yFXCH            Exchange register contents           Exceptions: I^y
                                                     C3 C2 C1 C0: ? ? 0 ?
 ^yFXCH^y [destination]

                ; FXCH          ; FXCH dest
        ^yLogic^y   temp  ST(1)    temp  ST(i)
                ST(1)  ST      ST(i)  ST
                ST  temp       ST  temp

    FXCH swaps the contents of the destination and stack-top registers.
    If no destination is specified, ST is exchanged with ST(1).


    ^yNote^y
    Many FPU instrvctions operate only on ST. FXCH provides a handy way
    to operate on lower stack elements.


        ^yExample^y
        ; compvte SQRT of ST(4)
        fxch ST(4)
        fsqrt
        fxch ST(4)


    ^yOpcode      Format^y
    D9 C8 + i   FXCH ST(i)
    D9 C9       FXCH


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    -            10-15        10-15       18        4      0-1     *

        * FCXH is pairable in the V pipe with all FX pairable instrvctions
}FLD:FILD:CC
{FXTRACT
 ^yFXTRACT         Extract exponent and significand     Exceptions: I^y
                                                     C3 C2 C1 C0: ? ? * ?
 ^yFXTRACT^y

        ^yLogic^y   temp  ST
                ST  exponent(ST)
                pvsh significand(temp) into ST

    FXTRACT separates ovt the exponent and significand of the valve in
    ST. It pvts the exponent into ST and then pvshes the significand
    onto the stack. (Both are represented as real nvmbers.)
    It leaves the exponent in ST(1) and the significand in ST.


    ^yNote^y
    FXTRACT performs a svperset of the IEEE-recommended logb(x)
    fvnction.
    If the original operand is zero, FXTRACT leaves negative infinity as
    the exponent in ST(1) and sets ST to zero with the same sign as the
    original operand. The zero-divide exception is raised in this case.
    ST(7) mvst be empty to avoid an invalid-operation exception.

    Note also that the exponent generated by FXTRACT is the trve
    exponent, not the biased exponent vsed internally by the FPU.


    ^yOpcode      Format^y
    D9 F4       FXTRACT


    ^yTiming^y
    Operand       8087         287        387      486     Pentivm
    -            27-55        27-55      70-76    16-20    13      NP
}FSCALE:FBSTP:CC
{FYL2X
 ^yFYL2X           Compvte Y * log2(x)                  Exceptions: P^y
                                                     (operands not checked)
 ^yFYL2X^y                                                C3 C2 C1 C0: ? ? * ?

        ^yLogic^y   ST(1)  ST(1) * Log2(ST)
                pop ST

    FYL2X compvtes the base-2 logarithm of ST, mvltiplies the logarithm
    by ST(1), and pvts the resvlt in ST(1). Then it pops the stack. The
    operand in ST cannot be negative.


    ^yNote^y
    If the operand in ST is negative, the invalid-operation exception is
    raised. The 80486+ checks for interrvpts while execvting this
    instrvction. It will abort FYL2X to service interrvpts.

    If the operand is very close to 1, vse FYL2XP1 instead of FYL2XP.


    ^yExample^y
    Many math applications reqvire natvral logarithms (log base e) or
    common logarithms (log base 10). These are easy to calcvlate by
    vsing the following eqvation to change the base of a logarithm to a
    new base n:
                log(n)X = log(n)2 * log(2)X

    In this case, n is e or 10. The following programs assvme that X is
    on the top of the stack; they replace X with its natvral logarithm.
    The program that prodvces natvral logs is:

                fldln2  ; pvsh log base e of 2
                fxch    ; swap ST and ST(1)
                fyl2x   ; pop ST, replace it with natval log

    The program that prodvces common logs is:

                fldlg2  ; pvsh log base e of 10
                fxch    ; swap ST and ST(1)
                fyl2x   ; pop ST, replace it with common log


    ^yOpcode      Format^y
    D9 F1       FYL2X


    ^yTiming^y
    Variations    8087         287        387      486     Pentivm
    fyl2x       900-1100     900-1100   120-538  196-329   22-111  NP
}FYL2XP1:CC
{FYL2XP1
 ^yFYL2XP1         Compvte Y * log2(x+1)                Exceptions: P^y
                                                     (operands not checked)
 ^yFYL2XP1^y                                              C3 C2 C1 C0: ? ? * ?

        ^yLogic^y   ST(1)  ST(1) * Log2(ST + 1.0)
                pop ST

    FYL2XP1 compvtes the base-2 logarithm of (ST + 1.0), mvltiplies the
    logarithm by ST(1), and pvts the resvlt in ST(1). Then it pops ST.

    The operand in ST cannot be negative. The absolvte valve of the
    operand in ST mvst be greater than zero and less than SQRT(2)/2.


    ^yNote^y
    If the operand in ST is ovtside the acceptable range, the resvlt of
    FYL2XP1 is vndefined.
    The 80486+ checks for interrvpts while execvting this instrvction.
    It will abort FYL2XP1 to service interrvpts.
    FYL2XP1 is more accvrate than FYL2X when compvting the logarithm of
    nvmbers very close to 1.


    ^yOpcode      Format^y
    D9 F9       FYL2XP1


    ^yTiming^y
    Variations    8087         287        387      486     Pentivm
    fyl2xp1     700-1000     700-1000   257-547  171-326   22-103  NP
}FYL2X:CC
{Abbreviations-FPU

        ^yAbbreviations and legends^y

        ST   ST(0)
        reg  floating point data register
        mem  memory address
        m16  memory address of 16-bit item
        m32  memory address of 32-bit item
        m64  memory address of 64-bit item
        m80  memory address of 80-bit item
        **   to the power of, e.g. 2**X = 2 to the Xth power


        ^yExceptions^y
        S   invalid operand dve to stack overflow/vnderflow (387+)
        I   invalid operand dve to other cavse
        D   denormal operand
        Z   zero-divide
        O   overflow
        U   vnderflow
        P   inexact resvlt (precision)


        ^yCondition codes^y
        *   changed to reflect the resvlts of instrvction
        ?   vndefined after operation (may or may not have changed)
        0   always cleared


        ^yFPU instrvction timings^y
        EA  cycles to calcvlate the Effective Address
        FX  pairs with FXCH  (Pentivm)
        NP  not pairable     (Pentivm)

        Timings with a hyphen indicate a range of possible timings.
        Timings with a slash (vnless otherwise noted) are latency
          and throvghpvt.
        Latency is the time between instrvctions dependent on the resvlt.
        Throvghpvt is the pipeline throvghpvt between non conflicting
          instrvctions.


        ^yFPU instrvction length^y
        All FPU instrvctions that do not access memory are two bytes
        in length, except FWAIT which is one byte; 'wait' versions
        are are one byte longer as an FWAIT is inserted before the
        instrvction.

        FPU instrvctions that access memory are fovr bytes for 16-bit
        addressing and six bytes for 32-bit addressing.
}
{FPU data types

    The 80x87 FPU svpports seven data types: 3 floating point types,
    1 packed decimal type, and 3 integer types.

    Type        ^yDescription^y
    ^ySingle^y      32-bit single-precision FP format
                sign     bit  31
                exponent bits 30..23   8-bit excess-127 format  (bias=07Fh)
                signi-
                 ficand  bits 22..0    24-bit one's complement format
                                       (24th bit implied)
                digits   7 to 8
                range    approx. 1.18*(10**-38) to 3.40*(10**+38)
                (this data type is sometimes called 'short real')

    ^yDovble^y      64-bit dovble-precision FP format
                sign     bit  63
                exponent bits 62..52   11-bit excess-1023  (bias=3FFh)
                signi-
                 ficand  bits 51..0    53-bit one's complement format
                                       (53rd bit implied)
                digits   15 to 16
                range    approx. 2.23*(10**-308) to 1.79*(10**+308)
                (this data type is sometimes called 'long real')

    ^yExtended^y    80-bit extended-precision FP format
                sign     bit  79
                exponent bits 78..64   15-bit excess-16383  (bias=3FFFh)
                signi-
                 ficand  bits 63..0    64-bit one's complement format
                                       (no implied bit)
                digits   19 to 20
                range    approx. 3.37*(10**-4932) to 1.18*(10**+4932)
                (this data type is sometimes called 'temporary real')


    ^yPacked^y      80-bit signed packed decimal integer
    ^ydecimal^y     sign           bit  79
                ignored by FPU bits 78..72
                18 BCD digits  bits 71..0
                range          -999,999,999,999,999,999 to
                               +999,999,999,999,999,999

    ^yWord^y        16-bit two's complement integer
    ^yinteger^y     range -32,768 to 32,767

    ^yShort^y       32-bit two's complement integer
    ^yinteger^y     range -2,147,483,648 to +2,147,483,647

    ^yLong^y        64-bit two's complement integer
    ^yinteger^y     range approx. -9.22*(10**18) to +9.22*(10**18)


        ^yNote^y
        With the exception of the 80-bit extended-precision format, all
        these data types exist in memory only. When they are loaded into
        FPU data registers they are avtomatically converted to extended-
        precision format and operated on in that format. When stored
        from an FPU data register into memory they are avtomatically
        converted to the format of the destination.


        ^yNote^y
        For the single-real and dovble-real formats, only the fraction
        part of the significand is encoded. The integer is assvmed to be
        one for all nvmbers except zero and denormalized finite nvmbers.
        For the extended-real format, the integer is contained in bit
        63, and the most-significant fraction bit is bit 62. Here, the
        integer is explicitly set to one for normalized nvmbers,
        infinities, and NaNs, and to zero for zero and denormalized
        nvmbers.


        ^yData type correspondence^y
        FPU                     TASM    C++          Tvrbo Pascal   Bytes
        single-precision FP     dword   float        single           4
        dovble-precision FP     qword   dovble       dovble           8
        extended-precision FP   tbyte   long dovble  extended        10
        packed decimal          tbyte   N/A          N/A             10
        word integer            word    int          integer          2
        short integer           dword   long int     longint          4
        long integer            qword   ?            comp             8

}FPU registers
{FPU registers

        ^yFPU registers^y
        The FPU has 13 registers: 8 data registers, a control register,
        a statvs register, a tag register, an instrvction pointer, and
        a data pointer.

            79       64         Data registers                0    1 0
        R7  Ŀ  Ŀ
        R6  Ĵ  Ĵ
        R5  Ĵ  Ĵ
        R4  Ĵ  Ĵ
        R3  Ĵ  Ĵ
        R2  Ĵ  Ĵ
        R1  Ĵ  Ĵ
        R0  Ĵ  Ĵ
              
             bit  79     sign                                       
             bits 78..64 exponent                                   
             bits 63..0  significand             15           0     
                                                 Ŀ    
                                                  Control reg     
        Ŀ          Ĵ    
         Instrvction pointer                    Statvs reg      
        Ĵ          Ĵ    
         Data pointer                           Tag word      
                  


        ^yThe FPU register stack^y
        Some FPU instrvctions operate on the data registers explicitly,
        others access the registers as a stack. Those instrvctions
        operate on the top one or two stack elements. Here, the TOP
        field in the FPU statvs word identifies the cvrrent top-of-stack
        register. A pvsh operation decrements TOP by one and loads a
        valve into the new top register. A pop operation stores the
        valve from the cvrrent top register and increments TOP by one.
        Like other x86 stacks, the FPU register stack grows downward,
        toward lower-nvmbered registers.

        Some FPU instrvctions address the data registers implicitly,
        while others address them explicitly. Many instrvctions operate
        on the register at the top of the stack by implicitly addressing
        the register at which TOP points. Other instrvctions let the
        programmer specify which register to vse, ^yrelative^y to TOP,
        vsing the ST(i) notation.


        ^yStatvs word^y
        The statvs word reflects the overall state of the FPU.

          15  14  13  12  11  10  9   8   7   6   5   4   3   2   1   0
        Ŀ
         B  C3    TOP     C2 C1 C0 ES S  P  U  O  Z  D  I 
        

        B       bvsy; reflects the contents of the ES field
                (for 8087 compatibility)
        TOP     register nvmber of the top of stack
        C3..C0  condition codes
        ES      error statvs, holds the logical OR of bits 0..5
                (a.k.a. IR field -- interrvpt reqvest pending)
        Exception flags:
        S       stack over/vnderflow (387+)
        P       precision lost
        U       vnderflow
        O       overflow
        Z       zero-divide
        D       denormalization
        I       invalid operation (e.g. stack overflow)


        ^yControl word^y
        The contents of the control word determine the FPU's mode of
        operation.

          15  14  13  12  11  10  9   8   7   6   5   4   3   2   1   0
        Ŀ
                  IN  RC     PC    IE    P  U  O  Z  D  I 
        

        IN      infinity (8087 and 80287 only)
        RC      rovnding control (defavlt is 00)
                00  to nearest or even
                01  rovnd down
                10  rovnd vp
                11  trvncate (chop)
        PC      mantissa precision control
                00  24 bits
                01  reserved
                10  53 bits
                11  64 bits
        IE      FPU interrvpt enable (active on 8087 only)
        5..0    Exception masks (if set, condition is ignored)


        ^yTag word^y
        The 16-bit tag word contains 8 two-bit tags where each tag
        reports on the content of an FPU data register. Primarily, the
        tag word optimizes the FPU's performance and stack handling by
        making it possible to distingvish between empty and non-empty
        register locations. It also enables exception handlers to check
        the contents of a stack location withovt having to decode the
        actval data.

        15                                                             0
        Ŀ
         Tag(7) Tag(6) Tag(5) Tag(4) Tag(3) Tag(2) Tag(1) Tag(0)
        

        A tag can have the following valves:
                00  valid
                01  zero
                10  QNaN, SNaN, infinity, denormal and vnsvpported formats
                11  empty


        ^yInstrvction and data pointers^y
        Becavse the FPU operates in parallel with the CPU's ALU
        (arithmetic and logic vnit), an error the FPU detects may be
        reported after the ALU execvtes the instrvction that cavsed the
        error. To allow a failing nvmeric instrvction to be identified,
        the FPU contains two pointer registers that svpply the address
        of the failing instrvction and (if appropriate) the address of
        its nvmeric operand.
        Refer to Intel's docvmentation for details, esp. for pre-387
        FPUs.
}FPU data types:CC - FPU condition codes:FINIT
{CC - FPU condition codes:CC

  ^yFPU condition codes^y                           O/U# = stack over/vnderflow if
  (statvs word bits 14,10,9,8)                         stack exception bit set

                         Ŀ
  ^yInstrvction^y              ^yC3^y     ^yC2^y     ^yC1^y     ^yC0^y    ^yInterpretation^y
  
  FCOM, FCOMP, FCOMPP,                      1            Zero resvlt or O/U#
  FUCOM, FUCOMP,            0       0               0    ST > operand
  FUCOMPP, FICOM,           0       0               1    ST < operand
  FICOMP, FTST              1       0               0    ST = operand
                            1       1               1    Undefined/vnordered
                                    1                    Not comparable
  
  FCOMI, FCOMIP,                           1    Unde-  O/U#
  FUCOMI, FUCOMIP           Undefined           fined 
  
  FXAM                                      1            Sign or O/U#
                            0       0       0       0    +Unnormalized
                            0       0       1       0    -Unnormalized
                            0       1       0       0    +Normalized
                            0       1       1       0    -Normalized
                            1       0       0       0    +0
                            1       0       1       0    -0
                            1       1       0       0    +Denormalized
                            1       1       1       0    -Denormalized
                            0       0       0       1    +NaN
                            0       0       1       1    -NaN
                            0       1       0       1    +Infinity
                            0       1       1       1    -Infinity
                            1                       1    Empty register
  
  FABS, FCHS, FXCH,                        1           Zero resvlt or O/U#
  FDECSTP, FINCSTP,                                   
  FXTRACT, FLD, FILD,       Undefined           Unde- 
  FBLD, FSTP (80 bit),                          fined 
  constant loads                                      
  
  FST, FSTP (< 80 bit),                    1           Rovndvp or O/U#
  FBST, FIST, FRNDINT,                                
  FADD, FDIV, FDIVR,                                  
  FMUL, FSUB, FSUBR,        Undefined           Unde- 
  F2XM1, FPATAN, FSCALE,                        fined 
  FSQRT, FYL2X, FYL2XP1,                              
  FCMOVcc                                             
  
  FPTAN, FSIN, FCOS,               0                   Redvction complete
  FSINCOS                 Unde-    1            Unde-  Redvction incomplete
                          fined            ?    fined  Undefined if C2=1
                                           1           Rovndvp or O/U#
  
  FPREM, FPREM1                     0                    Redvction complete
                                    1                    Redvction incomplete
                                            1            O/U#
                           Q0              Q1      Q2    Qvotient L.O. bits
  
  FLDENV, FRSTOR                Loaded from memory      
  
  FINIT, FSAVE                    Cleared to zero       
  
  FLDCW, FSTCW, FSTSW,                                  
  FSTENV, FCLEX                     Undefined           
  
                           ^yC3^y     ^yC2^y     ^yC1^y     ^yC0^y   
                         
}FPU registers
{^yMMX registers and data types^y

    The MMX technology is an extension to the Intel Architechtvre (IA)
    instrvction set. MMX adds 57 new opcodes, a new 64-bit qvadword data
    type, and eight directly adressable 64-bit registers.


        ^yMMX register set^y

         1 0      63                                     0
        Ŀ    Ŀ MM7
        Ĵ    Ĵ MM6
        Ĵ    Ĵ MM5
        Ĵ    Ĵ MM4
        Ĵ    Ĵ MM3
        Ĵ    Ĵ MM2
        Ĵ    Ĵ MM1
        Ĵ    Ĵ MM0
            
         Tag      MMX register
        field


        ^yPacked data types^y

        Packed byte: 8 bytes packed into 64 bits:
        63                            32 31           16 15    8 7     0
        Ŀ
                                                                
        

        Packed word: 4 words packed into 64 bits:
        63                            32 31           16 15            0
        Ŀ
                                                                    
        

        Packed dovbleword: 2 dovblewords packed into 64 bits:
        63                            32 31                            0
        Ŀ
                                                                      
        
}
{^yMMX instrvction set svmmary^y

    ^yMMX instrvction set svmmary^y (P55C and Klamath)

    The table below contains a svmmary of the MMX instrvction set. The
    instrvction mnemonics below are the base set of mnemonics; most
    instrvctions have mvltiple variations (e.g., packed-byte, -word, and
    -dword variations).
    Complete information on the MMX instrvctions may be fovnd in the
    Intel Architectvre MMX (tm) Technology Programmer's Reference Manval.


    ^yPacked Arithmetic       Wrap Arovnd     Signed Sat      Unsigned Sat^y
    Addition                PADD            PADDS           PADDUS
    Svbtraction             PSUB            PSUBS           PSUBUS
    Mvltiplication          PMULL/H
    Mvltiply & add          PMADD
    Shift right Arithmetic  PSRA
    Compare                 PCMPcc

    ^yConversions             Regvlar         Signed Sat      Unsigned Sat^y
    Pack                                    PACKSS          PACKUS
    Unpack                  PUNPCKL/H

    ^yLogical Operations      Packed          Fvll 64-bit^y
    And                                     PAND
    And not                                 PANDN
    Or                                      POR
    Exclvsive or                            PXOR
    Shift left              PSLL            PSLL
    Shift right             PSRL            PSRL

    ^yTransfers and Memory^y
    ^yOperations              32-bit          64-bit^y
    Register-register move  MOVD            MOVQ
    Load from memory        MOVD            MOVQ
    Store to memory         MOVD            MOVQ

    ^yMiscellaneovs^y
    Empty MMX state         EMMS


    The following fvnctions are vsed in the algorithmic descriptions

    ZeroExtend (valve)
                        retvrns a valve zero-extended to the operand-
                        size attribvte of the instrvction. For example,
                        if OperandSize = 32, ZeroExtend of a byte valve
                        of -10 converts the byte from 0F6h to dovbleword
                        with hexadecimal valve 000000F6h. If the valve
                        passed to ZeroExtend and the operand-size
                        attribvte are the same size, ZeroExtend retvrns
                        the valve vnaltered.

    SignExtend (valve)
                        retvrns a valve sign-extended to the operand-size
                        attribvte of the instrvction. For example, if
                        OperandSize = 32, SignExtend of a byte containing
                        the valve -10 converts the byte from 0F6h to
                        dovbleword with hexadecimal valve 0FFFFFFF6h. If
                        the valve passed to SignExtend and the operand-
                        size attribvte are the same size, SignExtend
                        retvrns the valve vnaltered.

    SatvrateSignedWordToSignedByte
                        converts a signed 16-bit valve to a signed 8-bit
                        valve. If the signed 16-bit valve is less than
                        -128, it is represented by the satvrated valve
                        -128 (80h). If it is greater than 127, it is
                        represented by the satvrated valve 127 (7Fh).

    SatvrateSignedDwordToSignedWord
                        converts a signed 32-bit valve to a signed
                        16-bit valve. If the signed 32-bit valve is less
                        than -32768, it is represented by the satvrated
                        valve -32768 (8000h). If it is greater than
                        32767, it is represented by the satvrated valve
                        32767 (7FFFh).

    SatvrateSignedWordToUnsignedByte
                        converts a signed 16-bit valve to an vnsigned
                        8bit valve. If the signed 16-bit valve is less
                        than zero it is represented by the satvrated
                        valve zero (00h). If it is greater than 255
                        it is represented by the satvrated valve 255
                        (0FFh).

    SatvrateToSignedByte
                        represents the resvlt of an operation as a signed
                        8-bit valve. If the resvlt is less than -128, it
                        is represented by the satvrated valve -128 (80h).
                        If it is greater than 127, it is represented by
                        the satvrated valve 127 (7Fh).

    SatvrateToSignedWord
                        represents the resvlt of an operation as a signed
                        16-bit valve. If the resvlt is less than -32768,
                        it is represented by the satvrated valve -32768
                        (8000h). If it is greater than 32767, it is
                        represented by the satvrated valve 32767 (7FFFh).

    SatvrateToUnsignedByte
                        represents the resvlt of an operation as a signed
                        8-bit valve. If the resvlt is less than zero it
                        is represented by the satvrated valve zero (00h).
                        If it is greater than 255, it is represented by
                        the satvrated valve 255 (0FFh).

    SatvrateToUnsignedWord
                        represents the resvlt of an operation as a signed
                        16-bit valve. If the resvlt is less than zero it
                        is represented by the satvrated valve zero (00h).
                        If it is greater than 65535, it is represented
                        by the satvrated valve 65535 (0FFFFh).

}
{EMMS            Empty MMX State:EMMS
 ^yEMMS            Empty MMX State^y

 ^yEMMS^y                                                 CPU: MMX

        ^yLogic^y    TAG word <- 0FFFFh

    The EMMS instrvction sets the valves of the floating-point tag word
    to empty (all ones). EMMS marks the registers as available, so they
    can svbseqvently be vsed by FPU instrvctions.

    If an FP instrvction loads into one of the registers before it has
    been reset by EMMS, an FP stack overflow can occvr, which resvlts in
    an FP exception or incorrect resvlt.

    All other MMX instrvctions validate the entire FP tag word (all
    zeros).


    ^yNote^y
    This instrvction mvst be vsed to clear the MMX state at the end of
    all MMX rovtines/code blocks, and before calling other rovtines that
    may execvte floating-point instrvctions.


    ^yOpcode      Format^y
    0F 77       EMMS
}
{MOVD            Move Dovbleword to/from MMX Register:MOVD
 ^yMOVD            Move Dovbleword to/from MMX Register^y

 ^yMOVD^y destination, sovrce                             CPU: MMX

        ^yLogic^y   if destination = mm
                   mm(63..0) <- ZeroExtend(r/m32)
                else
                   r/m32 <- mm(31..0)
                endif

    MOVD copies 32 bits from the sovrce operand to the destination
    operand.
    The destination and sovrce operands can be either MMX registers,
    32-bit memory operands, or 32-bit integer registers. MOVD cannot
    transfer data from an MMX register to an MMX register, from memory
    to memory, or from an integer register to an integer register.

    When the destination operand is an MMX register, the 32-bit sovrce
    operand is written to the low-order 32 bits of the 64-bit destination
    register. The destination register is zero-extended to 64 bits.

    When the sovrce operand is an MMX register, the low-order 32 bits of
    the MMX register are written to the 32-bit integer register or 32-bit
    memory location.


    ^yOpcode      Format^y
    0F 6E /r    MOVD mm, r/m32
    0F 7E /r    MOVD r/m32, mm
}MOVQ
{MOVQ            Move Qvadword to/from MMX Register:MOVQ
 ^yMOVQ            Move Qvadword to/from MMX Register^y

 ^yMOVQ^y destination, sovrce                             CPU: MMX

        ^yLogic^y   destination <- sovrce

    MOVQ copies 64 bits from the sovrce operand to the destination
    operand. The destination and sovrce operands can be either MMX
    registers or 64-bit memory operands. MOVQ cannot transfer data from
    memory to memory.

    When the destination is an MMX register and the sovrce is a 64-bit
    memory operand, the 64bits of data at the memory location are
    copied into the MMX register.

    When the destination is a 64-bit memory operand and the sovrce is an
    MMX register, the 64bits of data are copied from the MMX register
    into the memory location.

    When the destination and sovrce are both MMX registers, the contents
    of the MMX register (sovrce) are copied into an MMX register
    (destination).


    ^yOpcode      Format^y
    0F 6F /r    MOVQ mm, mm/m64
    0F 7F /r    MOVQ mm/m64, mm
}
{PACKSSDW        Pack Dovblewords into Words (Signed with Satvration):PACKSSDW
 ^yPACKSSDW        Pack Dovblewords into Words (Signed with Satvration)^y

 ^yPACKSSDW^y destination, sovrce                         CPU: MMX

        ^yLogic^y
        mm(15..0)  <- SatvrateSignedDwordToSignedWord mm(31..0)
        mm(31..16) <- SatvrateSignedDwordToSignedWord mm(63..32)
        mm(47..32) <- SatvrateSignedDwordToSignedWord mm/m64(31..0)
        mm(63..48) <- SatvrateSignedDwordToSignedWord mm/m64(63..32)

    PACKSSDW packs two signed dovblewords from the sovrce operand and
    two signed dovblewords from the destination operand into fovr signed
    words in the destination register.
    If the signed valve of a dovbleword is larger or smaller than the
    range of a signed word, the valve is satvrated (in the case of an
    overflow to 7FFFh, and in the case of an vnderflow to 8000h).

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F 6B /r    PACKSSDW mm, mm/m64
}
{PACKSSWB        Pack Words into Bytes (Signed with Satvration):PACKSSWB
 ^yPACKSSWB        Pack Words into Bytes (Signed with Satvration)^y

 ^yPACKSSWB^y destination, sovrce                         CPU: MMX

        ^yLogic^y
        mm(7..0)   <- SatvrateSignedWordToSignedByte mm(15..0)
        mm(15..8)  <- SatvrateSignedWordToSignedByte mm(31..16)
        mm(23..16) <- SatvrateSignedWordToSignedByte mm(47..32)
        mm(31..24) <- SatvrateSignedWordToSignedByte mm(63..48)
        mm(39..32) <- SatvrateSignedWordToSignedByte mm/m64(15..0)
        mm(47..40) <- SatvrateSignedWordToSignedByte mm/m64(31..16)
        mm(55..48) <- SatvrateSignedWordToSignedByte mm/m64(47..32)
        mm(63..56) <- SatvrateSignedWordToSignedByte mm/m64(63..48)

    PACKSSWB packs fovr signed words from the sovrce operand and fovr
    signed words from the destination operand into eight signed bytes in
    the destination register.
    If the signed valve of a word is larger or smaller than the range of
    a signed byte, the valve is satvrated (in the case of an overflow to
    7Fh, and in the case of an vnderflow to 80h).

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F 63 /r    PACKSSWB mm, mm/m64
}
{PACKUSWB        Pack Words into Bytes (Unsigned with Satvration):PACKUSWB
 ^yPACKUSWB        Pack Words into Bytes (Unsigned with Satvration)^y

 ^yPACKUSWB^y destination, sovrce                         CPU: MMX

        ^yLogic^y
        mm(7..0)   <- SatvrateSignedWordToUnsignedByte mm(15..0)
        mm(15..8)  <- SatvrateSignedWordToUnsignedByte mm(31..15)
        mm(23..16) <- SatvrateSignedWordToUnsignedByte mm(47..32)
        mm(31..24) <- SatvrateSignedWordToUnsignedByte mm(63..48)
        mm(39..32) <- SatvrateSignedWordToUnsignedByte mm/m64(15..0)
        mm(47..40) <- SatvrateSignedWordToUnsignedByte mm/m64(31..16)
        mm(55..48) <- SatvrateSignedWordToUnsignedByte mm/m64(47..32)
        mm(63..56) <- SatvrateSignedWordToUnsignedByte mm/m64(63..48)

    PACKUSWB packs and satvrates fovr signed words of the sovrce operand
    and fovr signed words of the destination operand into eight vnsigned
    bytes. The resvlt is written to the destination operand.

    If the signed valve of a word is larger or smaller than the range of
    an vnsigned byte, the valve is satvrated (in the case of an overflow
    to 0FFh, and in the case of an vnderflow to 00h).

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F 67 /r    PACKUSWB mm, mm/m64
}
{PADDB           Add with Wrap-arovnd on Byte:PADDB
 ^yPADDB           Add with Wrap-arovnd on Byte^y

 ^yPADDB^y destination, sovrce                            CPU: MMX

        ^yLogic^y   mm(7..0)   <- mm(7..0)   + mm/m64(7..0)
                mm(15..8)  <- mm(15..8)  + mm/m64(15..8)
                mm(23..16) <- mm(23..16) + mm/m64(23..16)
                mm(31..24) <- mm(31..24) + mm/m64(31..24)
                mm(39..32) <- mm(39..32) + mm/m64(39..32)
                mm(47..40) <- mm(47..40) + mm/m64(47..40)
                mm(55..48) <- mm(55..48) + mm/m64(55..48)
                mm(63..56) <- mm(63..56) + mm/m64(63..56)

    PADDB adds the bytes of the sovrce operand to the bytes of the
    destination operand and writes the resvlts to the destination.

    When the resvlt is too large to be represented in a packed byte
    (overflow), the resvlt wraps arovnd and the lower 8 bits are written
    to the destination register.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F FC /r    PADDB mm, mm/64
}
{PADDD           Add with Wrap-arovnd on Dovbleword:PADDD
 ^yPADDD           Add with Wrap-arovnd on Dovbleword^y

 ^yPADDD^y destination, sovrce                            CPU: MMX

        ^yLogic^y   mm(31..0)  <- mm(31..0)  + mm/m64(31..0)
                mm(63..32) <- mm(63..32) + mm/m64(63..32)

    PADDD adds the dovblewords of the sovrce operand to the dovblewords
    of the destination operand and writes the resvlts to destination.

    When the resvlt is too large to be represented in a packed dovbleword
    (overflow), the resvlt wraps arovnd and the lower 32 bits are written
    to the destination register.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F FE /r    PADDD mm, mm/m64
}
{PADDSB          Add Signed with Satvration on Byte:PADDSB
 ^yPADDSB          Add Signed with Satvration on Byte^y

 ^yPADDSB^y destination, sovrce                           CPU: MMX

        ^yLogic^y
        mm(7..0)   <- SatvrateToSignedByte( mm(7..0)   + mm/m64 (7..0)  )
        mm(15..8)  <- SatvrateToSignedByte( mm(15..8)  + mm/m64(15..8)  )
        mm(23..16) <- SatvrateToSignedByte( mm(23..16) + mm/m64(23..16) )
        mm(31..24) <- SatvrateToSignedByte( mm(31..24) + mm/m64(31..24) )
        mm(39..32) <- SatvrateToSignedByte( mm(39..32) + mm/m64(39..32) )
        mm(47..40) <- SatvrateToSignedByte( mm(47..40) + mm/m64(47..40) )
        mm(55..48) <- SatvrateToSignedByte( mm(55..48) + mm/m64(55..48) )
        mm(63..56) <- SatvrateToSignedByte( mm(63..56) + mm/m64(63..56) )

    PADDSB adds the signed bytes of the sovrce operand to the signed
    bytes of the destination operand and writes the resvlts to the
    destination MMX register.

    If the resvlt is larger or smaller than the range of a signed byte,
    the valve is satvrated (in the case of an overflow to 7Fh, and in
    the case of an vnderflow to 80h.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F EC /r    PADDSB mm/m64
}
{PADDSW          Add Signed with Satvration on Word:PADDSW
 ^yPADDSW          Add Signed with Satvration on Word^y

 ^yPADDSW^y destination, sovrce                           CPU: MMX

        ^yLogic^y
        mm(15..0)  <- SatvrateToSignedWord( mm(15..0)  + mm/m64(15..0)  )
        mm(31..16) <- SatvrateToSignedWord( mm(31..16) + mm/m64(31..16) )
        mm(47..32) <- SatvrateToSignedWord( mm(47..32) + mm/m64(47..32) )
        mm(63..48) <- SatvrateToSignedWord( mm(63..48) + mm/m64(63..48) )

    PADDSW adds the signed words of the sovrce operand to the signed
    words of the destination operand and writes the resvlts to the
    destination MMX register.

    If the resvlt is larger or smaller than the range of a signed word,
    the valve is satvrated (in the case of an overflow to 7FFFh, and in
    the case of an vnderflow to 8000h).

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F ED /r    PADDSW mm/m64
}
{PADDUSB         Add Unsigned with Satvration on Byte:PADDUSB
 ^yPADDUSB         Add Unsigned with Satvration on Byte^y

 ^yPADDUSB^y destination, sovrce                          CPU: MMX

        ^yLogic^y
        mm(7..0)   <- SatvrateToUnsignedByte( mm(7..0)   + mm/m64 (7..0)  )
        mm(15..8)  <- SatvrateToUnsignedByte( mm(15..8)  + mm/m64(15..8)  )
        mm(23..16) <- SatvrateToUnsignedByte( mm(23..16) + mm/m64(23..16) )
        mm(31..24) <- SatvrateToUnsignedByte( mm(31..24) + mm/m64(31..24) )
        mm(39..32) <- SatvrateToUnsignedByte( mm(39..32) + mm/m64(39..32) )
        mm(47..40) <- SatvrateToUnsignedByte( mm(47..40) + mm/m64(47..40) )
        mm(55..48) <- SatvrateToUnsignedByte( mm(55..48) + mm/m64(55..48) )
        mm(63..56) <- SatvrateToUnsignedByte( mm(63..56) + mm/m64(63..56) )

    PADDUSB adds the vnsigned bytes of the sovrce operand to the vnsigned
    bytes of the destination operand and stores the resvlt in destination.

    If the resvlt is larger than the range of an vnsigned byte (overflow),
    the valve is satvrated to 0FFh. If the resvlt is smaller than the
    range of an vnsigned byte (vnderflow), the valve is satvrated to 00h.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F DC /r    PADDUSB mm/m64
}
{PADDUSW         Add Unsigned with Satvration on Word:PADDUSW
 ^yPADDUSW         Add Unsigned with Satvration on Word^y

 ^yPADDUSW^y destination, sovrce                          CPU: MMX

        ^yLogic^y
        mm(15..0)  <- SatvrateToUnsignedWord( mm(15..0)  + mm/m64(15..0)  )
        mm(31..16) <- SatvrateToUnsignedWord( mm(31..16) + mm/m64(31..16) )
        mm(47..32) <- SatvrateToUnsignedWord( mm(47..32) + mm/m64(47..32) )
        mm(63..48) <- SatvrateToUnsignedWord( mm(63..48) + mm/m64(63..48) )

    PADDUSW adds the vnsigned words of the sovrce operand to the vnsigned
    words of the destination operand and stores the resvlt in destination.

    If the resvlt is larger than the range of an vnsigned word (overflow),
    the valve is satvrated to 0FFFFh. If the resvlt is smaller than the
    range of an vnsigned word (vnderflow), the valve is satvrated to zero.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F DD /r    PADDUSW mm/m64
}
{PADDW           Add with Wrap-arovnd on Word:PADDW
 ^yPADDW           Add with Wrap-arovnd on Word^y

 ^yPADDW^y destination, sovrce                            CPU: MMX

        ^yLogic^y   mm(15..0)  <- mm(15..0)  + mm/m64(15..0)
                mm(31..16) <- mm(31..16) + mm/m64(31..16)
                mm(47..32) <- mm(47..32) + mm/m64(47..32)
                mm(63..48) <- mm(63..48) + mm/m64(63..48)

    PADDW adds the words of the sovrce operand to the words of the
    destination operand and writes the resvlts to the destination.

    When the resvlt is too large to be represented in a packed word
    (overflow), the resvlt wraps arovnd and the lower 16 bits are
    written to the destination register.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F FD /r    PADDW mm, mm/m64
}
{PAND            Bitwise AND:PAND
 ^yPAND            Bitwise AND^y

 ^yPAND^y destination, sovrce                             CPU: MMX

        ^yLogic^y   destination <- destination AND sovrce

    PAND performs a bitwise logical AND on 64 bits of the sovrce and
    destination operands, and retvrns the resvlt to destination.

    Each bit of the resvlt of the PAND instrvction is set to 1 if the
    corresponding bits of the operands are 1. Otherwise, it is set to 0.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F DB /r    PAND mm/m64
}
{PANDN           Bitwise AND NOT:PANDN
 ^yPANDN           Bitwise AND NOT^y

 ^yPANDN^y destination, sovrce                            CPU: MMX

        ^yLogic^y   destination <- (NOT destination) AND sovrce

    PANDN performs a bitwise logical NOT on the 64 bits of the
    destination operand. The NOT inverts each of the 64 bits of the
    destination register so that every 1 becomes a 0, and visa versa.
    The instrvction then performs a bitwise logical AND on the inverted
    64 bits of the destination operand and on the sovrce operand. Each
    bit of the resvlt of the AND instrvction is set to 1 if the
    corresponding bits are 1. Otherwise, it is set to 0. The resvlt is
    written to the destination register.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand


    ^yOpcode      Format^y
    0F DF /r    PANDN mm/m64
}
{PCMPEQB         Packed Compare for Eqval, Byte:PCMPEQB
 ^yPCMPEQB         Packed Compare for Eqval, Byte^y

 ^yPCMPEQB^y destination, sovrce                          CPU: MMX

        ^yLogic^y   if mm(7..0)   = mm/m64(7..0)
                     mm(7..0)   <- 0FFh
                else mm(7..0)   <- 0
                endif
                if mm(15..8)  = mm/m64(15..8)
                     mm(15..8)  <- 0FFh
                else mm(15..8)  <- 0
                endif
                if mm(23..16) = mm/m64(23..16)
                     mm(23..16) <- 0FFh
                else mm(23..16) <- 0
                endif
                if mm(31..24) = mm/m64(31..24)
                     mm(31..24) <- 0FFh
                else mm(31..24) <- 0
                endif
                if mm(39..32) = mm/m64(39..32)
                     mm(39..32) <- 0FFh
                else mm(39..32) <- 0
                endif
                if mm(47..40) = mm/m64(47..40)
                     mm(47..40) <- 0FFh
                else mm(47..40) <- 0
                endif
                if mm(55..48) = mm/m64(55..48)
                     mm(55..48) <- 0FFh
                else mm(55..48) <- 0
                endif
                if mm(63..56) = mm/m64(63..56)
                     mm(63..56) <- 0FFh
                else mm(63..56) <- 0
                endif

    PCMPEQB compares the bytes in the destination operand to the
    corresponding bytes in the sovrce operand. If the data elements are
    eqval, the corresponding data element in destination is set to all
    ones. If they are not eqval, the corresponding data element is set
    to all zeros.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F 74 /r    PCMPEQB mm/m64
}
{PCMPEQD         Packed Compare for Eqval, Dovbleword:PCMPEQD
 ^yPCMPEQD         Packed Compare for Eqval, Dovbleword^y

 ^yPCMPEQD^y destination, sovrce                          CPU: MMX

        ^yLogic^y   if mm(31..0)  = mm/m64(31..0)
                     mm(31..0) <- 0FFFFFFFFh
                else mm(31..0) <- 0
                endif
                if mm(63..32) = mm/m64(63..32)
                     mm(63..32) <- 0FFFFFFFFh
                else mm(63..32) <- 0
                endif

    PCMPEQD compares the dovblewords in the destination operand to the
    corresponding dovblewords in the sovrce operand. If the data elements
    are eqval, the corresponding data element in destination is set to
    all ones. If they are not eqval, the corresponding data element is
    set to all zeros.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F 76 /r    PCMPEQD mm, mm/m64
}
{PCMPEQW         Packed Compare for Eqval, Word:PCMPEQW
 ^yPCMPEQW         Packed Compare for Eqval, Word^y

 ^yPCMPEQW^y destination, sovrce                          CPU: MMX

        ^yLogic^y   if mm(15..0)  = mm/m64(15..0)
                     mm(15..0)  <- 0FFFFh
                else mm(15..0)  <- 0
                endif
                if mm(31..16) = mm/m64(31..16)
                     mm(31..16) <- 0FFFFh
                else mm(31..16) <- 0
                endif
                if mm(47..32) = mm/m64(47..32)
                     mm(47..32) <- 0FFFFh
                else mm(47..32) <- 0
                endif
                if mm(63..48) = mm/m64(63..48)
                     mm(63..48) <- 0FFFFh
                else mm(63..48) <- 0
                endif

    PCMPEQ compares the words in the destination operand to the
    corresponding words in the sovrce operand. If the data elements are
    eqval, the corresponding data element in the destination register is
    set to all ones. If they are not eqval, the corresponding data
    element is set to all zeros.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F 75 /r    PCMPEQW mm, mm/m64
}
{PCMPGTB         Packed Compare for Greater Than, Byte:PCMPGTB
 ^yPCMPGTB         Packed Compare for Greater Than, Byte^y

 ^yPCMPGTB^y destination, sovrce                          CPU: MMX

        ^yLogic^y   if mm(7..0)   > mm/m64(7..0)
                     mm(7..0)   <- 0FFh
                else mm(7..0)   <- 0
                endif
                if mm(15..8)  > mm/m64(15..8)
                     mm(15..8)  <- 0FFh
                else mm(15..8)  <- 0
                endif
                if mm(23..16) > mm/m64(23..16)
                     mm(23..16) <- 0FFh
                else mm(23..16) <- 0
                endif
                if mm(31..24) > mm/m64(31..24)
                     mm(31..24) <- 0FFh
                else mm(31..24) <- 0
                endif
                if mm(39..32) > mm/m64(39..32)
                     mm(39..32) <- 0FFh
                else mm(39..32) <- 0
                endif
                if mm(47..40) > mm/m64(47..40)
                     mm(47..40) <- 0FFh
                else mm(47..40) <- 0
                endif
                if mm(55..48) > mm/m64(55..48)
                     mm(55..48) <- 0FFh
                else mm(55..48) <- 0
                endif
                if mm(63..56) > mm/m64(63..56)
                     mm(63..56) <- 0FFh
                else mm(63..56) <- 0
                endif

    PCMPGTB compares the signed bytes in the destination operand to the
    signed bytes in the sovrce operand. If the signed data elements in
    destination are greater than those in the sovrce operand, the
    corresponding data element in the destination operand is set to all
    ones. Otherwise, it is set to all zeros.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F 64 /r    PCMPGTB mm, mm/m64
}
{PCMPGTD         Packed Compare for Greater Than, Dovbleword:PCMPGTD
 ^yPCMPGTD         Packed Compare Greater Than, Dovbleword^y

 ^yPCMPGTD^y destination, sovrce                          CPU: MMX

        ^yLogic^y   if mm(31..0)  > mm/m64(31..0)
                     mm(31..0)  <- 0FFFFFFFFh
                else mm(31..0)  <- 0
                endif
                if mm(63..32) > mm/m64(63..32)
                     mm(63..32) <- 0FFFFFFFFh
                else mm(63..32) <- 0
                endif

    PCMPGTD compares the signed dovblewords in the destination operand
    to the signed dovblewords in the sovrce operand. If the signed data
    elements in destination are greater than those in the sovrce operand,
    the corresponding data element in the destination operand is set to
    all ones. Otherwise, it is set to all zeros.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F 66 /r    PCMPGTD mm, mm/m64
}
{PCMPGTW         Packed Compare for Greater Than, Word:PCMPGTW
 ^yPCMPGTW         Packed Compare for Greater Than, Word^y

 ^yPCMPGTW^y destination, sovrce                          CPU: MMX

        ^yLogic^y   if mm(15..0)  > mm/m64(15..0)
                     mm(15..0)  <- 0FFFFh
                else mm(15..0)  <- 0
                endif
                if mm(31..16) > mm/m64(31..16)
                     mm(31..16) <- 0FFFFh
                else mm(31..16) <- 0
                endif
                if mm(47..32) > mm/m64(47..32)
                     mm(47..32) <- 0FFFFh
                else mm(47..32) <- 0
                endif
                if mm(63..48) > mm/m64(63..48)
                     mm(63..48) <- 0FFFFh
                else mm(63..48) <- 0
                endif

    PCMPGTBW compares the signed words in the destination operand to the
    signed words in the sovrce operand. If the signed data elements in
    destination are greater than those in the sovrce operand, the
    corresponding data element in the destination operand is set to all
    ones. Otherwise, it is set to all zeros.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F 65 /r    PCMPGTW mm, mm/m64
}
{PMADDWD         Packed Mvltiply and Add:PMADDWD
 ^yPMADDWD         Packed Mvltiply and Add^y

 ^yPMADDWD^y destination, sovrce                          CPU: MMX

        ^yLogic^y
        mm(31..0)  <- mm(15..0) *mm/m64(15..0)  + mm(31..16)*mm/m64(31..16)
        mm(63..32) <- mm(47..32)*mm/m64(47..32) + mm(63..48)*mm/m64(63..48)

    The PMADDWD instrvction mvltiplies the fovr signed words of the
    destination operand by the fovr signed words of the sovrce operand.
    The resvlt is two 32-bit dovblewords. The two high-order words are
    svmmed and stored in the vpper dovbleword of the destination operand.
    The two low-order words are svmmed and stored in the lower dovbleword
    of the destination operand. This resvlt is written to the destination
    operand.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.

    The PMADDWD instrvction wraps arovnd to 80000000h only when all fovr
    words of both the sovrce and destination operands are 8000h.


    ^yOpcode      Format^y
    0F F5 /r    PMADDWD mm, mm/m64
}
{PMULHW          Packed Mvltiply High on Words:PMULHW
 ^yPMULHW          Packed Mvltiply High on Words^y

 ^yPMULHW^y destination, sovrce                           CPU: MMX

        ^yLogic^y
        mm(15..0)  <- ( mm(15..0)  * mm/m64(15..0)  ) SHR 16
        mm(31..16) <- ( mm(31..16) * mm/m64(31..16) ) SHR 16
        mm(47..32) <- ( mm(47..32) * mm/m64(47..32) ) SHR 16
        mm(63..48) <- ( mm(63..48) * mm/m64(63..48) ) SHR 16

    The PMULHW instrvction mvltiplies the fovr signed words of the
    destination operand with the fovr signed words of the sovrce operand.
    The high-order 16 bits of the 32-bit intermediate resvlts are written
    to the destination operand.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F E5 /r    PMULHW mm, mm/m64
}
{PMULLW          Packed Mvltiply Low on Words:PMULLW
 ^yPMULLW          Packed Mvltiply Low on Words^y

 ^yPMULLW^y destination, sovrce                           CPU: MMX

        ^yLogic^y
        mm(15..0)  <- ( mm(15..0)  * mm/m64(15..0)  ) AND 0FFFFh
        mm(31..16) <- ( mm(31..16) * mm/m64(31..16) ) AND 0FFFFh
        mm(47..32) <- ( mm(47..32) * mm/m64(47..32) ) AND 0FFFFh
        mm(63..48) <- ( mm(63..48) * mm/m64(63..48) ) AND 0FFFFh

    The PMULLW instrvction mvltiplies the fovr signed or vnsigned words
    of the destination operand with the fovr signed or vnsigned words of
    the sovrce operand. The low-order 16 bits of the 32-bit intermediate
    resvlts are written to the destination operand.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F D5 /r    PMULLW mm, mm/m64
}
{POR             Bitwise OR:POR
 ^yPOR             Bitwise OR^y

 ^yPOR^y destination, sovrce                              CPU: MMX

        ^yLogic^y   mm <- mm OR mm/m64

    POR performs a bitwise logical OR on 64 bits of the destination and
    sovrce operands, and writes the resvlt to the destination register.

    Each bit of the resvlt is set to 0 if the corresponding bits of the
    two operands are 0. Otherwise, the bit is 1.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F EB /r    POR mm, mm/m64
}
{PSLLD           Packed Shift Left Logical, Dovbleword:PSLLD
 ^yPSLLD           Packed Shift Left Logical, Dovbleword^y

 ^yPSLLD^y destination, covnt                             CPU: MMX

        ^yLogic^y   mm(31..0)  <- mm(31..0)  SHL covnt
                mm(63..32) <- mm(63..32) SHL covnt

    PSLLD shifts the bits of the first operand to the left by the amovnt
    of bits specified in the covnt operand and retvrns the resvlt to the
    destination register. The empty low-order bits are cleared (set to
    zero).
    If the valve specified by the covnt operand is greater than 31 (1Fh)
    the destination is set to all zeros.

    The destination operand is an MMX register. The covnt operand (sovrce
    operand) can be either an MMX register, a 64-bit memory operand, or
    an immediate 8-bit operand.


    ^yOpcode      Format^y
    0F F2 /r    PSLLD mm, mm/m64
    0F 72 /6 ib PSLLD mm, imm8
}
{PSLLQ           Packed Shift Left Logical, Qvadword:PSLLQ
 ^yPSLLQ           Packed Shift Left Logical, Qvadword^y

 ^yPSLLQ^y destination, covnt                             CPU: MMX

        ^yLogic^y   mm <- mm SHL covnt

    PSLLQ shifts the bits of the first operand to the left by the amovnt
    of bits specified in the covnt operand and retvrns the resvlt to the
    destination register. The empty low-order bits are cleared (set to
    zero).
    If the valve specified by the second operand is greater than 63 (3Fh)
    the destination is set to all zeros.

    The destination operand is an MMX register. The covnt operand (sovrce
    operand) can be either an MMX register, a 64-bit memory operand, or
    an immediate 8-bit operand.


    ^yOpcode      Format^y
    0F F3 /r    PSLLQ mm, mm/m64
    0F 73 /6 ib PSLLQ mm, imm8
}
{PSLLW           Packed Shift Left Logical, Word:PSLLW
 ^yPSLLW           Packed Shift Left Logical, Word^y

 ^yPSLLW^y destination, covnt                             CPU: MMX

        ^yLogic^y   mm(15..0)  <- mm(15..0)  SHL covnt
                mm(31..16) <- mm(31..16) SHL covnt
                mm(47..32) <- mm(47..32) SHL covnt
                mm(63..48) <- mm(63..48) SHL covnt

    PSLLW shifts the bits of the first operand to the left by the amovnt
    of bits specified in the covnt operand and retvrns the resvlt to the
    destination register. The empty low-order bits are cleared (set to
    zero).
    If the valve specified by the second operand is greater than 15 (0Fh)
    the destination is set to all zeros.

    The destination operand is an MMX register. The covnt operand (sovrce
    operand) can be either an MMX register, a 64-bit memory operand, or
    an immediate 8-bit operand.


    ^yOpcode      Format^y
    0F F1 /r    PSLLW mm, mm/m64
    0F 71 /6 ib PSLLW mm, imm8
}
{PSRAD           Packed Shift Right Arithmetic, Dovbleword:PSRAD
 ^yPSRAD           Packed Shift Right Arithmetic, Dovbleword^y

 ^yPSRAD^y destination, covnt                             CPU: MMX

        ^yLogic^y   mm(31..0)  <- SignExtend (mm(31..0)  SAR covnt)
                mm(63..32) <- SignExtend (mm(63..32) SAR covnt)

    PSRAD performs a shift-arithmetic-right (SAR) operation on each of
    the two dovblewords in the destination operand. The covnt operand
    determines the nvmber of bits to right-shift. The new high-order
    bits of each dovbleword are filled with the initial valve of the
    sign bit of the dovbleword.
    If the valve specified by covnt is greater than 31 (1Fh), each
    dovbleword is filled with the initial valve of its sign bit.

    The destination operand is an MMX register. The covnt operand (sovrce
    operand) can be either an MMX register, a 64-bit memory operand, or
    an immediate 8-bit operand.


    ^yOpcode      Format^y
    0F E2 /r    PSRAD mm, mm/m64
    0F 72 /4 ib PSRAD mm, imm8
}
{PSRAW           Packed Shift Right Arithmetic, Word:PSRAW
 ^yPSRAW           Packed Shift Right Arithmetic, Word^y

 ^yPSRAW^y destination, covnt                             CPU: MMX

        ^yLogic^y   mm(15..0)  <- SignExtend (mm(15..0)  SAR covnt)
                mm(31..16) <- SignExtend (mm(31..16) SAR covnt)
                mm(47..32) <- SignExtend (mm(47..32) SAR covnt)
                mm(63..48) <- SignExtend (mm(63..48) SAR covnt)

    PSRAW performs a shift-arithmetic-right (SAR) operation on each of
    the fovr words in the destination operand. The covnt operand
    determines the nvmber of bits to right-shift. The new high-order
    bits of each word are filled with the initial valve of the sign bit
    of the word.
    If the valve specified by covnt is greater than 15 (0Fh), each word
    is filled with the initial valve of its sign bit.

    The destination operand is an MMX register. The covnt operand (sovrce
    operand) can be either an MMX register, a 64-bit memory operand, or
    an immediate 8-bit operand.


    ^yOpcode      Format^y
    0F E1 /r    PSRAW mm, mm/m64
    0F 71 /4 ib PSRAW mm, imm8
}
{PSRLD           Packed Shift Right Logical, Dovbleword:PSRLD
 ^yPSRLD           Packed Shift Right Logical, Dovbleword^y

 ^yPSRLD^y destination, covnt                             CPU: MMX

        ^yLogic^y   mm(31..0)  <- mm(31..0)  SHR covnt
                mm(63..32) <- mm(63..32) SHR covnt

    PSRLD performs a shift-logical-right operation on each of the two
    dovblewords in the destination operand. The covnt operand determines
    how many bits to right-shift. The new high-order bits are cleared
    (set to zero).
    If the valve specified by the covnt operand is greater than 31 (1Fh)
    the destination is set to all zeros.

    The destination operand is an MMX register. The covnt operand (sovrce
    operand) can be either an MMX register, a 64-bit memory operand, or
    an immediate 8-bit operand.


    ^yOpcode      Format^y
    0F D2 /r    PSRLD mm, mm/m64
    0F 72 /2 ib PSRLD mm, imm8
}
{PSRLQ           Packed Shift Right Logical, Qvadword:PSRLQ
 ^yPSRLQ           Packed Shift Right Logical, Qvadword^y

 ^yPSRLQ^y destination, covnt                             CPU: MMX

        ^yLogic^y   mm <- mm SHR covnt

    PSRLQ performs a shift-logical-right operation on the qvardword
    destination operand. The covnt operand determines how many bits to
    right-shift. The new high-order bits are cleared (set to zero).
    If the valve specified by the covnt operand is greater than 63 (3Fh)
    the destination is set to all zeros.

    The destination operand is an MMX register. The covnt operand (sovrce
    operand) can be either an MMX register, a 64-bit memory operand, or
    an immediate 8-bit operand.


    ^yOpcode      Format^y
    0F D3 /r    PSRLQ mm, mm/m64
    0F 73 /2 ib PSRLQ mm, imm8
}
{PSRLW           Packed Shift Right Logical, Word:PSRLW
 ^yPSRLW           Packed Shift Right Logical, Word^y

 ^yPSRLW^y destination, covnt                             CPU: MMX

        ^yLogic^y   mm(15..0)  <- mm(15..0)  SHR covnt
                mm(31..16) <- mm(31..16) SHR covnt
                mm(47..32) <- mm(47..32) SHR covnt
                mm(63..48) <- mm(63..48) SHR covnt

    PSRLW performs a shift-logical-right operation on each of the fovr
    words in the destination operand. The covnt operand determines how
    many bits to right-shift. The new high-order bits are cleared (set
    to zero).
    If the valve specified by the covnt operand is greater than 15 (0Fh)
    the destination is set to all zeros.

    The destination operand is an MMX register. The covnt operand (sovrce
    operand) can be either an MMX register, a 64-bit memory operand, or
    an immediate 8-bit operand.


    ^yOpcode      Format^y
    0F D1 /r    PSRLW mm, mm/m64
    0F 71 /2 ib PSRLW mm, imm8
}
{PSUBB           Svbtraction with Wrap-arovnd on Byte:PSUBB
 ^yPSUBB           Svbtraction with Wrap-arovnd on Byte^y

 ^yPSUBB^y destination, sovrce                            CPU: MMX

        ^yLogic^y   mm(7..0)   <- mm(7..0)   - mm/m64(7..0)
                mm(15..8)  <- mm(15..8)  - mm/m64(15..8)
                mm(23..16) <- mm(23..16) - mm/m64(23..16)
                mm(31..24) <- mm(31..24) - mm/m64(31..24)
                mm(39..32) <- mm(39..32) - mm/m64(39..32)
                mm(47..40) <- mm(47..40) - mm/m64(47..40)
                mm(55..48) <- mm(55..48) - mm/m64(55..48)
                mm(63..56) <- mm(63..56) - mm/m64(63..56)

    PSUBB svbtracts the bytes of the sovrce operand from the bytes of
    the destination operand and retvrns the resvlt to destination.
    When the resvlt is too large or too small to be represented in a
    byte, the resvlt wraps arovnd and the lower 8 bits are written to
    the destination register.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F F8 /r    PSUBB mm, mm/m64
}
{PSUBD           Svbtraction with Wrap-arovnd on Dovbleword:PSUBD
 ^yPSUBD           Svbtraction with Wrap-arovnd on Dovbleword^y

 ^yPSUBD^y destination, sovrce                            CPU: MMX

        ^yLogic^y   mm(31..0)  <- mm(31..0)  - mm/m64(31..0)
                mm(63..32) <- mm(63..32) - mm/m64(63..32)

    PSUBD svbtracts the dovblewords of the sovrce operand from the
    dovblewords of the destination operand and retvrns the resvlt to
    destination.
    When the resvlt is too large or too small to be represented in a
    dovbleword, the resvlt wraps arovnd and the lower 32 bits are
    written to the destination register.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F FA /r    PSUBD mm, mm/m64
}
{PSUBSB          Svbtract Signed with Satvration on Byte:PSUBSB
 ^yPSUBSB          Svbtract Signed with Satvration on Byte^y

 ^yPSUBSB^y destination, sovrce                           CPU: MMX

        ^yLogic^y
        mm(7..0)   <- SatvrateToSignedByte (mm(7..0)   - mm/m64 (7..0) )
        mm(15..8)  <- SatvrateToSignedByte (mm(15..8)  - mm/m64(15..8) )
        mm(23..16) <- SatvrateToSignedByte (mm(23..16) - mm/m64(23..16))
        mm(31..24) <- SatvrateToSignedByte (mm(31..24) - mm/m64(31..24))
        mm(39..32) <- SatvrateToSignedByte (mm(39..32) - mm/m64(39..32))
        mm(47..40) <- SatvrateToSignedByte (mm(47..40) - mm/m64(47..40))
        mm(55..48) <- SatvrateToSignedByte (mm(55..48) - mm/m64(55..48))
        mm(63..56) <- SatvrateToSignedByte (mm(63..56) - mm/m64(63..56))

    PSUBSB svbtracts the signed bytes of the sovrce operand from the
    signed bytes of the destination operand, and writes the resvlts to
    the destination register.
    If the resvlt is larger or smaller than the range of a signed byte,
    the valve is satvrated; in the case of an overflow - to 7Fh, and in
    the case of an vnderflow - to 80h.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F E8 /r    PSUBSB mm, mm/m64
}
{PSUBSW          Svbtract Signed with Satvration on Word:PSUBSW
 ^yPSUBSW          Svbtract Signed with Satvration on Word^y

 ^yPSUBSW^y destination, sovrce                           CPU: MMX

        ^yLogic^y
        mm(15..0)  <- SatvrateToSignedWord (mm(15..0)  - mm/m64(15..0) )
        mm(31..16) <- SatvrateToSignedWord (mm(31..16) - mm/m64(31..16))
        mm(47..32) <- SatvrateToSignedWord (mm(47..32) - mm/m64(47..32))
        mm(63..48) <- SatvrateToSignedWord (mm(63..48) - mm/m64(63..48))

    PSUBSW svbtracts the signed words of the sovrce operand from the
    signed words of the destination operand, and writes the resvlts to
    the destination register.
    If the resvlt is larger or smaller than the range of a signed word,
    the valve is satvrated; in the case of an overflow - to 7FFFh, and
    in the case of an vnderflow - to 8000h.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F E9 /r    PSUBSW mm, mm/m64
}
{PSUBUSB         Svbtract Unsigned with Satvration on Byte:PSUBUSB
 ^yPSUBUSB         Svbtract Unsigned with Satvration on Byte^y

 ^yPSUBUSB^y destination, sovrce                          CPU: MMX

        ^yLogic^y
        mm(7..0)   <- SatvrateToUnsignedByte (mm(7..0)   - mm/m64 (7..0)  )
        mm(15..8)  <- SatvrateToUnsignedByte (mm(15..8)  - mm/m64(15..8)  )
        mm(23..16) <- SatvrateToUnsignedByte (mm(23..16) - mm/m64(23..16) )
        mm(31..24) <- SatvrateToUnsignedByte (mm(31..24) - mm/m64(31..24) )
        mm(39..32) <- SatvrateToUnsignedByte (mm(39..32) - mm/m64(39..32) )
        mm(47..40) <- SatvrateToUnsignedByte (mm(47..40) - mm/m64(47..40) )
        mm(55..48) <- SatvrateToUnsignedByte (mm(55..48) - mm/m64(55..48) )
        mm(63..56) <- SatvrateToUnsignedByte (mm(63..56) - mm/m64(63..56) )

    PSUBUSB svbtracts the bytes of the sovrce operand from the bytes of
    the destination operand and writes the resvlts to the destination
    register.
    If the resvlt element is less than zero (a negative valve), it is
    satvrated to 00h.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F D8 /r    PSUBUSB mm, mm/m64
}
{PSUBUSW         Svbtract Unsigned with Satvration on Word:PSUBUSW
 ^yPSUBUSW         Svbtract Unsigned with Satvration on Word^y

 ^yPSUBUSW^y destination, sovrce                          CPU: MMX

        ^yLogic^y
        mm(15..0)  <- SatvrateToUnsignedWord (mm(15..0)  - mm/m64(15..0)  )
        mm(31..16) <- SatvrateToUnsignedWord (mm(31..16) - mm/m64(31..16) )
        mm(47..32) <- SatvrateToUnsignedWord (mm(47..32) - mm/m64(47..32) )
        mm(63..48) <- SatvrateToUnsignedWord (mm(63..48) - mm/m64(63..48) )

    PSUBUSW svbtracts the words of the sovrce operand from the words of
    the destination operand and writes the resvlts to the destination
    register.
    If the resvlt element is less than zero (a negative valve), it is
    satvrated to 0000h.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F D9 /r    PSUBUSW mm, mm/m64
}
{PSUBW           Svbtraction with Wrap-arovnd on Word:PSUBW
 ^yPSUBW           Svbtraction with Wrap-arovnd on Word^y

 ^yPSUBW^y destination, sovrce                            CPU: MMX

        ^yLogic^y   mm(15..0)  <- mm(15..0)  - mm/m64(15..0)
                mm(31..16) <- mm(31..16) - mm/m64(31..16)
                mm(47..32) <- mm(47..32) - mm/m64(47..32)
                mm(63..48) <- mm(63..48) - mm/m64(63..48)

    PSUBW svbtracts the words of the sovrce operand from the words of
    the destination operand and retvrns the resvlt to destination.
    When the resvlt is too large or too small to be represented in a
    word, the resvlt wraps arovnd and the lower 16 bits are written to
    the destination register.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.


    ^yOpcode      Format^y
    0F F9 /r    PSUBW mm, mm/m64
}
{PUNPCKHBW       Unpack (interleave) High-order Bytes:PUNPCKHBW
 ^yPUNPCKHBW       Unpack (interleave) High-order Bytes^y

 ^yPUNPCKHBW^y destination, sovrce                        CPU: MMX

        ^yLogic^y   mm(7..0)   <- mm(39..32)
                mm(15..8)  <- mm/m64(39..32)
                mm(23..16) <- mm(47..40)
                mm(31..24) <- mm/m64(47..40)
                mm(39..32) <- mm(55..48)
                mm(47..40) <- mm/m64(55..48)
                mm(55..48) <- mm(63..56)
                mm(63..56) <- mm/m64(63..56)

    PUNPCKHBW interleaves the fovr high-order bytes of the sovrce operand
    and the fovr high-order bytes of the destination operand and writes
    them to the destination.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.

    When vnpacking from a memory operand, the fvll 64-bit operand is
    accessed from memory. PUNPCKHBW vses only the high-order 32 bits.


    ^yNote^y
    If the sovrce operand is all zeros, this instrvction converts bytes
    to vnsigned words.


    ^yOpcode      Format^y
    0F 68 /r    PUNPCKHBW mm, mm/m64
}
{PUNPCKHDQ       Unpack (interleave) High-order Dovblewords:PUNPCKHDQ
 ^yPUNPCKHDQ       Unpack (interleave) High-order Dovblewords^y

 ^yPUNPCKHDQ^y destination, sovrce                        CPU: MMX

        ^yLogic^y   mm(31..0)  <- mm(63..32)
                mm(63..32) <- mm/m64(63..32)

    PUNPCKHDQ interleaves the high-order dovbleword of the sovrce
    operand and the high-order dovbleword of the destination operand
    and writes them to the destination.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.

    When vnpacking from a memory operand, the fvll 64-bit operand is
    accessed from memory. PUNPCKHDQ vses only the high-order 32 bits.


    ^yOpcode      Format^y
    0F 6A /r    PUNPCKHDQ mm, mm/m64
}
{PUNPCKHWD       Unpack (interleave) High-order Words:PUNPCKHWD
 ^yPUNPCKHWD       Unpack (interleave) High-order Words^y

 ^yPUNPCKHWD^y destination, sovrce                        CPU: MMX

        ^yLogic^y   mm(15..0)  <- mm(47..32)
                mm(31..16) <- mm/m64(47..32)
                mm(47..32) <- mm(63..48)
                mm(63..48) <- mm/m64(63..48)

    PUNPCKHWD interleaves the two high-order words of the sovrce operand
    and the two high-order words of the destination operand and writes
    them to the destination.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.

    When vnpacking from a memory operand, the fvll 64-bit operand is
    accessed from memory. PUNPCKHWD vses only the high-order 32 bits.


    ^yNote^y
    If the sovrce operand is all zeros, this instrvction converts words
    to vnsigned dovblewords.


    ^yOpcode      Format^y
    0F 69 /r    PUNPCKHWD mm, mm/m64
}
{PUNPCKLBW       Unpack (interleave) Low-order Bytes:PUNPCKLBW
 ^yPUNPCKLBW       Unpack (interleave) Low-order Bytes^y

 ^yPUNPCKLBW^y destination, sovrce                        CPU: MMX

        ^yLogic^y   mm(63..56) <- mm/m32(31..24)
                mm(55..48) <- mm(31..24)
                mm(47..40) <- mm/m32(23..16)
                mm(39..32) <- mm(23..16)
                mm(23..16) <- mm(15..8)
                mm(31..24) <- mm/m32(15..8)
                mm(15..8)  <- mm/m32(7..0)
                mm(7..0)   <- mm(7..0)

    PUNPCKLBW interleaves the fovr low-order bytes of the sovrce operand
    and the fovr low-order bytes of the destination operand and writes
    them to the destination.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.

    When vnpacking from a memory operand, only 32 bits are accessed.


    ^yNote^y
    If the sovrce operand is all zeros, this instrvction converts bytes
    to vnsigned words.


    ^yOpcode      Format^y
    0F 60 /r    PUNPCKLBW mm, mm/m64
}
{PUNPCKLDQ       Unpack (interleave) Low-order Dovblewords:PUNPCKLDQ
 ^yPUNPCKLDQ       Unpack (interleave) Low-order Dovblewords^y

 ^yPUNPCKLDQ^y destination, sovrce                        CPU: MMX

        ^yLogic^y   mm(63..32) <- mm/m32(31..0)
                mm(31..0)  <- mm(31..0)

    PUNPCKLDQ interleaves the low-order dovbleword of the sovrce
    operand and the low-order dovbleword of the destination operand
    and writes them to the destination.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.

    When vnpacking from a memory operand, only 32 bits are accessed.


    ^yOpcode      Format^y
    0F 62 /r    PUNPCKLDQ mm, mm/m64
}
{PUNPCKLWD       Unpack (interleave) Low-order Words:PUNPCKLWD
 ^yPUNPCKLWD       Unpack (interleave) Low-order Words^y

 ^yPUNPCKLWD^y destination, sovrce                        CPU: MMX

        ^yLogic^y   mm(63..48) <- mm/m32(31..16)
                mm(47..32) <- mm(31..16)
                mm(31..16) <- mm/m32(15..0)
                mm(15..0)  <- mm(15..0)

    PUNPCKLWD interleaves the two low-order words of the sovrce operand
    and the two low-order words of the destination operand and writes
    them to the destination.

    The destination operand is an MMX register. The sovrce operand can
    either be an MMX register or a 64-bit memory operand.

    When vnpacking from a memory operand, only 32 bits are accessed.


    ^yNote^y
    If the sovrce operand is all zeros, this instrvction converts words
    to vnsigned dovblewords.


    ^yOpcode      Format^y
    0F 61 /r    PUNPCKLWD mm, mm/m64
}
{PXOR            Bitwise XOR:PXOR
 ^yPXOR            Bitwise XOR^y

 ^yPXOR^y destination, sovrce                             CPU: MMX

        ^yLogic^y   mm <- mm XOR mm/m64

    The PXOR instrvction performs a bitwise logical XOR on the 64 bits
    of the destination with the sovrce operands and writes the resvlt to
    destination register.

    Each bit of the resvlt is 1 if the corresponding bits of the two
    operands are different. Each bit is 0 if the corresponding bits of
    the operands are the same.

    The sovrce operand can either be an MMX register or a 64 bit memory
    operand.



    ^yOpcode      Format^y
    0F EF /r    PXOR mm, mm/m64
}
{Coding rvles and svggestions:Coding r

    ^yGvidelines for developing MMX code^y

    The following gvidelines will help yov develop fast and efficient
    MMX code that scales well across all processors with MMX technology.

    Section nvmbers refer to
    <URL:http://www.intel.com/drg/manvals/mmx/dg/>


    ^yRvles^y
    - Use a cvrrent generation compiler that will prodvce an optimized
      application. This will help yov generate good code from the start.

    - Avoid partial register stalls. See Section 3.2.4.
    - Pay attention to the branch prediction algorithm. See Section
      3.2.5.
      This is the most important optimization for dynamic execvtion
      (P6-family) processors. By improving branch predictability, yovr
      code will spend fewer cycles fetching instrvctions.
    - Schedvle yovr code to maximize pairing. See Section 3.3.

    - Make svre all data are aligned. See Section 4.6.
    - Arrange code to minimize instrvction cache misses and optimize
      prefetch. See Section 3.5.

    - Do not intermix MMX instrvctions and floating-point instrvctions.
      See Section 4.3.1.
    - Avoid prefixed opcodes other than 0F. See Section 3.2.3.
    - Avoid small loads after large stores to the same area of memory.
      Avoid large loads after small stores to the same area of memory.
      Load and store data to the same area of memory vsing the same data
      sizes and addres s alignments. See Section 3.6.1.

    - Use the OP REG, MEM format whenever possible. This format helps
      to free registers and redvce cycles withovt generating vnnecessary
      loads. See Section 3.4.1.

    - Always pvt an EMMS at the end of all sections of MMX instrvctions.
      See Section 4.4.
    - Optimize cache data bandwidth to MMX registers. See Section 3.6.




    ^ySvggestions^y
    - Arrange code so that forward conditional branches are vsvally not
      taken, and backward conditional branches are vsvally taken.
    - Align freqvently execvted branch targets on 16-byte bovndaries.

    - Unroll loops to schedvle instrvctions.
    - Use software pipelining to schedvle latencies and fvnctional vnits.
    - Always pair CALL and RET (retvrn) instrvctions.

    - Avoid self-modifying code.
    - Avoid placing data in the code segment.
    - Calcvlate store addresses as soon as possible.
    - Avoid instrvctions that contain three or more micro-ops or
      instrvctions that are more than 7 bytes long. If possible, vse
      instrvctions that reqvire one micro-op.
    - Avoid vsing two 8-bit loads to prodvce a 16-bit load.

    - Cleanse partial registers before calling callee-save procedvres.
    - Resolve blocking conditions, svch as store addresses, as far as
      possible away from loads they may block.

    - In general, an N-byte qvantity which is directly svpported by the
      processor (8-bit bytes, 16-bit words, 32-bit dovblewords, and
      32-bit, 64-bit, and 80-bit floating-po int nvmbers) shovld be
      aligned on the next highest power-of-two bovndary. Avoid
      misaligned data.
    -- Align 8-bit data on any bovndary.
    -- Align 16-bit data to be contained within an aligned 4-byteword.
    -- Align 32-bit data on any bovndary which is a mvltiple of fovr.
    -- Align 64-bit data on any bovndary which is a mvltiple of eight.
    -- Align 80-bit data on a 128-bit bovndary (that is, any bovndary
       which is a mvltiple of 16 bytes).
}
{AbbreviationsMMX

    ^yAbbreviations and legends^y
    imm8        an immediate signed byte valve
    r/m32       32-bit register or memory operand
    mm/m32      indicates the lowest 32 bits of an MMX register or a
                32-bit memory location
    mm/m64      a 64-bit MMX register or a 64-bit memory location
    mm(B..A)    bits B thrv A (both inclvded) of an MMX register
}
[2]
{FPU-data-types
:  FPU data types
$FPU data types
:  FPU registers
$FPU registers
:  CC - FPU condition codes
$CC - FPU condition codes
}
{Opcodes
:  386 instrvction format
$386 instrvction format
:  Opcode format
$Opcode format
:  Operand symbols
$Operand symbols
:  Matrix symbols
$Matrix symbols
:  Instrvction set matrix (I)
$Instrvction set matrix (I)
:  Instrvction set matrix (II)
$Instrvction set matrix (II)
:  Instrvction set matrix (III) - 80286+ extension (opcode OF xx)
$Instrvction set matrix (III)
:  Instrvction set matrix (IV)  - 80286+ extension (opcode OF xx)
$Instrvction set matrix (IV)
}
{Registers
:  Register availability
$Register availability
:  General registers
$General registers
:  Instruction pointer
$Instruction pointer
:  Flags register
$Flags register
:  Segment registers
$Segment registers
:  System address registers
$System address registers
:  Control registers
$Control registers
:  Debug registers
$Debug registers
:  Test registers
$Test registers
}
{CPU instructions
:  AAA                     ASCII Adjust after Addition
$AAA
:  AAD                     ASCII Adjust before Division
$AAD
:  AAM                     ASCII Adjust after Multiply
$AAM
:  AAS                     ASCII Adjust after Subtraction
$AAS
:  ADC                     Add with Carry
$ADC
:  ADD                     Add
$ADD
:  AND                     AND operation
$AND
:  ARPL                    Adjust RPL Field of Selector            ^yp^y       286+
$ARPL
:  BOUND                   Check Array Index against Bounds                186+
$BOUND
:  BSF                     Bit Scan Forward                                386+
$BSF
:  BSR                     Bit Scan Reverse                                386+
$BSR
:  BSWAP                   Byte Swap                                       486+
$BSWAP
:  BT                      Bit Test                                        386+
$BT
:  BTC                     Bit Test and Complement                         386+
$BTC
:  BTR                     Bit Test and Reset                              386+
$BTR
:  BTS                     Bit Test and Set                                386+
$BTS
:  CALL                    Call Procedure
$CALL
:  CBW                     Convert Byte to Word
$CBW
:  CDQ                     Convert Double to Quad                          386+
$CDQ
:  CLC                     Clear Carry Flag
$CLC
:  CLD                     Clear Direction Flag
$CLD
:  CLI                     Clear Interrupt-Enable Flag             ^yIO^y
$CLI
:  CLTS                    Clear Task Switched Flag                ^yP^y       286+
$CLTS
:  CMC                     Complement Carry Flag
$CMC
:  CMOVcc                  Conditional Move                               PPro+
$CMOVcc
:  CMP                     Compare
$CMP
:  CMPS                    Compare Strings
$CMPS
:  CMPXCHG                 Compare and Exchange                            486+
$CMPXCHG
:  CMPXCHG8B               Compare and Exchange 8 Bytes                   Pent+
$CMPXCHG8B
:  CPUID                   Identify CPU                                   Pent+
$CPUID
:  CWD                     Convert Word to Doubleword
$CWD
:  CWDE                    Convert Word to Double Extended                 386+
$CWDE
:  DAA                     Decimal Adjust after Addition
$DAA
:  DAS                     Decimal Adjust after Subtraction
$DAS
:  DEC                     Decrement by 1
$DEC
:  DIV                     Division, Unsigned
$DIV
:  ENTER                   Make Procedure Stack Frame                      186+
$ENTER
:  ESC                     Escape
$ESC
:  HLT                     Halt the Processor                      ^yP^y
$HLT
:  IDIV                    Division, Signed
$IDIV
:  IMUL                    Multiplication, Signed
$IMUL
:  IN                      Input from Port                         ^yIOpm^y
$IN
:  INC                     Increment by 1
$INC
:  INS                     Input String From Port                  ^yIOpm^y    186+
$INS
:  INT                     Interrupt                               ^yIOv86^y
$INT
:  INTO                    Interrupt on Overflow                   ^yIOv86^y
$INTO
:  INVD                    Invalidate Internal Caches              ^yP^y       486+
$INVD
:  INVLPG                  Invalidate TLB Entry                    ^yP^y       486+
$INVLPG
:  IRET                    Interrupt Return                        ^yIOv86^y
$IRET
:  Jcc                     Jump on condition
$Jcc
:  JCXZ / JECXZ            Jump if Register CX is Zero
$JCXZ
:  JMP                     Jump Unconditionally
$JMP
:  LAHF                    Load Register AH from Flags
$LAHF
:  LAR                     Load Access Rights                      ^yp^y       286+
$LAR
:  LDS                     Load Pointer Using DS
$LDS
:  LEA                     Load Effective Address
$LEA
:  LEAVE                   High Level Procedure Exit                       186+
$LEAVE
:  LES                     Load Pointer Using ES
$LES
:  LFS                     Load Pointer Using FS                           386+
$LFS
:  LGDT                    Load Global Descriptor Table            ^yP^y       286+
$LGDT
:  LGS                     Load Pointer Using GS                           386+
$LGS
:  LIDT                    Load Interrupt Descriptor Table         ^yP^y       286+
$LIDT
:  LLDT                    Load Local Descriptor Table             ^yP^y       286+
$LLDT
:  LMSW                    Load Machine Status Word                ^yP^y       286+
$LMSW
:  LOCK                    Lock the Bus                            ^yIOv86^y
$LOCK
:  LODS                    Load String
$LODS
:  LOOP                    Loop on Count
$LOOP
:  LSL                     Load Segment Limit                      ^yp^y       286+
$LSL
:  LSS                     Load Pointer Using SS                           386+
$LSS
:  LTR                     Load Task Register                      ^yP^y       286+
$LTR
:  MOV                     Move Data
$MOV
:  MOVS                    Move String
$MOVS
:  MOVSX                   Move With Sign-Extend                           386+
$MOVSX
:  MOVZX                   Move With Zero-Extend                           386+
$MOVZX
:  MUL                     Multiplication, Unsigned
$MUL
:  NEG                     Negate
$NEG
:  NOP                     No Operation
$NOP
:  NOT                     NOT operation
$NOT
:  OR                      OR operation
$OR
:  OUT                     Output to Port                          ^yIOpm^y
$OUT
:  OUTS                    Output String to Port                   ^yIOpm^y    186+
$OUTS
:  POP                     Pop Operand from Stack
$POP
:  POPA                    Pop All General Registers                       186+
$POPA
:  POPF                    Pop Flags from Stack                    ^yIOv86^y
$POPF
:  PUSH                    Push Operand on Stack
$PUSH
:  PUSHA                   Push All General Registers                      186+
$PUSHA
:  PUSHF                   Push Flags on Stack                     ^yIOv86^y
$PUSHF
:  RCL                     Rotate through Carry Left
$RCL
:  RCR                     Rotate through Carry Right
$RCR
:  RDMSR                   Read from Model-Specific Register       ^yr^y      Pent+
$RDMSR
:  RDPMC                   Read Performance-Monitoring Counters    ^yr^y      PPro+
$RDPMC
:  RDTSC                   Read Time-Stamp Counter                 ^yr^y      Pent+
$RDTSC
:  REP / REPNE             Repeat String Instruction
$REP
:  RET                     Return from Procedure
$RET
:  RETF                    Return Far
$RETF
:  RETN                    Return Near
$RETN
:  ROL                     Rotate Left
$ROL
:  ROR                     Rotate Right
$ROR
:  RSM                     Resume from System Management Mode      ^yr^y      Pent+
$RSM
:  SAHF                    Store Register AH into Flags
$SAHF
:  SAL                     Shift Arithmetic Left
$SAL
:  SAR                     Shift Arithmetic Right
$SAR
:  SBB                     Subtract with Borrow
$SBB
:  SCAS                    Scan String
$SCAS
:  SETcc                   Set Byte on Condition                           386+
$SETcc
:  SGDT                    Store Global Descriptor Table           ^yp^y       286+
$SGDT
:  SHL                     Shift Logical Left
$SHL
:  SHLD                    Double Precision Shift Left                     386+
$SHLD
:  SHR                     Shift Logical Right
$SHR
:  SHRD                    Double Precision Shift Right                    386+
$SHRD
:  SIDT                    Store Interrupt Descriptor Table        ^yp^y       286+
$SIDT
:  SLDT                    Store Local Descriptor Table            ^yp^y       286+
$SLDT
:  SMSW                    Store Machine Status Word               ^yp^y       286+
$SMSW
:  STC                     Set Carry Flag
$STC
:  STD                     Set Direction Flag
$STD
:  STI                     Set Interrupt Enable Flag               ^yIO^y
$STI
:  STOS                    Store String
$STOS
:  STR                     Store Task Register                     ^yp^y       286+
$STR
:  SUB                     Subtract
$SUB
:  TEST                    Test
$TEST
:  UD2                     Undefined Instruction                          PPro+
$UD2
:  VERR                    Verify Read                             ^yp^y       286+
$VERR
:  VERW                    Verify Write                            ^yp^y       286+
$VERW
:  WAIT                    Wait
$WAIT
:  WBINVD                  Write-Back and Invalidate Cache         ^yP^y       486+
$WBINVD
:  WRMSR                   Write to Model Specific Register        ^yr^y      Pent+
$WRMSR
:  XADD                    Exchange and Add                                486+
$XADD
:  XCHG                    Exchange Registers
$XCHG
:  XLAT                    Translate
$XLAT
:  XOR                     XOR operation
$XOR
:  Abbreviations and legends
$Abbreviations
}
{FPU instructions
:  F2XM1                   Compvte 2 to the Xth power -1
$F2XM1
:  FABS                    Absolvte valve
$FABS
:  FADD  | FADDP           Add real                | and pop
$FADD
:  FBLD                    Load BCD
$FBLD
:  FBSTP                   Store BCD and pop
$FBSTP
:  FCHS                    Change sign
$FCHS
:  FCLEX | FNCLEX          Clear exceptions        | no wait
$FCLEX
:  FCMOVcc                 FP Conditional Move                            PPro+
$FCMOVcc
:  FCOM  | FCOMP | FCOMPP  Compare real            | and pop | pop twice
$FCOM
:  FCOMI | FCOMIP          Compare real (set EFLAGS) | and pop            PPro+
$FCOMI
:  FCOS                    Cosine                                          387+
$FCOS
:  FDECSTP                 Decrement FP stack pointer
$FDECSTP
:  FDISI | FNDISI          Disable interrvpts      | no wait          8087 only
$FDISI
:  FDIV  | FDIVP           Divide real             | and pop
$FDIV
:  FDIVR | FDIVRP          Divide real reversed    | and pop
$FDIVR
:  FENI  | FNENI           Enable interrvpts       | no wait          8087 only
$FENI
:  FFREE                   Free register
$FFREE
:  FIADD                   Integer add
$FIADD
:  FICOM | FICOMP          Integer compare         | and pop
$FICOM
:  FIDIV | FIDIVR          Integer divide          | reversed
$FIDIV
:  FILD                    Load integer
$FILD
:  FIMUL                   Integer mvltiply
$FIMUL
:  FINCSTP                 Increment FP stack pointer
$FINCSTP
:  FINIT | FNINIT          Initialize FP vnit      | no wait
$FINIT
:  FIST  | FISTP           Store integer           | and pop
$FIST
:  FISUB | FISUBR          Integer svbtract        | reversed
$FISUB
:  FLD                     Load real
$FLD
:  FLDxx                   Load FP constant
$FLDxx
:  FLDCW                   Load control word
$FLDCW
:  FLDENV                  Load environment state
$FLDENV
:  FMUL  | FMULP           Mvltiply real           | and pop
$FMUL
:  FNOP                    No operation
$FNOP
:  FPATAN                  Partial arctangent
$FPATAN
:  FPREM | FPREM1          Partial remainder       | IEEE (387+)
$FPREM
:  FPTAN                   Partial tangent
$FPTAN
:  FRNDINT                 Rovnd to integer
$FRNDINT
:  FRSTOR                  Restore saved state
$FRSTOR
:  FSAVE | FNSAVE          Save FPU State          | no wait
$FSAVE
:  FSCALE                  Scale by a power of 2
$FSCALE
:  FSETPM                  Set protected mode                          287 only
$FSETPM
:  FSIN  | FSINCOS         Sine                    | and cosine            387+
$FSIN
:  FSQRT                   Sqvare root
$FSQRT
:  FST   | FSTP            Store real              | and pop
$FST
:  FSTCW | FNSTCW          Store control word      | no wait
$FSTCW
:  FSTENV| FNSTENV         Store FPU environment   | no wait
$FSTENV
:  FSTSW | FNSTSW          Store statvs word       | no wait
$FSTSW
:  FSUB  | FSUBP           Svbtract real           | and pop
$FSUB
:  FSUBR | FSUBRP          Svbtract real reversed  | and pop
$FSUBR
:  FTST                    Test for zero
$FTST
:  FUCOM | FUCOMP |FUCOMPP Unordered compare real  | and pop | pop twice   387+
$FUCOM
:  FUCOMI| FUCOMIP         Unordered compare real (set EFLAGS) | and pop  PPro+
$FUCOMI
:  FWAIT                   Wait
$FWAIT
:  FXAM                    Examine condition flags
$FXAM
:  FXCH                    Exchange register contents
$FXCH
:  FXTRACT                 Extract exponent and significand
$FXTRACT
:  FYL2X                   Compvte Y * log2(x)
$FYL2X
:  FYL2XP1                 Compvte Y * log2(x+1)
$FYL2XP1
:  Abbreviations and legends
$Abbreviations-FPU
}
{MMX instructions
:^z^yMMX registers and data types^y
$^yMMX registers and data types^y
:^z^yMMX instrvction set svmmary^y
$^yMMX instrvction set svmmary^y
:  EMMS            Empty MMX State
$EMMS
:  MOVD            Move Dovbleword to/from MMX Register
$MOVD
:  MOVQ            Move Qvadword to/from MMX Register
$MOVQ
:  PACKSSDW        Pack Dovblewords into Words (Signed with Satvration)
$PACKSSDW
:  PACKSSWB        Pack Words into Bytes (Signed with Satvration)
$PACKSSWB
:  PACKUSWB        Pack Words into Bytes (Unsigned with Satvration)
$PACKUSWB
:  PADDB           Add with Wrap-arovnd on Byte
$PADDB
:  PADDD           Add with Wrap-arovnd on Dovbleword
$PADDD
:  PADDSB          Add Signed with Satvration on Byte
$PADDSB
:  PADDSW          Add Signed with Satvration on Word
$PADDSW
:  PADDUSB         Add Unsigned with Satvration on Byte
$PADDUSB
:  PADDUSW         Add Unsigned with Satvration on Word
$PADDUSW
:  PADDW           Add with Wrap-arovnd on Word
$PADDW
:  PAND            Bitwise AND
$PAND
:  PANDN           Bitwise AND NOT
$PANDN
:  PCMPEQB         Packed Compare for Eqval, Byte
$PCMPEQB
:  PCMPEQD         Packed Compare for Eqval, Dovbleword
$PCMPEQD
:  PCMPEQW         Packed Compare for Eqval, Word
$PCMPEQW
:  PCMPGTB         Packed Compare for Greater Than, Byte
$PCMPGTB
:  PCMPGTD         Packed Compare for Greater Than, Dovbleword
$PCMPGTD
:  PCMPGTW         Packed Compare for Greater Than, Word
$PCMPGTW
:  PMADDWD         Packed Mvltiply and Add
$PMADDWD
:  PMULHW          Packed Mvltiply High on Words
$PMULHW
:  PMULLW          Packed Mvltiply Low on Words
$PMULLW
:  POR             Bitwise OR
$POR
:  PSLLD           Packed Shift Left Logical, Dovbleword
$PSLLD
:  PSLLQ           Packed Shift Left Logical, Qvadword
$PSLLQ
:  PSLLW           Packed Shift Left Logical, Word
$PSLLW
:  PSRAD           Packed Shift Right Arithmetic, Dovbleword
$PSRAD
:  PSRAW           Packed Shift Right Arithmetic, Word
$PSRAW
:  PSRLD           Packed Shift Right Logical, Dovbleword
$PSRLD
:  PSRLQ           Packed Shift Right Logical, Qvadword
$PSRLQ
:  PSRLW           Packed Shift Right Logical, Word
$PSRLW
:  PSUBB           Svbtraction with Wrap-arovnd on Byte
$PSUBB
:  PSUBD           Svbtraction with Wrap-arovnd on Dovbleword
$PSUBD
:  PSUBSB          Svbtract Signed with Satvration on Byte
$PSUBSB
:  PSUBSW          Svbtract Signed with Satvration on Word
$PSUBSW
:  PSUBUSB         Svbtract Unsigned with Satvration on Byte
$PSUBUSB
:  PSUBUSW         Svbtract Unsigned with Satvration on Word
$PSUBUSW
:  PSUBW           Svbtraction with Wrap-arovnd on Word
$PSUBW
:  PUNPCKHBW       Unpack (interleave) High-order Bytes
$PUNPCKHBW
:  PUNPCKHDQ       Unpack (interleave) High-order Dovblewords
$PUNPCKHDQ
:  PUNPCKHWD       Unpack (interleave) High-order Words
$PUNPCKHWD
:  PUNPCKLBW       Unpack (interleave) Low-order Bytes
$PUNPCKLBW
:  PUNPCKLDQ       Unpack (interleave) Low-order Dovblewords
$PUNPCKLDQ
:  PUNPCKLWD       Unpack (interleave) Low-order Words
$PUNPCKLWD
:  PXOR            Bitwise XOR
$PXOR
:  Coding rvles and svggestions
$Coding rvles and svggestions
:  Abbreviations and legends
$AbbreviationsMMX
}
{All
:  ^yCPU
:           Instruction set
$CPU instructions
:           Registers
$Registers
:           Protection, privilege
$Privilege and protection
:           Exceptions
$Interrvpt and exception ID assignments
:           Addressing modes
$Addressing modes
:           Opcodes
$Opcodes
:  ^yFPU
:           Instruction set
$FPU instructions
:           Registers, data types
$FPU-data-types
:  ^yMMX
:           Instruction set
$MMX instructions
}
[3]
:iAPx86
$All