	TITLE	egacopybits

	.286p

X	EQU	4
;X	EQU	6

IBM_TEXT	SEGMENT  WORD PUBLIC 'CODE'
IBM_TEXT	ENDS
IBM_TEXT	SEGMENT
	ASSUME	CS: IBM_TEXT

; egacopybits(psrc, pdst, soff, doff, n, op)
; egacopyinvert(psrc, pdst, soff, doff, n, op)
; u_char *psrc, *pdst;	/* byte pointers to src and dst memory */
; int soff, doff;	/* offset into src and dst 0 <= soff,doff <= 7 */
; int n;		/* number of bits to copy */
; int op;		/* VGA hardware rop */

; DS:SI:	psrc
; ES:DI:	pdst
; CX:		n
; BH:		bitstoshift
; AX,DX		temporary registers

IFDEF	invert
	PUBLIC	_egacopyinvert
_egacopyinvert	PROC NEAR
ELSE
	PUBLIC	_egacopybits
_egacopybits	PROC NEAR
ENDIF
	push	bp
	mov	bp,sp
	push	si
	push	di
	push	ds
	lds	si,[bp+X]	; psrc
	les	di,4[bp+X]	; pdst
	mov	cx,12[bp+X]	; n
	mov	ah,14[bp+X]	; op
	shl	ah,3		; shift for register
	mov	al,3		; data rotate register
	mov	dx,03ceh
	mov	bh,10[bp+X]	; doff
	sub	bh,8[bp+X]	; doff-soff
	je	$L13		; if (bitstoshift == 0)
	jg	$L1		; if (bitstoshift > 0)
	jmp	$L6		; if (bitstoshift < 0)
$L13:
; bitstoshift == 0
	out	dx,ax		; fix op
	mov	bx,cx		; save n in bx

	mov	cx,8[bp+X]	; cl = soff, ch = 0
	jcxz	$L2
	add	bx,cx		; n = n + soff
	mov	ah,0ffh
	shr	ah,cl		; BL = 0xff >> soff
	sub	bx,8		; n = n - 8
	jge	$L3
	mov	cx,bx		; ch = 0xff, cl = -n
	neg	cl
	shl	ch,cl
	and	ah,ch
$L3:
	; mask is in bl, put it in the hardware
	mov	al,8
	out	dx,ax

	lodsb			; get source
IFDEF	invert
	not	al
ENDIF
	mov	ah,es:[di]	; preload
	stosb
	cmp	cx,0
	jg	$L2
	jmp	$L0
$L2:
	mov	cx,bx		; restore n to cx
	shr	cx,3
	jcxz	$L5

	sub	ah,ah		; set mask to all 1's
	not	ah
	mov	al,8
	out	dx,ax
$L4:
	lodsb
	mov	ah,es:[di]	; preload buffer
IFDEF	invert
	not	al
ENDIF
	stosb
	loop	$L4
$L5:
	mov	cx,bx		; restore n
	and	cx,7		; cl = n & 7, ch = 0
	jnz	$L12
	jmp	$L0
$L12:
	not	ch		; ch = 0xff
	mov	ah,ch
	shr	ah,cl
	not	ah

	mov	al,8
	out	dx,ax

	lodsb
IFDEF	invert
	not	al
ENDIF
	mov	bl,es:[di]	; preload
	stosb
	jmp	$L0

; bitstoshift > 0, doff must be positive
$L1:
	or	ah,bh		; amount to shift right and op
	out	dx,ax

	mov	dx,cx		; n
	mov	cx,10[bp+X]	; cl = doff, ch = 0
	add	dx,cx		; n += doff, preparation for the n -= (8 - doff)
				; operation completed below
	not	ch		; ch = 0xff
	shr	ch,cl		; ch = 0xff >> doff
	mov	ah,ch

	sub	dx,8		; n -= 8
	jge	$L7		; if (n < 0)
	mov	cx,dx		; cl = -n, ch = 0xff
	neg	cl		; -n
	shl	ch,cl		; CH = 0xff << -n
	and	ah,ch		; mask (AH) &= CH
	xor	dx,dx		; zero n
$L7:
	; mask is in ah - put it into the E/VGA hardware
	mov	cx,dx		; save n
	mov	dx,03ceh
	mov	al,8		; mask register
	out	dx,ax

	mov	al,ds:[si]
	mov	ah,es:[di]	; preload buffer
IFDEF	invert
	not	al
ENDIF
	stosb			; write data out

	mov	bl,cl		; save LSB of n

	jcxz	$L9

	shr	cx,3		; CX = n >> 3;
	jcxz	$L9		; if (n >> 3)
	; save si, di, and cx (count) for next loop
	push	cx
	push	si
	push	di
	; do the operation in two passes, the first will the right part
	; of the bytes.
	inc	si
	mov	ah,0ffh
	xchg	cl,bh
	shr	ah,cl
	xchg	cl,bh
	mov	al,8		; mask register
	mov	dx,03ceh
	out	dx,ax		; set mask register

$L10:
	lodsb
	mov	ah,es:[di]	; preload buffer
IFDEF	invert
	not	al
ENDIF
	stosb
	loop	$L10

	; now do the left part of the bytes.
	pop	di
	pop	si
	mov	ah,0ffh
	mov	cl,bh
	shr	ah,cl		; ah = 0xff >> bitstoshift
	not	ah
	pop	cx		; get original (n >> 3)

	mov	al,8		; mask register
	mov	dx,03ceh
	out	dx,ax
	; data rotate register is already correct.
$L8:
	lodsb
	mov	ah,es:[di]	; preload buffers
IFDEF	invert
	not	al
ENDIF
	stosb
	loop	$L8

$L9:
	; take care of last remaining byte
	and	bl,7		; we only need this part
	je	$L01

	mov	dx,03ceh
	mov	al,8		; mask
	not	ch		; CH = 0xff

	cmp	bl,bh		; if (n > bitstoshift)
	jg	$L21
	; bitstoshift > n
	mov	bh,bl
$L21:
	; set up mask register for last bits
	mov	cl,bh		; n & 7
	shr	ch,cl		; CH = 0xff >> (n & 7)
	not	ch
	mov	ah,ch
	out	dx,ax
	lodsb
	mov	ah,es:[di]	; preload
IFDEF	invert
	not	al
ENDIF
	mov	es:[di],al

	; check for more
	sub	bl,bh		; n -= bitstoshift
	jle	$L01

	mov	ah,0ffh
	mov	cl,bh
	shr	ah,cl		; CH = 0xff >> bitstoshift
	mov	cl,bl
	mov	ch,ah
	shr	ah,cl		; AH = 0xff >> n
	not	ah
	and	ah,ch		; final mask

	mov	al,8		; mask
	out	dx,ax

	lodsb			; temp = *psrc++
	mov	ah,es:[di]	; preload buffer
IFDEF	invert
	not	al
ENDIF
	stosb
$L01:	jmp	$L0		; last bits


; bitstoshift < 0, soff must be positive
$L6:
	mov	bl,bh
	and	bl,7
	or	ah,bl		; "or" in op
	out	dx,ax
	neg	bh		; bh = amount to shift left

	mov	dx,cx		; save n

	mov	cx,10[bp+X]	; cl = doff, ch = 0
	not	ch		; ch = 0xff
	shr	ch,cl		; ch = 0xff >> doff
	mov	ah,ch

	mov	cx,8
	sub	cx,8[bp+X]	; 8 - soff
	sub	dx,cx		; n -= (8 - soff)
	mov	cl,0
	jge	$L22		; if (n < 8 - soff)
	; n is small so we have to mask less
	mov	cl,dl		; n - (8 - soff)
	xor	dx,dx		; zero n
	neg	cl		; 8 - soff - n
$L22:
	add	cl,bh		; 8 - soff - n + bitstoshift = 8 - doff - n
	mov	ch,ah
	shl	ch,cl
	and	ah,ch

	mov	cx,dx		; save n in cx
	mov	dx,03ceh
	mov	al,8		; mask register
	out	dx,ax
	lodsb			; al = *psrc++
	mov	ah,es:[di]	; preload
IFDEF	invert
	not	al
ENDIF
	mov	es:[di],al
	jcxz	$L02		; all done

	; We took care of the first byte of *psrc.  Now star the main
	; loop for the remaining whole bytes.
	mov	bl,cl		; save part of n
	shr	cx,3
	jcxz	$L27

	mov	dx,cx		; save n in dx
	mov	ah,0ffh
	mov	cl,bh
	shl	ah,cl
	not	ah		; right part of byte

	mov	cx,dx		; restore n in cx
	mov	dx,03ceh
	mov	al,8		; mask register
	out	dx,ax

	push	cx		; save for left part of bytes
	push	si
	push	di
$L11:
	; Note: this preload is only really necessary on the first
	; byte, but to make the code simpler, I'll leave it in for
	; every byte.
	lodsb
	mov	ah,es:[di]	; preload
IFDEF	invert
	not	al
ENDIF
	stosb
	loop	$L11

	; Now do left part of bytes
	mov	cl,bh
	mov	ah,0ffh
	shl	ah,cl
	mov	al,8		; mask register
	out	dx,ax		; set mask to left part of bytes

	pop	di
	inc	di		; left part of bytes
	pop	si
	pop	cx
$L24:
	lodsb
	mov	ah,es:[di]	; preload
IFDEF	invert
	not	al
ENDIF
	stosb
	loop	$L24

	dec	di		; back up one

$L27:
	; Now do whatever's left over in the last byte

	and	bl,7		; n &= 7
	jz	$L0

	not	ch		; ch = 0xff
	mov	cl,bh
	shl	ch,cl
	not	ch
	mov	ah,ch

	; n is now in bl.
	sub	bl,bh
	jge	$L26
	; n is smaller than bitstoshift
	neg	bl
	mov	cl,bl
	shl	ch,cl
	and	ah,ch
	xor	bl,bl
$L26:
	mov	cl,bl
	xor	ch,ch		; put n into cx
	mov	al,8		; mask register
	out	dx,ax
	mov	al,ds:[si]
	mov	ah,es:[di]	; preload
IFDEF	invert
	not	al
ENDIF
	stosb
$L02:	jcxz	$L0

	; now do left part of last byte
	not	ch		; change to 0xff
	shr	ch,cl
	not	ch
	mov	al,8
	mov	ah,ch
	out	dx,ax
	lodsb
	mov	ah,es:[di]	; preload
IFDEF	invert
	not	al
ENDIF
	stosb
$L0:
	; put E/VGA hardware back to normal
	mov	dx,03ceh
	mov	al,3
	sub	ah,ah
	out	dx,ax		; set rotate register to 0
	mov	al,8
	not	ah		; restore mask to all 1's
	out	dx,ax
	pop	ds
	pop	di
	pop	si
;	mov	sp,bp
	pop	bp
	ret
IFDEF	invert
_egacopyinvert	ENDP
ELSE
_egacopybits	ENDP
ENDIF
IBM_TEXT	ENDS
	END
