Optimisation Challenge, for the benefit of new programmers

The place for codemasters or beginners to talk about programming any language for the Spectrum.
User avatar
ketmar
Manic Miner
Posts: 700
Joined: Tue Jun 16, 2020 5:25 pm
Location: Ukraine

Re: Optimisation Challenge, for the benefit of new programmers

Post by ketmar »

Pobulous wrote: Thu Apr 28, 2022 7:04 pm I'm working on one, with the helpful optimisation of the graphic data being interleaved, so all 256 of the first lines, then all 256 of the 2nd line, etc, followed by the attr data.
you just stole my idea! ;-)
User avatar
Joefish
Rick Dangerous
Posts: 2058
Joined: Tue Nov 14, 2017 10:26 am

Re: Optimisation Challenge, for the benefit of new programmers

Post by Joefish »

In that case, here's a crap version with no optimisation for a start.
It does the job, but not in any particular rush.
This uses PASMO syntax.

Code: Select all

ENTRY_POINT		EQU 32768

CHAR_MAP		EQU 40000
UDGs			EQU 50000
EMPTY_UDGs		EQU 2

SCREEN			EQU 16384
ATTRS			EQU 22528
SCREEN_X_OFFSET	EQU 2
SCREEN_Y_OFFSET	EQU 2
SCREEN_OFFSET	EQU SCREEN_X_OFFSET + (32 * SCREEN_Y_OFFSET)

MAP_COLS		EQU 32
MAP_ROWS		EQU 24
WINDOW_COLS		EQU 28
WINDOW_ROWS		EQU 20
WINDOW_X_OFFSET	EQU 2
WINDOW_Y_OFFSET	EQU 2
WINDOW_OFFSET	EQU WINDOW_X_OFFSET + (MAP_COLS * WINDOW_Y_OFFSET)


org ENTRY_POINT
	ld a,1
	out (254),a

	ld bc,SCREEN+SCREEN_OFFSET			; BC = Screen address
	ld de,ATTRS+SCREEN_OFFSET			; DE = Attribute address
	
	exx									; Alternate registers
		push hl							; Save HL' and restore before exit. Also don't mess with IY.
	
		ld de,CHAR_MAP+WINDOW_OFFSET	; DE' = Character Map address
	
		ld c,WINDOW_ROWS				; C' = Row loop counter
row_loop
		ld b,WINDOW_COLS				; B' = Column (character) loop counter
col_loop
		ld a,(de)		; Get character code from Character Map
		inc de
	exx					; Back to regular registers.
	
	ld ixh,b
	ld ixl,c			; Save Screen address
	
	ld hl,UDGs			; Calculate address of UDG in HL, messing with BC
	ld b,0
	ld c,a
	add hl,bc			; HL = UDGs + (1 x code)
	sla c
	rl b				; BC x 2
	sla c
	rl b				; BC x 4
	sla c
	rl b				; BC x 8
	add hl,bc			; HL = UDGs + (9 x code)
	
	ldi					; Copy attribute of UDG, advancing DE and HL
		
	ld b,ixh
	ld c,ixl			; Restore Screen address in BC
	
	cp EMPTY_UDGs
	jp c,skip_pixels	; Jump over pixel drawing for low UDG numbers
	
	REPT 7
	  ld a,(hl)
	  ld (bc),a
	  inc hl
	  inc b
	ENDM
	ld a,(hl)
	ld (bc),a			; Copy 8 bytes of UDG data
	ld b,ixh			; Restore Screen address in BC (hi-byte B) to before UDG was drawn
	
skip_pixels
	inc c				; Advance screen address to next character

	exx
		djnz col_loop	; Repeat for all chars in row (count in B').
						; Note we re-enter the loop with alternate registers active.
						
	exx					; Back to regular registers
	
	ld a,c
	add a,32-WINDOW_COLS	; Advance Screen address BC over skipped columns
	ld c,a
	jp nc,block_skip
	ld a,8					; If there was a carry from the lo-byte,
	add a,b					;  add 8 to the hi-byte for the next screen third
	ld b,a
block_skip
	
	ld a,e
	add a,32-WINDOW_COLS	; Advance Attribute address DE over skipped columns
	ld e,a
	ld a,0
	adc a,d					; Pickup any 8-bit carried bit from E in D
	ld d,a
	
	exx						; Alternate registers again
		ld a,e
		add a,MAP_COLS-WINDOW_COLS	; Advance Character Map address DE' over skipped columns
		ld e,a
		ld a,0
		adc a,d					; Pickup any 8-bit carried bit from E' in D'
		ld d,a
	
		dec c
		jp nz,row_loop	;Repeat for all rows (count in C')

		pop hl		; Restore HL' for BASIC
	exx
	ret				; And return.

org CHAR_MAP	
	defb	2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
	defb	2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
	defb	2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
	defb	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	defb	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	defb	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	defb	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	defb	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	
	defb	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	defb	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  4,5,0,0,0,1,0,0,  0,0,0,0,0,2,2,2
	defb	2,2,2,0,0,0,0,0,  0,0,3,4,5,4,5,0,  1,0,3,4,5,1,0,0,  0,0,0,0,0,2,2,2
	defb	2,2,2,0,0,0,0,0,  0,0,1,1,1,1,7,3,  1,3,1,6,5,1,5,0,  0,0,0,0,0,2,2,2
	defb	2,2,2,0,0,0,0,0,  0,0,1,6,7,6,7,0,  1,0,1,6,7,1,1,0,  0,0,0,0,0,2,2,2
	defb	2,2,2,0,0,0,0,0,  0,6,7,0,0,0,0,6,  7,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	defb	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	defb	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	
	defb	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	defb	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	defb	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	defb	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	defb	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	defb	2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
	defb	2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
	defb	2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
	
	
org UDGs
	; Attribute, then data.
	defb	00000000b,    00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,00000000b
	defb	01011011b,    00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,00000000b
	
	defb	01110000b,    11001100b,01100110b,00110011b,10011001b,11001100b,01100110b,00110011b,10011001b
	
	defb	01000011b,    00111100b,01111110b,11111111b,11111111b,11111111b,11111111b,01111110b,00111100b
	
	defb	01000011b,    00000000b,00000011b,00001111b,00011111b,00111111b,00111111b,01111111b,01111111b
	defb	01000011b,    00000000b,11000000b,11110000b,11111000b,11111100b,11111100b,11111110b,11111110b
	defb	01000011b,    01111111b,01111111b,00111111b,00111111b,00011111b,00001111b,00000011b,00000000b
	defb	01000011b,    11111110b,11111110b,11111100b,11111100b,11111000b,11110000b,11000000b,00000000b
	
	defs 9*(256-8),0

end ENTRY_POINT
dfzx
Manic Miner
Posts: 682
Joined: Mon Nov 13, 2017 6:55 pm
Location: New Forest, UK
Contact:

Re: Optimisation Challenge, for the benefit of new programmers

Post by dfzx »

How do we measure the speed? Is there a standard way of doing that?
Derek Fountain, author of the ZX Spectrum C Programmer's Getting Started Guide and various open source games, hardware and other projects, including an IF1 and ZX Microdrive emulator.
User avatar
Joefish
Rick Dangerous
Posts: 2058
Joined: Tue Nov 14, 2017 10:26 am

Re: Optimisation Challenge, for the benefit of new programmers

Post by Joefish »

Good question! Don't know really! :D
For optimising, it'll be about counting the clock cycles of every operation.

For judging, I guess you'd have to do a standard CALL / RET of the routine, say, 1000 times (without any VBL synching) with a specific test-case image and just time it with a stopwatch. You should probably disable interrupts too (some routines might impose that anyway) so you can't rely on the Speccy for timing.
User avatar
Joefish
Rick Dangerous
Posts: 2058
Joined: Tue Nov 14, 2017 10:26 am

Re: Optimisation Challenge, for the benefit of new programmers

Post by Joefish »

Pobulous wrote: Thu Apr 28, 2022 7:04 pm I'm working on one, with the helpful optimisation of the graphic data being interleaved, so all 256 of the first lines, then all 256 of the 2nd line, etc, followed by the attr data.

I suppose I ought to provide a routine to do the interleaving, as it's sposed to be simple to use!
Please do. Note that Rule #5, data expansion, does not apply or limit you here as you could easily have a UDG editor that saves data in your chosen format. The data is still the same size, even if re-ordered.

The screen character map should remain in linear order, so as not to be a burden on the programmer. If you want to pre-process that into a different order, or expand it (e.g convert bytes to word addresses) then that WILL come off your run time and 2K cache limit. Though even then the 2K is just a guide. If you need 2.5K to pull off something blisteringly fast, by all means make the case for it. Just don't make something that needs all 48K just to run once!

Now if some smart-arse wants to have a character map in vertical linear order, instead of horizontal, I guess that's technically allowed. Probably best in that particular case to expand it to 32 high so it's easier to calculate for printing on. The rendered portion would still be the same size.
dfzx
Manic Miner
Posts: 682
Joined: Mon Nov 13, 2017 6:55 pm
Location: New Forest, UK
Contact:

Re: Optimisation Challenge, for the benefit of new programmers

Post by dfzx »

Joefish wrote: Thu Apr 28, 2022 7:57 pm In that case, here's a crap version with no optimisation for a start.
It does the job, but not in any particular rush.
That takes just about exactly 5 stopwatch seconds to run 100 times (in a BASIC loop). So 50ms per iteration as a starting point. :)
Derek Fountain, author of the ZX Spectrum C Programmer's Getting Started Guide and various open source games, hardware and other projects, including an IF1 and ZX Microdrive emulator.
User avatar
Pobulous
Dynamite Dan
Posts: 1365
Joined: Wed Nov 15, 2017 12:51 pm

Re: Optimisation Challenge, for the benefit of new programmers

Post by Pobulous »

Here's my early entry.

I could do with trying to optimise away using IX and IY, possibly by using the alternate registers.
I managed to optimise away using B for the inner loop, which was nice.
As I'm using IY I have disabled interrupts, which makes speed hard to work out.

Graphics data is 256 attribs, followed by 256 first lines, 256 second lines, etc - does away with multiplying to find the start of the character's data, and just an INC H to get next line, although it does mean you need all 256 characters defined and it took some Excel and text editor acrobatics to convert to db statements.

Whole screen is always updated but UDG0 is a do nothing character, the next (defineable) few characters just update the ATTRs.
Using UDG0 in my CharMap I have defined a border/status area.

Sample output:
Image

Code: Select all

ORG 32768

	PUSH IY			;Need to preserve IY for return to Basic
	DI				;Need to disable interrupts since IY is being used
	
	LD IX, CharMap
	LD DE, 16384
	LD IY, 22528
	LD C, UDGs/256
	
	LD B,3			;Loop once for each screen third

PrintLoop:
	LD A,(IX+0)		;Get next CharMap character to print
	INC IX			;Advance to next CharMap character
	
	AND A			;if zero, skip over screen position
	JR Z, SkipPrinting
	
	CP 9			;chars 1-(n-1) (1-8 in this case) will only update attr data (set to 0 or 1 to disable this feature)
	
	LD H,C			;Get base address of UDGs
	LD L,A			;Select UDG 1-255
	LD A,(HL)		;copy attr data
	LD (IY+0),A
	
	JR C,SkipPrinting
	
	INC H
	PUSH DE			;Store current screen address

	LD A,(HL)		;copy 8 lines of UDG data to screen
	LD (DE),A
	INC H
	INC D
	LD A,(HL)
	LD (DE),A
	INC H
	INC D
	LD A,(HL)
	LD (DE),A
	INC H
	INC D
	LD A,(HL)
	LD (DE),A
	INC H
	INC D
	LD A,(HL)
	LD (DE),A
	INC H
	INC D
	LD A,(HL)
	LD (DE),A
	INC H
	INC D
	LD A,(HL)
	LD (DE),A
	INC H
	INC D
	LD A,(HL)
	LD (DE),A

	POP DE			;Restore screen address
SkipPrinting:
	INC IY			;Advance to next ATTR position
	INC E			;Advance screen address

	JR NZ, PrintLoop	;Loop until E wraps to zero - signals a third of the screen is completed
	
	
	LD A,D			;Adjust DE to point to next screen third
	ADD A,8
	LD D,A
	
	DJNZ PrintLoop	;Do next screen third
	
	POP IY			;Restore IY and re-enable interrupts before returning to BASIC
	EI
	RET
	
ORG 49152
CharMap:			;24 rows of 32 bytes of characters to render, aligned on 256 byte boundary
					;0s don't aren't printed, 1-8 only update the attributes
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,0,0,0
	db 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0
	db 0,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,0,0,0
	db 0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0
	db 0,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,0,0,0
	db 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0
	db 0,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,0,0,0
	db 0,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,0,0,0
	db 0,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,0,0,0
	db 0,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0,0
	db 0,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,0,0,0
	db 0,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,0,0,0
	db 0,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,0,0,0
	db 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,0,0,0
	db 0,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,0,0,0
	db 0,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,0,0,0
	db 0,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,0,0,0
	db 0,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,0,0,0
	db 0,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,0,0,0
	db 0,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0




UDGs:				;256 8 byte arrays of attribs abd characters aligned on 256 byte boundary, interleaved
					;256 ATTR bytes, followed by 256 top rows of UDGS, then 256 2nd rows, etc
	db  0,0,8,16,24,32,40,48,56,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
	db  24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49
	db  50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75
	db  76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101
	db  102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127
	db  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153
	db  154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179
	db  180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205
	db  206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231
	db  232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,60,24,60,60,8,126,60,126,60,60,0,32,0,4,0,12
	db  0,64,16,4,32,16,0,0,0,0,0,0,0,16,0,0,60,24,60,60,8,126,60,126,60,60
	db  0,32,0,4,0,12,0,64,16,4,32,16,0,0,0,0,0,0,0,16,0,0,60,24,60,60
	db  8,126,60,126,60,60,0,32,0,4,0,12,0,64,16,4,32,16,0,0,0,0,0,0,0,16
	db  0,0,60,24,60,60,8,126,60,126,60,60,0,32,0,4,0,12,0,64,16,4,32,16,0,0
	db  0,0,0,0,0,16,0,0,60,24,60,60,8,126,60,126,60,60,0,32,0,4,0,12,0,64
	db  16,4,32,16,0,0,0,0,0,0,0,16,0,0,60,24,60,60,8,126,60,126,60,60,0,32
	db  0,4,0,12,0,64,16,4,32,16,0,0,0,0,0,0,0,16,0,0,60,24,60,60,8,126
	db  60,126,60,60,0,32,0,4,0,12,0,64,16,4,32,16,0,0,0,0,0,0,0,16,0,0
	db  60,24,60,60,8,126,60,126,60,60,0,32,0,4,0,12,0,64,16,4,32,16,0
	db  0,0,0,0,0,0,0,0,0,70,40,66,66,24,64,64,2,66,66,56,32,28,4,56,16
	db  60,64,0,0,40,16,104,120,56,120,60,28,56,56,68,68,70,40,66,66,24,64,64,2,66,66
	db  56,32,28,4,56,16,60,64,0,0,40,16,104,120,56,120,60,28,56,56,68,68,70,40,66,66
	db  24,64,64,2,66,66,56,32,28,4,56,16,60,64,0,0,40,16,104,120,56,120,60,28,56,56
	db  68,68,70,40,66,66,24,64,64,2,66,66,56,32,28,4,56,16,60,64,0,0,40,16,104,120
	db  56,120,60,28,56,56,68,68,70,40,66,66,24,64,64,2,66,66,56,32,28,4,56,16,60,64
	db  0,0,40,16,104,120,56,120,60,28,56,56,68,68,70,40,66,66,24,64,64,2,66,66,56,32
	db  28,4,56,16,60,64,0,0,40,16,104,120,56,120,60,28,56,56,68,68,70,40,66,66,24,64
	db  64,2,66,66,56,32,28,4,56,16,60,64,0,0,40,16,104,120,56,120,60,28,56,56,68,68
	db  70,40,66,66,24,64,64,2,66,66,56,32,28,4,56,16,60,64,0,0,40,16,104
	db  0,0,0,0,0,0,0,0,0,74,8,2,12,40,124,124,4,60,66,4,60,32,60,68,24
	db  68,120,48,4,48,16,84,68,68,68,68,32,64,16,68,68,74,8,2,12,40,124,124,4,60,66
	db  4,60,32,60,68,24,68,120,48,4,48,16,84,68,68,68,68,32,64,16,68,68,74,8,2,12
	db  40,124,124,4,60,66,4,60,32,60,68,24,68,120,48,4,48,16,84,68,68,68,68,32,64,16
	db  68,68,74,8,2,12,40,124,124,4,60,66,4,60,32,60,68,24,68,120,48,4,48,16,84,68
	db  68,68,68,32,64,16,68,68,74,8,2,12,40,124,124,4,60,66,4,60,32,60,68,24,68,120
	db  48,4,48,16,84,68,68,68,68,32,64,16,68,68,74,8,2,12,40,124,124,4,60,66,4,60
	db  32,60,68,24,68,120,48,4,48,16,84,68,68,68,68,32,64,16,68,68,74,8,2,12,40,124
	db  124,4,60,66,4,60,32,60,68,24,68,120,48,4,48,16,84,68,68,68,68,32,64,16,68,68
	db  74,8,2,12,40,124,124,4,60,66,4,60,32,60,68,24,68,120,48,4,48,16,84
	db  0,0,0,0,0,0,0,0,0,82,8,60,2,72,2,66,8,66,62,60,34,32,68,120,16
	db  68,68,16,4,48,16,84,68,68,68,68,32,56,16,68,40,82,8,60,2,72,2,66,8,66,62
	db  60,34,32,68,120,16,68,68,16,4,48,16,84,68,68,68,68,32,56,16,68,40,82,8,60,2
	db  72,2,66,8,66,62,60,34,32,68,120,16,68,68,16,4,48,16,84,68,68,68,68,32,56,16
	db  68,40,82,8,60,2,72,2,66,8,66,62,60,34,32,68,120,16,68,68,16,4,48,16,84,68
	db  68,68,68,32,56,16,68,40,82,8,60,2,72,2,66,8,66,62,60,34,32,68,120,16,68,68
	db  16,4,48,16,84,68,68,68,68,32,56,16,68,40,82,8,60,2,72,2,66,8,66,62,60,34
	db  32,68,120,16,68,68,16,4,48,16,84,68,68,68,68,32,56,16,68,40,82,8,60,2,72,2
	db  66,8,66,62,60,34,32,68,120,16,68,68,16,4,48,16,84,68,68,68,68,32,56,16,68,40
	db  82,8,60,2,72,2,66,8,66,62,60,34,32,68,120,16,68,68,16,4,48,16,84
	db  0,0,0,0,0,0,0,0,0,98,8,64,66,126,66,66,16,66,2,68,34,32,68,64,16
	db  60,68,16,4,40,16,84,68,68,120,60,32,4,16,68,40,98,8,64,66,126,66,66,16,66,2
	db  68,34,32,68,64,16,60,68,16,4,40,16,84,68,68,120,60,32,4,16,68,40,98,8,64,66
	db  126,66,66,16,66,2,68,34,32,68,64,16,60,68,16,4,40,16,84,68,68,120,60,32,4,16
	db  68,40,98,8,64,66,126,66,66,16,66,2,68,34,32,68,64,16,60,68,16,4,40,16,84,68
	db  68,120,60,32,4,16,68,40,98,8,64,66,126,66,66,16,66,2,68,34,32,68,64,16,60,68
	db  16,4,40,16,84,68,68,120,60,32,4,16,68,40,98,8,64,66,126,66,66,16,66,2,68,34
	db  32,68,64,16,60,68,16,4,40,16,84,68,68,120,60,32,4,16,68,40,98,8,64,66,126,66
	db  66,16,66,2,68,34,32,68,64,16,60,68,16,4,40,16,84,68,68,120,60,32,4,16,68,40
	db  98,8,64,66,126,66,66,16,66,2,68,34,32,68,64,16,60,68,16,4,40,16,84
	db  0,0,0,0,0,0,0,0,0,60,62,126,60,8,60,60,16,60,60,60,60,28,60,60,16
	db  4,68,56,36,36,12,84,68,56,64,4,32,120,12,56,16,60,62,126,60,8,60,60,16,60,60
	db  60,60,28,60,60,16,4,68,56,36,36,12,84,68,56,64,4,32,120,12,56,16,60,62,126,60
	db  8,60,60,16,60,60,60,60,28,60,60,16,4,68,56,36,36,12,84,68,56,64,4,32,120,12
	db  56,16,60,62,126,60,8,60,60,16,60,60,60,60,28,60,60,16,4,68,56,36,36,12,84,68
	db  56,64,4,32,120,12,56,16,60,62,126,60,8,60,60,16,60,60,60,60,28,60,60,16,4,68
	db  56,36,36,12,84,68,56,64,4,32,120,12,56,16,60,62,126,60,8,60,60,16,60,60,60,60
	db  28,60,60,16,4,68,56,36,36,12,84,68,56,64,4,32,120,12,56,16,60,62,126,60,8,60
	db  60,16,60,60,60,60,28,60,60,16,4,68,56,36,36,12,84,68,56,64,4,32,120,12,56,16
	db  60,62,126,60,8,60,60,16,60,60,60,60,28,60,60,16,4,68,56,36,36,12,84
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  56,0,0,24,0,0,0,0,0,64,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,56,0,0,24,0,0,0,0,0,64,6,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,56,0,0,24,0,0,0,0,0,64,6,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,56,0,0,24,0,0,0,0
	db  0,64,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,56,0
	db  0,24,0,0,0,0,0,64,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,56,0,0,24,0,0,0,0,0,64,6,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,56,0,0,24,0,0,0,0,0,64,6,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,56,0,0,24,0,0,0	
dfzx
Manic Miner
Posts: 682
Joined: Mon Nov 13, 2017 6:55 pm
Location: New Forest, UK
Contact:

Re: Optimisation Challenge, for the benefit of new programmers

Post by dfzx »

This isn't what was asked for, but I'll post it here as a curio or a line in the sand. It's an extremely naive C implementation which uses a loop within a loop and calculations for all addresses. I wrote it to ensure I understood the requirement. I used joefish's data and it produces identical output.

Code: Select all

#include <stdint.h>
#include <arch/zx.h>

extern uint8_t char_map[];
extern uint8_t udgs[];

void main(void)
{
  uint8_t x,y;

  zx_border(INK_BLUE);

  for( y=2; y < 22; y++ )
  {
    for( x=2; x < 30; x++ )
    {
      uint8_t *screen_addr = zx_cxy2saddr(x, y);   // char x,y to screen pixel address   
      uint8_t *att_addr    = zx_cxy2aaddr(x, y);   // char x,y to attribute address
      uint8_t c            = char_map[(y*32)+x];   // pick the character out of the map

      // Fill in the 8 scan lines of the screen's char cell from the UDG data
      uint8_t i;
      for(i=0;i<8;i++)
      {
	// Find the UDG required, skip the attribute value, pick up the scan line byte
	*screen_addr = udgs[ (9*c) + 1 + i ];

	// Next screen row down
	screen_addr += 256;
      }
      
      // Set the colour byte. UDG's attribute value is first in the 9 bytes of data
      *att_addr = udgs[ 9*c ];
    }
  }
  return;
}
It takes about 600ms per iteration. i.e. you can actually see it drawing the screen. So arguably quite readable, but needs work to make it faster... :lol:
Derek Fountain, author of the ZX Spectrum C Programmer's Getting Started Guide and various open source games, hardware and other projects, including an IF1 and ZX Microdrive emulator.
User avatar
Sol_HSA
Microbot
Posts: 162
Joined: Thu Feb 04, 2021 11:03 am
Location: .fi
Contact:

Re: Optimisation Challenge, for the benefit of new programmers

Post by Sol_HSA »

If this is on the next... I wrote a dot command that can be used to time things at about 1 second granularity using the real time clock.. https://github.com/jarikomppa/specnext/tree/master/dt
User avatar
ketmar
Manic Miner
Posts: 700
Joined: Tue Jun 16, 2020 5:25 pm
Location: Ukraine

Re: Optimisation Challenge, for the benefit of new programmers

Post by ketmar »

so you didn't leave me a choice. ;-) 256-unrolled UDG version (not heavily optimised yet); normal->unrolled UDG converter routine included.

Code: Select all

DEBUG equ 0
DEBUG_LOOP equ 0

;; 0x300 bytes (768)
;; need not to be at a 256-byte boundary
TILEMAP equ 0xf000

;; 0x900 bytes (2304)
;; must be at a 256-byte boundary
UDGADDR equ 0xf300

TILE_SKIP_TOP equ 2
TILE_SKIP_BOTTOM equ 2
TILE_SKIP_LEFT equ 2
TILE_SKIP_RIGHT equ 2

TILE_ROW_SKIP equ TILE_SKIP_LEFT+TILE_SKIP_RIGHT

TILEMAP_LAST_VIS_TILE equ TILEMAP+32*24-1-TILE_SKIP_RIGHT-32*TILE_SKIP_BOTTOM

  org 32768

  IF DEBUG
    di  ;; for debug
  ELSE
    ;; for basic
    exx
    push  hl
  ENDIF


  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  ;; to make things easier, i'm unrolling normal UDG here
  ;; normal UDG is 8 bitmap bytes and attribute
  ;; you can skip this unrolling, and use prepared data instead
  call  prepare_udg_set


again:
  IF !DEBUG && DEBUG_LOOP
  halt
  ld    a,1
  out   (#fe),a
  ENDIF

  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  ;; draw attributes
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
row_attr:
  ld    bc,TILEMAP_LAST_VIS_TILE
  ld    de,0x5800+32*24-1-TILE_SKIP_RIGHT-32*TILE_SKIP_BOTTOM
  ld    hl,UDGADDR+256*8
  exx
  ;; alternate set: counters
  ld    c,24-TILE_SKIP_TOP-TILE_SKIP_BOTTOM  ; height

print_one_row_loop:
  ;; alternate set: counters
  ld    b,32-TILE_ROW_SKIP  ; width

print_one_tile:
  ;; here we should be at "counters"
  exx
  ld    a,(bc)
  ld    l,a
  ldd
  inc   hl  ; restore high byte (required for zero tile)
  exx
  djnz  print_one_tile
  ; next row
  dec   c
  jp    z,attr_done
  ;; alternate set: counters
  exx
  ;; normal set: addresses
  ; skip invisible tiles
  DUP TILE_ROW_SKIP
    dec   bc
    dec   de
  EDUP
  exx
  jp    print_one_row_loop
attr_done:

  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  ;; draw bitmap
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

  ;; top 1/3
  ld    bc,TILEMAP+TILE_SKIP_LEFT+32*TILE_SKIP_TOP
  ld    hl,0x4000+TILE_SKIP_LEFT+32*TILE_SKIP_TOP

  exx
  ld    c,8-TILE_SKIP_TOP  ; row count
  call  do_one_row
  ;; alternate set (counters)

  ;; middle 1/3
  exx
  ;; main set (addresses)
  DUP TILE_ROW_SKIP
    inc   bc
  EDUP
  ld    hl,0x4800+TILE_SKIP_LEFT
  exx
  ;; alternate set (counters)
  ld    c,8  ; row count
  call  do_one_row
  ;; alternate set (counters)

  ;; bottom 1/3
  exx
  ;; main set (addresses)
  DUP TILE_ROW_SKIP
    inc   bc
  EDUP
  ld    hl,0x5000+TILE_SKIP_LEFT
  exx
  ;; alternate set (counters)
  ld    c,8-TILE_SKIP_BOTTOM  ; row count
  call  do_one_row
  ;; alternate set (counters)

  IF DEBUG
    jr    $
  ENDIF

  IF !DEBUG && DEBUG_LOOP
    xor   a
    out   (#fe),a
    jp    again
  ENDIF

  IF !DEBUG
    ;; for basic
    pop   hl
    exx
    ret
  ENDIF


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; in:
;;   C': height
;;   BC: tilemap
;;   HL: scraddr
;;   registers must be at the alternate set (counters)
;;
;; out:
;;   DE,HL,AF: dead
;;   BC: right after last read tilemap byte
;;   B'C': dead
;;   registers are at the alternate set (counters)
;;
do_one_row:
  ld    b,32-TILE_ROW_SKIP
  ;; do it tile by tile
do_one_tile:
  ;; alternate set (counters)
  exx
  ;; normal set (addresses)
  ld    a,(bc)
  inc   bc
  or    a
  jp    z,skip_one_tile
  ld    (do_one_tile_save_addr+1),hl
  ld    (one_tile_udg_addr+1),a
one_tile_udg_addr:
  ld    de,UDGADDR
  ; tile bitmap
  DUP 7
    ld    a,(de)
    ld    (hl),a
    inc   d
    inc   h
  EDUP
  ; avoid two useless increments
  ld    a,(de)
  ld    (hl),a
do_one_tile_save_addr:
  ld    hl,0  ; self-modifying code, fixed above
skip_one_tile:
  inc   l
  exx
  djnz  do_one_tile

  dec   c
  ret   z

  ; we are done with the row
  ;; alternate set (counters)
  exx
  ;; normal set (addresses)
  DUP TILE_ROW_SKIP
    inc   bc
    inc   l
  EDUP
  exx
  jp    do_one_row



;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unroll linear UDG set to `UDGADDR`
;;
prepare_udg_set:
  ld    c,UDG_COUNT
  ld    hl,UDG_SET
  ld    de,UDGADDR
  ; char by char
prepare_next_char:
  push  de
  ld    b,9
prepare_char_loop:
  ld    a,(hl)
  ld    (de),a
  inc   hl
  inc   d
  djnz  prepare_char_loop
  pop   de
  inc   e
  dec   c
  jr    nz,prepare_next_char
  ret


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; linear UDG set
;;
UDG_COUNT equ 8

UDG_SET:
  ;; bitmap, then attribute
  defb  00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,  00000000b
  defb  00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,  01011011b

  defb  11001100b,01100110b,00110011b,10011001b,11001100b,01100110b,00110011b,10011001b,  01110000b

  defb  00111100b,01111110b,11111111b,11111111b,11111111b,11111111b,01111110b,00111100b,  01000011b

  defb  00000000b,00000011b,00001111b,00011111b,00111111b,00111111b,01111111b,01111111b,  01000011b
  defb  00000000b,11000000b,11110000b,11111000b,11111100b,11111100b,11111110b,11111110b,  01000011b
  defb  01111111b,01111111b,00111111b,00111111b,00011111b,00001111b,00000011b,00000000b,  01000011b
  defb  11111110b,11111110b,11111100b,11111100b,11111000b,11110000b,11000000b,00000000b,  01000011b


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; tilemap
;;
  org TILEMAP
  defb  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
  defb  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
  defb  2,2,2,2,0,2,0,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
  defb  2,2,2,0,0,2,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2

  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  4,5,0,0,0,1,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,3,4,5,4,5,0,  1,0,3,4,5,1,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,1,1,1,1,7,3,  1,3,1,6,5,1,5,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,1,6,7,6,7,0,  1,0,1,6,7,1,1,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,6,7,0,0,0,0,6,  7,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2

  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
  defb  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
  defb  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unrolled UDG set
;; contains first bytes for all 256 tiles, then next bytes, and so on
  org UDGADDR
  ;INCBIN "udg.bin"
compiled .tap demo
User avatar
ketmar
Manic Miner
Posts: 700
Joined: Tue Jun 16, 2020 5:25 pm
Location: Ukraine

Re: Optimisation Challenge, for the benefit of new programmers

Post by ketmar »

and slightly more optimised version for the case when tilemap is 256-byte aligned:

Code: Select all

DEBUG equ 0
DEBUG_LOOP equ 0

;; 0x300 bytes (768)
;; might be at a 256-byte boundary (slightly faster)
TILEMAP equ 0xf000

;; 0x900 bytes (2304)
;; must be at a 256-byte boundary
UDGADDR equ 0xf300

;; top/bottom skip: [0..7]
TILE_SKIP_TOP equ 2
TILE_SKIP_BOTTOM equ 2
;; left/right skip: any sane value, need to have at least one tile visible
TILE_SKIP_LEFT equ 2
TILE_SKIP_RIGHT equ 2

TILE_ROW_SKIP equ TILE_SKIP_LEFT+TILE_SKIP_RIGHT

TILEMAP_LAST_VIS_TILE equ TILEMAP+32*24-1-TILE_SKIP_RIGHT-32*TILE_SKIP_BOTTOM


  org 32768

  IF DEBUG
    di  ;; for debug
  ELSE
    ;; for basic
    exx
    push  hl
  ENDIF


  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  ;; to make things easier, i'm unrolling normal UDG here
  ;; normal UDG is 8 bitmap bytes and attribute
  ;; you can skip this unrolling, and use prepared data instead
  call  prepare_udg_set


again:
  IF !DEBUG && DEBUG_LOOP
  halt
  ld    a,1
  out   (#fe),a
  ENDIF

  ;; call main blitting routine
  call blit_tilemap

  IF DEBUG
    jr    $
  ENDIF

  IF !DEBUG && DEBUG_LOOP
    xor   a
    out   (#fe),a
    jp    again
  ENDIF

  IF !DEBUG
    ;; for basic
    pop   hl
    exx
  ENDIF
  ret


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; in:
;;   none
;;
;; out:
;;   BC,DE,HL,AF: dead
;;   B'C', D'E', H'L': dead
;;
;; doesn't use IX/IY, or stack tricks
;;
;; tile #0 is "attribute print" (i.e. only attrs will be printed)
;;
blit_tilemap:
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  ;; draw attributes
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  ld    bc,TILEMAP_LAST_VIS_TILE
  ld    de,0x5800+32*24-1-TILE_SKIP_RIGHT-32*TILE_SKIP_BOTTOM
  ld    hl,UDGADDR+256*8
  exx
  ;; alternate set: counters
  ld    c,24-TILE_SKIP_TOP-TILE_SKIP_BOTTOM  ; height

print_one_row_loop:
  ;; alternate set: counters
  ld    b,32-TILE_ROW_SKIP  ; width

print_one_tile:
  ;; here we should be at "counters"
  exx
  ld    a,(bc)
  ld    l,a
  ldd
  inc   hl  ; restore high byte (required for zero tile)
  exx
  djnz  print_one_tile
  ; next row
  dec   c
  jp    z,attr_done
  ;; alternate set: counters
  exx
  ;; normal set: addresses
  ; skip invisible tiles
  DUP TILE_ROW_SKIP
    dec   bc
    dec   de
  EDUP
  exx
  jp    print_one_row_loop
attr_done:

  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  ;; draw bitmap
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

  ;; top 1/3
  ld    bc,TILEMAP+TILE_SKIP_LEFT+32*TILE_SKIP_TOP
  ld    hl,0x4000+TILE_SKIP_LEFT+32*TILE_SKIP_TOP

  exx
  ld    c,8-TILE_SKIP_TOP  ; row count
  call  do_one_row
  ;; alternate set (counters)

  ;; middle 1/3
  ld    bc,TILEMAP+TILE_SKIP_LEFT+32*8
  ld    hl,0x4800+TILE_SKIP_LEFT
  exx
  ;; alternate set (counters)
  ld    c,8  ; row count
  call  do_one_row
  ;; alternate set (counters)

  ;; bottom 1/3
  ld    bc,TILEMAP+TILE_SKIP_LEFT+32*8*2
  ld    hl,0x5000+TILE_SKIP_LEFT
  exx
  ;; alternate set (counters)
  ld    c,8-TILE_SKIP_BOTTOM  ; row count
  call  do_one_row
  ;; alternate set (counters)

  ret


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; in:
;;   C': height
;;   BC: tilemap
;;   HL: scraddr
;;   registers must be at the alternate set (counters)
;;
;; out:
;;   DE,HL,AF: dead
;;   BC: right after last read tilemap byte
;;   B'C': dead
;;   registers are at the alternate set (counters)
;;
do_one_row:
  ld    b,32-TILE_ROW_SKIP
  ;; do it tile by tile
do_one_tile:
  ;; alternate set (counters)
  exx
  ;; normal set (addresses)
  ld    a,(bc)
  IF TILEMAP&0xff
    inc   bc
  ELSE
    inc   c
  ENDIF
  or    a
  jp    z,skip_one_tile
  ld    (do_one_tile_save_addr+1),hl
  ld    (one_tile_udg_addr+1),a
one_tile_udg_addr:
  ld    de,UDGADDR
  ; tile bitmap
  DUP 7
    ld    a,(de)
    ld    (hl),a
    inc   d
    inc   h
  EDUP
  ; avoid two useless increments
  ld    a,(de)
  ld    (hl),a
do_one_tile_save_addr:
  ld    hl,0  ; self-modifying code, fixed above
skip_one_tile:
  inc   l
  exx
  djnz  do_one_tile

  dec   c
  ret   z

  ; we are done with the row
  ;; alternate set (counters)
  exx
  ;; normal set (addresses)
  IF TILE_ROW_SKIP > 3 && (TILEMAP&0xff) == 0
    ld    a,TILE_ROW_SKIP
    add   a,c
    ld    c,a
  ELSE
    DUP TILE_ROW_SKIP
      IF TILEMAP&0xff
        inc   bc
      ELSE
        inc   c
      ENDIF
    EDUP
  ENDIF
  IF TILE_ROW_SKIP > 3
    ld    a,TILE_ROW_SKIP
    add   a,l
    ld    l,a
  ELSE
    DUP TILE_ROW_SKIP
      inc   l
    EDUP
  ENDIF
  exx
  jp    do_one_row



;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unroll linear UDG set to `UDGADDR`
;;
prepare_udg_set:
  ld    c,UDG_COUNT
  ld    hl,UDG_SET
  ld    de,UDGADDR
  ; char by char
prepare_next_char:
  push  de
  ld    b,9
prepare_char_loop:
  ld    a,(hl)
  ld    (de),a
  inc   hl
  inc   d
  djnz  prepare_char_loop
  pop   de
  inc   e
  dec   c
  jr    nz,prepare_next_char
  ret


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; linear UDG set
;;
UDG_COUNT equ 8

UDG_SET:
  ;; bitmap, then attribute
  defb  00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,  00000000b
  defb  00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,  01011011b

  defb  11001100b,01100110b,00110011b,10011001b,11001100b,01100110b,00110011b,10011001b,  01110000b

  defb  00111100b,01111110b,11111111b,11111111b,11111111b,11111111b,01111110b,00111100b,  01000011b

  defb  00000000b,00000011b,00001111b,00011111b,00111111b,00111111b,01111111b,01111111b,  01000011b
  defb  00000000b,11000000b,11110000b,11111000b,11111100b,11111100b,11111110b,11111110b,  01000011b
  defb  01111111b,01111111b,00111111b,00111111b,00011111b,00001111b,00000011b,00000000b,  01000011b
  defb  11111110b,11111110b,11111100b,11111100b,11111000b,11110000b,11000000b,00000000b,  01000011b


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; tilemap
;;
  org TILEMAP
  defb  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
  defb  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
  defb  2,2,2,2,0,2,0,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
  defb  2,2,2,0,0,2,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2

  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  4,5,0,0,0,1,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,3,4,5,4,5,0,  1,0,3,4,5,1,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,1,1,1,1,7,3,  1,3,1,6,5,1,5,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,1,6,7,6,7,0,  1,0,1,6,7,1,1,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,6,7,0,0,0,0,6,  7,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2

  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
  defb  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
  defb  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unrolled UDG set
;; contains first bytes for all 256 tiles, then next bytes, and so on
  org UDGADDR
needs UrAsm to be compiled (mostly for "IF"-conditions, so can be adapted to other asms manually, i believe).
User avatar
ketmar
Manic Miner
Posts: 700
Joined: Tue Jun 16, 2020 5:25 pm
Location: Ukraine

Re: Optimisation Challenge, for the benefit of new programmers

Post by ketmar »

and i optimised it slightly more, but let's wait for others first. ;-) (also, cleaned up the code too.)
dfzx
Manic Miner
Posts: 682
Joined: Mon Nov 13, 2017 6:55 pm
Location: New Forest, UK
Contact:

Re: Optimisation Challenge, for the benefit of new programmers

Post by dfzx »

Joefish wrote: Thu Apr 28, 2022 7:57 pm In that case, here's a crap version with no optimisation for a start.
It does the job, but not in any particular rush.
@Joefish I think you might have a dose of excessive average familiarity, so per this XKCD comic:

Image

Your "crap" version appears to use undocumented Z80 instructions, makes extensive use of the alternate registers, nicely sets up a LDI instruction, and unrolls a loop. That's all before it does something clever to handle the screen address, which I don't understand. I've a feeling your "crap" code is already streets ahead of what the vast majority of programmers could write, let alone "new" ones. :lol:

I managed to get my C version down from 600ms to 91ms, which is still close to double the time your "crap" version takes. I think I could make it faster if I understood how to work with the screen better. Given that a compiler can't do the clever things an assembly language programmer can I was quite pleased with what I've go to. :)
Derek Fountain, author of the ZX Spectrum C Programmer's Getting Started Guide and various open source games, hardware and other projects, including an IF1 and ZX Microdrive emulator.
User avatar
ketmar
Manic Miner
Posts: 700
Joined: Tue Jun 16, 2020 5:25 pm
Location: Ukraine

Re: Optimisation Challenge, for the benefit of new programmers

Post by ketmar »

dfzx wrote: Sat Apr 30, 2022 8:13 pmI think I could make it faster if I understood how to work with the screen better.
two basic tricks: interleaved tile data, and drawing the screen by 1/3 at a time can do wonders. ;-) with interleaved data you don't have to calculate tile data address at all, and working with only 1/3 of the screen at a time you don't need to calculate screen addresses too. i don't think that it is possible to do it much faster without dynamic codegen/code specialization anyway. at least that's where i hit the ceiling. but i'm not the best Z80 coder out there (not even a very good one), so you don't have to believe my words. ;-)

and let me spam you all with my Final RC version of the code, btw. ;-)

Code: Select all

DEBUG equ 0
DEBUG_LOOP equ 0
DEBUG_NO_ZERO_TILES equ 0


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; options
;;
;; should tile #0 change only attrs?
;; if set, tile #0 will change only attributes, leaving pixels intact
ZERO_TILE_ATTR_ONLY equ 1
;; this is marginally faster when there are many #0 tiles, and `ZERO_TILE_ATTR_ONLY` is set
;; if almost whole tilemap is non-zero, separate attr pass is marginally slower (600-800 tics)
;; note that separate attr pass is temporarily modifying SP
;; so if you're not doing this right after HALT, use combined pass instead
;; the default is safer combined pass
USE_SEPARATE_ATTR_PASS equ 0


;; 0x300 bytes (768)
;; might be at a 256-byte boundary (slightly faster)
TILEMAP equ 0xf000

;; 0x900 bytes (2304)
;; must be at a 256-byte boundary
UDGADDR equ 0xf300

;; top/bottom skip: [0..7]
TILE_SKIP_TOP equ 2
TILE_SKIP_BOTTOM equ 2
;; left/right skip: any sane value, need to have at least one tile visible
;; visible window size must be even for maximum speed, and must be >2
;; note that the code is optimised for at least `2+2` skips (i.e. 4 and more)
TILE_SKIP_LEFT equ 2
TILE_SKIP_RIGHT equ 2

TILE_ROW_SKIP equ TILE_SKIP_LEFT+TILE_SKIP_RIGHT

TILEMAP_LAST_VIS_TILE equ TILEMAP+32*24-1-TILE_SKIP_RIGHT-32*TILE_SKIP_BOTTOM

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; sanity checks
;;
IF TILE_SKIP_TOP < 0 || TILE_SKIP_TOP > 7
  FATAL "invalid TILE_SKIP_TOP"
ENDIF
IF TILE_SKIP_BOTTOM < 0 || TILE_SKIP_BOTTOM > 7
  FATAL "invalid TILE_SKIP_BOTTOM"
ENDIF
IF TILE_SKIP_LEFT < 0 || TILE_SKIP_LEFT > 30
  FATAL "invalid TILE_SKIP_LEFT"
ENDIF
IF TILE_SKIP_RIGHT < 0 || TILE_SKIP_RIGHT > 30
  FATAL "invalid TILE_SKIP_RIGHT"
ENDIF
IF TILE_ROW_SKIP < 0 || TILE_ROW_SKIP > 30 || (TILE_ROW_SKIP&1)
  FATAL "invalid TILE_SKIP_LEFT/TILE_SKIP_RIGHT combination"
ENDIF



;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; main entry
;;
  org 32768

  IF !DEBUG
    ;; for basic
    exx
    push  hl
  ENDIF


  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  ;; to make things easier, i'm unrolling normal UDG here
  ;; normal UDG is 8 bitmap bytes and attribute
  ;; you can skip this unrolling, and use prepared data instead
  call  prepare_udg_set

  IF DEBUG_NO_ZERO_TILES
  ld    hl,TILEMAP
  ld    bc,32*24
tilefix_loop:
  ld    a,(hl)
  or    a
  jp    nz,tilefix_no_fix
  inc   a
  ld    (hl),a
tilefix_no_fix:
  inc   hl
  dec   bc
  ld    a,b
  or    c
  jp    nz,tilefix_loop
  ENDIF


again:
  halt
  if DEBUG
    di
  ELSE
    IF DEBUG_LOOP
      ld    a,1
      out   (#fe),a
    ENDIF
  ENDIF

test0:
  ;; call main blitting routine
  call blit_tilemap
test1:

  IF DEBUG
    jr    $
  ENDIF

  IF !DEBUG && DEBUG_LOOP
    xor   a
    out   (#fe),a
    jp    again
  ENDIF

  IF !DEBUG
    ;; for basic
    pop   hl
    exx
  ENDIF
  ret


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; in:
;;   none
;;
;; out:
;;   BC,DE,HL,AF: dead
;;   B'C', D'E', H'L': dead
;;   if `USE_SEPARATE_ATTR_PASS` is zero, IX is dead too
;;
;; if `USE_SEPARATE_ATTR_PASS` is non-zero, uses stack to blit attributes
;; so either use HALT, or restore corrupted screen borders
;;
;; if `ZERO_TILE_ATTR_ONLY` is non-zero, then tile #0 is "attribute print"
;; (i.e. only attrs will be printed)
;;
blit_tilemap:
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  ;; draw attributes
  ;; use stack to copy them
if USE_SEPARATE_ATTR_PASS
  ld    (blit_attr_saved_sp+1),sp

  ;; alternate set: counters
  ld    hl,0x5800+32*24-TILE_SKIP_RIGHT-32*TILE_SKIP_BOTTOM
  ld    de,0-32  ; constant
  ld    c,24-TILE_SKIP_TOP-TILE_SKIP_BOTTOM  ; height

  exx
  ;; normal set: addresses
  ld    bc,TILEMAP_LAST_VIS_TILE
  ld    hl,UDGADDR

print_one_row_loop:
  ;; normal set: addresses
  exx
print_one_row_loop_no_exx:
  ;; alternate set: counters
  ld    b,0+(32-TILE_ROW_SKIP)/2  ; width
  ;; set address to push
  ld    sp,hl

print_one_tile:
  ;; here we should be at "counters"
  ;; 4+7+4+7+6+7+4+7+6+11+4 = 67 tics per 2 tiles
  exx           ; 4
  ;; load two tiles
  ld    a,(bc)  ; 7
  ld    l,a     ; 4
  ld    d,(hl)  ; 7
  dec   bc      ; 6
  ld    a,(bc)  ; 7
  ld    l,a     ; 4
  ld    e,(hl)  ; 7
  dec   bc      ; 6
  ;; and push them
  push  de      ; 11
  exx           ; 4
  djnz  print_one_tile
  ; next row
  dec   c       ; 4
  jp    z,blit_attr_saved_sp
  ;; alternate set: counters
  ;; move hl to the previous screen line
  add   hl,de   ; 11
  ; skip invisible tiles
  IF TILE_ROW_SKIP
    exx           ; 4
    ;; normal set: addresses
    ; fixed 15 tics most of the time (w/o jp)
    ld    a,c                     ; 4
    sub   a,TILE_ROW_SKIP         ; 7
    ld    c,a                     ; 4
    jp    nc,print_one_row_loop   ; 10
    dec   b                       ; 4
    jp    print_one_row_loop      ; 10
  ELSE
    jp    print_one_row_loop_no_exx  ; 10
  ENDIF
blit_attr_saved_sp:
  ld    sp,0
endif

  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  ;; draw bitmap
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

  ;; top 1/3
  ld    bc,TILEMAP+TILE_SKIP_LEFT+32*TILE_SKIP_TOP
  ld    hl,0x4000+TILE_SKIP_LEFT+32*TILE_SKIP_TOP
  ;; normal set (addresses)
  exx
  ;; microoptimisation: load B' to skip one loading
  ld    bc,256*(32-TILE_ROW_SKIP)+8-TILE_SKIP_TOP  ; row count
  ld    e,b  ; E' is used to restore B'
  IF !USE_SEPARATE_ATTR_PASS
  ; IX is attributes address
  ld    ix,0x5800+TILE_SKIP_LEFT+32*TILE_SKIP_TOP
  ENDIF
  call  do_one_tile  ; HACK! jump into subroutine body
  ;; alternate set (counters)

  ;; middle 1/3
  ; B' will be set at do_one_third
  ld    c,8  ; row count
  IF !USE_SEPARATE_ATTR_PASS
  ; IX is attributes address
  ld    ix,0x5800+TILE_SKIP_LEFT+32*8
  ENDIF
  exx
  ;; normal set (addresses)
  ld    bc,TILEMAP+TILE_SKIP_LEFT+32*8
  ld    hl,0x4800+TILE_SKIP_LEFT
  call  do_one_third
  ;; alternate set (counters)

  ;; bottom 1/3
  ; B' will be set at do_one_third
  ld    c,8-TILE_SKIP_BOTTOM  ; row count
  IF !USE_SEPARATE_ATTR_PASS
  ; IX is attributes address
  ld    ix,0x5800+TILE_SKIP_LEFT+32*8*2
  ENDIF
  exx
  ;; normal set (addresses)
  ld    bc,TILEMAP+TILE_SKIP_LEFT+32*8*2
  ld    hl,0x5000+TILE_SKIP_LEFT
  ;jp    do_one_third


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; in:
;;   C': height
;;   E': width
;;   BC: tilemap
;;   HL: scraddr
;;   registers must be at the normal set (addresses)
;;
;; out:
;;   DE,HL,AF: dead
;;   BC: right after last read tilemap byte
;;   B'C': dead
;;   registers are at the alternate set (counters)
;;
do_one_third:
  ;; normal set (addresses)
  exx                   ; 4
do_one_third_no_exx:
  ;; alternate set (counters)
  ld    b,e             ; 4
  ;; do it tile by tile
do_one_tile:
  ;; alternate set (counters)
  exx                   ; 4
  ;; normal set (addresses)
  ld    a,(bc)          ; 7
  ; cheap micro-optimisation if tilemap is 256-byte aligned
  IF TILEMAP&0xff
    inc   bc            ; 6
  ELSE
    inc   c             ; 4
  ENDIF

  IF ZERO_TILE_ATTR_ONLY
  or    a               ; 4
  jp    z,skip_one_tile ; 10
  ENDIF

  ld    d,UDGADDR/256+USE_SEPARATE_ATTR_PASS ; 7
  ld    e,a             ; 4
  IF !USE_SEPARATE_ATTR_PASS
    ; tile attributes
    ld    a,(de)
    ld    (ix+0),a
    inc   d
    inc   ix
  ENDIF
  ; tile bitmap
  DUP 7
    ld    a,(de)        ; 7
    ld    (hl),a        ; 7
    inc   d             ; 4
    inc   h             ; 4
  EDUP
  ; avoid two useless increments
  ld    a,(de)          ; 7
  ld    (hl),a          ; 7
  ; faster than SMC (16+10) vs (10+11)
  ld    de,0-256*7+1    ; 10
  add   hl,de           ; 11
  exx                   ; 4
  ;; alternate set (counters)
  djnz  do_one_tile     ; 13/8

done_row:
  ; decrement row counter
  dec   c               ; 4

  ; we are done with the row
  IF TILE_ROW_SKIP
    ret   z               ; 11/5 (5 most of the time)
    ;; alternate set (counters)
    exx                   ; 4
    ;; normal set (addresses)
    ; skip screen addresses
    ; fixed 15 tics
    ld    a,TILE_ROW_SKIP   ; 7
    add   a,l               ; 4
    ld    l,a               ; 4
    IF !USE_SEPARATE_ATTR_PASS
      ; use the fact that we are working by 1/3
      ; i.e. it's enough to modify only IXL
      ld    a,TILE_ROW_SKIP   ; 7
      add   a,ixl             ; 4+4
      ld    ixl,a             ; 4+4
    ENDIF
    ; skip tilemap bytes
    ; fixed 15 tics
    ld    a,TILE_ROW_SKIP   ; 7
    add   a,c               ; 4
    ld    c,a               ; 4
    IF TILEMAP&0xff
      jp    nc,do_one_third   ; 10
      dec   b                 ; 4
    ENDIF
    jp    do_one_third
  ELSE
  ;; full width
  ;; alternate set (counters)
  jp    nz,do_one_third_no_exx
  ret
  ENDIF

; some pasta for tile skip
skip_one_tile:
  ;; normal set (addresses)
  ; tile attributes
  IF !USE_SEPARATE_ATTR_PASS
    ld    de,UDGADDR ; 11
    ld    a,(de)
    ld    (ix+0),a
    inc   ix
  ENDIF
  inc   l               ; 4
  exx                   ; 4
  ;; alternate set (counters)
  djnz  do_one_tile     ; 13/8
  jp    done_row        ; 10


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unroll linear UDG set to `UDGADDR`
;;
prepare_udg_set:
  ld    c,UDG_COUNT
  ld    hl,UDG_SET
  ld    de,UDGADDR
  ; char by char
prepare_next_char:
  push  de
  ld    b,9
prepare_char_loop:
  ld    a,(hl)
  ld    (de),a
  inc   hl
  inc   d
  djnz  prepare_char_loop
  pop   de
  inc   e
  dec   c
  jr    nz,prepare_next_char
  ret


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; linear UDG set
;;
UDG_COUNT equ 8

UDG_SET:
  ;; attribute, then bitmap
  defb  00000000b,  00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,00000000b
  defb  01011011b,  00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,00000000b,00000000b

  defb  01110000b,  11001100b,01100110b,00110011b,10011001b,11001100b,01100110b,00110011b,10011001b

  defb  01000011b,  00111100b,01111110b,11111111b,11111111b,11111111b,11111111b,01111110b,00111100b

  defb  01000011b,  00000000b,00000011b,00001111b,00011111b,00111111b,00111111b,01111111b,01111111b
  defb  01000011b,  00000000b,11000000b,11110000b,11111000b,11111100b,11111100b,11111110b,11111110b
  defb  01000011b,  01111111b,01111111b,00111111b,00111111b,00011111b,00001111b,00000011b,00000000b
  defb  01000011b,  11111110b,11111110b,11111100b,11111100b,11111000b,11110000b,11000000b,00000000b


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; tilemap
;;
  org TILEMAP
  defb  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
  defb  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
  defb  2,2,2,2,0,2,0,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
  defb  2,2,2,0,0,2,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2

  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  4,5,0,0,0,1,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,3,4,5,4,5,0,  1,0,3,4,5,1,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,1,1,1,1,7,3,  1,3,1,6,5,1,5,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,1,6,7,6,7,0,  1,0,1,6,7,1,1,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,6,7,0,0,0,0,6,  7,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2

  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
  defb  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
  defb  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
  defb  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unrolled UDG set
;; contains first bytes for all 256 tiles, then next bytes, and so on
  org UDGADDR
  ;INCBIN "udg.bin"
User avatar
Pobulous
Dynamite Dan
Posts: 1365
Joined: Wed Nov 15, 2017 12:51 pm

Re: Optimisation Challenge, for the benefit of new programmers

Post by Pobulous »

I've removed any need for IX and IY registers, converted all JRs to JPs.

I stil need to write the graphic interleaving routine, so it can use the simple graphic data from Joefish's example routine.

Be interesting to see how they all compare in different scenarios.

Code: Select all

ORG 32768

	DI				;Disable interrupts for speed
	
	LD DE, 16384
	LD BC, 22528
	
	LD HL, CharMap

PrintLoop:
			LD A,(HL)		;Get next CharMap character to print
					
			AND A			;if zero, skip over screen position
			JP Z, SkipPrinting
			
			CP 9			;chars 1-(n-1) (1-8 in this case) will only update attr data (set to 0 or 1 to disable this feature)
			
			PUSH HL			;Store CharMap location
			LD H,UDGs/256	;Get base address of UDGs
			LD L,A			;Select UDG 1-255
			LD A,(HL)		;copy ATTR data
			LD (BC),A
			
			JP C,PrintATTROnly
			
			INC H			;Get first line of chardata
			PUSH DE			;Store current screen address

			LD A,(HL)		;copy 8 lines of UDG data to screen
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A

			POP DE			;Restore screen address
			
PrintATTROnly:
			POP HL			;Restore CharMap location
			
SkipPrinting:
			INC BC			;Advance to next ATTR position
			INC HL			;Advance to next CharMap character
			INC E			;Advance screen address

		JP NZ, PrintLoop	;Loop until E wraps to zero - signals a third of the screen is completed
		
		
		LD A,D			;Adjust DE to point to next screen third
		ADD A,8
		LD D,A
		
		CP 88			;If MSB of screen addr is now 88, then we are done.
	JP C, PrintLoop	;Do next screen third
	
	EI
	RET
	
ORG 49152
CharMap:			;24 rows of 32 bytes of characters to render, aligned on 256 byte boundary
					;0s don't aren't printed, 1-8 only update the attributes
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,0,0,0
	db 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0
	db 0,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,0,0,0
	db 0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0
	db 0,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,0,0,0
	db 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0
	db 0,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,0,0,0
	db 0,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,0,0,0
	db 0,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,0,0,0
	db 0,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0,0
	db 0,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,0,0,0
	db 0,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,0,0,0
	db 0,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,0,0,0
	db 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,0,0,0
	db 0,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,0,0,0
	db 0,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,0,0,0
	db 0,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,0,0,0
	db 0,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,0,0,0
	db 0,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,0,0,0
	db 0,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0




UDGs:				;256 8 byte arrays of attribs abd characters aligned on 256 byte boundary, interleaved
					;256 ATTR bytes, followed by 256 top rows of UDGS, then 256 2nd rows, etc
	db  0,0,8,16,24,32,40,48,56,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
	db  24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49
	db  50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75
	db  76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101
	db  102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127
	db  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153
	db  154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179
	db  180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205
	db  206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231
	db  232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,60,24,60,60,8,126,60,126,60,60,0,32,0,4,0,12
	db  0,64,16,4,32,16,0,0,0,0,0,0,0,16,0,0,60,24,60,60,8,126,60,126,60,60
	db  0,32,0,4,0,12,0,64,16,4,32,16,0,0,0,0,0,0,0,16,0,0,60,24,60,60
	db  8,126,60,126,60,60,0,32,0,4,0,12,0,64,16,4,32,16,0,0,0,0,0,0,0,16
	db  0,0,60,24,60,60,8,126,60,126,60,60,0,32,0,4,0,12,0,64,16,4,32,16,0,0
	db  0,0,0,0,0,16,0,0,60,24,60,60,8,126,60,126,60,60,0,32,0,4,0,12,0,64
	db  16,4,32,16,0,0,0,0,0,0,0,16,0,0,60,24,60,60,8,126,60,126,60,60,0,32
	db  0,4,0,12,0,64,16,4,32,16,0,0,0,0,0,0,0,16,0,0,60,24,60,60,8,126
	db  60,126,60,60,0,32,0,4,0,12,0,64,16,4,32,16,0,0,0,0,0,0,0,16,0,0
	db  60,24,60,60,8,126,60,126,60,60,0,32,0,4,0,12,0,64,16,4,32,16,0
	db  0,0,0,0,0,0,0,0,0,70,40,66,66,24,64,64,2,66,66,56,32,28,4,56,16
	db  60,64,0,0,40,16,104,120,56,120,60,28,56,56,68,68,70,40,66,66,24,64,64,2,66,66
	db  56,32,28,4,56,16,60,64,0,0,40,16,104,120,56,120,60,28,56,56,68,68,70,40,66,66
	db  24,64,64,2,66,66,56,32,28,4,56,16,60,64,0,0,40,16,104,120,56,120,60,28,56,56
	db  68,68,70,40,66,66,24,64,64,2,66,66,56,32,28,4,56,16,60,64,0,0,40,16,104,120
	db  56,120,60,28,56,56,68,68,70,40,66,66,24,64,64,2,66,66,56,32,28,4,56,16,60,64
	db  0,0,40,16,104,120,56,120,60,28,56,56,68,68,70,40,66,66,24,64,64,2,66,66,56,32
	db  28,4,56,16,60,64,0,0,40,16,104,120,56,120,60,28,56,56,68,68,70,40,66,66,24,64
	db  64,2,66,66,56,32,28,4,56,16,60,64,0,0,40,16,104,120,56,120,60,28,56,56,68,68
	db  70,40,66,66,24,64,64,2,66,66,56,32,28,4,56,16,60,64,0,0,40,16,104
	db  0,0,0,0,0,0,0,0,0,74,8,2,12,40,124,124,4,60,66,4,60,32,60,68,24
	db  68,120,48,4,48,16,84,68,68,68,68,32,64,16,68,68,74,8,2,12,40,124,124,4,60,66
	db  4,60,32,60,68,24,68,120,48,4,48,16,84,68,68,68,68,32,64,16,68,68,74,8,2,12
	db  40,124,124,4,60,66,4,60,32,60,68,24,68,120,48,4,48,16,84,68,68,68,68,32,64,16
	db  68,68,74,8,2,12,40,124,124,4,60,66,4,60,32,60,68,24,68,120,48,4,48,16,84,68
	db  68,68,68,32,64,16,68,68,74,8,2,12,40,124,124,4,60,66,4,60,32,60,68,24,68,120
	db  48,4,48,16,84,68,68,68,68,32,64,16,68,68,74,8,2,12,40,124,124,4,60,66,4,60
	db  32,60,68,24,68,120,48,4,48,16,84,68,68,68,68,32,64,16,68,68,74,8,2,12,40,124
	db  124,4,60,66,4,60,32,60,68,24,68,120,48,4,48,16,84,68,68,68,68,32,64,16,68,68
	db  74,8,2,12,40,124,124,4,60,66,4,60,32,60,68,24,68,120,48,4,48,16,84
	db  0,0,0,0,0,0,0,0,0,82,8,60,2,72,2,66,8,66,62,60,34,32,68,120,16
	db  68,68,16,4,48,16,84,68,68,68,68,32,56,16,68,40,82,8,60,2,72,2,66,8,66,62
	db  60,34,32,68,120,16,68,68,16,4,48,16,84,68,68,68,68,32,56,16,68,40,82,8,60,2
	db  72,2,66,8,66,62,60,34,32,68,120,16,68,68,16,4,48,16,84,68,68,68,68,32,56,16
	db  68,40,82,8,60,2,72,2,66,8,66,62,60,34,32,68,120,16,68,68,16,4,48,16,84,68
	db  68,68,68,32,56,16,68,40,82,8,60,2,72,2,66,8,66,62,60,34,32,68,120,16,68,68
	db  16,4,48,16,84,68,68,68,68,32,56,16,68,40,82,8,60,2,72,2,66,8,66,62,60,34
	db  32,68,120,16,68,68,16,4,48,16,84,68,68,68,68,32,56,16,68,40,82,8,60,2,72,2
	db  66,8,66,62,60,34,32,68,120,16,68,68,16,4,48,16,84,68,68,68,68,32,56,16,68,40
	db  82,8,60,2,72,2,66,8,66,62,60,34,32,68,120,16,68,68,16,4,48,16,84
	db  0,0,0,0,0,0,0,0,0,98,8,64,66,126,66,66,16,66,2,68,34,32,68,64,16
	db  60,68,16,4,40,16,84,68,68,120,60,32,4,16,68,40,98,8,64,66,126,66,66,16,66,2
	db  68,34,32,68,64,16,60,68,16,4,40,16,84,68,68,120,60,32,4,16,68,40,98,8,64,66
	db  126,66,66,16,66,2,68,34,32,68,64,16,60,68,16,4,40,16,84,68,68,120,60,32,4,16
	db  68,40,98,8,64,66,126,66,66,16,66,2,68,34,32,68,64,16,60,68,16,4,40,16,84,68
	db  68,120,60,32,4,16,68,40,98,8,64,66,126,66,66,16,66,2,68,34,32,68,64,16,60,68
	db  16,4,40,16,84,68,68,120,60,32,4,16,68,40,98,8,64,66,126,66,66,16,66,2,68,34
	db  32,68,64,16,60,68,16,4,40,16,84,68,68,120,60,32,4,16,68,40,98,8,64,66,126,66
	db  66,16,66,2,68,34,32,68,64,16,60,68,16,4,40,16,84,68,68,120,60,32,4,16,68,40
	db  98,8,64,66,126,66,66,16,66,2,68,34,32,68,64,16,60,68,16,4,40,16,84
	db  0,0,0,0,0,0,0,0,0,60,62,126,60,8,60,60,16,60,60,60,60,28,60,60,16
	db  4,68,56,36,36,12,84,68,56,64,4,32,120,12,56,16,60,62,126,60,8,60,60,16,60,60
	db  60,60,28,60,60,16,4,68,56,36,36,12,84,68,56,64,4,32,120,12,56,16,60,62,126,60
	db  8,60,60,16,60,60,60,60,28,60,60,16,4,68,56,36,36,12,84,68,56,64,4,32,120,12
	db  56,16,60,62,126,60,8,60,60,16,60,60,60,60,28,60,60,16,4,68,56,36,36,12,84,68
	db  56,64,4,32,120,12,56,16,60,62,126,60,8,60,60,16,60,60,60,60,28,60,60,16,4,68
	db  56,36,36,12,84,68,56,64,4,32,120,12,56,16,60,62,126,60,8,60,60,16,60,60,60,60
	db  28,60,60,16,4,68,56,36,36,12,84,68,56,64,4,32,120,12,56,16,60,62,126,60,8,60
	db  60,16,60,60,60,60,28,60,60,16,4,68,56,36,36,12,84,68,56,64,4,32,120,12,56,16
	db  60,62,126,60,8,60,60,16,60,60,60,60,28,60,60,16,4,68,56,36,36,12,84
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  56,0,0,24,0,0,0,0,0,64,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,56,0,0,24,0,0,0,0,0,64,6,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,56,0,0,24,0,0,0,0,0,64,6,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,56,0,0,24,0,0,0,0
	db  0,64,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,56,0
	db  0,24,0,0,0,0,0,64,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,56,0,0,24,0,0,0,0,0,64,6,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,56,0,0,24,0,0,0,0,0,64,6,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,56,0,0,24,0,0,0	
Ralf
Rick Dangerous
Posts: 2287
Joined: Mon Nov 13, 2017 11:59 am
Location: Poland

Re: Optimisation Challenge, for the benefit of new programmers

Post by Ralf »

Your "crap" version appears to use undocumented Z80 instructions, makes extensive use of the alternate registers, nicely sets up a LDI instruction, and unrolls a loop. That's all before it does something clever to handle the screen address, which I don't understand. I've a feeling your "crap" code is already streets ahead of what the vast majority of programmers could write, let alone "new" ones.
Yes, you shouldn't show a standard, decent code and call it crap.
User avatar
Pobulous
Dynamite Dan
Posts: 1365
Joined: Wed Nov 15, 2017 12:51 pm

Re: Optimisation Challenge, for the benefit of new programmers

Post by Pobulous »

OK, this version implements the border rule, and can be adjusted with the EQUs at the top, and I've included a routine to convert graphics from attr, line1-8, attr, line1-8 etc into attr*256, line1*256, line2*256, etc. USR entry point for conversion code is 32896.

All data starting addresses need to be 256 byte aligned.

I've no idea how fast it is compared to other solutions. On the plus side it's not too hard to follow and has some optimisations....

Code: Select all

TopEdge 	EQU 64 		;(NumRows*32)
BottomEdge	EQU 192		;(256-(NumRows*32)
LeftEdge	EQU 2		;NumCols
RightEdge	EQU 30		;32-NumCols


ORG 32768

	DI				;Disable interrupts for speed
	
	LD DE, 16384
	LD BC, 22528
	
	LD HL, CharMap

	LD A,E
	CP TopEdge
	JP NC, TestLeftSide
	LD A,TopEdge
	LD E,A
	LD L,A
	LD C,A

PrintLoop:

			LD A,D

TestBottomEdge:
			CP 80
			LD A,E
			JP NZ, TestLeftSide
			CP BottomEdge
			JP C, TestLeftSide
			EI				;Finished, Return
			RET

TestLeftSide:
			AND 31
			CP LeftEdge
			JP NC, TestRightSide
			LD A,LeftEdge
			ADD A,E
			LD E,A
			LD L,A
			LD C,A
			JP PrintLoop
			
TestRightSide:
			CP RightEdge
			JP C, PrintChar
			LD A, 31
			OR E
			LD E,A
			LD L,A
			LD C,A
			JP SkipPrinting	

PrintChar:
			LD A,(HL)		;Get next CharMap character to print
					
			CP 8			;chars 0-(n-1) (0-7 in this case) will only update attr data (set to 0 to disable this feature)
			
			PUSH HL			;Store CharMap location
			LD H,UDGs/256	;Get base address of UDGs
			LD L,A			;Select UDG 1-255
			LD A,(HL)		;copy ATTR data
			LD (BC),A
			
			JP C,PrintATTROnly
			
			INC H			;Get first line of chardata
			PUSH DE			;Store current screen address

			LD A,(HL)		;copy 8 lines of UDG data to screen
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A

			POP DE			;Restore screen address
			
PrintATTROnly:
			POP HL			;Restore CharMap location
			
SkipPrinting:
			INC BC			;Advance to next ATTR position
			INC HL			;Advance to next CharMap character
			INC E			;Advance screen address

		JP NZ, PrintLoop	;Loop until E wraps to zero - signals a third of the screen is completed
		
		
		LD A,D				;Adjust DE to point to next screen third
		ADD A,8
		LD D,A
		
		CP 88				;If MSB of screen addr is now 88, then we are done.
	JP C, TestBottomEdge	;Do next screen third
	
	EI
	RET


	
PrepUDGS:				;Run this before main code if required.
	LD DE,UDGs
	LD HL,RawUDGs		;Graphics in ATTR followed by 8 lines of pixels
	
UDGLineLoop:

		PUSH BC			;Save outer loop counter
		PUSH DE
		
		LD B,9
UDGLoop:
			LD A,(HL)
			LD (DE),A
			INC HL
			INC D
			
		DJNZ UDGLoop
				
		POP DE
		INC E
		
	JP NZ, UDGLineLoop
	
	RET


ORG 49152
CharMap:
	db 8,8,8,8,8,8,8,8,  8,8,8,8,8,8,8,8,  8,8,8,8,8,8,8,8,  8,8,8,8,8,8,8,8
	db 8,8,8,8,8,8,8,8,  8,8,8,8,8,8,8,8,  8,8,8,8,8,8,8,8,  8,8,8,8,8,8,8,8
	db 8,8,8,8,8,8,8,8,  8,8,8,8,8,8,8,8,  8,8,8,8,8,8,8,8,  8,8,8,8,8,8,8,8
	db 8,8,8,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,8,8,8
	db 8,8,8,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,8,8,8
	db 8,8,8,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,8,8,8
	db 8,8,8,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,8,8,8
	db 8,8,8,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,8,8,8
	
	db 8,8,8,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,8,8,8
	db 8,8,8,0,0,0,0,0,  0,0,0,0,0,0,0,0,  10,11,0,0,0,1,0,0,  0,0,0,0,0,8,8,8
	db 8,8,8,0,0,0,0,0,  0,0,9,10,11,10,11,0,  1,0,9,10,11,1,0,0,  0,0,0,0,0,8,8,8
	db 8,8,8,0,0,0,0,0,  0,0,1,1,1,1,13,9,  1,9,1,12,11,1,11,0,  0,0,0,0,0,8,8,8
	db 8,8,8,0,0,0,0,0,  0,0,1,12,13,12,13,0,  1,0,1,12,13,1,1,0,  0,0,0,0,0,8,8,8
	db 8,8,8,0,0,0,0,0,  0,12,13,0,0,0,0,12,  13,0,0,0,0,0,0,0,  0,0,0,0,0,8,8,8
	db 8,8,8,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,8,8,8
	db 8,8,8,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,8,8,8
	
	db 8,8,8,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,8,8,8
	db 8,8,8,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,8,8,8
	db 8,8,8,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,8,8,8
	db 8,8,8,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,8,8,8
	db 8,8,8,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,8,8,8
	db 8,8,8,8,8,8,8,8,  8,8,8,8,8,8,8,8,  8,8,8,8,8,8,8,8,  8,8,8,8,8,8,8,8
	db 8,8,8,8,8,8,8,8,  8,8,8,8,8,8,8,8,  8,8,8,8,8,8,8,8,  8,8,8,8,8,8,8,8
	db 8,8,8,8,8,8,8,8,  8,8,8,8,8,8,8,8,  8,8,8,8,8,8,8,8,  8,8,8,8,8,8,8,8


UDGs:				;256 8 byte arrays of attribs abd characters aligned on 256 byte boundary, interleaved
					;256 ATTR bytes, followed by 256 top rows of UDGS, then 256 2nd rows, etc
	db  0,0,8,16,24,32,40,48,56,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
	db  24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49
	db  50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75
	db  76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101
	db  102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127
	db  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153
	db  154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179
	db  180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205
	db  206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231
	db  232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,60,24,60,60,8,126,60,126,60,60,0,32,0,4,0,12
	db  0,64,16,4,32,16,0,0,0,0,0,0,0,16,0,0,60,24,60,60,8,126,60,126,60,60
	db  0,32,0,4,0,12,0,64,16,4,32,16,0,0,0,0,0,0,0,16,0,0,60,24,60,60
	db  8,126,60,126,60,60,0,32,0,4,0,12,0,64,16,4,32,16,0,0,0,0,0,0,0,16
	db  0,0,60,24,60,60,8,126,60,126,60,60,0,32,0,4,0,12,0,64,16,4,32,16,0,0
	db  0,0,0,0,0,16,0,0,60,24,60,60,8,126,60,126,60,60,0,32,0,4,0,12,0,64
	db  16,4,32,16,0,0,0,0,0,0,0,16,0,0,60,24,60,60,8,126,60,126,60,60,0,32
	db  0,4,0,12,0,64,16,4,32,16,0,0,0,0,0,0,0,16,0,0,60,24,60,60,8,126
	db  60,126,60,60,0,32,0,4,0,12,0,64,16,4,32,16,0,0,0,0,0,0,0,16,0,0
	db  60,24,60,60,8,126,60,126,60,60,0,32,0,4,0,12,0,64,16,4,32,16,0
	db  0,0,0,0,0,0,0,0,0,70,40,66,66,24,64,64,2,66,66,56,32,28,4,56,16
	db  60,64,0,0,40,16,104,120,56,120,60,28,56,56,68,68,70,40,66,66,24,64,64,2,66,66
	db  56,32,28,4,56,16,60,64,0,0,40,16,104,120,56,120,60,28,56,56,68,68,70,40,66,66
	db  24,64,64,2,66,66,56,32,28,4,56,16,60,64,0,0,40,16,104,120,56,120,60,28,56,56
	db  68,68,70,40,66,66,24,64,64,2,66,66,56,32,28,4,56,16,60,64,0,0,40,16,104,120
	db  56,120,60,28,56,56,68,68,70,40,66,66,24,64,64,2,66,66,56,32,28,4,56,16,60,64
	db  0,0,40,16,104,120,56,120,60,28,56,56,68,68,70,40,66,66,24,64,64,2,66,66,56,32
	db  28,4,56,16,60,64,0,0,40,16,104,120,56,120,60,28,56,56,68,68,70,40,66,66,24,64
	db  64,2,66,66,56,32,28,4,56,16,60,64,0,0,40,16,104,120,56,120,60,28,56,56,68,68
	db  70,40,66,66,24,64,64,2,66,66,56,32,28,4,56,16,60,64,0,0,40,16,104
	db  0,0,0,0,0,0,0,0,0,74,8,2,12,40,124,124,4,60,66,4,60,32,60,68,24
	db  68,120,48,4,48,16,84,68,68,68,68,32,64,16,68,68,74,8,2,12,40,124,124,4,60,66
	db  4,60,32,60,68,24,68,120,48,4,48,16,84,68,68,68,68,32,64,16,68,68,74,8,2,12
	db  40,124,124,4,60,66,4,60,32,60,68,24,68,120,48,4,48,16,84,68,68,68,68,32,64,16
	db  68,68,74,8,2,12,40,124,124,4,60,66,4,60,32,60,68,24,68,120,48,4,48,16,84,68
	db  68,68,68,32,64,16,68,68,74,8,2,12,40,124,124,4,60,66,4,60,32,60,68,24,68,120
	db  48,4,48,16,84,68,68,68,68,32,64,16,68,68,74,8,2,12,40,124,124,4,60,66,4,60
	db  32,60,68,24,68,120,48,4,48,16,84,68,68,68,68,32,64,16,68,68,74,8,2,12,40,124
	db  124,4,60,66,4,60,32,60,68,24,68,120,48,4,48,16,84,68,68,68,68,32,64,16,68,68
	db  74,8,2,12,40,124,124,4,60,66,4,60,32,60,68,24,68,120,48,4,48,16,84
	db  0,0,0,0,0,0,0,0,0,82,8,60,2,72,2,66,8,66,62,60,34,32,68,120,16
	db  68,68,16,4,48,16,84,68,68,68,68,32,56,16,68,40,82,8,60,2,72,2,66,8,66,62
	db  60,34,32,68,120,16,68,68,16,4,48,16,84,68,68,68,68,32,56,16,68,40,82,8,60,2
	db  72,2,66,8,66,62,60,34,32,68,120,16,68,68,16,4,48,16,84,68,68,68,68,32,56,16
	db  68,40,82,8,60,2,72,2,66,8,66,62,60,34,32,68,120,16,68,68,16,4,48,16,84,68
	db  68,68,68,32,56,16,68,40,82,8,60,2,72,2,66,8,66,62,60,34,32,68,120,16,68,68
	db  16,4,48,16,84,68,68,68,68,32,56,16,68,40,82,8,60,2,72,2,66,8,66,62,60,34
	db  32,68,120,16,68,68,16,4,48,16,84,68,68,68,68,32,56,16,68,40,82,8,60,2,72,2
	db  66,8,66,62,60,34,32,68,120,16,68,68,16,4,48,16,84,68,68,68,68,32,56,16,68,40
	db  82,8,60,2,72,2,66,8,66,62,60,34,32,68,120,16,68,68,16,4,48,16,84
	db  0,0,0,0,0,0,0,0,0,98,8,64,66,126,66,66,16,66,2,68,34,32,68,64,16
	db  60,68,16,4,40,16,84,68,68,120,60,32,4,16,68,40,98,8,64,66,126,66,66,16,66,2
	db  68,34,32,68,64,16,60,68,16,4,40,16,84,68,68,120,60,32,4,16,68,40,98,8,64,66
	db  126,66,66,16,66,2,68,34,32,68,64,16,60,68,16,4,40,16,84,68,68,120,60,32,4,16
	db  68,40,98,8,64,66,126,66,66,16,66,2,68,34,32,68,64,16,60,68,16,4,40,16,84,68
	db  68,120,60,32,4,16,68,40,98,8,64,66,126,66,66,16,66,2,68,34,32,68,64,16,60,68
	db  16,4,40,16,84,68,68,120,60,32,4,16,68,40,98,8,64,66,126,66,66,16,66,2,68,34
	db  32,68,64,16,60,68,16,4,40,16,84,68,68,120,60,32,4,16,68,40,98,8,64,66,126,66
	db  66,16,66,2,68,34,32,68,64,16,60,68,16,4,40,16,84,68,68,120,60,32,4,16,68,40
	db  98,8,64,66,126,66,66,16,66,2,68,34,32,68,64,16,60,68,16,4,40,16,84
	db  0,0,0,0,0,0,0,0,0,60,62,126,60,8,60,60,16,60,60,60,60,28,60,60,16
	db  4,68,56,36,36,12,84,68,56,64,4,32,120,12,56,16,60,62,126,60,8,60,60,16,60,60
	db  60,60,28,60,60,16,4,68,56,36,36,12,84,68,56,64,4,32,120,12,56,16,60,62,126,60
	db  8,60,60,16,60,60,60,60,28,60,60,16,4,68,56,36,36,12,84,68,56,64,4,32,120,12
	db  56,16,60,62,126,60,8,60,60,16,60,60,60,60,28,60,60,16,4,68,56,36,36,12,84,68
	db  56,64,4,32,120,12,56,16,60,62,126,60,8,60,60,16,60,60,60,60,28,60,60,16,4,68
	db  56,36,36,12,84,68,56,64,4,32,120,12,56,16,60,62,126,60,8,60,60,16,60,60,60,60
	db  28,60,60,16,4,68,56,36,36,12,84,68,56,64,4,32,120,12,56,16,60,62,126,60,8,60
	db  60,16,60,60,60,60,28,60,60,16,4,68,56,36,36,12,84,68,56,64,4,32,120,12,56,16
	db  60,62,126,60,8,60,60,16,60,60,60,60,28,60,60,16,4,68,56,36,36,12,84
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  56,0,0,24,0,0,0,0,0,64,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,56,0,0,24,0,0,0,0,0,64,6,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,56,0,0,24,0,0,0,0,0,64,6,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,56,0,0,24,0,0,0,0
	db  0,64,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,56,0
	db  0,24,0,0,0,0,0,64,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,56,0,0,24,0,0,0,0,0,64,6,0,0,0,0,0,0,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,56,0,0,24,0,0,0,0,0,64,6,0,0,0,0,0
	db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,56,0,0,24,0,0,0	


RawUDGs:
	db 9*0, 0,0,0,0,0,0,0,0
	db 9*3+64, 0,0,0,0,0,0,0,0
	db 9*2, 0,0,0,0,0,0,0,0
	db 9*3, 0,0,0,0,0,0,0,0
	db 9*4, 0,0,0,0,0,0,0,0
	db 9*5, 0,0,0,0,0,0,0,0
	db 9*6, 0,0,0,0,0,0,0,0
	db 9*7, 0,0,0,0,0,0,0,0

	db	01110000b,    11001100b,01100110b,00110011b,10011001b,11001100b,01100110b,00110011b,10011001b
	
	db	01000011b,    00111100b,01111110b,11111111b,11111111b,11111111b,11111111b,01111110b,00111100b
	
	db	01000011b,    00000000b,00000011b,00001111b,00011111b,00111111b,00111111b,01111111b,01111111b
	db	01000011b,    00000000b,11000000b,11110000b,11111000b,11111100b,11111100b,11111110b,11111110b
	db	01000011b,    01111111b,01111111b,00111111b,00111111b,00011111b,00001111b,00000011b,00000000b
	db	01000011b,    11111110b,11111110b,11111100b,11111100b,11111000b,11110000b,11000000b,00000000b
Output using Joefish's image data.
Image
User avatar
Joefish
Rick Dangerous
Posts: 2058
Joined: Tue Nov 14, 2017 10:26 am

Re: Optimisation Challenge, for the benefit of new programmers

Post by Joefish »

dfzx wrote: Fri Apr 29, 2022 3:00 pm
Joefish wrote: Thu Apr 28, 2022 7:57 pm In that case, here's a crap version with no optimisation for a start.
It does the job, but not in any particular rush.
That takes just about exactly 5 stopwatch seconds to run 100 times (in a BASIC loop). So 50ms per iteration as a starting point. :)
Probably better to use a loop of machine code calls, as BASIC calls themselves are pretty slow.
User avatar
Pobulous
Dynamite Dan
Posts: 1365
Joined: Wed Nov 15, 2017 12:51 pm

Re: Optimisation Challenge, for the benefit of new programmers

Post by Pobulous »

Slight bit of optimisation, fixed a bug in the graphic preparing routine, included the output of that routine from joefish's UDGs, added a benchmark routine, added more comments.

Benchmarks at around 455 50ths of a second for 256 passes in 48K in Fuse. 128K machines are slightly faster at 449 in 128K or 48K mode.
Either way it's just over 35ms per pass.

Code: Select all

ScreenAddr	EQU	16384		;These could be changed to target 2nd Screen on 128K machines
ATTRAddr	EQU 22528

;These four are used to set the non-printing margins
TopEdge 	EQU 64 		;(NumRows*32)  (Maximum 7 lines)
BottomEdge	EQU 192		;(256-(NumRows*32) (Maximum 8 lines)
LeftEdge	EQU 2		;NumCols
RightEdge	EQU 30		;32-NumCols
EmptyUDGs	EQU 2		;UDGs 0-(n-1) (0-1 in this case) will only update attr data (set to 0 to disable this feature)



ORG 32768

	DI					;Disable interrupts for speed
DisplayScreen:	
	LD DE, ScreenAddr	;Initialise the main registers - Worthy of note is that the LSB of ScreenAddr, ATTRAddr and CharMap will be equal throughout the runtime.
	LD BC, ATTRAddr
	LD HL, CharMap		;Must be divisible by 256

	LD A,TopEdge		;Check for non-printing margin
	AND A
	JP Z, TestLeftSide	;If TopEdge is larger than current position, then set current position print ScreenAddr LSB to TopEdge
	LD E,A			
	LD L,A				;ATTRAddr and CharMap LSB can be updated likewise
	LD C,A

PrintLoop:

			LD A,D				;We will be testing if we are in bottom third of screen and if so, testing if we are in non-printing border area

TestBottomEdge:					;Entry point for if A already contains MSB of ScreenAddr
			CP 80				;Are we in bottom 3rd of screen?
			LD A,E				;LSB is needed to be tested whatever portion of the screen we are in
			JP NZ, TestLeftSide	;Jump if not in bottom third
			CP BottomEdge		;Test if we are in bottom non-printing area
			JP C, TestLeftSide	;Jump if not
			EI					;Finished printing, Enable Interrupts and return
			RET

TestLeftSide:
			AND 31					;A contains LSB of ScreenAddr, lowest 5 bits contains X position
			CP LeftEdge				;Are we in left non-printing border?
			JP NC, TestRightSide	;Jump to next test if not
			LD A,LeftEdge			;Add border offset to LSB ScreenAddr
			ADD A,E
			LD E,A					
			LD L,A					;ATTRAddr and CharMap LSB can be updated likewise
			LD C,A
			JP PrintLoop			;Skip over right border test
			
TestRightSide:
			CP RightEdge			;compare screen X position with right-hand non-printing border
			JP C, PrintChar			;Jump into printing routine if not in border
			LD A, 31				;Set ScreenAddr position to x=31 on the current line
			OR E
			LD E,A
			LD L,A					;ATTRAddr and CharMap LSB can be updated likewise
			LD C,A
			JP SkipPrinting			;Skip Printing, via checking for end of third of screen.

PrintChar:
			LD A,(HL)			;Get next CharMap character to print
					
			CP EmptyUDGs		;Test if will only be updating ATTR	
			
			PUSH HL				;Store CharMap location
			LD H,UDGs/256		;Get base address of UDGs
			LD L,A				;Select UDG 1-255
			LD A,(HL)			;copy ATTR data
			LD (BC),A
			
			JP C,PrintATTROnly	;Jump if updating ATTR only
			
			INC H				;Get first line of chardata
			PUSH DE				;Store current screen address

			LD A,(HL)			;copy 8 lines of UDG data to screen
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A
			INC H
			INC D
			LD A,(HL)
			LD (DE),A

			POP DE			;Restore screen address
			
PrintATTROnly:
			POP HL			;Restore CharMap location
			
SkipPrinting:
			INC BC			;Advance to next ATTR position
			INC HL			;Advance to next CharMap character
			INC E			;Advance screen address

		JP NZ, PrintLoop	;Loop until E wraps to zero - signals a third of the screen is completed
		
		
		LD A,D				;Adjust DE to point to next screen third
		ADD A,8
		LD D,A
		
		CP 88				;If MSB of screen addr is now 88, then we are done.
	JP C, TestBottomEdge	;Do next screen third
	
	EI
	RET


	
PrepUDGS:				;Run this before main code if required.
	LD DE,UDGs
	LD HL,RawUDGs		;Graphics in ATTR followed by 8 lines of pixels
	
UDGLineLoop:
		PUSH DE
		
		LD B,9
UDGLoop:
			LD A,(HL)
			LD (DE),A
			INC HL
			INC D
			
		DJNZ UDGLoop
				
		POP DE
		INC E
		
	JP NZ, UDGLineLoop	;Loop will finish after 256 iterations when E overflows back to zero
	
	RET
	
Benchmark:				;Run the DisplayScreen routine 256 times and return elapsed time in 50ths of a second
	LD BC,0
	LD (23672),BC		;Set lower 2 bytes of Frames to 0
SpeedLoop:
	PUSH BC				;Run Routine 256 times
	CALL DisplayScreen
	POP BC
	DJNZ SpeedLoop
	LD BC,(23672)		;Return elapsed time in 50ths of a second
	RET
	
ORG 49152
CharMap:
	db 	2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
	db 	2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
	db 	2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
	db 	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	db 	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	db 	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	db 	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	db 	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	
	db 	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	db 	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  4,5,0,0,0,1,0,0,  0,0,0,0,0,2,2,2
	db 	2,2,2,0,0,0,0,0,  0,0,3,4,5,4,5,0,  1,0,3,4,5,1,0,0,  0,0,0,0,0,2,2,2
	db 	2,2,2,0,0,0,0,0,  0,0,1,1,1,1,7,3,  1,3,1,6,5,1,5,0,  0,0,0,0,0,2,2,2
	db 	2,2,2,0,0,0,0,0,  0,0,1,6,7,6,7,0,  1,0,1,6,7,1,1,0,  0,0,0,0,0,2,2,2
	db 	2,2,2,0,0,0,0,0,  0,6,7,0,0,0,0,6,  7,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	db 	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	db 	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	
	db 	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	db 	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	db 	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	db 	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	db 	2,2,2,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,  0,0,0,0,0,2,2,2
	db 	2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
	db 	2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2
	db 	2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2


UDGs:				;256 8 byte arrays of attribs and characters aligned on 256 byte boundary, interleaved
					;256 ATTR bytes, followed by 256 top rows of UDGS, then 256 2nd rows, etc
	db 0,91,112,67,67,67,67,67,0,0,0,0,0,0,0,0			;attribs
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	
	db 0,0,204,60,0,0,127,254,0,0,0,0,0,0,0,0			;1st row of gfx data
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	
	db 0,0,102,126,3,192,127,254,0,0,0,0,0,0,0,0		;2nd row of gfx data
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	
	db 0,0,51,255,15,240,63,252,0,0,0,0,0,0,0,0			;3rd row of gfx data
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	
	db 0,0,153,255,31,248,63,252,0,0,0,0,0,0,0,0		;4th row of gfx data
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	
	db 0,0,204,255,63,252,31,248,0,0,0,0,0,0,0,0		;5th row of gfx data
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	
	db 0,0,102,255,63,252,15,240,0,0,0,0,0,0,0,0		;6th row of gfx data
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	
	db 0,0,51,126,127,254,3,192,0,0,0,0,0,0,0,0			;7th row of gfx data
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	
	db 0,0,153,60,127,254,0,0,0,0,0,0,0,0,0,0			;8th row of gfx data
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
	db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


RawUDGs:
	db 9*0, 0,0,0,0,0,0,0,0
	db 9*3+64, 0,0,0,0,0,0,0,0

	db	01110000b,    11001100b,01100110b,00110011b,10011001b,11001100b,01100110b,00110011b,10011001b
	
	db	01000011b,    00111100b,01111110b,11111111b,11111111b,11111111b,11111111b,01111110b,00111100b
	
	db	01000011b,    00000000b,00000011b,00001111b,00011111b,00111111b,00111111b,01111111b,01111111b
	db	01000011b,    00000000b,11000000b,11110000b,11111000b,11111100b,11111100b,11111110b,11111110b
	db	01000011b,    01111111b,01111111b,00111111b,00111111b,00011111b,00001111b,00000011b,00000000b
	db	01000011b,    11111110b,11111110b,11111100b,11111100b,11111000b,11110000b,11000000b,00000000b
Now I need to find a way to utilise the routine in the crap game compo.
User avatar
Pobulous
Dynamite Dan
Posts: 1365
Joined: Wed Nov 15, 2017 12:51 pm

Re: Optimisation Challenge, for the benefit of new programmers

Post by Pobulous »

I'm looking at building on this routine to actually display some gameplay.

One thing that struck me is that the other parts that need to be done (outside the routine) are:

Have a background map of tiles for non-moving elements of the screen and initialise the CharMap from this
Have some code to put moveable elements into the CharMap

Then every frame:

Display CharMap (using above created routine)
Execute program logic
Copy background map tiles to CharMap
Copy sprites to CharMap
Loop to DisplayCharMap

A useful optimisation is to have UDG 0 be a do nothing rather than just update ATTRs (I had it this way originally)

Now the method is:
Copy levelmap to background map
Initialise CharMap by LDIRing background map tiles to CharMap

Display CharMap (using above created routine, skipping over zero UDGs)
Execute program logic
Copy background map tiles to CharMap, setting any unchanged tiles to zero in CharMap, and leaving any zeroed tiles as are
Copy sprites to CharMap
Loop to DisplayCharMap


In most game types a lot of the map won't change, but may contain background graphics. Could be the dots and walls in pacman, or the mushrooms in centipede, or ladders and platforms, etc.
Setting those to zero and skipping drawing them will speed things up - kind of a simplified dirty rectangles.
User avatar
Joefish
Rick Dangerous
Posts: 2058
Joined: Tue Nov 14, 2017 10:26 am

Re: Optimisation Challenge, for the benefit of new programmers

Post by Joefish »

I think trying to keep track of unchanged tiles just makes the program far more complicated than it needs to be.

Certainly if you've got a scrolling background, you don't want 'holes' in the rendering, which is why I don't bother with a skippable UDG #0.

The simplest program loop is simply to copy in your background again across the whole character map. The hidden border area lets you do this from a block-based scenery map, without having to worry about trimming those blocks to fit exactly.

Then add your sprites over the top. Now when copying the sprites into the character map (or even copying parallax layers of foreground scenery), then you probably want a transparent, 'don't draw me', character 0, so you can leave gaps in your sprites to show the background.

But then when you finally call the render function, it renders the whole screen without gaps.
User avatar
ketmar
Manic Miner
Posts: 700
Joined: Tue Jun 16, 2020 5:25 pm
Location: Ukraine

Re: Optimisation Challenge, for the benefit of new programmers

Post by ketmar »

for the real game engine, you actually can get away with the backbuffer only for 32x8 pixels (and attrs), and some flags in tilemap. the idea is that your tiles has flags "not changed", "blit", "dirty", and each character row has a list of sprites touching it. now, the main loop is something like this:

Code: Select all

clear sprite lists
loop over all sprites, put each sprite in row lists
loop over all tile rows, for each row (using our 1-row backbuffer):
  render all sprites into backbuffer, also mark all touched tiles as "dirty" here
  now loop over all 32 tiles:
    "not changed": ignore
    "blit": copy tile from gfx data directly to screen, mark it as "not changed"
    "dirty": copy backbuffer gfx, mark tile as "blit"
by decrementing tile flag (dirty->blit->unchanged), you also get automatic erasing of printed sprites for free, no need to track them. ;-)

this way you can support a lot of sprites, background tile animations, and still render only changed screen parts. i'm planning to write a demo engine based on this… for several years, lol.

p.s.: and thanks to "row sorting" step, you can do fast collision detection for many sprites too: you already have almost all the info ready. i.e. each sprite marks tiles as "dirty", so you can use that to perform "broad phase" of coldet (checking if there is a possible collision), and use precise check only when necessary. i.e. time spent on collision checking is almost independed of the number of active sprites.
User avatar
Joefish
Rick Dangerous
Posts: 2058
Joined: Tue Nov 14, 2017 10:26 am

Re: Optimisation Challenge, for the benefit of new programmers

Post by Joefish »

dfzx wrote: Sat Apr 30, 2022 8:13 pm @Joefish I think you might have a dose of excessive average familiarity, so per this XKCD comic:

Image

Your "crap" version appears to use undocumented Z80 instructions, makes extensive use of the alternate registers, nicely sets up a LDI instruction, and unrolls a loop. That's all before it does something clever to handle the screen address, which I don't understand. I've a feeling your "crap" code is already streets ahead of what the vast majority of programmers could write, let alone "new" ones. :lol:
Well, yes, 'crap' for me is to just dump some stuff to get to the first thing that actually seems to work. For some reason I'm averse to in-memory variable storage and the stack and try to do everything in registers (hence extensive use of the 'undocumented' IXH , IXL etc. instructions as caches where someone else might use PUSH and POP). Although in my defence, they are some of the most well-documented 'undocumented' features of anything ever! :lol:

The only 'optimisation' change I made was realising that the attributes should come before the pixels in the UDG definition rather than after, to make it easier to copy the attribute but skip the pixel step. In assembly it's perfectly possible to write functions that skip operations by entering the function later rather than exiting it early, but you really don't want to go there...
User avatar
Joefish
Rick Dangerous
Posts: 2058
Joined: Tue Nov 14, 2017 10:26 am

Re: Optimisation Challenge, for the benefit of new programmers

Post by Joefish »

Anyway, my idea for a scrolling game using this system is to split the UDGs roughly in half, the first 128 for scenery and the rest for sprites and animations. The first half are then split again; 64 for foreground stuff and the other 64 split, yet again, four ways for the parallax. That leaves 16 UDGs, so either an 8x2 or 4x4 grid, with 4 frames of 2-pixel pre-shifting, rotated in-place.

With your foreground character map you have 0s in it where you want the background scroll to show through, so you copy up the foreground, then insert the relevant background characters in the zero spaces. Anything that's still 0 just gets rendered as a black (blue?) square UDG at the final stage.

The slight drawback with this is if you want bands of colour in your background, your sprite UDGs can't easily change PAPER colours to match. So if your parallax is a band between two colours (e.g. sky blue to black), best keep it high up the screen where your character can't jump. If it's lower down, stick with all black PAPER.
...Or stick with very chunky sprites that always have black PAPER.
...Or modify the renderer to OR in different PAPER colours per row as it renders the screen. Or AND the colours in, then if your UDGs have either white or black PAPER you can control which ones show background colour.
...Or re-write the code so that there are two bytes per cell in the character map, one for UDGs (pixels only) and the second byte for attributes.
But we'll leave those as an exercise for anyone who thinks they've mastered this renderer first!
Last edited by Joefish on Fri May 06, 2022 3:20 pm, edited 1 time in total.
User avatar
PeterJ
Site Admin
Posts: 6877
Joined: Thu Nov 09, 2017 7:19 pm
Location: Surrey, UK

Re: Optimisation Challenge, for the benefit of new programmers

Post by PeterJ »

It would be nice to have some more of these types of things, though perhaps aimed at a rather lower level of experience. The sorts of assumptions that Jonathan makes in his creating arcade game book would be perfect.

Image

I'm only on chapter 5, but I understand his code (and feel comfortable adapting it), and if I'm struggling I consult the Z80 documentation. It's amazing what you can achieving with load, inc, dec, pop, push, compare, call and jumps!
Post Reply