BurnMMX used to be an above decent memory tester. it was and improvment over BurnBX which got implante as test 5 in memtest86/memtest86+
However the burnMMX utiltity only supports 64mb. I would Love if some of the quick brains in here would be able to fix it to make it support more modern size memory testing 2g or above would be nice.

The source code is this:

Code:
#  cpuburn-1.4:	 burnMMX   Chipset/DRAM Loading Utility
#  Copyright 2000  Robert J. Redelmeier.  All Right Reserved
#  Licensed under GNU General Public Licence 2.0.  No warrantee.
#  *** USE AT YOUR OWN RISK ***
 
.text
#ifdef WINDOWS
.globl _main
_main:
	movl	4(%esp),%eax
	movl	$6, %ecx	# default f = 64 kB
	subl	$1, %eax	# is a param given? 
	jz	no_size

	movl	8(%esp),%eax	# address of strings
	movl	4(%eax),%eax	# address of first paramater
	movzb	(%eax),%ecx	# first parameter - a byte
no_size:
	subl	$12, %esp	# stack space
#else
.globl _start
_start:
	subl	$12, %esp
	movl	20(%esp), %eax
	movl	$6, %ecx	# default f = 64 kB
	testl	%eax, %eax	# is a param given? 
	jz	no_size
	movl	(%eax), %ecx
no_size:
#endif			     
	emms
	movq	rt, %mm0
	decl	%ecx		
	andl	$15, %ecx	# mask off ASCII bits
	movl	$256, %eax
	shll	%cl, %eax
	movl	%eax, 4(%esp)	# save blocksize
	movl	$256*1024, %eax
	shrl	%cl, %eax
	movl	%eax, 8(%esp)	# save count blks / 512 MB

	movl	4(%esp), %ecx	# initial fill of 2 cachelines
	shrl	$4, %ecx
	movl	$buffer, %edi
	xorl	%eax, %eax
	notl	%eax
more:
	movl	%eax, %edx	# qwords F-F-0-F , F-0-F-0 
	notl	%edx
	movl	%eax,  0(%edi)
	movl	%eax,  4(%edi)
	movl	%eax,  8(%edi)
	movl	%eax, 12(%edi)
	movl	%edx, 16(%edi)
	movl	%edx, 20(%edi)
	movl	%eax, 24(%edi)
	movl	%eax, 28(%edi)

	movl	%eax, 32(%edi)
	movl	%eax, 36(%edi)
	movl	%edx, 40(%edi)
	movl	%edx, 44(%edi)
	movl	%eax, 48(%edi)
	movl	%eax, 52(%edi)
	movl	%edx, 56(%edi)
	movl	%edx, 60(%edi)
	rcll	$1, %eax	# walking zero, 33 cycle
	leal	64(%edi), %edi	# odd inst to preserve CF
	decl	%ecx
	jnz	more

thrash:				# OUTER LOOP
	movl	8(%esp), %edx	#   reset count for 512 MB
mov_again:			
	movq	%mm0, %mm1
	movq	%mm0, %mm2
	movl	$buffer, %esi
	movl	$buf2, %edi
	movl	4(%esp), %ecx
	shll	$2, %ecx	# move block up
	addl	%ecx, %esi
	addl	%ecx, %edi
	negl	%ecx
.align 16, 0x90
0:				    # WORKLOOP 7 uops/ 3 clks in L1
	movq	0(%esi,%ecx),%mm7
	pmaddwd %mm0, %mm1
	pmaddwd %mm0, %mm2
	movq	%mm7, 0(%edi,%ecx)
	addl	$8, %ecx
	jnz	0b

	movl	$buffer + 32, %edi	# move block back
	movl	$buf2, %esi		# shifting by
	movl	4(%esp), %ecx		# one cacheline
	subl	$8, %ecx
	shll	$2, %ecx
	addl	%ecx, %esi
	addl	%ecx, %edi
	negl	%ecx
.align 16, 0x90
0:				   # second workloop
	movq	0(%esi,%ecx),%mm7
	pmaddwd %mm0, %mm1
	pmaddwd %mm0, %mm2
	movq	%mm7, 0(%edi,%ecx)
	addl	$8, %ecx
	jnz	0b

	movl	$buffer, %edi
	movsl			# replace last c line
	movsl
	movsl
	movsl
	movsl
	movsl
	movsl
	movsl
	decl	%edx		# do again for 512 MB.
	jnz	mov_again

	xorl	%ebx ,%ebx	# DATA CHECK
	decl	%ebx
	pcmpeqd %mm2, %mm1
	psrlq	$16, %mm1
	movd	%mm1, %eax
	incl	%eax
	jnz	error		# MMX calcs OK?

	decl	%ebx
	subl	$32, %edi	
	xorl	%ecx, %ecx
test:				# Check data (NOT optimized)
	mov	0(%edi,%ecx,4), %eax
	cmp	%eax, 4(%edi,%ecx,4)
	jnz	error
	incl	%ecx
	incl	%ecx
	cmpl	4(%esp), %ecx
	jc	test
	jmp	thrash

error:				# error abend
	emms
	movl	$1, %eax
#ifdef WINDOWS
	addl $12, %esp		# deallocate stack
	ret
#else
	push	%ebx
	push	%eax
	int	$0x80
#endif
rt:	.long	0x7fffffff, 0x7fffffff

.bss				# Data allocation
.align 32
.lcomm	buffer,	 32 <<20	# reduce both to 8 <<20 for only
.lcomm	buf2,	 32 <<20	# 16 MB virtual memory available

#

My thanks goes to anyone looking into this.