@ ***********************************************************************
@ *									*
@ *                   Copy an Array of Words En-masse                   *
@ *									*
@ ***********************************************************************
@ Author:   John Zaitseff <J.Zaitseff@unsw.edu.au>
@ Date:     9th September, 2002
@ Version:  1.3
@ This program is a more sophisticated version of "wordcopy.s".  Instead
@ of copying an array one word at a time, it uses the "ldmia" and "stmia"
@ instructions to copy eight words at a time.  Of special note is the
@ method of setting up an independent stack.
	.text
	.global main
	.equ	num, 20		@ Number of words to be copied
main:
	push {lr}
	mov r0,sp 
	ldr	sp,=stack_top	@ Set up the stack pointer (R13) to some memory
	push {r0}
@ You would not normally set up your own stack: you would just use R13 as
@ it is on entry to your program.  Setting up your own stack (at the
@ beginning of your program) makes sense where the program needs a lot of
@ stack space (eg, some of its functions have large parameters, or are recursive).
	ldr     r0,=src		@ R0 = pointer to source block
	ldr     r1,=dst		@ R1 = pointer to destination block
	mov     r2,#num		@ R2 = number of words to copy
blockcopy:
	movs	r3,r2,lsr #3	@ R3 = number of eight-word multiples
	beq	copywords	@ Do we have less than eight words to move?
	push {r4-r11}           @stmfd	sp!,{r4-r11}	@ Save our working registers (R4-R11)
octcopy:
	ldmia	r0!,{r4-r11}	@ Load 8 words from the source@ update R0
	stmia	r1!,{r4-r11}	@ and store them at the destination@ update R1
	subs	r3,r3,#1	    @ Decrement the counter (num. of 8-words)
	bne	octcopy		        @ and repeat if necessary
	pop {r4-r11}		@ldmfd	sp!,{r4-r11}	@ Restore original register contents
copywords: 
	ands	r2,r2,#7	    @ Number of words left to copy
	beq	done
wordcopy:
	ldr	r3,[r0],#4	        @ Load a word from the source
	str	r3,[r1],#4	        @ and store it at the destination
	subs	r2,r2,#1	    @ Decrement the counter (num. of words)
	bne	wordcopy	        @ and repeat if necessary
done:
	sub r1,r1,#4
        ldr r1,[r1]		@ print last word copied
	pop {r0}		@ restore original sp
        mov sp,r0
	bl debug			        @ Finished copying!
exit:	pop {pc}			@swi	0x11

debug: push {lr}
	ldr r0,=dbg_msg
	bl printf
	pop {pc}

	.data			        @ Read/write data follows
	.align			        @ Make sure data is aligned on 32-bit  boundaries
dbg_msg:.asciz "debug message last word copied: %u\n"
.align
src:	.word	 1,  2,  3,  4,  5,  6,  7,  8,  9, 10
	.word	11, 12, 13, 14, 15, 16, 17, 18, 19, 20
dst:	.skip	num * 4	    @ Reserve 80 bytes (num 32-bit words)
	.section .bss		    @ Uninitialised storage space follows
	.align
@ The ".section" assembler directive allows you to switch to any arbitrary
@ section in your program (in fact, ".text" is really ".section .text" and
@ ".data" is ".section .data").  Be warned, however, that it is the GNU
@ Linker's job of putting those sections in some sort of order... and it
@ can't do so with sections it does not know about.  In this case, the
@ ".bss" section is reserved by the linker for uninitialised storage
@ space: nothing is stored in the executable file apart from a note
@ saying, in effect, "reserve this many bytes of space for me".
@ Pages 23-27 of the GNU Assembler Manual have more information about
@ sections in a program.  See page 52 of that manual for a fuller
@ description of the ".section" assembler directive.

stack:		.skip	1024	@ Allow 1KB for the local stack
stack_top:		        @ The stack grows downwards in memory, so we need a label to its top
	.end

@ ***********************************************************************
@ *									*
@ *               Function: Copy a NUL-terminated String                *
@ *									*
@ ***********************************************************************

@ Author:   John Zaitseff <J.Zaitseff@unsw.edu.au>
@ Date:     9th September, 2002
@ Version:  1.4

@ This file contains a function, written in assembly language, to copy a
@ C-style string from one location to another.  C-style strings are ASCII
@ strings with a NUL character (code 0) at the end.  This function, along
@ with the files "strcopy-a.s" and "strcopy-c.c", is a demonstration of
@ using multiple source modules in a single executable.

@ You can use "make" to create the executables that depend on this source
@ file@ the source modules that make use of this file are "strcopy-a.s"
@ (assembly language version) and "strcopy-c.c" (C language version).


	.text

	.equ	NUL, 0		@ ASCII code for NUL (end of string indicator)

	.global	strcopy		@ Make the label visible to all modules

@ The function "strcopy" needs to agree with its callers about what to
@ expect (in registers and in memory) on entry and on exit.  ARM defines
@ the ARM-Thumb Procedure Call Standard to help you with this---this
@ standard is on the CD-ROM in the "reference" directory.

@ On entry to this function, R0 = pointer to (ie, address of) the
@ destination string@ R1 = pointer to the source string.  Nothing (at
@ least worthwhile) is returned on exit.  This corresponds to the C
@ definition:
@     void strcopy (char *dest, const char *src)
@ and is very similar to the C function "strcpy".

@ DANGER, Will Robinson!  This function should not be used in real code.
@ What happens if the source string is longer than the buffer set aside
@ for the destination?  Search for "buffer overflow" on www.google.com to
@ find out...

strcopy:			@ Entry to "strcopy" function
				@ R0 = address of destination string
				@ R1 = address of source string
copyloop:
	ldrb	r2,[r1],#1	@ Load byte into R2 and update R1
	strb	r2,[r0],#1	@ Store the byte and update R0
	cmp	r2,#NUL		@ Check for NUL terminator
	bne	copyloop	@ Repeat loop if not

	mov	pc,lr		@ Return to the caller

@ By the way, symbols are case-sensitive: if you replace "cmp r2,#NUL"
@ above with "cmp r2,#nul", the GNU Assembler will complain:
@     Error: internal relocation (type 232) not fixed up (IMMEDIATE)
@ Yes, this is cryptic!  The reason is that the GNU Assembler treats all
@ undefined symbols (in this case, "nul") as being defined in an external
@ module@ that is not allowed for #-type parameters.

	.end
	.text
	.global main	
@******************************************************************
        .equ    NUL, 0          @ NUL is used for end of string
strcopy:                        @ Entry to "strcopy" function
                                @ R0 = address of destination string
                                @ R1 = address of source string
l1:
        ldrb    r2,[r1],#1      @ Load byte into R2 and update R1
        strb    r2,[r0],#1      @ Store the byte and update R0
        cmp     r2,#NUL         @ Check for NUL terminator
        bne     l1              @ Repeat loop if not
	bx lr
@*******************************************************************
main:
	push {lr}
	ldr	r0,=str_before	@ Print str_before to the console
	bl 	puts
	ldr	r0,=srcstr
	bl	puts
	ldr	r0, =dststr
	bl	puts
	ldr	r0,=dststr	@ R0 := address of destination string
	ldr	r1,=srcstr	@ R1 := address of source string
	bl	strcopy		@ Call the function "strcopy" (in "copy.s")
	ldr	r0,=str_after	@ Print str_after to the console
	bl	puts		@ then print the two strings (again)
	ldr	r0,=srcstr
	bl	puts
	ldr	r0, =dststr
	bl	puts
	pop {pc}
	.data			@ Read/write data follows
	.align			@ Align to a 32-bit boundary
srcstr:	.asciz	"First (source) string"
dststr:	.asciz	"Second (destination) string"
str_before:
	.asciz	"Before copying:"	@ Note: NOT ".asciz"!
str_after:
	.asciz	"After copying:"	@ The same, using C-style escapes
	.end
/* teste de subrotina de divisao inteira por 10
   udiv10 calcula quociente e resto de inteiro sem sinal dividido por 10
   apenas uma instrução de multiplicaçaõ e deslocamentos são usados
Prof. Célio Guimarães   MC 404 - 2o sem 2012
*/

.align 2
.text
.global main
debug_msg: .asciz "quoc= %u rem= %u dividend= %u\n"
.align 2
main:
	push {lr}
	ldr r2,=0xffffffff @ r2=4294967295 na base 10 
	push {r2}	@ save dividend to print later	
	bl udiv10	@ compute r2/10 and r2%10
    	mov r1,r2	@quocient to r1
	mov r2,r3	@ rem to r2 will print quoc, rem, dividend
	pop {r3}	@ get dividend
	ldr r0, = debug_msg
	bl printf
	pop {pc}
@***********************************************************
udiv10:	@unsigned integer divide by 10
	@input: r2
	@output: r2, r3 (quoc, rem)
	mov r4,r2	@ save dividend
	ldr r3,=-858993459 @gcc found this magic constant
	umull r1, r3, r3, r2
	lsr r2,r3, #3	@ r2= quocient
	mov r3,r2	@ compute quoc*10 as follows:
	lsl r3,r3, #2	@ r3= quoc *4
	add r3,r3,r2	@ r3= quoc *5
	lsl r3,r3, #1	@ r3= quoc *10
	rsb r3,r3,r4	@ r3= dividend - quoc*10=rem
	bx lr
@***********************************************************

.data
.align 2
msg:	.asciz "dividendo= %d divisor= %d quociente= %d resto= %d\n"
.align 2
.text
.global main
main:
	push {lr}
	ldr r1, =1000
	mov r2, #13
	push {r1,r2}	@ save dividend, divisor
	bl div
	mov r4,r1	@ remainder to r4
	mov r3,r0	@ quocient to r3
	pop {r1,r2}	@ dividend to r3, divisor to r4
	push {r4}
	ldr r0,=msg
	bl printf
	pop {r4}
	pop {pc}

div:		@ R0 := R1/R2
	CMP             R2, #0
	BEQ done	 @check for divide by zero!

	MOV      R0,#0     @clear R0 to accumulate result
	MOV      R3,#1     @set bit 0 in R3, which will be
start:                    @shifted left then right
	CMP      R2,R1
	MOVLS    R2,R2,LSL#1
	MOVLS    R3,R3,LSL#1
	BLS      start
 @shift R2 left until it is about to
 @be bigger than R1
 @shift R3 left in parallel in order
 @to flag how far we have to go

next:
 CMP       R1,R2      @carry set if R1>R2 (don't ask why)
 SUBCS     R1,R1,R2   @subtract R2 from R1 if this would
                      @give a positive answer
 ADDCS     R0,R0,R3   @and add the current bit in R3 to
                      @the accumulating answer in R0

 MOVS      R3,R3,LSR#1     @Shift R3 right into carry flag
 MOVCC     R2,R2,LSR#1     @and if bit 0 of R3 was zero, also
                           @shift R2 right
 BCC       next            @If carry not clear, R3 has shifted
                           @back to where it started, and we
                           @can end
done:
	bx lr

/* ex1.s 	MC404  Oct 2012  Celio G
 *******************************************************
*/
.data
scan_format:	.asciz "%d"
out_format: .asciz "Sum: %d    Difference: %d    Product: %d\n"
instr1: .asciz "Enter first integer: "
instr2: .asciz "Enter second integer: "
.align	2
num1: .word 0xaa
num2: .word 0
sum: .word 0
difference: .word 0
product: .word 0
.text
.align 2
.global	main	
main:
	push {lr}   
	ldr r0, =instr1  
	bl printf
	ldr r1, =num1
	ldr r0, =scan_format
	bl scanf	@ read number into num1
	ldr r0, =instr2  
	bl printf
	ldr r1, =num2
	ldr r0, =scan_format
	bl scanf
	ldr r1,=num1
	ldr r1,[r1]	@ retrieve first input
	ldr r2, =num2
	ldr r2,[r2]	@ retrieve 2nd input
	add r0, r1, r2 	@ now store sum in r0 m
	ldr r3, =sum
	str r0, [r3] 	@The source r0 precedes the destination
	sub r0, r1, r2	@ compute the diference
	ldr r3, =difference
	str r0, [r3]	@ save it
	mul r0, r1, r2  @ thumb requires overlap like mul r1, r1, r2
	ldr r3, =product
	str r0, [r3]	@ save product
	ldr r1, =sum
	ldr r1, [r1]	
	ldr r2,= difference
	ldr r2, [r2]
	ldr r3, =product
	ldr r3, [r3]
	ldr r0, =out_format
	bl printf	@ print sum, difference, product
	pop { pc}
 	.end

    .data
    .align 2
Hellomessage:
    .string "Hello World!"
    .text
    .align 2
    .global main
main:
    push {lr}
    ldr r0, =Hellomessage
    bl puts
    pop {pc}

@hellothumb.s
@adaptado de hello.s para gerar código thumb2
@************************************************ 
.thumb
.syntax unified   @ use thumb2 extensions
.data
.align 2
wtword: .word 0
.Lmessage:
    .string "Hello World! %d\n"
    .text
    .align 2
    .global main
    .type main, %function   @required by thumb compiler
main:
    push {lr} 
    ldr r3, myword
    mov r2, #10
    str r2, [r3]
    ldr r1, [r3]
    mov  r1, r1, lsl #1
    ldr r0, .Laddr_message
    bl printf
    pop {pc} 
    .align 2
.Laddr_message:
    .word   .Lmessage
myword:    .word  wtword

@ ***********************************************************************
@ *									*
@ *                   A Multi-way Branch (Jump Table)                   *
@ *									*
@ ***********************************************************************

@ Author:   John Zaitseff <J.Zaitseff@unsw.edu.au>
@ Date:     9th September, 2002
@ Version:  1.3

@ This program illustrates a multi-way branch (also called a jump table),
@ as well as using pointers to functions.  If you know BASIC, you might
@ remember the ON ... GOTO statement@ this is the ARM assembly language
@ equivalent.  The actual example is somewhat contrived (think about
@ operating system calls dispatch code for a better example)@ the real
@ meat is in the technique.
.data
add_msg: .asciz	"Addition executed!"
sub_msg: .asciz	"Subtracion executed!"
mul_msg: .asciz "Multiplication executed!"

	.text
	.global	main

@ Function call identifiers

	.equ	num_func, 3	@ Number of functions available
	.equ	f_add, 0	@   0 = addition
	.equ	f_sub, 1	@   1 = subtraction
	.equ	f_mul, 2	@   2 = multiplication

main:
	push	{lr}
	mov	r0,#f_sub	@ R0 = function number (an index number)
	mov	r1,#218		@ R1 = first parameter
	mov	r2,#34		@ R2 = second parameter
	bl	dispatch	@ Call the function identified by R0


dispatch:			@ Multi-way branch function
	cmp	r0,#num_func	@ On entry, R0 = function number
	movhs	pc,lr		@ Simply return if R0 >= number of functions
	adr	r3,func_table	@ Get the address of the function table
	ldr	pc,[r3,r0,lsl #2] @ Jump to the routine (PC = R3 + R0*4)

func_table:			@ The actual table of function addresses
	.word	do_add		@   for entry 0 (f_add)
	.word	do_sub		@   for entry 1 (f_sub)
	.word	do_mul		@   for entry 2 (f_mul)

@ The table "func_table" contains a series of addresses (ie, pointers) to
@ functions.  Each address occupies four bytes (hence the R0*4 above).
@ The "dispatch" function simply uses the function number (in R0) as an
@ index into this table, retrieves the corresponding address, then jumps
@ to that address: all this in four statements!

do_add:				@ Function 0: f_add
	add	r0,r1,r2	@ R0 := R1 + R2
	ldr 	r0, =add_msg
	bl	puts
	pop	{pc}

do_sub:				@ Function 1: f_sub
	sub	r0,r1,r2	@ R0 := R1 - R2
	ldr 	r0, =sub_msg
	bl	puts
	pop	{pc}

do_mul:				@ Function 2: f_mul
	mul	r0,r1,r2	@ R0 := R1 * R2
	ldr 	r0, =mul_msg
	bl	puts
	pop	{pc}

@ By the way, the "dispatch" routine can be rewritten to take only THREE
@ instructions instead of four.  Are you up to the challenge?  Hint: page
@ A9-9 of the ARM Architecture Reference Manual (page 431 of the PDF
@ document) might give you a clue...  In real life, however, do remember
@ that "premature optimisation is the root of all evil" (Donald Knuth).

	.end
/* ndiv.s	subrotina de divisão para o ARM otimizada
	http://www.tofla.iconbar.com/tofla/arm/arm02/index.htm
O algoritmo de divisão por subtrações sucessivas do link acima possui 2 fases:
fase 1: laço para obter o maior valor de n tal que 2**n x divisor < dividendo
fase 2 : laço de subtrações do dividendo por divisor x 2**n, decrementando n até 0
O algoritmo executa log2(dividendo/divisor) iterações ao contrário de
dividendo/divisor subtrações sucessivas do algoritmo "burro".
As melhorias do presente algoritmo consistiram em: 
(i) eliminação do laço da fase 1 utilizando a instrução CLZ (Count Leading Zeros)
reduzindo pela metade o numero de iterações.
(ii)suporta inteiros de 32 bits sem sinal (o original suporta 31 bits sem sinal)
O código usa o poderoso recurso de "execução condicional" do ARM.
Estatísticas de um teste: divisão de 0x7fffffff (2**31 - 1) por 1:
Algoritmo burro: subtrai 1 e incrementa contador: 2*10**9 iterações
Algoritmo do link: 64 iterações
Algoritmo melhorado: 32 iterações
Prof. Célio Guimarães - MC404 2012
*************************************************************************/
.data
.align 2
msg:	.asciz "dividendo= %x divisor= %x quociente= %x resto= %x iterações: %d\n"
dbgmsg: .asciz "debug message"
.align 2
.text
.global main
main:
	push {lr}
	ldr r1, =0x7fffffff 
	ldr r2, =0x1	
	push {r1,r2}	@ save dividend, divisor
	bl div			@ call division  subroutine
	mov r3,r0		@ quocient to r3
	mov r4,r1		@ remainder to r4
	pop {r1,r2}		@ dividend to r1, divisor to r2
	push {r4,r7}	@ remainder and iterations printf requires next parameters on stack
	ldr r0,=msg		@ will print 5 values
	bl printf
	pop {r4,r7}
exit:	pop {pc}

/**************************************************************************
division subroutine (32 bit unsigned integers)
Obs: upper case instructions are from original program
***************************************************************************/
div:	@ R0 := R1/R2 R1 := R1 % R2
	CMP   R2, #0
	BEQ done	 @check for divide by zero!
	MOV      R0,#0     @clear R0 to accumulate result
	MOV      R3,#1     @set bit 0 in R3, which will be
	mov r7, #1			@ statistics iteration count
	clz r4,r1
	clz r5,r2
	subs r5,r4			@ if > 0 r5 has num of bits to shift r2 and r3
	movgt r2,r2,lsl r5
	movgt r3,r3,lsl r5
next:	
	CMP       R1,R2      @carry set if R1>=R2 (don't ask why)
	SUBCS     R1,R1,R2   @subtract R2 from R1 if this would
    	                 @give a positive answer
	ADDCS     R0,R0,R3   @and add the current bit in R3 to
    	                 @the accumulating answer in R0
	addcs 	  r7, #1	 @ inc iteration count
	MOVS      R3,R3,LSR#1     @shift R3 right until it goes into carry flag
	MOVCC     R2,R2,LSR#1     @and if bit 0 of R3 was zero, also shift R2 right
	BCS       done   @If carry not clear, R3 has shifted back to where it started
@	bx lr
	clz r4,r1
	clz r5,r2
	subs r5, r5,r4
	movge r2,r2, lsr r5
	movge r3, r3, lsr r5
	b next
done:
	bx lr
/*************************************************************************************/

debug:
	push {lr}
	ldr r0, =dbgmsg
	bl puts
	pop {lr}
	bx lr
.align 2
.text
msgyes: .asciz "%u is a power of 2\n"
msgno: .asciz "%u is not a power of 2\n"
.align 2
.global main
/******************************************************
unsigned int powerof2(int n){
	return ( n==(n&(-n)));
}
******************************************************/
pwof2:	@ check if  r0 is a power of 2
	@ returns Z status bit
	@ no registers changed
	push {r1}
	rsb r1,r0,#0    @ r1:= -r0
	and r1, r0,r1	@ r1:= n & (-n)
	cmp r1,r0	@ compare with n
	pop {r1}
	bx lr
/******************************************************/
main:
	push {lr}
	ldr r0,=65536
	bl pwof2
	bne no
	mov r1,r0
	ldr r0,=msgyes
	bl printf
	pop {pc}
no:	mov r1,r0
	ldr r0,=msgno
	bl printf
	pop {pc}
@ ***********************************************************************
@ *									*
@ *                    Copy a NUL-terminated String                     *
@ *									*
@ ***********************************************************************

@ Author:   John Zaitseff <J.Zaitseff@unsw.edu.au>
@ Date:     9th September, 2002
@ Version:  1.4

@ This program copies a NUL-terminated string from one location to
@ another.  The main features of this program are the use of a separate
@ module, "copy.s", for the actual copy-string function and the use of SWI
@ software interrupt instructions.  Please make sure that you read the
@ source code to "copy.s"!


	.text
	.global main	

@ ASCII codes
	.equ	NUL, 0		@ NUL is used for end of string
	.equ	LF, 10		@ Line Feed (end of line) character

@ External definitions (this section is optional but recommended)
	.extern	strcopy


main:
	push {lr}
	ldr	r0,=str_before	@ Print str_before to the console
	bl 	puts
	ldr	r0,=srcstr
	bl	puts
	ldr	r0, =dststr
	bl	puts
	ldr	r0,=dststr	@ R0 := address of destination string
	ldr	r1,=srcstr	@ R1 := address of source string
	bl	strcopy		@ Call the function "strcopy" (in "copy.s")
	ldr	r0,=str_after	@ Print str_after to the console
	bl	puts		@ then print the two strings (again)
	ldr	r0,=srcstr
	bl	puts
	ldr	r0, =dststr
	bl	puts
	pop {pc}


	.data			@ Read/write data follows
	.align			@ Align to a 32-bit boundary

srcstr:	.asciz	"First (source) string"
dststr:	.asciz	"Second (destination) string"

@ The source string "srcstr" could have been placed in the ".text"
@ section, as it is read-only.  Note that ".asciz" places a NUL character

	.data			@ Back to the data section (although these
				@ strings could have been left in ".text")

str_before:
	.asciz	"Before copying:"	@ Note: NOT ".asciz"!
str_after:
	.asciz	"After copying:"	@ The same, using C-style escapes
str_srcis:
	.asciz	"srcstr = \""		@ Using \" escape for double-quotes
str_dstis:
	.asciz	"\"\ndststr = \""
str_end:
	.asciz	"\"\n\n"

	.end
.align 2
.text
sum_msg:    .asciz "Sum 15 + 32= %d  "
sub_msg:    .asciz "Sub 47 - 5= %d\n"
debug_msg:  .asciz "R1= %d R2= %d R3= %d R4= %d\n"
.align 2
	.global	main
main:
    	push {lr}
	mov	r0,#15		@ Set up parameters
	mov	r1,#32
	bl	f_add		@ Call the function "f_add"@ result is in R0
	mov r1,r0
	push {r0}		@ save sum to use later
    	ldr r0, =sum_msg
    	bl  printf
	pop {r0}		@ retrive sum = 47 
	mov	r1,#5		@ Set up the second parameter
	bl	f_sub		@ Call the function "f_sub
	mov r1, r0
    	ldr r0, =sub_msg
    	bl  printf
exit: 	pop {pc}			@ Terminate the program

@********************************************************************
f_add:				@ Function "f_add" for addition
@ For the function "f_add", R0 and R1 on entry are the addends
@ on exit R0 is the sum.
	add	r0,r0,r1	@ Perform R0 := R0 + R1
	mov	pc,lr		@ and return to the caller
@************************************************************************
f_sub:				@ Function "f_sub" for subtraction
@ On entry, R0 is the minuend and R1 is the subtrahend. 
@ On exit, R0 is the difference.
	sub	r0,r0,r1	@ Perform R0 := R0 - R1
	bx	lr		@ and return to the caller
	.end

/* teste de subrotina de divisao inteira por 10
   udiv10 calcula quociente de inteiro sem sinal dividido por 10
   usando a constante do paper "ARM Programming Techniques" p 5-12 a 5-15 
   apenas uma instrução de multiplicaçaõ e deslocamentos são usados
Prof. Célio Guimarães   MC 404 - 2o sem 2012
*/

.align 2
.text
.global main
debug_msg: .asciz "quoc= %u rem= %u dividend= %u\n"
mul_msg: .asciz "product low= %x high= %x\n"
.align 2
main:
	push {lr}
	ldr r2,=0xffffffff @ r2=4294967295 na base 10
/***** test umull instruction 
       ldr r3,=2;  @ r3 x r2 = (1,fffffffe)
	umull r1,r3,r3,r2
	mov r2,r3 
	ldr r0,=mul_msg   @ r1= product low r2= product high
	bl printf
	pop {pc}
**** end of test */ 
	push {r2}	@ save dividend to print later	
	bl udiv10	@ compute r2/10 and r2%10
    	mov r1,r2	@quotient to r1
	mov r2,r4	@ rem to r2; 
	pop {r3}	@ get dividend
	ldr r0, = debug_msg @ print quot, rem, dividend 
	bl printf
	pop {pc}
@***********************************************************
udiv10:	@unsigned integer divide by 10
	@input: r2
	@output: r2, r4 (quoc, rem)
	sub r4,r2,#10	@ save dividend -10
	ldr r3,=0x19999999  @magic constant=2**32/10
	umull r1, r3, r3, r2 @(r1,r3 low,high)= r2*(2**32/10) r3= quocient
	mov r2,r3	@ save quotient, compute quot*10:
	add r3, r3,r3,lsl #2	@ r3= quoc *5
	lsl r3,r3, #1	@ r3= quoc *10
	subs r4, r4,r3	@ r4= dividend -10 - quoc*10
        addpl r2,r2, #1 @ fix quocient
	addmi r4, r4,#10 @ fix rmainder
	bx lr
@***********************************************************
@ ***********************************************************************
@ *                       Copy an Array of Words                        *
@ ***********************************************************************
@ Author:   John Zaitseff <J.Zaitseff@unsw.edu.au>
@ This program is, in the main, a demonstration of the ARM "ldr" and "str"
@ instructions.  Do pay attention, however, at the use of the ".data"
@ section, which contains read/write data.
	.text
	.global main	
	.equ	num, 20		@ Number of words to be copied
main:
	push	{lr}
	ldr     r0,=src		@ R0 = pointer to source block
	ldr     r1,=dst		@ R1 = pointer to destination block
	mov     r2,#num		@ R2 = number of words to copy
loop:
	ldr	r3,[r0],#4	@ Load a word into R3 and update R0
				@ (post-indexed: R0 := R0 + 4)
	str	r3,[r1],#4	@ Store the word and update R1
	subs	r2,r2,#1	@ Decrement the word counter
	bne	loop		@ and repeat loop if not finished
	ldr	r1,[r1,#-4]	@ get last value copied
	ldr	r0, =printvalue
	bl	printf
	pop	{pc}
	.data			@ Read/write data follows
	.align			@ Make sure data is aligned on 32-bit boundaries
src:	.word	 1,  2,  3,  4,  5,  6,  7,  8,  9, 10
	.word	11, 12, 13, 14, 15, 16, 17, 18, 19, 20
@ The ".data" section can contain pre-initialised variables, as shown above
dst:	.word	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0
	.word	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0
printvalue:
	.asciz "Last value copied= %d\n"
@ An alternative to directly including twenty "0"'s for the destination
@ variable "dst" would be to use the ".skip" directive:
@ dst:	.skip	num * 4		@ Reserve 80 bytes (num 32-bit words)
	.end