--- /usr/avr/include/util/delay.h 2006-05-01 06:04:50.000000000 -0400 +++ delay.h 2006-08-20 16:31:03.000000000 -0400 @@ -83,6 +83,7 @@ #if !defined(__DOXYGEN__) static inline void _delay_loop_1(uint8_t __count) __attribute__((always_inline)); static inline void _delay_loop_2(uint16_t __count) __attribute__((always_inline)); +static inline void _delay_loop_2b(uint16_t __count) __attribute__((always_inline)); static inline void _delay_us(double __us) __attribute__((always_inline)); static inline void _delay_ms(double __ms) __attribute__((always_inline)); #endif @@ -131,6 +132,33 @@ ); } +/** \ingroup util_delay + + Just like __delay_loop_2 but "plan b" explicitly uses r24-5 and clobber + directive to (hopefully) repel any mov shenanigans that may eat clocks. + Delay loop using a 16-bit counter \c __count, so up to 65536 + iterations are possible. (The value 65536 would have to be + passed as 0.) The loop executes four CPU cycles per iteration. + not including the overhead the compiler requires to setup the + counter register pair. + + Thus, at a CPU speed of 1 MHz, delays of up to about 262.1 + milliseconds can be achieved. + */ +#define _delay_loop_2b( __count)\ +{\ + __asm__ volatile (\ + "ldi r24,lo8(%0) \n\t"\ + "ldi r25,hi8(%0) \n\t"\ + "1: sbiw r24,1 \n\t"\ + "brne 1b \n\t"\ + :\ + : "n" (__count)\ + : "r24", "r25"\ + );\ +} + + #ifndef F_CPU /* prevent compiler error by supplying a default */ # warning "F_CPU not defined for " @@ -140,25 +168,46 @@ /** \ingroup util_delay - Perform a delay of \c __us microseconds, using _delay_loop_1(). - + Perform a delay of \c __us microseconds, using _delay_loop_2b(). + This routine will use nop shims to be as precise as possible, down to just 1 nop. + Or, in the case of 0.9 clocks-worth of delay, this will do NOTHING (IOW, it rounds down). + It needs gcc optimization (-O) in order to be inlined properly. + Optimization will clash with using -g to hand-count a listing, sorry. The macro F_CPU is supposed to be defined to a constant defining the CPU clock frequency (in Hertz). - The maximal possible delay is 768 us / F_CPU in MHz. + The maximal possible delay is ~262.14 ms (not us!) / F_CPU in MHz. */ void -_delay_us(double __us) +_delay_us(const double __us) { - uint8_t __ticks; - double __tmp = ((F_CPU) / 3e6) * __us; - if (__tmp < 1.0) - __ticks = 1; - else if (__tmp > 255) - __ticks = 0; /* i.e. 256 */ - else - __ticks = (uint8_t)__tmp; - _delay_loop_1(__ticks); + const double __clocks_per_us=((F_CPU)/1e6); + const double __clocks_delay = __clocks_per_us * __us; + const char __loop_2b_fixed=7; // the 4 ldi clocks + the 3clk last loop + const uint32_t __loop_2b_runs = ((__clocks_delay - __loop_2b_fixed) / 4)+1; // +1, don't forget the 3clk loop! + const double __remainder_clocks= __clocks_delay - ((__loop_2b_runs-1) * 4 + __loop_2b_fixed); //the minus 1 un-considers the 3clk last loop, which is counted in __loop_3_fixed + + if(__loop_2b_runs < 65536) { + if (__clocks_delay < 1) {/*DONOTHING*/} + else if (__clocks_delay < 2) {asm volatile ("nop");} + else if (__clocks_delay < 3) {asm volatile ("rjmp +0");} /*rjmp 0(implicit PC+1) = 2 nops*/ + else if (__clocks_delay < 4) {asm volatile ("rjmp +0\n\t nop");} + else if (__clocks_delay < 5) {asm volatile ("rjmp +0\n\t rjmp +0");} + else if (__clocks_delay < 6) {asm volatile ("rjmp +0\n\t rjmp +0 \n\t nop");} + else if (__clocks_delay < 7) {asm volatile ("rjmp +0\n\t rjmp +0 \n\t rjmp +0");} + else if (__remainder_clocks < 1) { + _delay_loop_2b((uint16_t)__loop_2b_runs); + } else if (__remainder_clocks < 2) { + _delay_loop_2b((uint16_t)__loop_2b_runs); + asm volatile ("nop"); + } else if (__remainder_clocks < 3) { + _delay_loop_2b((uint16_t)__loop_2b_runs); + asm volatile ("rjmp +0"); + } else if (__remainder_clocks < 4) { + _delay_loop_2b((uint16_t)__loop_2b_runs); + asm volatile ("rjmp +0\n\t nop"); + } + } else {/*NOTREACHED*/} }