core: Simplify changes to SREG
[simavr] / simavr / sim / sim_core.c
index 53271e1..31c2ab3 100644 (file)
 #include <stdio.h>
 #include <string.h>
 #include <ctype.h>
-#include "simavr.h"
+#include "sim_avr.h"
 #include "sim_core.h"
+#include "avr_flash.h"
+#include "avr_watchdog.h"
 
 // SREG bit names
 const char * _sreg_bit_name = "cznvshti";
@@ -34,6 +36,7 @@ const char * _sreg_bit_name = "cznvshti";
  * This is used only for debugging purposes to be able to
  * print the effects of each instructions on registers
  */
+#if CONFIG_SIMAVR_TRACE
 #define REG_TOUCH(a, r) (a)->touched[(r) >> 5] |= (1 << ((r) & 0x1f))
 #define REG_ISTOUCHED(a, r) ((a)->touched[(r) >> 5] & (1 << ((r) & 0x1f)))
 
@@ -57,7 +60,7 @@ int donttrace = 0;
 
 #define STATE(_f, args...) { \
        if (avr->trace) {\
-               if (avr->codeline[avr->pc>>1]) {\
+               if (avr->codeline && avr->codeline[avr->pc>>1]) {\
                        const char * symn = avr->codeline[avr->pc>>1]->symbol; \
                        int dont = 0 && dont_trace(symn);\
                        if (dont!=donttrace) { \
@@ -76,6 +79,46 @@ int donttrace = 0;
                printf("%c", avr->sreg[_sbi] ? toupper(_sreg_bit_name[_sbi]) : '.');\
        printf("\n");\
 }
+#else
+#define REG_TOUCH(a, r)
+#define STATE(_f, args...)
+#define SREG()
+#endif
+
+void avr_core_watch_write(avr_t *avr, uint16_t addr, uint8_t v)
+{
+       if (addr > avr->ramend) {
+               printf("*** Invalid write address PC=%04x SP=%04x O=%04x Address %04x=%02x out of ram\n",
+                               avr->pc, _avr_sp_get(avr), avr->flash[avr->pc] | (avr->flash[avr->pc]<<8), addr, v);
+               CRASH();
+       }
+       if (addr < 32) {
+               printf("*** Invalid write address PC=%04x SP=%04x O=%04x Address %04x=%02x low registers\n",
+                               avr->pc, _avr_sp_get(avr), avr->flash[avr->pc] | (avr->flash[avr->pc]<<8), addr, v);
+               CRASH();
+       }
+#if AVR_STACK_WATCH
+       /*
+        * this checks that the current "function" is not doctoring the stack frame that is located
+        * higher on the stack than it should be. It's a sign of code that has overrun it's stack
+        * frame and is munching on it's own return address.
+        */
+       if (avr->stack_frame_index > 1 && addr > avr->stack_frame[avr->stack_frame_index-2].sp) {
+               printf("\e[31m%04x : munching stack SP %04x, A=%04x <= %02x\e[0m\n", avr->pc, _avr_sp_get(avr), addr, v);
+       }
+#endif
+       avr->data[addr] = v;
+}
+
+uint8_t avr_core_watch_read(avr_t *avr, uint16_t addr)
+{
+       if (addr > avr->ramend) {
+               printf("*** Invalid read address PC=%04x SP=%04x O=%04x Address %04x out of ram (%04x)\n",
+                               avr->pc, _avr_sp_get(avr), avr->flash[avr->pc] | (avr->flash[avr->pc]<<8), addr, avr->ramend);
+               CRASH();
+       }
+       return avr->data[addr];
+}
 
 /*
  * Set a register (r < 256)
@@ -87,18 +130,23 @@ static inline void _avr_set_r(avr_t * avr, uint8_t r, uint8_t v)
        REG_TOUCH(avr, r);
 
        if (r == R_SREG) {
-               avr->data[r] = v;
+               avr->data[R_SREG] = v;
                // unsplit the SREG
                for (int i = 0; i < 8; i++)
-                       avr->sreg[i] = (avr->data[R_SREG] & (1 << i)) != 0;
+                       avr->sreg[i] = (v & (1 << i)) != 0;
                SREG();
        }
        if (r > 31) {
                uint8_t io = AVR_DATA_TO_IO(r);
-               if (avr->iow[io].w)
-                       avr->iow[io].w(avr, r, v, avr->iow[io].param);
+               if (avr->io[io].w.c)
+                       avr->io[io].w.c(avr, r, v, avr->io[io].w.param);
                else
                        avr->data[r] = v;
+               if (avr->io[io].irq) {
+                       avr_raise_irq(avr->io[io].irq + AVR_IOMEM_IRQ_ALL, v);
+                       for (int i = 0; i < 8; i++)
+                               avr_raise_irq(avr->io[io].irq + i, (v >> i) & 1);                               
+               }
        } else
                avr->data[r] = v;
 }
@@ -133,16 +181,37 @@ static inline void _avr_set_ram(avr_t * avr, uint16_t addr, uint8_t v)
  */
 static inline uint8_t _avr_get_ram(avr_t * avr, uint16_t addr)
 {
-       if (addr > 31 && addr < 256) {
+       if (addr == R_SREG) {
+               /*
+                * SREG is special it's reconstructed when read
+                * while the core itself uses the "shortcut" array
+                */
+               avr->data[R_SREG] = 0;
+               for (int i = 0; i < 8; i++)
+                       if (avr->sreg[i] > 1) {
+                               printf("** Invalid SREG!!\n");
+                               CRASH();
+                       } else if (avr->sreg[i])
+                               avr->data[R_SREG] |= (1 << i);
+               
+       } else if (addr > 31 && addr < 256) {
                uint8_t io = AVR_DATA_TO_IO(addr);
-               if (avr->ior[io].r)
-                       avr->data[addr] = avr->ior[io].r(avr, addr, avr->ior[io].param);
+               
+               if (avr->io[io].r.c)
+                       avr->data[addr] = avr->io[io].r.c(avr, addr, avr->io[io].r.param);
+               
+               if (avr->io[io].irq) {
+                       uint8_t v = avr->data[addr];
+                       avr_raise_irq(avr->io[io].irq + AVR_IOMEM_IRQ_ALL, v);
+                       for (int i = 0; i < 8; i++)
+                               avr_raise_irq(avr->io[io].irq + i, (v >> i) & 1);                               
+               }
        }
        return avr_core_watch_read(avr, addr);
 }
 
 /*
- * Stack oush accessors. Push/pop 8 and 16 bits
+ * Stack push accessors. Push/pop 8 and 16 bits
  */
 static inline void _avr_push8(avr_t * avr, uint16_t v)
 {
@@ -202,10 +271,16 @@ const char * avr_regname(uint8_t reg)
  */
 static void _avr_invalid_opcode(avr_t * avr)
 {
+#if CONFIG_SIMAVR_TRACE
        printf("\e[31m*** %04x: %-25s Invalid Opcode SP=%04x O=%04x \e[0m\n",
                        avr->pc, avr->codeline[avr->pc>>1]->symbol, _avr_sp_get(avr), avr->flash[avr->pc] | (avr->flash[avr->pc+1]<<8));
+#else
+       printf("\e[31m*** %04x: Invalid Opcode SP=%04x O=%04x \e[0m\n",
+                       avr->pc, _avr_sp_get(avr), avr->flash[avr->pc] | (avr->flash[avr->pc+1]<<8));
+#endif
 }
 
+#if CONFIG_SIMAVR_TRACE
 /*
  * Dump changed registers when tracing
  */
@@ -235,11 +310,12 @@ void avr_dump_state(avr_t * avr)
                }
        printf("\n");
 }
+#endif
 
 #define get_r_d_10(o) \
                const uint8_t r = ((o >> 5) & 0x10) | (o & 0xf); \
                const uint8_t d = (o >> 4) & 0x1f;\
-               const uint8_t vd = avr->data[d], vr =avr->data[r];
+               const uint8_t vd = avr->data[d], vr = avr->data[r];
 #define get_k_r16(o) \
                const uint8_t r = 16 + ((o >> 4) & 0xf); \
                const uint8_t k = ((o & 0x0f00) >> 4) | (o & 0xf);
@@ -247,15 +323,36 @@ void avr_dump_state(avr_t * avr)
 /*
  * Add a "jump" address to the jump trace buffer
  */
+#if CONFIG_SIMAVR_TRACE
 #define TRACE_JUMP()\
        avr->old[avr->old_pci].pc = avr->pc;\
        avr->old[avr->old_pci].sp = _avr_sp_get(avr);\
        avr->old_pci = (avr->old_pci + 1) & (OLD_PC_SIZE-1);\
 
+#if AVR_STACK_WATCH
+#define STACK_FRAME_PUSH()\
+       avr->stack_frame[avr->stack_frame_index].pc = avr->pc;\
+       avr->stack_frame[avr->stack_frame_index].sp = _avr_sp_get(avr);\
+       avr->stack_frame_index++; 
+#define STACK_FRAME_POP()\
+       if (avr->stack_frame_index > 0) \
+               avr->stack_frame_index--;
+#else
+#define STACK_FRAME_PUSH()
+#define STACK_FRAME_POP()
+#endif
+#else /* CONFIG_SIMAVR_TRACE */
+
+#define TRACE_JUMP()
+#define STACK_FRAME_PUSH()
+#define STACK_FRAME_POP()
+
+#endif
+
 /****************************************************************************\
  *
  * Helper functions for calculating the status register bit values.
- * See the Atmel data sheet for the instuction set for more info.
+ * See the Atmel data sheet for the instruction set for more info.
  *
 \****************************************************************************/
 
@@ -313,40 +410,51 @@ get_compare_overflow (uint8_t res, uint8_t rd, uint8_t rr)
     return (rd & ~rr & ~res) | (~rd & rr & res);
 }
 
+static inline int _avr_is_instruction_32_bits(avr_t * avr, uint32_t pc)
+{
+       uint16_t o = (avr->flash[pc] | (avr->flash[pc+1] << 8)) & 0xfc0f;
+       return  o == 0x9200 || // STS ! Store Direct to Data Space
+                       o == 0x9000 || // LDS Load Direct from Data Space
+                       o == 0x940c || // JMP Long Jump
+                       o == 0x940d || // JMP Long Jump
+                       o == 0x940e ||  // CALL Long Call to sub
+                       o == 0x940f; // CALL Long Call to sub
+}
+
 /*
  * Main opcode decoder
  * 
  * The decoder was written by following the datasheet in no particular order.
  * As I went along, I noticed "bit patterns" that could be used to factor opcodes
- * However, a lot of these only becane apparent later on, so SOME instructions
+ * However, a lot of these only became apparent later on, so SOME instructions
  * (skip of bit set etc) are compact, and some could use some refactoring (the ALU
  * ones scream to be factored).
  * I assume that the decoder could easily be 2/3 of it's current size.
  * 
- * The core 'almost' work. There is a case where it munches the stack, problem to be
- * debugged.
- * 
- * It lacks a couple of multiply instructions, and the "extended" XMega jumps.
+ * + It lacks the "extended" XMega jumps. 
+ * + It also doesn't check whether the core it's
+ *   emulating is supposed to have the fancy instructions, like multiply and such.
  * 
  * for now all instructions take "one" cycle, the cycle+=<extra> needs to be added.
  */
 uint16_t avr_run_one(avr_t * avr)
 {
+#if CONFIG_SIMAVR_TRACE
        /*
-        * this traces spurious reset or bad jump/opcodes and dumps the last 32 "jumps" to track it down
+        * this traces spurious reset or bad jumps
         */
        if ((avr->pc == 0 && avr->cycle > 0) || avr->pc >= avr->codeend) {
                avr->trace = 1;
                STATE("RESET\n");
                CRASH();
        }
+       avr->touched[0] = avr->touched[1] = avr->touched[2] = 0;
+#endif
 
        uint32_t        opcode = (avr->flash[avr->pc + 1] << 8) | avr->flash[avr->pc];
        uint32_t        new_pc = avr->pc + 2;   // future "default" pc
        int             cycle = 1;
 
-       avr->touched[0] = avr->touched[1] = avr->touched[2] = 0;
-
        switch (opcode & 0xf000) {
                case 0x0000: {
                        switch (opcode) {
@@ -408,6 +516,56 @@ uint16_t avr_run_one(avr_t * avr)
                                                                        _avr_set_r(avr, d, avr->data[r]);
                                                                        _avr_set_r(avr, d+1, avr->data[r+1]);
                                                                }       break;
+                                                               case 0x0200: {  // MULS – Multiply Signed 0000 0010 dddd rrrr
+                                                                       int8_t r = opcode & 0xf;
+                                                                       int8_t d = (opcode >> 4) & 0xf;
+                                                                       int16_t res = ((int8_t)avr->data[r]) * ((int8_t)avr->data[d]);
+                                                                       STATE("muls %s[%d], %s[%02x] = %d\n", avr_regname(d), ((int8_t)avr->data[d]), avr_regname(r), ((int8_t)avr->data[r]), res);
+                                                                       _avr_set_r(avr, 0, res);
+                                                                       _avr_set_r(avr, 1, res >> 8);
+                                                                       avr->sreg[S_C] = (res >> 15) & 1;
+                                                                       avr->sreg[S_Z] = res == 0;
+                                                                       SREG();
+                                                               }       break;
+                                                               case 0x0300: {  // multiplications
+                                                                       int8_t r = 16 + (opcode & 0x7);
+                                                                       int8_t d = 16 + ((opcode >> 4) & 0x7);
+                                                                       int16_t res = 0;
+                                                                       uint8_t c = 0;
+                                                                       const char * name = "";
+                                                                       switch (opcode & 0x88) {
+                                                                               case 0x00:      // MULSU – Multiply Signed Unsigned 0000 0011 0ddd 0rrr
+                                                                                       res = ((uint8_t)avr->data[r]) * ((int8_t)avr->data[d]);
+                                                                                       c = (res >> 15) & 1;
+                                                                                       name = "mulsu";
+                                                                                       break;
+                                                                               case 0x08:      // FMUL Fractional Multiply Unsigned 0000 0011 0ddd 1rrr
+                                                                                       res = ((uint8_t)avr->data[r]) * ((uint8_t)avr->data[d]);
+                                                                                       c = (res >> 15) & 1;
+                                                                                       res <<= 1;
+                                                                                       name = "fmul";
+                                                                                       break;
+                                                                               case 0x80:      // FMULS – Multiply Signed  0000 0011 1ddd 0rrr
+                                                                                       res = ((int8_t)avr->data[r]) * ((int8_t)avr->data[d]);
+                                                                                       c = (res >> 15) & 1;
+                                                                                       res <<= 1;
+                                                                                       name = "fmuls";
+                                                                                       break;
+                                                                               case 0x88:      // FMULSU – Multiply Signed Unsigned 0000 0011 1ddd 0rrr
+                                                                                       res = ((uint8_t)avr->data[r]) * ((int8_t)avr->data[d]);
+                                                                                       c = (res >> 15) & 1;
+                                                                                       res <<= 1;
+                                                                                       name = "fmulsu";
+                                                                                       break;
+                                                                       }
+                                                                       cycle++;
+                                                                       STATE("%s %s[%d], %s[%02x] = %d\n", name, avr_regname(d), ((int8_t)avr->data[d]), avr_regname(r), ((int8_t)avr->data[r]), res);
+                                                                       _avr_set_r(avr, 0, res);
+                                                                       _avr_set_r(avr, 1, res >> 8);
+                                                                       avr->sreg[S_C] = c;
+                                                                       avr->sreg[S_Z] = res == 0;
+                                                                       SREG();
+                                                               }       break;
                                                                default: _avr_invalid_opcode(avr);
                                                        }
                                        }
@@ -434,8 +592,13 @@ uint16_t avr_run_one(avr_t * avr)
                                        get_r_d_10(opcode);
                                        uint16_t res = vd == vr;
                                        STATE("cpse %s[%02x], %s[%02x]\t; Will%s skip\n", avr_regname(d), avr->data[d], avr_regname(r), avr->data[r], res ? "":"not ");
-                                       if (res)
-                                               new_pc += 2;
+                                       if (res) {
+                                               if (_avr_is_instruction_32_bits(avr, new_pc)) {
+                                                       new_pc += 4; cycle += 2;
+                                               } else {
+                                                       new_pc += 2; cycle++;
+                                               }
+                                       }
                                }       break;
                                case 0x1400: {  // CP Compare 0000 10 rd dddd rrrr
                                        get_r_d_10(opcode);
@@ -466,21 +629,7 @@ uint16_t avr_run_one(avr_t * avr)
                                        avr->sreg[S_S] = avr->sreg[S_N] ^ avr->sreg[S_V];
                                        SREG();
                                }       break;
-                               default:
-                                       switch (opcode & 0xff00) {
-                                               case 0x0200: {  // MULS – Multiply Signed 0000 0010 dddd rrrr
-                                                       int8_t r = opcode & 0xf;
-                                                       int8_t d = (opcode >> 4) & 0xf;
-                                                       int16_t res = ((int8_t)avr->data[r]) * ((int8_t)avr->data[d]);
-                                                       STATE("muls %s[%d], %s[%02x] = %d\n", avr_regname(d), ((int8_t)avr->data[d]), avr_regname(r), ((int8_t)avr->data[r]), res);
-                                                       _avr_set_r(avr, 0, res);
-                                                       _avr_set_r(avr, 1, res >> 8);
-                                                       avr->sreg[S_C] = (res >> 15) & 1;
-                                                       avr->sreg[S_Z] = res == 0;
-                                                       SREG();
-                                               }       break;
-                                               default: _avr_invalid_opcode(avr);
-                                       }
+                               default: _avr_invalid_opcode(avr);
                        }
                }       break;
 
@@ -609,7 +758,7 @@ uint16_t avr_run_one(avr_t * avr)
                                case 0x8000: {  // LD (LDD) – Load Indirect using Z 10q0 qq0r rrrr 0qqq
                                        uint16_t v = avr->data[R_ZL] | (avr->data[R_ZH] << 8);
                                        uint8_t r = (opcode >> 4) & 0x1f;
-                                       uint8_t q = ((opcode & 0x2000) >> 7) | ((opcode & 0x0c00) >> 7) | (opcode & 0x7);
+                                       uint8_t q = ((opcode & 0x2000) >> 8) | ((opcode & 0x0c00) >> 7) | (opcode & 0x7);
 
                                        if (opcode & 0x0200) {
                                                STATE("st (Z+%d[%04x]), %s[%02x]\n", q, v+q, avr_regname(r), avr->data[r]);
@@ -618,12 +767,13 @@ uint16_t avr_run_one(avr_t * avr)
                                                STATE("ld %s, (Z+%d[%04x])=[%02x]\n", avr_regname(r), q, v+q, avr->data[v+q]);
                                                _avr_set_r(avr, r, _avr_get_ram(avr, v+q));
                                        }
+                                       cycle += 2;
                                }       break;
                                case 0xa008:
                                case 0x8008: {  // LD (LDD) – Load Indirect using Y 10q0 qq0r rrrr 1qqq
                                        uint16_t v = avr->data[R_YL] | (avr->data[R_YH] << 8);
                                        uint8_t r = (opcode >> 4) & 0x1f;
-                                       uint8_t q = ((opcode & 0x2000) >> 7) | ((opcode & 0x0c00) >> 7) | (opcode & 0x7);
+                                       uint8_t q = ((opcode & 0x2000) >> 8) | ((opcode & 0x0c00) >> 7) | (opcode & 0x7);
 
                                        if (opcode & 0x0200) {
                                                STATE("st (Y+%d[%04x]), %s[%02x]\n", q, v+q, avr_regname(r), avr->data[r]);
@@ -632,6 +782,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                STATE("ld %s, (Y+%d[%04x])=[%02x]\n", avr_regname(r), q, v+q, avr->data[v+q]);
                                                _avr_set_r(avr, r, _avr_get_ram(avr, v+q));
                                        }
+                                       cycle += 2;
                                }       break;
                                default: _avr_invalid_opcode(avr);
                        }
@@ -651,31 +802,48 @@ uint16_t avr_run_one(avr_t * avr)
                                }       break;
                                case 0x9598: { // BREAK
                                        STATE("break\n");
+                                       if (avr->gdb) {
+                                               // if gdb is on, we break here as in here
+                                               // and we do so until gdb restores the instruction
+                                               // that was here before
+                                               avr->state = cpu_StepDone;
+                                               new_pc = avr->pc;
+                                               cycle = 0;
+                                       }
                                }       break;
                                case 0x95a8: { // WDR
                                        STATE("wdr\n");
+                                       avr_ioctl(avr, AVR_IOCTL_WATCHDOG_RESET, 0);
+                               }       break;
+                               case 0x95e8: { // SPM
+                                       STATE("spm\n");
+                                       avr_ioctl(avr, AVR_IOCTL_FLASH_SPM, 0);
                                }       break;
                                case 0x9409: { // IJMP Indirect jump
                                        uint16_t z = avr->data[R_ZL] | (avr->data[R_ZH] << 8);
                                        STATE("ijmp Z[%04x]\n", z << 1);
                                        new_pc = z << 1;
+                                       cycle++;
                                        TRACE_JUMP();
                                }       break;
                                case 0x9509: { // ICALL Indirect Call to Subroutine
                                        uint16_t z = avr->data[R_ZL] | (avr->data[R_ZH] << 8);
                                        STATE("icall Z[%04x]\n", z << 1);
-
                                        _avr_push16(avr, new_pc >> 1);
                                        new_pc = z << 1;
+                                       cycle += 2;
                                        TRACE_JUMP();
+                                       STACK_FRAME_PUSH();
                                }       break;
                                case 0x9518:    // RETI
                                case 0x9508: {  // RET
                                        new_pc = _avr_pop16(avr) << 1;
                                        if (opcode & 0x10)      // reti
                                                avr->sreg[S_I] = 1;
+                                       cycle += 3;
                                        STATE("ret%s\n", opcode & 0x10 ? "i" : "");
                                        TRACE_JUMP();
+                                       STACK_FRAME_POP();
                                }       break;
                                case 0x95c8: {  // LPM Load Program Memory R0 <- (Z)
                                        uint16_t z = avr->data[R_ZL] | (avr->data[R_ZH] << 8);
@@ -700,6 +868,14 @@ uint16_t avr_run_one(avr_t * avr)
                                }       break;
                                default:  {
                                        switch (opcode & 0xfe0f) {
+                                               case 0x9000: {  // LDS Load Direct from Data Space, 32 bits
+                                                       uint8_t r = (opcode >> 4) & 0x1f;
+                                                       uint16_t x = (avr->flash[new_pc+1] << 8) | avr->flash[new_pc];
+                                                       new_pc += 2;
+                                                       STATE("lds %s[%02x], 0x%04x\n", avr_regname(r), avr->data[r], x);
+                                                       _avr_set_r(avr, r, _avr_get_ram(avr, x));
+                                                       cycle++;
+                                               }       break;
                                                case 0x9005:
                                                case 0x9004: {  // LPM Load Program Memory 1001 000d dddd 01oo
                                                        uint16_t z = avr->data[R_ZL] | (avr->data[R_ZH] << 8);
@@ -712,6 +888,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                                _avr_set_r(avr, R_ZH, z >> 8);
                                                                _avr_set_r(avr, R_ZL, z);
                                                        }
+                                                       cycle += 2;
                                                }       break;
                                                case 0x900c:
                                                case 0x900d:
@@ -734,7 +911,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                        uint8_t r = (opcode >> 4) & 0x1f;
                                                        uint16_t x = (avr->data[R_XH] << 8) | avr->data[R_XL];
                                                        STATE("st %sX[%04x]%s, %s[%02x] \n", op == 2 ? "--" : "", x, op == 1 ? "++" : "", avr_regname(r), avr->data[r]);
-
+                                                       cycle++;
                                                        if (op == 2) x--;
                                                        _avr_set_ram(avr, x, avr->data[r]);
                                                        if (op == 1) x++;
@@ -747,7 +924,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                        uint8_t r = (opcode >> 4) & 0x1f;
                                                        uint16_t y = (avr->data[R_YH] << 8) | avr->data[R_YL];
                                                        STATE("ld %s, %sY[%04x]%s\n", avr_regname(r), op == 2 ? "--" : "", y, op == 1 ? "++" : "");
-
+                                                       cycle++;
                                                        if (op == 2) y--;
                                                        _avr_set_r(avr, r, _avr_get_ram(avr, y));
                                                        if (op == 1) y++;
@@ -760,7 +937,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                        uint8_t r = (opcode >> 4) & 0x1f;
                                                        uint16_t y = (avr->data[R_YH] << 8) | avr->data[R_YL];
                                                        STATE("st %sY[%04x]%s, %s[%02x] \n", op == 2 ? "--" : "", y, op == 1 ? "++" : "", avr_regname(r), avr->data[r]);
-
+                                                       cycle++;
                                                        if (op == 2) y--;
                                                        _avr_set_ram(avr, y, avr->data[r]);
                                                        if (op == 1) y++;
@@ -780,7 +957,6 @@ uint16_t avr_run_one(avr_t * avr)
                                                        uint8_t r = (opcode >> 4) & 0x1f;
                                                        uint16_t z = (avr->data[R_ZH] << 8) | avr->data[R_ZL];
                                                        STATE("ld %s, %sZ[%04x]%s\n", avr_regname(r), op == 2 ? "--" : "", z, op == 1 ? "++" : "");
-
                                                        if (op == 2) z--;
                                                        _avr_set_r(avr, r, _avr_get_ram(avr, z));
                                                        if (op == 1) z++;
@@ -793,7 +969,6 @@ uint16_t avr_run_one(avr_t * avr)
                                                        uint8_t r = (opcode >> 4) & 0x1f;
                                                        uint16_t z = (avr->data[R_ZH] << 8) | avr->data[R_ZL];
                                                        STATE("st %sZ[%04x]%s, %s[%02x] \n", op == 2 ? "--" : "", z, op == 1 ? "++" : "", avr_regname(r), avr->data[r]);
-
                                                        if (op == 2) z--;
                                                        _avr_set_ram(avr, z, avr->data[r]);
                                                        if (op == 1) z++;
@@ -805,19 +980,14 @@ uint16_t avr_run_one(avr_t * avr)
                                                        _avr_set_r(avr, r, _avr_pop8(avr));
                                                        uint16_t sp = _avr_sp_get(avr);
                                                        STATE("pop %s (@%04x)[%02x]\n", avr_regname(r), sp, avr->data[sp]);
+                                                       cycle++;
                                                }       break;
                                                case 0x920f: {  // PUSH 1001 001d dddd 1111
                                                        uint8_t r = (opcode >> 4) & 0x1f;
                                                        _avr_push8(avr, avr->data[r]);
                                                        uint16_t sp = _avr_sp_get(avr);
                                                        STATE("push %s[%02x] (@%04x)\n", avr_regname(r), avr->data[r], sp);
-                                               }       break;
-                                               case 0x9000: {  // LDS Load Direct from Data Space, 32 bits
-                                                       uint8_t r = (opcode >> 4) & 0x1f;
-                                                       uint16_t x = (avr->flash[new_pc+1] << 8) | avr->flash[new_pc];
-                                                       new_pc += 2;
-                                                       STATE("lds %s[%02x], 0x%04x\n", avr_regname(r), avr->data[r], x);
-                                                       _avr_set_r(avr, r, _avr_get_ram(avr, x));
+                                                       cycle++;
                                                }       break;
                                                case 0x9400: {  // COM – One’s Complement
                                                        uint8_t r = (opcode >> 4) & 0x1f;
@@ -917,9 +1087,9 @@ uint16_t avr_run_one(avr_t * avr)
                                                        uint32_t a = ((opcode & 0x01f0) >> 3) | (opcode & 1);
                                                        uint16_t x = (avr->flash[new_pc+1] << 8) | avr->flash[new_pc];
                                                        a = (a << 16) | x;
-                                               //      printf("jmp %06x\n", a << 1);
                                                        STATE("jmp 0x%06x\n", a);
                                                        new_pc = a << 1;
+                                                       cycle += 2;
                                                        TRACE_JUMP();
                                                }       break;
                                                case 0x940e:
@@ -927,12 +1097,13 @@ uint16_t avr_run_one(avr_t * avr)
                                                        uint32_t a = ((opcode & 0x01f0) >> 3) | (opcode & 1);
                                                        uint16_t x = (avr->flash[new_pc+1] << 8) | avr->flash[new_pc];
                                                        a = (a << 16) | x;
-                                               //      printf("call %06x\n", a << 1);
                                                        STATE("call 0x%06x\n", a);
                                                        new_pc += 2;
                                                        _avr_push16(avr, new_pc >> 1);
                                                        new_pc = a << 1;
+                                                       cycle += 3;     // 4 cycles
                                                        TRACE_JUMP();
+                                                       STACK_FRAME_PUSH();
                                                }       break;
 
                                                default: {
@@ -977,14 +1148,20 @@ uint16_t avr_run_one(avr_t * avr)
                                                                        uint8_t res = _avr_get_ram(avr, io) & ~(1 << b);
                                                                        STATE("cbi %s[%04x], 0x%02x = %02x\n", avr_regname(io), avr->data[io], 1<<b, res);
                                                                        _avr_set_ram(avr, io, res);
+                                                                       cycle++;
                                                                }       break;
                                                                case 0x9900: {  // SBIC - Skip if Bit in I/O Register is Cleared 1001 0111 AAAA Abbb
                                                                        uint8_t io = ((opcode >> 3) & 0x1f) + 32;
                                                                        uint8_t b = opcode & 0x7;
                                                                        uint8_t res = _avr_get_ram(avr, io) & (1 << b);
                                                                        STATE("sbic %s[%04x], 0x%02x\t; Will%s branch\n", avr_regname(io), avr->data[io], 1<<b, !res?"":"not ");
-                                                                       if (!res)
-                                                                               new_pc += 2;
+                                                                       if (!res) {
+                                                                               if (_avr_is_instruction_32_bits(avr, new_pc)) {
+                                                                                       new_pc += 4; cycle += 2;
+                                                                               } else {
+                                                                                       new_pc += 2; cycle++;
+                                                                               }
+                                                                       }
                                                                }       break;
                                                                case 0x9a00: {  // SBI - Set Bit in I/O Register 1001 1000 AAAA Abbb
                                                                        uint8_t io = ((opcode >> 3) & 0x1f) + 32;
@@ -992,14 +1169,20 @@ uint16_t avr_run_one(avr_t * avr)
                                                                        uint8_t res = _avr_get_ram(avr, io) | (1 << b);
                                                                        STATE("sbi %s[%04x], 0x%02x = %02x\n", avr_regname(io), avr->data[io], 1<<b, res);
                                                                        _avr_set_ram(avr, io, res);
+                                                                       cycle++;
                                                                }       break;
                                                                case 0x9b00: {  // SBIS - Skip if Bit in I/O Register is Cleared 1001 0111 AAAA Abbb
                                                                        uint8_t io = (opcode >> 3) & 0x1f;
                                                                        uint8_t b = opcode & 0x7;
                                                                        uint8_t res = _avr_get_ram(avr, io + 32) & (1 << b);
                                                                        STATE("sbis %s[%04x], 0x%02x\t; Will%s branch\n", avr_regname(io), avr->data[io], 1<<b, res?"":"not ");
-                                                                       if (res)
-                                                                               new_pc += 2;
+                                                                       if (res) {
+                                                                               if (_avr_is_instruction_32_bits(avr, new_pc)) {
+                                                                                       new_pc += 4; cycle += 2;
+                                                                               } else {
+                                                                                       new_pc += 2; cycle++;
+                                                                               }
+                                                                       }
                                                                }       break;
                                                                default:
                                                                        switch (opcode & 0xfc00) {
@@ -1028,15 +1211,12 @@ uint16_t avr_run_one(avr_t * avr)
                                        uint8_t r = (opcode >> 4) & 0x1f;
                                        uint8_t A = ((((opcode >> 9) & 3) << 4) | ((opcode) & 0xf)) + 32;
                                        STATE("out %s, %s[%02x]\n", avr_regname(A), avr_regname(r), avr->data[r]);
-                                       // todo: store to IO register
                                        _avr_set_ram(avr, A, avr->data[r]);
-                               //      avr->data[A] = ;
                                }       break;
                                case 0xb000: {  // IN Rd,A 1011 0AAr rrrr AAAA
                                        uint8_t r = (opcode >> 4) & 0x1f;
                                        uint8_t A = ((((opcode >> 9) & 3) << 4) | ((opcode) & 0xf)) + 32;
                                        STATE("in %s, %s[%02x]\n", avr_regname(r), avr_regname(A), avr->data[A]);
-                                       // todo: get the IO register
                                        _avr_set_r(avr, r, _avr_get_ram(avr, A));
                                }       break;
                                default: _avr_invalid_opcode(avr);
@@ -1048,6 +1228,7 @@ uint16_t avr_run_one(avr_t * avr)
                        short o = ((short)(opcode << 4)) >> 4;
                        STATE("rjmp .%d [%04x]\n", o, new_pc + (o << 1));
                        new_pc = new_pc + (o << 1);
+                       cycle++;
                        TRACE_JUMP();
                }       break;
 
@@ -1057,7 +1238,12 @@ uint16_t avr_run_one(avr_t * avr)
                        STATE("rcall .%d [%04x]\n", o, new_pc + (o << 1));
                        _avr_push16(avr, new_pc >> 1);
                        new_pc = new_pc + (o << 1);
-                       TRACE_JUMP();
+                       cycle += 2;
+                       // 'rcall .1' is used as a cheap "push 16 bits of room on the stack"
+                       if (o != 0) {
+                               TRACE_JUMP();
+                               STACK_FRAME_PUSH();
+                       }
                }       break;
 
                case 0xe000: {  // LDI Rd, K 1110 KKKK RRRR KKKK -- aka SER (LDI r, 0xff)
@@ -1086,8 +1272,10 @@ uint16_t avr_run_one(avr_t * avr)
                                        } else {
                                                STATE("%s%c .%d [%04x]\t; Will%s branch\n", set ? "brbs" : "brbc", _sreg_bit_name[s], o, new_pc + (o << 1), branch ? "":" not");
                                        }
-                                       if (branch)
+                                       if (branch) {
+                                               cycle++;
                                                new_pc = new_pc + (o << 1);
+                                       }
                                }       break;
                                case 0xf800:
                                case 0xf900: {  // BLD – Bit Store from T into a Bit in Register 1111 100r rrrr 0bbb