misc: Update NO_COLOR define switch
[simavr] / simavr / sim / sim_core.c
index 94de8f6..5e7e2c8 100644 (file)
@@ -25,6 +25,9 @@
 #include <ctype.h>
 #include "sim_avr.h"
 #include "sim_core.h"
+#include "sim_gdb.h"
+#include "avr_flash.h"
+#include "avr_watchdog.h"
 
 // SREG bit names
 const char * _sreg_bit_name = "cznvshti";
@@ -35,12 +38,15 @@ const char * _sreg_bit_name = "cznvshti";
  * print the effects of each instructions on registers
  */
 #if CONFIG_SIMAVR_TRACE
-#define REG_TOUCH(a, r) (a)->touched[(r) >> 5] |= (1 << ((r) & 0x1f))
-#define REG_ISTOUCHED(a, r) ((a)->touched[(r) >> 5] & (1 << ((r) & 0x1f)))
+
+#define T(w) w
+
+#define REG_TOUCH(a, r) (a)->trace_data->touched[(r) >> 5] |= (1 << ((r) & 0x1f))
+#define REG_ISTOUCHED(a, r) ((a)->trace_data->touched[(r) >> 5] & (1 << ((r) & 0x1f)))
 
 /*
- * This allows a "special case" to skip indtruction tracing when in these
- * symbols. since printf() is useful to have, but generates a lot of cycles
+ * This allows a "special case" to skip instruction tracing when in these
+ * symbols since printf() is useful to have, but generates a lot of cycles.
  */
 int dont_trace(const char * name)
 {
@@ -58,8 +64,8 @@ int donttrace = 0;
 
 #define STATE(_f, args...) { \
        if (avr->trace) {\
-               if (avr->codeline[avr->pc>>1]) {\
-                       const char * symn = avr->codeline[avr->pc>>1]->symbol; \
+               if (avr->trace_data->codeline && avr->trace_data->codeline[avr->pc>>1]) {\
+                       const char * symn = avr->trace_data->codeline[avr->pc>>1]->symbol; \
                        int dont = 0 && dont_trace(symn);\
                        if (dont!=donttrace) { \
                                donttrace = dont;\
@@ -78,14 +84,60 @@ int donttrace = 0;
        printf("\n");\
 }
 #else
+#define T(w)
 #define REG_TOUCH(a, r)
 #define STATE(_f, args...)
 #define SREG()
 #endif
 
+void avr_core_watch_write(avr_t *avr, uint16_t addr, uint8_t v)
+{
+       if (addr > avr->ramend) {
+               AVR_LOG(avr, LOG_ERROR, "CORE: *** Invalid write address PC=%04x SP=%04x O=%04x Address %04x=%02x out of ram\n",
+                               avr->pc, _avr_sp_get(avr), avr->flash[avr->pc + 1] | (avr->flash[avr->pc]<<8), addr, v);
+               CRASH();
+       }
+       if (addr < 32) {
+               AVR_LOG(avr, LOG_ERROR, "CORE: *** Invalid write address PC=%04x SP=%04x O=%04x Address %04x=%02x low registers\n",
+                               avr->pc, _avr_sp_get(avr), avr->flash[avr->pc + 1] | (avr->flash[avr->pc]<<8), addr, v);
+               CRASH();
+       }
+#if AVR_STACK_WATCH
+       /*
+        * this checks that the current "function" is not doctoring the stack frame that is located
+        * higher on the stack than it should be. It's a sign of code that has overrun it's stack
+        * frame and is munching on it's own return address.
+        */
+       if (avr->trace_data->stack_frame_index > 1 && addr > avr->trace_data->stack_frame[avr->trace_data->stack_frame_index-2].sp) {
+               printf( FONT_RED "%04x : munching stack SP %04x, A=%04x <= %02x\n" FONT_DEFAULT, avr->pc, _avr_sp_get(avr), addr, v);
+       }
+#endif
+
+       if (avr->gdb) {
+               avr_gdb_handle_watchpoints(avr, addr, AVR_GDB_WATCH_WRITE);
+       }
+
+       avr->data[addr] = v;
+}
+
+uint8_t avr_core_watch_read(avr_t *avr, uint16_t addr)
+{
+       if (addr > avr->ramend) {
+               AVR_LOG(avr, LOG_ERROR, FONT_RED "CORE: *** Invalid read address PC=%04x SP=%04x O=%04x Address %04x out of ram (%04x)\n" FONT_DEFAULT,
+                               avr->pc, _avr_sp_get(avr), avr->flash[avr->pc + 1] | (avr->flash[avr->pc]<<8), addr, avr->ramend);
+               CRASH();
+       }
+
+       if (avr->gdb) {
+               avr_gdb_handle_watchpoints(avr, addr, AVR_GDB_WATCH_READ);
+       }
+
+       return avr->data[addr];
+}
+
 /*
  * Set a register (r < 256)
- * if it's an IO regisrer (> 31) also (try to) call any callback that was
+ * if it's an IO register (> 31) also (try to) call any callback that was
  * registered to track changes to that register.
  */
 static inline void _avr_set_r(avr_t * avr, uint8_t r, uint8_t v)
@@ -93,10 +145,9 @@ static inline void _avr_set_r(avr_t * avr, uint8_t r, uint8_t v)
        REG_TOUCH(avr, r);
 
        if (r == R_SREG) {
-               avr->data[r] = v;
+               avr->data[R_SREG] = v;
                // unsplit the SREG
-               for (int i = 0; i < 8; i++)
-                       avr->sreg[i] = (avr->data[R_SREG] & (1 << i)) != 0;
+               SET_SREG_FROM(avr, v);
                SREG();
        }
        if (r > 31) {
@@ -144,8 +195,16 @@ static inline void _avr_set_ram(avr_t * avr, uint16_t addr, uint8_t v)
  */
 static inline uint8_t _avr_get_ram(avr_t * avr, uint16_t addr)
 {
-       if (addr > 31 && addr < 256) {
+       if (addr == R_SREG) {
+               /*
+                * SREG is special it's reconstructed when read
+                * while the core itself uses the "shortcut" array
+                */
+               READ_SREG_INTO(avr, avr->data[R_SREG]);
+               
+       } else if (addr > 31 && addr < 256) {
                uint8_t io = AVR_DATA_TO_IO(addr);
+               
                if (avr->io[io].r.c)
                        avr->data[addr] = avr->io[io].r.c(avr, addr, avr->io[io].r.param);
                
@@ -179,14 +238,14 @@ static inline uint8_t _avr_pop8(avr_t * avr)
 
 inline void _avr_push16(avr_t * avr, uint16_t v)
 {
-       _avr_push8(avr, v >> 8);
        _avr_push8(avr, v);
+       _avr_push8(avr, v >> 8);
 }
 
 static inline uint16_t _avr_pop16(avr_t * avr)
 {
-       uint16_t res = _avr_pop8(avr);
-       res |= _avr_pop8(avr) << 8;
+       uint16_t res = _avr_pop8(avr) << 8;
+       res |= _avr_pop8(avr);
        return res;
 }
 
@@ -221,10 +280,10 @@ const char * avr_regname(uint8_t reg)
 static void _avr_invalid_opcode(avr_t * avr)
 {
 #if CONFIG_SIMAVR_TRACE
-       printf("\e[31m*** %04x: %-25s Invalid Opcode SP=%04x O=%04x \e[0m\n",
-                       avr->pc, avr->codeline[avr->pc>>1]->symbol, _avr_sp_get(avr), avr->flash[avr->pc] | (avr->flash[avr->pc+1]<<8));
+       printf( FONT_RED "*** %04x: %-25s Invalid Opcode SP=%04x O=%04x \n" FONT_DEFAULT,
+                       avr->pc, avr->trace_data->codeline[avr->pc>>1]->symbol, _avr_sp_get(avr), avr->flash[avr->pc] | (avr->flash[avr->pc+1]<<8));
 #else
-       printf("\e[31m*** %04x: Invalid Opcode SP=%04x O=%04x \e[0m\n",
+       AVR_LOG(avr, LOG_ERROR, FONT_RED "CORE: *** %04x: Invalid Opcode SP=%04x O=%04x \n" FONT_DEFAULT,
                        avr->pc, _avr_sp_get(avr), avr->flash[avr->pc] | (avr->flash[avr->pc+1]<<8));
 #endif
 }
@@ -241,7 +300,7 @@ void avr_dump_state(avr_t * avr)
        int doit = 0;
 
        for (int r = 0; r < 3 && !doit; r++)
-               if (avr->touched[r])
+               if (avr->trace_data->touched[r])
                        doit = 1;
        if (!doit)
                return;
@@ -264,7 +323,11 @@ void avr_dump_state(avr_t * avr)
 #define get_r_d_10(o) \
                const uint8_t r = ((o >> 5) & 0x10) | (o & 0xf); \
                const uint8_t d = (o >> 4) & 0x1f;\
-               const uint8_t vd = avr->data[d], vr =avr->data[r];
+               const uint8_t vd = avr->data[d], vr = avr->data[r];
+#define get_r_dd_10(o) \
+               const uint8_t r = ((o >> 5) & 0x10) | (o & 0xf); \
+               const uint8_t d = (o >> 4) & 0x1f;\
+               const uint8_t vr = avr->data[r];
 #define get_k_r16(o) \
                const uint8_t r = 16 + ((o >> 4) & 0xf); \
                const uint8_t k = ((o & 0x0f00) >> 4) | (o & 0xf);
@@ -274,18 +337,18 @@ void avr_dump_state(avr_t * avr)
  */
 #if CONFIG_SIMAVR_TRACE
 #define TRACE_JUMP()\
-       avr->old[avr->old_pci].pc = avr->pc;\
-       avr->old[avr->old_pci].sp = _avr_sp_get(avr);\
-       avr->old_pci = (avr->old_pci + 1) & (OLD_PC_SIZE-1);\
+       avr->trace_data->old[avr->trace_data->old_pci].pc = avr->pc;\
+       avr->trace_data->old[avr->trace_data->old_pci].sp = _avr_sp_get(avr);\
+       avr->trace_data->old_pci = (avr->trace_data->old_pci + 1) & (OLD_PC_SIZE-1);\
 
 #if AVR_STACK_WATCH
 #define STACK_FRAME_PUSH()\
-       avr->stack_frame[avr->stack_frame_index].pc = avr->pc;\
-       avr->stack_frame[avr->stack_frame_index].sp = _avr_sp_get(avr);\
-       avr->stack_frame_index++; 
+       avr->trace_data->stack_frame[avr->trace_data->stack_frame_index].pc = avr->pc;\
+       avr->trace_data->stack_frame[avr->trace_data->stack_frame_index].sp = _avr_sp_get(avr);\
+       avr->trace_data->stack_frame_index++; 
 #define STACK_FRAME_POP()\
-       if (avr->stack_frame_index > 0) \
-               avr->stack_frame_index--;
+       if (avr->trace_data->stack_frame_index > 0) \
+               avr->trace_data->stack_frame_index--;
 #else
 #define STACK_FRAME_PUSH()
 #define STACK_FRAME_POP()
@@ -359,7 +422,7 @@ get_compare_overflow (uint8_t res, uint8_t rd, uint8_t rr)
     return (rd & ~rr & ~res) | (~rd & rr & res);
 }
 
-static inline int _avr_is_instruction_32_bits(avr_t * avr, uint32_t pc)
+static inline int _avr_is_instruction_32_bits(avr_t * avr, avr_flashaddr_t pc)
 {
        uint16_t o = (avr->flash[pc] | (avr->flash[pc+1] << 8)) & 0xfc0f;
        return  o == 0x9200 || // STS ! Store Direct to Data Space
@@ -384,10 +447,12 @@ static inline int _avr_is_instruction_32_bits(avr_t * avr, uint32_t pc)
  * + It also doesn't check whether the core it's
  *   emulating is supposed to have the fancy instructions, like multiply and such.
  * 
- * for now all instructions take "one" cycle, the cycle+=<extra> needs to be added.
+ * The number of cycles taken by instruction has been added, but might not be
+ * entirely accurate.
  */
-uint16_t avr_run_one(avr_t * avr)
+avr_flashaddr_t avr_run_one(avr_t * avr)
 {
+#if CONFIG_SIMAVR_TRACE
        /*
         * this traces spurious reset or bad jumps
         */
@@ -396,15 +461,13 @@ uint16_t avr_run_one(avr_t * avr)
                STATE("RESET\n");
                CRASH();
        }
-
-       uint32_t        opcode = (avr->flash[avr->pc + 1] << 8) | avr->flash[avr->pc];
-       uint32_t        new_pc = avr->pc + 2;   // future "default" pc
-       int             cycle = 1;
-
-#if CONFIG_SIMAVR_TRACE
-       avr->touched[0] = avr->touched[1] = avr->touched[2] = 0;
+       avr->trace_data->touched[0] = avr->trace_data->touched[1] = avr->trace_data->touched[2] = 0;
 #endif
 
+       uint32_t                opcode = (avr->flash[avr->pc + 1] << 8) | avr->flash[avr->pc];
+       avr_flashaddr_t new_pc = avr->pc + 2;   // future "default" pc
+       int                     cycle = 1;
+
        switch (opcode & 0xf000) {
                case 0x0000: {
                        switch (opcode) {
@@ -443,7 +506,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                        avr->sreg[S_S] = avr->sreg[S_N] ^ avr->sreg[S_V];
                                                        SREG();
                                                }       break;
-                                               case 0x0800: {  // SBC substract with carry 0000 10rd dddd rrrr
+                                               case 0x0800: {  // SBC subtract with carry 0000 10rd dddd rrrr
                                                        get_r_d_10(opcode);
                                                        uint8_t res = vd - vr - avr->sreg[S_C];
                                                        STATE("sbc %s[%02x], %s[%02x] = %02x\n", avr_regname(d), avr->data[d], avr_regname(r), avr->data[r], res);
@@ -467,8 +530,8 @@ uint16_t avr_run_one(avr_t * avr)
                                                                        _avr_set_r(avr, d+1, avr->data[r+1]);
                                                                }       break;
                                                                case 0x0200: {  // MULS – Multiply Signed 0000 0010 dddd rrrr
-                                                                       int8_t r = opcode & 0xf;
-                                                                       int8_t d = (opcode >> 4) & 0xf;
+                                                                       int8_t r = 16 + (opcode & 0xf);
+                                                                       int8_t d = 16 + ((opcode >> 4) & 0xf);
                                                                        int16_t res = ((int8_t)avr->data[r]) * ((int8_t)avr->data[d]);
                                                                        STATE("muls %s[%d], %s[%02x] = %d\n", avr_regname(d), ((int8_t)avr->data[d]), avr_regname(r), ((int8_t)avr->data[r]), res);
                                                                        _avr_set_r(avr, 0, res);
@@ -477,35 +540,35 @@ uint16_t avr_run_one(avr_t * avr)
                                                                        avr->sreg[S_Z] = res == 0;
                                                                        SREG();
                                                                }       break;
-                                                               case 0x0300: {  // multiplications
+                                                               case 0x0300: {  // MUL Multiply 0000 0011 fddd frrr
                                                                        int8_t r = 16 + (opcode & 0x7);
                                                                        int8_t d = 16 + ((opcode >> 4) & 0x7);
                                                                        int16_t res = 0;
                                                                        uint8_t c = 0;
-                                                                       const char * name = "";
+                                                                       T(const char * name = "";)
                                                                        switch (opcode & 0x88) {
                                                                                case 0x00:      // MULSU – Multiply Signed Unsigned 0000 0011 0ddd 0rrr
                                                                                        res = ((uint8_t)avr->data[r]) * ((int8_t)avr->data[d]);
                                                                                        c = (res >> 15) & 1;
-                                                                                       name = "mulsu";
+                                                                                       T(name = "mulsu";)
                                                                                        break;
                                                                                case 0x08:      // FMUL Fractional Multiply Unsigned 0000 0011 0ddd 1rrr
                                                                                        res = ((uint8_t)avr->data[r]) * ((uint8_t)avr->data[d]);
                                                                                        c = (res >> 15) & 1;
                                                                                        res <<= 1;
-                                                                                       name = "fmul";
+                                                                                       T(name = "fmul";)
                                                                                        break;
                                                                                case 0x80:      // FMULS – Multiply Signed  0000 0011 1ddd 0rrr
                                                                                        res = ((int8_t)avr->data[r]) * ((int8_t)avr->data[d]);
                                                                                        c = (res >> 15) & 1;
                                                                                        res <<= 1;
-                                                                                       name = "fmuls";
+                                                                                       T(name = "fmuls";)
                                                                                        break;
-                                                                               case 0x88:      // FMULSU – Multiply Signed Unsigned 0000 0011 1ddd 0rrr
+                                                                               case 0x88:      // FMULSU – Multiply Signed Unsigned 0000 0011 1ddd 1rrr
                                                                                        res = ((uint8_t)avr->data[r]) * ((int8_t)avr->data[d]);
                                                                                        c = (res >> 15) & 1;
                                                                                        res <<= 1;
-                                                                                       name = "fmulsu";
+                                                                                       T(name = "fmulsu";)
                                                                                        break;
                                                                        }
                                                                        cycle++;
@@ -538,10 +601,10 @@ uint16_t avr_run_one(avr_t * avr)
                                        avr->sreg[S_S] = avr->sreg[S_N] ^ avr->sreg[S_V];
                                        SREG();
                                }       break;
-                               case 0x1000: {  // CPSE Compare, skip if equal 0000 10 rd dddd rrrr
+                               case 0x1000: {  // CPSE Compare, skip if equal 0000 00 rd dddd rrrr
                                        get_r_d_10(opcode);
                                        uint16_t res = vd == vr;
-                                       STATE("cpse %s[%02x], %s[%02x]\t; Will%s skip\n", avr_regname(d), avr->data[d], avr_regname(r), avr->data[r], res ? "":"not ");
+                                       STATE("cpse %s[%02x], %s[%02x]\t; Will%s skip\n", avr_regname(d), avr->data[d], avr_regname(r), avr->data[r], res ? "":" not");
                                        if (res) {
                                                if (_avr_is_instruction_32_bits(avr, new_pc)) {
                                                        new_pc += 4; cycle += 2;
@@ -550,7 +613,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                }
                                        }
                                }       break;
-                               case 0x1400: {  // CP Compare 0000 10 rd dddd rrrr
+                               case 0x1400: {  // CP Compare 0000 01 rd dddd rrrr
                                        get_r_d_10(opcode);
                                        uint8_t res = vd - vr;
                                        STATE("cp %s[%02x], %s[%02x] = %02x\n", avr_regname(d), vd, avr_regname(r), vr, res);
@@ -627,9 +690,9 @@ uint16_t avr_run_one(avr_t * avr)
                                        SREG();
                                }       break;
                                case 0x2c00: {  // MOV  0010 11rd dddd rrrr
-                                       get_r_d_10(opcode);
+                                       get_r_dd_10(opcode);
                                        uint8_t res = vr;
-                                       STATE("mov %s[%02x], %s[%02x] = %02x\n", avr_regname(d), vd, avr_regname(r), vr, res);
+                                       STATE("mov %s, %s[%02x] = %02x\n", avr_regname(d), avr_regname(r), vr, res);
                                        _avr_set_r(avr, d, res);
                                }       break;
                                default: _avr_invalid_opcode(avr);
@@ -657,7 +720,8 @@ uint16_t avr_run_one(avr_t * avr)
                        uint8_t res = vr - k - avr->sreg[S_C];
                        STATE("sbci %s[%02x], 0x%02x = %02x\n", avr_regname(r), avr->data[r], k, res);
                        _avr_set_r(avr, r, res);
-                       avr->sreg[S_Z] = res  == 0;
+                       if (res)
+                               avr->sreg[S_Z] = 0;
                        avr->sreg[S_N] = (res >> 7) & 1;
                        avr->sreg[S_C] = (k + avr->sreg[S_C]) > vr;
                        avr->sreg[S_S] = avr->sreg[S_N] ^ avr->sreg[S_V];
@@ -717,7 +781,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                STATE("ld %s, (Z+%d[%04x])=[%02x]\n", avr_regname(r), q, v+q, avr->data[v+q]);
                                                _avr_set_r(avr, r, _avr_get_ram(avr, v+q));
                                        }
-                                       cycle += 2;
+                                       cycle += 1; // 2 cycles, 3 for tinyavr
                                }       break;
                                case 0xa008:
                                case 0x8008: {  // LD (LDD) – Load Indirect using Y 10q0 qq0r rrrr 1qqq
@@ -732,7 +796,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                STATE("ld %s, (Y+%d[%04x])=[%02x]\n", avr_regname(r), q, v+q, avr->data[v+q]);
                                                _avr_set_r(avr, r, _avr_get_ram(avr, v+q));
                                        }
-                                       cycle += 2;
+                                       cycle += 1; // 2 cycles, 3 for tinyavr
                                }       break;
                                default: _avr_invalid_opcode(avr);
                        }
@@ -748,7 +812,12 @@ uint16_t avr_run_one(avr_t * avr)
                        } else switch (opcode) {
                                case 0x9588: { // SLEEP
                                        STATE("sleep\n");
-                                       avr->state = cpu_Sleeping;
+                                       /* Don't sleep if there are interrupts about to be serviced.
+                                        * Without this check, it was possible to incorrectly enter a state
+                                        * in which the cpu was sleeping and interrupts were disabled. For more
+                                        * details, see the commit message. */
+                                       if (!avr_has_pending_interrupts(avr) || !avr->sreg[S_I])
+                                               avr->state = cpu_Sleeping;
                                }       break;
                                case 0x9598: { // BREAK
                                        STATE("break\n");
@@ -763,22 +832,31 @@ uint16_t avr_run_one(avr_t * avr)
                                }       break;
                                case 0x95a8: { // WDR
                                        STATE("wdr\n");
+                                       avr_ioctl(avr, AVR_IOCTL_WATCHDOG_RESET, 0);
                                }       break;
-                               case 0x9409: { // IJMP Indirect jump
-                                       uint16_t z = avr->data[R_ZL] | (avr->data[R_ZH] << 8);
-                                       STATE("ijmp Z[%04x]\n", z << 1);
-                                       new_pc = z << 1;
-                                       cycle++;
-                                       TRACE_JUMP();
+                               case 0x95e8: { // SPM
+                                       STATE("spm\n");
+                                       avr_ioctl(avr, AVR_IOCTL_FLASH_SPM, 0);
                                }       break;
-                               case 0x9509: { // ICALL Indirect Call to Subroutine
+                               case 0x9409:   // IJMP Indirect jump                                    1001 0100 0000 1001
+                               case 0x9419:   // EIJMP Indirect jump                                   1001 0100 0001 1001   bit 4 is "indirect"
+                               case 0x9509:   // ICALL Indirect Call to Subroutine             1001 0101 0000 1001
+                               case 0x9519: { // EICALL Indirect Call to Subroutine    1001 0101 0001 1001   bit 8 is "push pc"
+                                       int e = opcode & 0x10;
+                                       int p = opcode & 0x100;
+                                       if (e && !avr->eind)
+                                               _avr_invalid_opcode(avr);
                                        uint16_t z = avr->data[R_ZL] | (avr->data[R_ZH] << 8);
-                                       STATE("icall Z[%04x]\n", z << 1);
-                                       _avr_push16(avr, new_pc >> 1);
+                                       if (e)
+                                               z |= avr->data[avr->eind] << 16;
+                                       STATE("%si%s Z[%04x]\n", e?"e":"", p?"call":"jmp", z << 1);
+                                       if (p) {
+                                               cycle++;
+                                               _avr_push16(avr, new_pc >> 1);
+                                       }
                                        new_pc = z << 1;
-                                       cycle += 2;
+                                       cycle++;
                                        TRACE_JUMP();
-                                       STACK_FRAME_PUSH();
                                }       break;
                                case 0x9518:    // RETI
                                case 0x9508: {  // RET
@@ -793,6 +871,7 @@ uint16_t avr_run_one(avr_t * avr)
                                case 0x95c8: {  // LPM Load Program Memory R0 <- (Z)
                                        uint16_t z = avr->data[R_ZL] | (avr->data[R_ZH] << 8);
                                        STATE("lpm %s, (Z[%04x])\n", avr_regname(0), z);
+                                       cycle += 2; // 3 cycles
                                        _avr_set_r(avr, 0, avr->flash[z]);
                                }       break;
                                case 0x9408:case 0x9418:case 0x9428:case 0x9438:case 0x9448:case 0x9458:case 0x9468:
@@ -804,8 +883,8 @@ uint16_t avr_run_one(avr_t * avr)
                                        SREG();
                                }       break;
                                case 0x9488:case 0x9498:case 0x94a8:case 0x94b8:case 0x94c8:case 0x94d8:case 0x94e8:
-                               case 0x94f8:
-                               {       // BSET 1001 0100 0ddd 1000
+                               case 0x94f8:    // bit 7 is 'clear vs set'
+                               {       // BCLR 1001 0100 1ddd 1000
                                        uint8_t b = (opcode >> 4) & 7;
                                        avr->sreg[b] = 0;
                                        STATE("bclr %c\n", _sreg_bit_name[b]);
@@ -819,7 +898,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                        new_pc += 2;
                                                        STATE("lds %s[%02x], 0x%04x\n", avr_regname(r), avr->data[r], x);
                                                        _avr_set_r(avr, r, _avr_get_ram(avr, x));
-                                                       cycle++;
+                                                       cycle++; // 2 cycles
                                                }       break;
                                                case 0x9005:
                                                case 0x9004: {  // LPM Load Program Memory 1001 000d dddd 01oo
@@ -833,8 +912,33 @@ uint16_t avr_run_one(avr_t * avr)
                                                                _avr_set_r(avr, R_ZH, z >> 8);
                                                                _avr_set_r(avr, R_ZL, z);
                                                        }
-                                                       cycle += 2;
+                                                       cycle += 2; // 3 cycles
                                                }       break;
+                                               case 0x9006:
+                                               case 0x9007: {  // ELPM Extended Load Program Memory 1001 000d dddd 01oo
+                                                       if (!avr->rampz)
+                                                               _avr_invalid_opcode(avr);
+                                                       uint16_t z = avr->data[R_ZL] | (avr->data[R_ZH] << 8) | (avr->data[avr->rampz] << 16);
+                                                       uint8_t r = (opcode >> 4) & 0x1f;
+                                                       int op = opcode & 3;
+                                                       STATE("elpm %s, (Z[%02x:%04x]%s)\n", avr_regname(r), z >> 16, z&0xffff, opcode?"+":"");
+                                                       _avr_set_r(avr, r, avr->flash[z]);
+                                                       if (op == 3) {
+                                                               z++;
+                                                               _avr_set_r(avr, avr->rampz, z >> 16);
+                                                               _avr_set_r(avr, R_ZH, z >> 8);
+                                                               _avr_set_r(avr, R_ZL, z);
+                                                       }
+                                                       cycle += 2; // 3 cycles
+                                               }       break;
+                                               /*
+                                                * Load store instructions
+                                                *
+                                                * 1001 00sr rrrr iioo
+                                                * s = 0 = load, 1 = store
+                                                * ii = 16 bits register index, 11 = Z, 10 = Y, 00 = X
+                                                * oo = 1) post increment, 2) pre-decrement
+                                                */
                                                case 0x900c:
                                                case 0x900d:
                                                case 0x900e: {  // LD Load Indirect from Data using X 1001 000r rrrr 11oo
@@ -842,7 +946,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                        uint8_t r = (opcode >> 4) & 0x1f;
                                                        uint16_t x = (avr->data[R_XH] << 8) | avr->data[R_XL];
                                                        STATE("ld %s, %sX[%04x]%s\n", avr_regname(r), op == 2 ? "--" : "", x, op == 1 ? "++" : "");
-
+                                                       cycle++; // 2 cycles (1 for tinyavr, except with inc/dec 2)
                                                        if (op == 2) x--;
                                                        _avr_set_r(avr, r, _avr_get_ram(avr, x));
                                                        if (op == 1) x++;
@@ -856,7 +960,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                        uint8_t r = (opcode >> 4) & 0x1f;
                                                        uint16_t x = (avr->data[R_XH] << 8) | avr->data[R_XL];
                                                        STATE("st %sX[%04x]%s, %s[%02x] \n", op == 2 ? "--" : "", x, op == 1 ? "++" : "", avr_regname(r), avr->data[r]);
-                                                       cycle++;
+                                                       cycle++; // 2 cycles, except tinyavr
                                                        if (op == 2) x--;
                                                        _avr_set_ram(avr, x, avr->data[r]);
                                                        if (op == 1) x++;
@@ -869,7 +973,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                        uint8_t r = (opcode >> 4) & 0x1f;
                                                        uint16_t y = (avr->data[R_YH] << 8) | avr->data[R_YL];
                                                        STATE("ld %s, %sY[%04x]%s\n", avr_regname(r), op == 2 ? "--" : "", y, op == 1 ? "++" : "");
-                                                       cycle++;
+                                                       cycle++; // 2 cycles, except tinyavr
                                                        if (op == 2) y--;
                                                        _avr_set_r(avr, r, _avr_get_ram(avr, y));
                                                        if (op == 1) y++;
@@ -881,7 +985,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                        int op = opcode & 3;
                                                        uint8_t r = (opcode >> 4) & 0x1f;
                                                        uint16_t y = (avr->data[R_YH] << 8) | avr->data[R_YL];
-                                                       STATE("st %sY[%04x]%s, %s[%02x] \n", op == 2 ? "--" : "", y, op == 1 ? "++" : "", avr_regname(r), avr->data[r]);
+                                                       STATE("st %sY[%04x]%s, %s[%02x]\n", op == 2 ? "--" : "", y, op == 1 ? "++" : "", avr_regname(r), avr->data[r]);
                                                        cycle++;
                                                        if (op == 2) y--;
                                                        _avr_set_ram(avr, y, avr->data[r]);
@@ -894,6 +998,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                        uint16_t x = (avr->flash[new_pc+1] << 8) | avr->flash[new_pc];
                                                        new_pc += 2;
                                                        STATE("sts 0x%04x, %s[%02x]\n", x, avr_regname(r), avr->data[r]);
+                                                       cycle++;
                                                        _avr_set_ram(avr, x, avr->data[r]);
                                                }       break;
                                                case 0x9001:
@@ -902,6 +1007,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                        uint8_t r = (opcode >> 4) & 0x1f;
                                                        uint16_t z = (avr->data[R_ZH] << 8) | avr->data[R_ZL];
                                                        STATE("ld %s, %sZ[%04x]%s\n", avr_regname(r), op == 2 ? "--" : "", z, op == 1 ? "++" : "");
+                                                       cycle++;; // 2 cycles, except tinyavr
                                                        if (op == 2) z--;
                                                        _avr_set_r(avr, r, _avr_get_ram(avr, z));
                                                        if (op == 1) z++;
@@ -914,6 +1020,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                        uint8_t r = (opcode >> 4) & 0x1f;
                                                        uint16_t z = (avr->data[R_ZH] << 8) | avr->data[R_ZL];
                                                        STATE("st %sZ[%04x]%s, %s[%02x] \n", op == 2 ? "--" : "", z, op == 1 ? "++" : "", avr_regname(r), avr->data[r]);
+                                                       cycle++; // 2 cycles, except tinyavr
                                                        if (op == 2) z--;
                                                        _avr_set_ram(avr, z, avr->data[r]);
                                                        if (op == 1) z++;
@@ -923,14 +1030,14 @@ uint16_t avr_run_one(avr_t * avr)
                                                case 0x900f: {  // POP 1001 000d dddd 1111
                                                        uint8_t r = (opcode >> 4) & 0x1f;
                                                        _avr_set_r(avr, r, _avr_pop8(avr));
-                                                       uint16_t sp = _avr_sp_get(avr);
+                                                       T(uint16_t sp = _avr_sp_get(avr);)
                                                        STATE("pop %s (@%04x)[%02x]\n", avr_regname(r), sp, avr->data[sp]);
                                                        cycle++;
                                                }       break;
                                                case 0x920f: {  // PUSH 1001 001d dddd 1111
                                                        uint8_t r = (opcode >> 4) & 0x1f;
                                                        _avr_push8(avr, avr->data[r]);
-                                                       uint16_t sp = _avr_sp_get(avr);
+                                                       T(uint16_t sp = _avr_sp_get(avr);)
                                                        STATE("push %s[%02x] (@%04x)\n", avr_regname(r), avr->data[r], sp);
                                                        cycle++;
                                                }       break;
@@ -946,7 +1053,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                        avr->sreg[S_S] = avr->sreg[S_N] ^ avr->sreg[S_V];
                                                        SREG();
                                                }       break;
-                                               case 0x9401: {  // NEG – One’s Complement
+                                               case 0x9401: {  // NEG – Two’s Complement
                                                        uint8_t r = (opcode >> 4) & 0x1f;
                                                        uint8_t rd = avr->data[r];
                                                        uint8_t res = 0x00 - rd;
@@ -1029,7 +1136,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                }       break;
                                                case 0x940c:
                                                case 0x940d: {  // JMP Long Call to sub, 32 bits
-                                                       uint32_t a = ((opcode & 0x01f0) >> 3) | (opcode & 1);
+                                                       avr_flashaddr_t a = ((opcode & 0x01f0) >> 3) | (opcode & 1);
                                                        uint16_t x = (avr->flash[new_pc+1] << 8) | avr->flash[new_pc];
                                                        a = (a << 16) | x;
                                                        STATE("jmp 0x%06x\n", a);
@@ -1039,14 +1146,14 @@ uint16_t avr_run_one(avr_t * avr)
                                                }       break;
                                                case 0x940e:
                                                case 0x940f: {  // CALL Long Call to sub, 32 bits
-                                                       uint32_t a = ((opcode & 0x01f0) >> 3) | (opcode & 1);
+                                                       avr_flashaddr_t a = ((opcode & 0x01f0) >> 3) | (opcode & 1);
                                                        uint16_t x = (avr->flash[new_pc+1] << 8) | avr->flash[new_pc];
                                                        a = (a << 16) | x;
                                                        STATE("call 0x%06x\n", a);
                                                        new_pc += 2;
                                                        _avr_push16(avr, new_pc >> 1);
                                                        new_pc = a << 1;
-                                                       cycle += 3;     // 4 cycles
+                                                       cycle += 3;     // 4 cycles; FIXME 5 on devices with 22 bit PC
                                                        TRACE_JUMP();
                                                        STACK_FRAME_PUSH();
                                                }       break;
@@ -1087,7 +1194,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                                        SREG();
                                                                        cycle++;
                                                                }       break;
-                                                               case 0x9800: {  // CBI - Clear Bit in I/O Registe 1001 1000 AAAA Abbb
+                                                               case 0x9800: {  // CBI - Clear Bit in I/O Register 1001 1000 AAAA Abbb
                                                                        uint8_t io = ((opcode >> 3) & 0x1f) + 32;
                                                                        uint8_t b = opcode & 0x7;
                                                                        uint8_t res = _avr_get_ram(avr, io) & ~(1 << b);
@@ -1099,7 +1206,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                                        uint8_t io = ((opcode >> 3) & 0x1f) + 32;
                                                                        uint8_t b = opcode & 0x7;
                                                                        uint8_t res = _avr_get_ram(avr, io) & (1 << b);
-                                                                       STATE("sbic %s[%04x], 0x%02x\t; Will%s branch\n", avr_regname(io), avr->data[io], 1<<b, !res?"":"not ");
+                                                                       STATE("sbic %s[%04x], 0x%02x\t; Will%s branch\n", avr_regname(io), avr->data[io], 1<<b, !res?"":" not");
                                                                        if (!res) {
                                                                                if (_avr_is_instruction_32_bits(avr, new_pc)) {
                                                                                        new_pc += 4; cycle += 2;
@@ -1116,11 +1223,11 @@ uint16_t avr_run_one(avr_t * avr)
                                                                        _avr_set_ram(avr, io, res);
                                                                        cycle++;
                                                                }       break;
-                                                               case 0x9b00: {  // SBIS - Skip if Bit in I/O Register is Cleared 1001 0111 AAAA Abbb
-                                                                       uint8_t io = (opcode >> 3) & 0x1f;
+                                                               case 0x9b00: {  // SBIS - Skip if Bit in I/O Register is Set 1001 1011 AAAA Abbb
+                                                                       uint8_t io = ((opcode >> 3) & 0x1f) + 32;
                                                                        uint8_t b = opcode & 0x7;
-                                                                       uint8_t res = _avr_get_ram(avr, io + 32) & (1 << b);
-                                                                       STATE("sbis %s[%04x], 0x%02x\t; Will%s branch\n", avr_regname(io), avr->data[io], 1<<b, res?"":"not ");
+                                                                       uint8_t res = _avr_get_ram(avr, io) & (1 << b);
+                                                                       STATE("sbis %s[%04x], 0x%02x\t; Will%s branch\n", avr_regname(io), avr->data[io], 1<<b, res?"":" not");
                                                                        if (res) {
                                                                                if (_avr_is_instruction_32_bits(avr, new_pc)) {
                                                                                        new_pc += 4; cycle += 2;
@@ -1135,6 +1242,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                                                        get_r_d_10(opcode);
                                                                                        uint16_t res = vd * vr;
                                                                                        STATE("mul %s[%02x], %s[%02x] = %04x\n", avr_regname(d), vd, avr_regname(r), vr, res);
+                                                                                       cycle++;
                                                                                        _avr_set_r(avr, 0, res);
                                                                                        _avr_set_r(avr, 1, res >> 8);
                                                                                        avr->sreg[S_Z] = res == 0;
@@ -1156,15 +1264,12 @@ uint16_t avr_run_one(avr_t * avr)
                                        uint8_t r = (opcode >> 4) & 0x1f;
                                        uint8_t A = ((((opcode >> 9) & 3) << 4) | ((opcode) & 0xf)) + 32;
                                        STATE("out %s, %s[%02x]\n", avr_regname(A), avr_regname(r), avr->data[r]);
-                                       // todo: store to IO register
                                        _avr_set_ram(avr, A, avr->data[r]);
-                               //      avr->data[A] = ;
                                }       break;
                                case 0xb000: {  // IN Rd,A 1011 0AAr rrrr AAAA
                                        uint8_t r = (opcode >> 4) & 0x1f;
                                        uint8_t A = ((((opcode >> 9) & 3) << 4) | ((opcode) & 0xf)) + 32;
                                        STATE("in %s, %s[%02x]\n", avr_regname(r), avr_regname(A), avr->data[A]);
-                                       // todo: get the IO register
                                        _avr_set_r(avr, r, _avr_get_ram(avr, A));
                                }       break;
                                default: _avr_invalid_opcode(avr);
@@ -1173,7 +1278,8 @@ uint16_t avr_run_one(avr_t * avr)
 
                case 0xc000: {
                        // RJMP 1100 kkkk kkkk kkkk
-                       short o = ((short)(opcode << 4)) >> 4;
+//                     int16_t o = ((int16_t)(opcode << 4)) >> 4; // CLANG BUG!
+                       int16_t o = ((int16_t)((opcode << 4)&0xffff)) >> 4;
                        STATE("rjmp .%d [%04x]\n", o, new_pc + (o << 1));
                        new_pc = new_pc + (o << 1);
                        cycle++;
@@ -1182,7 +1288,8 @@ uint16_t avr_run_one(avr_t * avr)
 
                case 0xd000: {
                        // RCALL 1100 kkkk kkkk kkkk
-                       short o = ((short)(opcode << 4)) >> 4;
+//                     int16_t o = ((int16_t)(opcode << 4)) >> 4; // CLANG BUG!
+                       int16_t o = ((int16_t)((opcode << 4)&0xffff)) >> 4;
                        STATE("rcall .%d [%04x]\n", o, new_pc + (o << 1));
                        _avr_push16(avr, new_pc >> 1);
                        new_pc = new_pc + (o << 1);
@@ -1207,7 +1314,7 @@ uint16_t avr_run_one(avr_t * avr)
                                case 0xf200:
                                case 0xf400:
                                case 0xf600: {  // All the SREG branches
-                                       short o = ((short)(opcode << 6)) >> 9; // offset
+                                       int16_t o = ((int16_t)(opcode << 6)) >> 9; // offset
                                        uint8_t s = opcode & 7;
                                        int set = (opcode & 0x0400) == 0;               // this bit means BRXC otherwise BRXS
                                        int branch = (avr->sreg[s] && set) || (!avr->sreg[s] && !set);
@@ -1221,7 +1328,7 @@ uint16_t avr_run_one(avr_t * avr)
                                                STATE("%s%c .%d [%04x]\t; Will%s branch\n", set ? "brbs" : "brbc", _sreg_bit_name[s], o, new_pc + (o << 1), branch ? "":" not");
                                        }
                                        if (branch) {
-                                               cycle++;
+                                               cycle++; // 2 cycles if taken, 1 otherwise
                                                new_pc = new_pc + (o << 1);
                                        }
                                }       break;
@@ -1248,8 +1355,13 @@ uint16_t avr_run_one(avr_t * avr)
                                        int set = (opcode & 0x0200) != 0;
                                        int branch = ((avr->data[r] & (1 << s)) && set) || (!(avr->data[r] & (1 << s)) && !set);
                                        STATE("%s %s[%02x], 0x%02x\t; Will%s branch\n", set ? "sbrs" : "sbrc", avr_regname(r), avr->data[r], 1 << s, branch ? "":" not");
-                                       if (branch)
-                                               new_pc = new_pc + 2;
+                                       if (branch) {
+                                               if (_avr_is_instruction_32_bits(avr, new_pc)) {
+                                                       new_pc += 4; cycle += 2;
+                                               } else {
+                                                       new_pc += 2; cycle++;
+                                               }
+                                       }
                                }       break;
                                default: _avr_invalid_opcode(avr);
                        }