#include        "defs.h"

ULONG_PTR       g_traceProcessNextBuffer = (ULONG_PTR)traceProcessNextBuffer;
ULONG_PTR       g_traceInitThread        = (ULONG_PTR)traceInitThread;
ULONG_PTR       g_postSysenter           = (ULONG_PTR)tracePostSysenter;

lock_t          trace_lock;

LARGE_INTEGER   rdtsc;

VOID    WINAPI  traceVmEnterFixStack(__in pcontext pctx){
        ULONG_PTR       StackBase;
        ULONG_PTR       StackLimit;
        
        StackBase = __readfsdword(0x4);
        StackLimit= __readfsdword(0x8);
        
        if (pctx->thread_stack >= StackLimit && pctx->thread_stack <= StackBase) return;
        
        pctx->real_thread_stack_base  = StackBase;
        pctx->real_thread_stack_limit = StackLimit;

        __writefsdword(0x4, pctx->thread_stack_base + THREAD_STACK_SIZE);
        __writefsdword(0x8, pctx->thread_stack_base);
}

VOID    WINAPI traceVmExitFixStack(__in pcontext pctx){
        __writefsdword(0x4, pctx->real_thread_stack_base);
        __writefsdword(0x8, pctx->real_thread_stack_limit);
}

pbbl_struct traceFindBbl(__in unsigned long eip){ //__in context *pctx){
        pvmmap_area parea;
        pbbl_struct pbbl;
        
        //parea = vmmap_get_area((void *)pctx->reg_eip);
        parea = vmmap_get_area((void *)eip);
        if (parea->pbbl_next){
                pbbl = parea->pbbl_next;
                if (pbbl->bbl_start == eip) //pctx->reg_eip)
                        return pbbl;        
        }        
        
        //check if it's in the array of instructions...
        if (parea->pbbl_array)
        if (parea->pbbl_array[eip - parea->vmmap_base]) return parea->pbbl_array[eip - parea->vmmap_base];
        pbbl = NULL;
        return pbbl;
}

VOID    traceInitThread(__in px86regs pregs){
        context *pctx;
        PEXCEPTION_POINTERS     pExceptionPointers;
        pbbl_struct      pbbl;
        unsigned long    bbl_size;
        unsigned long    eip;
        FN_HANDLE_EXCEPTION     fnHandleException;
        context         ctx;
        
        //if (trace_base)
        //        __asm jmp $
                        
        InitThread(0);        
        pctx = (context *)__readfsdword(TEB_THREAD_CONTEXT);
        traceVmEnterFixStack(pctx);
        
        //save context, as there could be Exception in our code
        //which will destroy original context. In this case we
        //will restore context if exception is handled inside
        //of our code...
        memcpy(&ctx, pctx, sizeof(ctx));
        
        pctx->reg_eax = pregs->reg_eax;
        pctx->reg_ecx = pregs->reg_ecx;
        pctx->reg_edx = pregs->reg_edx;
        pctx->reg_ebx = pregs->reg_ebx;
        pctx->reg_esp = pregs->reg_esp;
        pctx->reg_ebp = pregs->reg_ebp;
        pctx->reg_esi = pregs->reg_esi;
        pctx->reg_edi = pregs->reg_edi;
        pctx->reg_eflags = pregs->reg_eflags;
        pctx->reg_eip = pregs->reg_eip;
        __writefsdword(TEB_THREAD_SAVE_EAX, pctx->reg_eax);
        
                      
        if (pctx->reg_eip == pLdrInitializeThunkShadow){ 
                DbgPrint(("%s -- LdrInitializeThunk hit...", __FUNCTION__));
                
                pctx->exception_code = 0;
                pctx->exception_eip  = 0;
                
                if (STATUS_SUCCESS == ProbeForWrite((void *)(pctx->reg_esp+8), 1, sizeof(ULONG_PTR))){
                        if (*(ULONG_PTR *)(pctx->reg_esp+8) == g_ntdllbase)
                                *(ULONG_PTR *)(pctx->reg_esp+8) = REBASED_NTDLL_BASE;           
                }
                
                goto __ExecBuffer;
        }else if (pctx->reg_eip == pKiUserExceptionDispatcherShadow){
                DbgPrint(("%s -- KiUserExceptionDispatcher hit...", __FUNCTION__));   
                /****************************************************
                 * fix eip properly so it points to original code
                 * and deliver exception to an application...
                 ****************************************************/
                pExceptionPointers = (PEXCEPTION_POINTERS)pctx->reg_esp;
                DbgPrint(("%s -- exception in code at eip     : %.08X\n", __FUNCTION__, pExceptionPointers->ExceptionRecord->ExceptionAddress));
                DbgPrint(("%s -- exception code               : %.08X\n", __FUNCTION__, pExceptionPointers->ExceptionRecord->ExceptionCode));
                fnHandleException = (FN_HANDLE_EXCEPTION)__readfsdword(TEB_THREAD_EXCEPTION);
                if (fnHandleException){
                        memcpy(pctx, &ctx, sizeof(ctx));
                        DbgPrint(("%s -- internal exceptio handled on read/write", __FUNCTION__));
                        DbgPrint(("%s -- EIP : %.08X", __FUNCTION__, pExceptionPointers->ContextRecord->Eip));
                        DbgPrint(("%s -- Faulting Access at : %.08X", __FUNCTION__, pExceptionPointers->ExceptionRecord->ExceptionInformation[1]));
                        fnHandleException(pExceptionPointers);
                }
 
                /**************************************************************************
                 * single step exception should continue in KiUserExceptionDispatcher at
                 * this point, as it wasn't injected, which means exception happened while
                 * instruction was executing...
                 **************************************************************************/
                if (pctx->tf_set == 1){
                        pExceptionPointers->ContextRecord->EFlags |= 0x100;
                        pctx->tf_inject = 0;            //clear tf_inject, as we don't inject TF right now
                                                        //but after execution of next instruction!!!
                }
                
                 /***************************************************************************
                 * If tf was set, and we receive exception at KiUserExceptionDispatcherHook
                 * as type SINGLE_STEP exception, we need to update CONTEXT on stack with 
                 * Eflags |= TF, and execute one instruction of KiUserExceptionDispatcher
                 ***************************************************************************/                
                pbbl = (pbbl_struct)pctx->current_bbl;
                if (!pbbl){
                        DbgPrint(("%s -- No BBL for this code... RaiseException maybe???", __FUNCTION__));
                        DbgPrint(("%s -- exception code                    : %.08X\n", __FUNCTION__, pExceptionPointers->ExceptionRecord->ExceptionCode));   
                        DbgPrint(("%s -- EAX : %.08X ECX : %.08X EDX : %.08X EBX : %.08X\n", __FUNCTION__,
                                                                                             pExceptionPointers->ContextRecord->Eax,
                                                                                             pExceptionPointers->ContextRecord->Ecx,
                                                                                             pExceptionPointers->ContextRecord->Edx,
                                                                                             pExceptionPointers->ContextRecord->Ebx));
                                                                                             
                        DbgPrint(("%s -- ESP : %.08X EBP : %.08X ESI : %.08X EDI : %.08X\n", __FUNCTION__,
                                                                                             pExceptionPointers->ContextRecord->Esp,
                                                                                             pExceptionPointers->ContextRecord->Ebp,
                                                                                             pExceptionPointers->ContextRecord->Esi,
                                                                                             pExceptionPointers->ContextRecord->Edi));
                        DbgPrint(("%s -- EIP : %.08X",                                       __FUNCTION__,
                                                                                             pExceptionPointers->ContextRecord->Eip));
                        pctx->exception_code      = (unsigned long)pExceptionPointers->ExceptionRecord->ExceptionCode;
                        pctx->exception_eip       = (unsigned long)pExceptionPointers->ExceptionRecord->ExceptionAddress;
                        instrumentException(pExceptionPointers);
                        goto __ExecBuffer;
                }
                bbl_size = pbbl->bbl_end - pbbl->bbl_start;
                eip = pExceptionPointers->ContextRecord->Eip;
                
                if (eip >= trace_base && eip < trace_end){
                        DbgPrint(("%s -- internal exception in code at eip     : %.08X\n", __FUNCTION__, eip));
                        DbgPrint(("%s -- internal exception in code at eip rva : %.08X\n", __FUNCTION__, eip - trace_base)); 
                        DbgPrint(("%s -- exception code                        : %.08X\n", __FUNCTION__, pExceptionPointers->ExceptionRecord->ExceptionCode));
                        if (pExceptionPointers->ExceptionRecord->ExceptionCode == STATUS_ACCESS_VIOLATION){
                                char *reason;
                                switch (pExceptionPointers->ExceptionRecord->ExceptionInformation[0])
                                {
                                case 0:
                                        reason = "READ";
                                        break;
                                case 1: 
                                        reason = "WRITE";
                                        break;
                                case 8:
                                        reason = "EXEC";
                                        break;
                                default:
                                        reason = "UNKNOWN";
                                        break;
                                }
                                DbgPrint(("%s -- exception happened becase of          : %s - %d\n", __FUNCTION__, reason, pExceptionPointers->ExceptionRecord->ExceptionInformation[0]));
                                DbgPrint(("%s -- exception happened by accessing       : %.08X", __FUNCTION__, pExceptionPointers->ExceptionRecord->ExceptionInformation[1]));                
                        }
                        DbgPrint(("%s -- EAX : %.08X ECX : %.08X EDX : %.08X EBX : %.08X\n", __FUNCTION__,
                                                                                             pExceptionPointers->ContextRecord->Eax,
                                                                                             pExceptionPointers->ContextRecord->Ecx,
                                                                                             pExceptionPointers->ContextRecord->Edx,
                                                                                             pExceptionPointers->ContextRecord->Ebx));
                                                                                             
                        DbgPrint(("%s -- ESP : %.08X EBP : %.08X ESI : %.08X EDI : %.08X\n", __FUNCTION__,
                                                                                             pExceptionPointers->ContextRecord->Esp,
                                                                                             pExceptionPointers->ContextRecord->Ebp,
                                                                                             pExceptionPointers->ContextRecord->Esi,
                                                                                             pExceptionPointers->ContextRecord->Edi));
                        DbgPrint(("%s -- EIP : %.08X",                                       __FUNCTION__,
                                                                                             pExceptionPointers->ContextRecord->Eip));
                        __asm jmp $
                        memcpy(pctx, &ctx, sizeof(ctx));
                        NtContinue(pExceptionPointers->ContextRecord, FALSE);
                        NtTerminateProcess((HANDLE)(ULONG_PTR)-1, 0);
                }
                
                if (eip >= pbbl->bbl_buffer && eip <= pbbl->bbl_buffer + bbl_size){
                        //exception happened inside of our BasicBlock, thus we alter
                        //EIP in context a little bit...
                        pExceptionPointers->ContextRecord->Eip -= pbbl->bbl_buffer;
                        pExceptionPointers->ContextRecord->Eip += pbbl->bbl_start;
                        
                        if (eip >= (ULONG_PTR)pExceptionPointers->ExceptionRecord->ExceptionAddress ||
                           (ULONG_PTR)pExceptionPointers->ExceptionRecord->ExceptionAddress <= pbbl->bbl_buffer + bbl_size)
                        {
                                (ULONG_PTR)pExceptionPointers->ExceptionRecord->ExceptionAddress -= pbbl->bbl_buffer;
                                (ULONG_PTR)pExceptionPointers->ExceptionRecord->ExceptionAddress += pbbl->bbl_start;
                        }
                        //pExceptionPointers->ExceptionRecord->ExceptionAddress = (PVOID)pExceptionPointers->ContextRecord->Eip;        
                        DbgPrint(("%s -- handled exception inside of bbl...", __FUNCTION__));
                        DbgPrint(("%s -- bbl_start : %.08X bbl_end : %.08X", __FUNCTION__, pbbl->bbl_start, pbbl->bbl_end));
                        DbgPrint(("%s -- exception code            : %.08X", __FUNCTION__, pExceptionPointers->ExceptionRecord->ExceptionCode));
                        //If tf was set here, we need to execute one instruction from KiUserExceptionDispatcher
                        //and inject exception. This is very important, as for example INTO with TF will generate Exception for INTO
                        //but TF won't be executed. Thus TF will be executed only after 1st instruction from KiUserExceptionDispatcher
                        //is executed. This is a must have for proper handling of exception.
                        pctx->exception_code      = (unsigned long)pExceptionPointers->ExceptionRecord->ExceptionCode;
                        pctx->exception_eip       = (unsigned long)pExceptionPointers->ExceptionRecord->ExceptionAddress;
                        //traceFreeBblRange((PVOID)pKiUserExceptionDispatcherShadow, 0x1000);
                        ////build 1 instruction, and set exception after it's executed.
                        //pbbl = traceBuildBbl((PVOID)pKiUserExceptionDispatcherShadow, TRUE);
                }
                
                instrumentException(pExceptionPointers);
                goto __ExecBuffer;
        }else if (pctx->reg_eip == pKiUserApcDispatcherShadow){
                DbgPrint(("%s -- KiUserApcDispatcher hit...", __FUNCTION__));
                pctx->exception_code = 0;
                pctx->exception_eip  = 0;
                goto __ExecBuffer;
        }else if (pctx->reg_eip == pKiUserCallbackDispatcherShadow){
                pctx->exception_code = 0;
                pctx->exception_eip  = 0;
                //ntOutputDebugString("KiUserCallbackDispatcher hit...");
                goto __ExecBuffer;
        }
        
__ExecBuffer:
        traceProcessNextBuffer(pctx);               
}

/***************************************************************
 * function will simply wait for blocks to be executed, and free
 * memory occupied by those... It's not possible that thread is
 * looping in bacis block as jmps are not part of bacis block...
 * Only basic block which can block is int 2b with CallbackReturn
 * but that one is executed outside of BasicBlock thus this block
 * actually doesn't "exist"
 ***************************************************************/
VOID    traceFreeBblRange(__in PVOID lpBaseAddress, __in ULONG dwSize){
        unsigned long jindex;
        ULONG_PTR     index;
        PVOID         *psubarray;
        pbbl_struct   pbbl;
        pvmmap_area   parea;
        pvmmap_area   parea_prev;
        pvmmap_area   parea_next;
        
        //DbgPrint(("%s -- freeing range : %.08X - %.08X", __FUNCTION__, lpBaseAddress, dwSize));
        
        index = (ULONG_PTR)lpBaseAddress;
        while (index < ((ULONG_PTR)lpBaseAddress + dwSize)){
                parea = vmmap_get_area((void *)lpBaseAddress);
                while (NULL != (pbbl = (pbbl_struct)RemoveHeadList(&parea->bbl_list))){
                        wait_zero(&pbbl->bbl_refcount);
                        if (pbbl->bbl_buffer){
                                if (pbbl->bbl_eip_redirection)
                                        memset((void *)pbbl->bbl_buffer, 0, sizeof(x86dis));
                                else
                                        memset((void *)pbbl->bbl_buffer, 0, pbbl->bbl_end - pbbl->bbl_start); 
                                dlfree((void *)pbbl->bbl_buffer);
                                memset((void *)pbbl->bbl_alternate_buffer, 0, pbbl->bbl_end - pbbl->bbl_start);
                                dlfree((void *)pbbl->bbl_alternate_buffer);
                        }
                        memset(pbbl, 0, sizeof(bbl_struct));
                        dlfree(pbbl);
                }
                
                if (parea->pbbl_next){
                        parea_next = vmmap_get_area((void *)(index + 0x1000));
                        pbbl = parea->pbbl_next;
                        wait_zero(&pbbl->bbl_refcount);
                        if (pbbl->bbl_buffer){
                                if (pbbl->bbl_eip_redirection)
                                        memset((void *)pbbl->bbl_buffer, 0, sizeof(x86dis));
                                else
                                        memset((void *)pbbl->bbl_buffer, 0, pbbl->bbl_end - pbbl->bbl_start); 
                                dlfree((void *)pbbl->bbl_buffer);
                                memset((void *)pbbl->bbl_alternate_buffer, 0, pbbl->bbl_end - pbbl->bbl_start);
                                dlfree((void *)pbbl->bbl_alternate_buffer);
                        }
                        memset(pbbl, 0, sizeof(bbl_struct));
                        dlfree(pbbl);
                        parea_next->pbbl_prev = 0;
                        parea->pbbl_next = 0;      
                }
                
                if (parea->pbbl_prev){
                        parea_prev = vmmap_get_area((void *)(index - 0x1000));
                        pbbl = parea->pbbl_prev;
                        wait_zero(&pbbl->bbl_refcount);
                        if (pbbl->bbl_buffer){
                                if (pbbl->bbl_eip_redirection)
                                        memset((void *)pbbl->bbl_buffer, 0, sizeof(x86dis));
                                else
                                        memset((void *)pbbl->bbl_buffer, 0, pbbl->bbl_end - pbbl->bbl_start);
                                dlfree((void *)pbbl->bbl_buffer);
                                memset((void *)pbbl->bbl_alternate_buffer, 0, pbbl->bbl_end - pbbl->bbl_start);
                                dlfree((void *)pbbl->bbl_alternate_buffer);
                        }
                        memset(pbbl, 0, sizeof(bbl_struct));
                        dlfree(pbbl);
                        parea_prev->pbbl_next = 0;
                        parea->pbbl_prev = 0;            
                }
                
                vmmap_delete_area((void *)index);                        
                index += 0x1000;        
        }

}

VOID    *traceBuildBbl(__in unsigned char *inst_ptr, __in unsigned long b_one_instruction){
        unsigned long index = (unsigned long)inst_ptr;
        unsigned char *ptr = inst_ptr;
        unsigned char *exec_buffer;
        
        unsigned long index_hi, index_low;
        PVOID   *psubarray;
        unsigned long inst_count = 0;
        unsigned long   len;
        unsigned long   iclass;
        unsigned long   bbl_size;
        unsigned long   write_ptr;
        px86dis         px86;
        x86dis          temp_buffer;
        NTSTATUS        status;
        unsigned long   vmmap_prot;
        unsigned long   int2e_handle = 0;
        unsigned long   store_as_next = 0;
        unsigned long   b_eip_redirection = 0;
        pvmmap_area     parea;
        pbbl_struct     pbbl;  
        unsigned long   page_mask;
        unsigned long   dest;
        unsigned long   protection;
        
        px86  = &temp_buffer;
                
        /******************************************
         * Allocate new vmmap area, or get existing
         * one...
         ******************************************/
        parea   = vmmap_get_area(inst_ptr);
        if (parea->pbbl_array == NULL){
                parea->pbbl_array = dlmalloc(0x1000 * 4);
                memset(parea->pbbl_array, 0, 0x1000 * 4);
        }
        
        protection = vmmapGetProtection(inst_ptr);
        
        pbbl = dlmalloc(sizeof(bbl_struct));
        memset(pbbl, 0, sizeof(bbl_struct));
        
        page_mask = (unsigned long)inst_ptr & 0xFFFFF000;
        
        /*************************************
         * check how many instructions we have
         * here...
         *************************************/        
        pbbl->bbl_start         = (ULONG_PTR)inst_ptr;
        pbbl->bbl_buffer        = (ULONG_PTR)dlmalloc(1);
        pbbl->bbl_syscall_int2e = 0;
        bbl_size   = 0;
        write_ptr  = 0;
        inst_count = 0;
        
        exec_buffer = (unsigned char *)pbbl->bbl_buffer;
        write_ptr = (ULONG_PTR)exec_buffer - pbbl->bbl_buffer;
        bbl_size  = write_ptr;
        while (1){
                memset(px86, 0, sizeof(x86dis));
                status = decodeInstructionWrapper(ptr, px86);
                if (status != STATUS_SUCCESS) break;
                
                len    = px86->len;
                iclass = px86->iclass;
                               
                if (((unsigned long)ptr  & 0xFFFFF000) != page_mask){
                        if (((unsigned long)ptr & 0xFFF) != 0){
                                DbgPrint(("%s -- wtf... how did we cross page boundary...", __FUNCTION__));        
                                __asm jmp $
                        }else{
                                break;
                        }
                }
                
                //check if current instrction overlaps page boundary...
                if ((((unsigned long)ptr + len) & 0xFFFFF000) != page_mask){
                        if (inst_count != 0){
                                //DbgPrint(("%s -- code block out of page boundary...", __FUNCTION__));
                                break;
                        }
                        store_as_next = 1;        
                }
                
                /***************************************************************
                 * If instruction has memory write, as is done in writable 
                 * memory, we isolate it. Eg. for this kind of code:
                 *              call    __delta
                 *__delta:      pop     ebp                     <---------------+
                 *              sub     ebp, offset __delta                     |
                 *              mov     eax, offset __write                     |
                 *              mov     byte ptr[ebp+eax], 0c3h                 +--- this would be one basic block
                 *__write:      nop                                             |    which would lose control when
                 *              mov     ecx, edx                <---------------+    executed live
                 ****************************************************************/          
                                
                if (protection & VMMAP_WRITE && bbl_size == 0 && px86->mem_write == 1){
                        b_one_instruction = TRUE;    
                        //DbgPrint(("%s -- memwrite in writable memory", __FUNCTION__));    
                }else if (protection & VMMAP_WRITE && bbl_size != 0 && px86->mem_write == 1){
                        //DbgPrint(("%s -- memwrite reached, end of block", __FUNCTION__));
                        break;        
                }
                                      
                if (decodeIsEipRedirection(px86)){
                        //if bbl_size == 0 and EIP redirection is INT, we make it as a part of bbl
                        //int3/int1 are emulated properly later on
                        if (bbl_size == 0 && (px86->iclass == XED_ICLASS_INT || px86->iclass == XED_ICLASS_INT1 || px86->iclass == XED_ICLASS_INT3)){
                                if (px86->num_of_operands == 1){
                                        if (px86->operand1_flags & C_IMM && px86->operand1 == 0x2E){
                                                int2e_handle = TRUE;   
                                                pbbl->bbl_syscall_int2e = 1;     
                                        }else if (px86->operand1_flags & C_IMM && px86->operand1 == 0x2B){
                                                //for 2b also don't refernece code block...
                                                //this is important as this int should never return
                                                //if it returns, well code will continue, but code block
                                                //is never marked as "used"
                                                pbbl->bbl_syscall_int2b = 1;
                                        }
                                }
                                bbl_size += len;
                                pbbl->bbl_buffer = (ULONG_PTR)dlrealloc((void *)pbbl->bbl_buffer, bbl_size);
                                memcpy((void *)(pbbl->bbl_buffer + write_ptr), ptr, len);        
                                write_ptr+=len;
                                inst_count++;
                                ptr+=len;
                        }else if (bbl_size == 0 && !decodeIsJcc(px86)){
                                //EIP redirection instructions are set as px86dis in bbl_buffer
                                //so those are emulated properly
                                pbbl->bbl_eip_redirection = 1;
                                dlfree((void *)pbbl->bbl_buffer);
                                
                                pbbl->bbl_buffer = (ULONG_PTR)dlmalloc(sizeof(x86dis));
                                memcpy((void *)pbbl->bbl_buffer, px86, sizeof(x86dis));        
                                b_eip_redirection = 1;
                                
                                if (store_as_next == 0)
                                        parea->pbbl_array[(unsigned long)ptr - parea->vmmap_base] = pbbl;
                                
                                inst_count++;
                                bbl_size += len;
                                ptr+=len;
                                write_ptr += len;       
                        }
                        //remove code which inslines jcc at the end of buffer
                        //and emulate them...
                        else if (decodeIsJcc(px86)){
                                //assemble valid jcc with 2 exits thus we don't emulate jcc
                                //anymore... (lets see if it's a big speedup)....
                                dest = (unsigned long)ptr + px86->len + px86->operand1;
                        
                                pbbl->bbl_buffer = (ULONG_PTR)dlrealloc((void *)pbbl->bbl_buffer, bbl_size + 7 + 7 + 2 + 10 + 5 + 10 + 5);
                                exec_buffer = (void *)(pbbl->bbl_buffer + bbl_size);                 
                                exec_buffer = asm_add_fs_prefix(exec_buffer);
                                exec_buffer = asm_mov_mem32_reg(exec_buffer, TEB_THREAD_SAVE_EAX, REG_EAX);
                                exec_buffer = asm_add_fs_prefix(exec_buffer);
                                exec_buffer = asm_mov_reg_mem32(exec_buffer, REG_EAX, TEB_THREAD_CONTEXT);
                                exec_buffer = asm_jcc_short(exec_buffer, px86->iclass, 15);
                                exec_buffer = asm_mov_reg_mem32_imm(exec_buffer, REG_EAX, FIELD_OFFSET(context, reg_eip), (unsigned long)ptr + px86->len); 
                                make_jmp(exec_buffer, (void *)g_vm_enter);
                                exec_buffer = (void *)((ULONG_PTR)exec_buffer + 5);
                                exec_buffer = asm_mov_reg_mem32_imm(exec_buffer, REG_EAX, FIELD_OFFSET(context, reg_eip), (unsigned long)dest); 
                                make_jmp(exec_buffer, (void *)g_vm_enter);
                                
                                if (store_as_next == 0)
                                        parea->pbbl_array[(unsigned long)ptr - parea->vmmap_base] = pbbl;
                                
                                b_eip_redirection = 1;
                                inst_count++;
                                bbl_size += len;
                                ptr+=len;
                                write_ptr += len;
                                break;
                        }
                        break;        
                }
                bbl_size += len;
                pbbl->bbl_buffer = (ULONG_PTR)dlrealloc((void *)pbbl->bbl_buffer, bbl_size);
                memcpy((void *)(pbbl->bbl_buffer + write_ptr), ptr, len);
                write_ptr += len;
                inst_count++;
                
                if (store_as_next == 0)
                        parea->pbbl_array[(unsigned long)ptr - parea->vmmap_base] = pbbl;
                
                ptr+= len;
                
                if (store_as_next == 1)
                        break; 
                if (b_one_instruction == TRUE && inst_count == 1) break;               
        }
        
        if (inst_count == 0){
                DbgPrint(("%s -- no instructions in bbl...", __FUNCTION__));
                dlfree((void *)pbbl->bbl_buffer);
                memset(pbbl, 0, sizeof(bbl_struct));
                dlfree(pbbl);
                return NULL;
        }
                
        pbbl->bbl_end = (ULONG_PTR)ptr;
        
        pbbl->bbl_alternate_buffer = (ULONG_PTR)dlmalloc(pbbl->bbl_end - pbbl->bbl_start);
        memcpy((void *)pbbl->bbl_alternate_buffer, (void *)pbbl->bbl_start, pbbl->bbl_end - pbbl->bbl_start);
        
        pbbl->bbl_inst_count = inst_count;
        if (b_eip_redirection == 0){
                pbbl->bbl_buffer = (ULONG_PTR)dlrealloc((void *)pbbl->bbl_buffer, bbl_size + 0x20);
                exec_buffer = (unsigned char *)(pbbl->bbl_buffer + write_ptr);
        
        
                exec_buffer = asm_add_fs_prefix(exec_buffer);
                exec_buffer = asm_mov_mem32_reg(exec_buffer, TEB_THREAD_SAVE_EAX, REG_EAX);
                exec_buffer = asm_add_fs_prefix(exec_buffer);
                exec_buffer = asm_mov_reg_mem32(exec_buffer, REG_EAX, TEB_THREAD_CONTEXT);
                exec_buffer = asm_mov_reg_mem32_imm(exec_buffer, REG_EAX, FIELD_OFFSET(context, reg_eip), (unsigned long)ptr); 
                if (int2e_handle)
                        //exec_buffer = asm_jmp_mem32(exec_buffer, (unsigned long)&g_vm_int2e_vm_enter);
                        make_jmp(exec_buffer, (void *)g_vm_int2e_vm_enter);
                else 
                        make_jmp(exec_buffer, (void *)g_vm_enter);   
        }
                
        if (store_as_next == 1){
                parea->pbbl_next = pbbl;
                parea = vmmap_get_area((void *)pbbl->bbl_end);
                parea->pbbl_prev = pbbl;
        }else{
                InsertTailList(&parea->bbl_list, (PLIST)pbbl);
                parea->pbbl_array[pbbl->bbl_start - parea->vmmap_base] = pbbl;
        }
        
        return pbbl;
}

VOID    __cdecl traceProcessNextBuffer(__in context *pctx){
        unsigned long prot;
        pbbl_struct   pbbl;
        pvmmap_area   parea;
        PULONG_PTR    pret;
        px86dis       px86;
        x86dis        temp_buffer;
        ULONG_PTR     stack_base;
        ULONG_PTR     stack_index;
        ULONG_PTR     index;
        ULONG_PTR     b_ok;
        EXECBBL       fnExecBbl;
        NTSTATUS      status;
        unsigned long dest;
        unsigned long mask;
        unsigned long *pstack;
        
        /*****************************************************
         * if exception happened inside of writable BBL, and
         * other thread has lock, and is waiting for bbl to be
         * free, we will deadlock, thus we will decrement ptr
         * and only then try to acquire lock...
         *****************************************************/
        traceVmEnterFixStack(pctx);
         
        if (pctx->current_bbl){
                pbbl = (pbbl_struct)pctx->current_bbl;
                _InterlockedDecrement(&pbbl->bbl_refcount); 
                pbbl = NULL;
        }   
        
        acquire_spin_lock(&trace_lock);   
                        
        pctx->reg_eax = __readfsdword(TEB_THREAD_SAVE_EAX);
        px86 = &temp_buffer;
                
__doagain:      
        /***********************************************************************
         * If eip somehow ended up in real ntdll.dll, rebase it to go for shadow
         * ntdll.dll
         ***********************************************************************/          
        if (pctx->tf_inject){
                pctx->tf_inject = 0;
                pctx->tf_set    = 0;
                DbgPrint(("%s -- injecting TF for exception...", __FUNCTION__)); 
                traceInjectException(pctx, STATUS_SINGLE_STEP, 0,0);
        }
        
        if (pctx->reg_eip >= g_ntdllbase && pctx->reg_eip < g_ntdllend){
                pctx->reg_eip -= g_ntdllbase;
                pctx->reg_eip += REBASED_NTDLL_BASE;        
        }
                      
        if (pctx->reg_eip > trace_base && pctx->reg_eip < trace_end){
                DbgPrint(("%s -- Wooops trace ended up in insturmentation code...", __FUNCTION__));
                DbgPrint(("%s -- pbbl : %.08X", __FUNCTION__, pctx->current_bbl));
                pbbl = (pbbl_struct)pctx->current_bbl;
                DbgPrint(("%s -- bbl_start : %.08X bbl_end : %.08X", __FUNCTION__, pbbl->bbl_start, pbbl->bbl_end));
                __asm _emit 0xEB
                __asm _emit 0xFE
        }
                        
        pbbl = traceFindBbl(pctx->reg_eip);
        /*************************************************
         * if we have TF set, we must make sure that we 
         * build basic block with only one instruction,
         * and just before we execute this block tf_inject
         * will be set, thus on next run code knows what 
         * to do...
         *************************************************/
        if (pctx->tf_set){
                if (pbbl){
                        traceFreeBblRange((void *)(pctx->reg_eip & 0xFFFFF000), 0x1000);        
                }
                pbbl = traceBuildBbl((void *)pctx->reg_eip, TRUE);
        }
        
        if (!pbbl){
                pbbl = traceBuildBbl((void *)pctx->reg_eip, FALSE);
        }else{
                /************************************************************************
                 * If basic block exists, check it's protection to know if we need to
                 * compare it, or we need to rebuild it if content has changed. This
                 * affects only VMMAP_WRITE and VMMAP_WAS_WRITE
                 ************************************************************************/
                prot = vmmapGetProtection((void *)pctx->reg_eip);
                if (prot == 0){
                        DbgPrint(("%s -- don't have protection for : %.08X", __FUNCTION__, pctx->reg_eip));
                        __asm _emit 0xEB
                        __asm _emit 0xFE
                        traceInjectException(pctx, STATUS_ACCESS_VIOLATION, pctx->reg_eip, 0); 

                }
                if (prot & VMMAP_WRITE || prot & VMMAP_WAS_WRITE){
                        //if (!(pctx->reg_eip >= pbbl->bbl_start && pctx->reg_eip < pbbl->bbl_end)){
                        //        DbgPrint(("%s -- Error getting PBBL...", __FUNCTION__));
                        //        DbgPrint(("%s -- pbbl->bbl_start        : %.08X", __FUNCTION__, pbbl->bbl_start));
                        //        DbgPrint(("%s -- pbbl->bbl_end          : %.08X", __FUNCTION__, pbbl->bbl_end));
                        //        DbgPrint(("%s -- pctx->reg_eip          : %.08X", __FUNCTION__, pctx->reg_eip));
                        //        __asm jmp $
                        //}
                        
                        /*********************************************************
                         * check if writable code has changed, if so rebuild it...
                         *********************************************************/
                        if (ntmemcmp((void *)(pbbl->bbl_alternate_buffer + pctx->reg_eip - pbbl->bbl_start), (void *)pctx->reg_eip, pbbl->bbl_end - pctx->reg_eip)){
                                traceFreeBblRange((void *)(pctx->reg_eip & 0xFFFFF000), 0x1000);
                                pbbl = traceBuildBbl((void *)pctx->reg_eip, FALSE);
                        }   
                        /*********************************************************
                         * remove was write thus we don't touch this code anymore
                         * for comparation...
                         *********************************************************/                     
                        if (prot & VMMAP_WAS_WRITE)
                                vmmapRemoveWasWrite((void *)pctx->reg_eip);
                        }
        }
                
        if (pbbl == NULL){
                DbgPrint(("%s -- pbbl == NULL for eip : %.08X" ,__FUNCTION__, pctx->reg_eip));
                //now do ProbeForRead to throw proper exception...
                status = ProbeForRead((PVOID)pctx->reg_eip, 1, 1);
                if (status != STATUS_SUCCESS){
                        traceInjectException(pctx, status, pctx->reg_eip, 0);
                }    
                DbgPrint(("%s -- There was no exception when reading eip but no bbl for eip : %.08X", __FUNCTION__, pctx->reg_eip));     
                __asm jmp $
        }
        
        if (pbbl->bbl_eip_redirection){
                px86 = (px86dis)pbbl->bbl_buffer;        

                if (px86->iclass == XED_ICLASS_CALL_NEAR || px86->iclass == XED_ICLASS_JMP){
                        dest = emuCallAndJmp(pctx, px86, (void *)pctx->reg_eip);
                        instrumentCallJmpRet(px86, dest, pctx->reg_eip);                                                 
                        pctx->reg_eip = dest;
                        goto __check_tf_injection;        
                }else if (px86->iclass == XED_ICLASS_RET_NEAR){
                        dest = emuRet(pctx, px86, (void *)pctx->reg_eip);  
                        instrumentCallJmpRet(px86, dest, pctx->reg_eip);                       
                        pctx->reg_eip = dest;
                        goto __check_tf_injection;        
                }else if (px86->iclass == XED_ICLASS_SYSENTER){
                        pctx->syscall_number = pctx->reg_eax;
                        pctx->syscall_arguments = pctx->reg_edx + 8;
                                                
                        if (tracePreSyscall(pctx)){
                                pctx->reg_eip += px86->len;
                                goto __doagain;
                        }
                                               
                        pctx->current_bbl = 0;                        
                        __writefsdword(TEB_THREAD_JMP, (ULONG_PTR)g_vm_sysenter_buffer);
                        fnExecBbl = (EXECBBL)g_vm_exit;
                        
                        //DbgPrint(("%s -- (%.04X) syscall number : %.08X", __FUNCTION__, get_currentthreadid(), pctx->syscall_number));
                        /*****************************************
                         * release lock and execute BBL... usually
                         * it's done from traceBuildAndExecute 
                         *****************************************/                        
                        goto __ExecuteBbl;                        
                }else if (px86->iclass == XED_ICLASS_IRETD){
                        DbgPrint(("%s -- IRETD about to be executed", __FUNCTION__));
                        status = ProbeForRead((void *)pctx->reg_esp, 3, sizeof(DWORD));
                        if (status != STATUS_SUCCESS){
                                //bah inject exception...        
                        }
                        
                        pstack = (unsigned long *)pctx->reg_esp;
                        
                        pctx->reg_eip = pstack[0];
                        if (pctx->tf_set == 1)
                                pctx->tf_inject = 1;
                        pctx->reg_eflags = pstack[2];
                        pctx->reg_esp += 0xC;
                        
                        if (TestBit(pctx->reg_eflags, F_TF)){
                                DbgPrint(("%s -- iretd cotaines TF...", __FUNCTION__));
                                pctx->reg_eflags &= 0xFFFFFEFF;
                                pctx->tf_set = 1;
                        }
                        goto __doagain;
                        //__debugbreak();   
                }else if (px86->iclass == XED_ICLASS_POPFD){
                        DbgPrint(("%s -- popfd executed...", __FUNCTION__));
                        status = ProbeForRead((void *)pctx->reg_esp, 1, sizeof(DWORD));
                        if (status != STATUS_SUCCESS){
                                traceInjectException(pctx, status, pctx->reg_esp, 0);
                        }
                        pctx->reg_eflags = *(unsigned long *)pctx->reg_esp;
                        pctx->reg_esp+=4;
                        
                        /***************************************************
                         * tf was set prior to this popfd, thus we need to
                         * inject exception once it's executed...
                         ***************************************************/
                        if (pctx->tf_set == 1)
                                pctx->tf_inject = 1;
                        
                        if (TestBit(pctx->reg_eflags, F_TF)){
                                DbgPrint(("%s -- popfd contains TF...", __FUNCTION__));
                                pctx->reg_eflags &= 0xFFFFFEFF;
                                pctx->tf_set = 1;
                        }
                        pctx->reg_eip += px86->len;
                        goto __doagain;
                }else if (px86->iclass == XED_ICLASS_PUSHFD){
                        DbgPrint(("%s -- pushfd executed...", __FUNCTION__));
                        pctx->reg_esp -= 4;
                        status = ProbeForWrite((void *)pctx->reg_esp, 1, sizeof(DWORD));
                        if (status != STATUS_SUCCESS){
                                traceInjectException(pctx, status, pctx->reg_esp, 1);
                        }
                        
                        if (pctx->tf_set)
                                *(unsigned long *)pctx->reg_esp = pctx->reg_eflags | 0x100;
                        else
                                *(unsigned long *)pctx->reg_esp = pctx->reg_eflags;
                        pctx->reg_eip += px86->len;
                        goto __check_tf_injection;                
                }else if (px86->iclass == XED_ICLASS_RDTSC){
                        //DbgPrint(("%s -- emulating rdtsc...", __FUNCTION__));
                        pctx->reg_eax = rdtsc.LowPart;
                        pctx->reg_eax = rdtsc.HighPart;
                        rdtsc.QuadPart += 0x10;
                        pctx->reg_eip += px86->len;
                        goto __check_tf_injection;        
                }else if (px86->iclass == XED_ICLASS_UD2 || px86->iclass == XED_ICLASS_INVALID){
                        traceInjectException(pctx, STATUS_ILLEGAL_INSTRUCTION, 0, 0);  
                /****************************************************************************************
                 * Never executed as int is executed inside of bbl...
                 *  
                 * }else if (px86->iclass == XED_ICLASS_INT){
                 *         //build basic block only for int handler, we need to take care only of int 2e
                 *         //and build proper enter code for it...  
                 *         if (px86->num_of_operands == 1){
                 *                 if (px86->operand1_flags & C_IMM && px86->operand1 == 0x2E){
                 *                         pctx->syscall_number    = pctx->reg_eax;
                 *                         pctx->syscall_arguments = pctx->reg_edx;
                 *                         tracePreSyscall(pctx);
                 *                 }
                 *         }                                                   
                ******************************************************************************************/
                }else if (px86->iclass == XED_ICLASS_INT3 || px86->iclass == XED_ICLASS_INT1){
                        if (px86->iclass == XED_ICLASS_INT3)
                                traceInjectException(pctx, STATUS_BREAKPOINT, 0,0);
                        else
                                traceInjectException(pctx, STATUS_ACCESS_VIOLATION, pctx->reg_eip, 0);        
                        
                }else if (px86->iclass == XED_ICLASS_LOOP   ||
                          px86->iclass == XED_ICLASS_LOOPE  ||
                          px86->iclass ==  XED_ICLASS_LOOPNE){
                        pctx->reg_eip = emuLoop(pctx, px86, (void *)pctx->reg_eip);
                        goto __check_tf_injection;
                }else if (px86->iclass == XED_ICLASS_JMP_FAR){
                        DbgPrint(("XED_ICLASS_JMP_FAR...needs to be handled properly..."));
                }else{
                        pctx->reg_eip = emuJcc(pctx, px86, (void *)pctx->reg_eip);
                        goto __check_tf_injection;       
                }
__check_tf_injection:
                if (pctx->tf_set == 1){
                        pctx->tf_inject = 1;                
                }
                goto __doagain;        
        }
  
        __writefsdword(TEB_THREAD_JMP, (ULONG_PTR)pbbl->bbl_buffer + (pctx->reg_eip - pbbl->bbl_start));
            
        if (pbbl->bbl_syscall_int2e == 1 || pbbl->bbl_syscall_int2b == 1){
                pctx->current_bbl = 0;
                if (pbbl->bbl_syscall_int2e){
                        pctx->syscall_number    = pctx->reg_eax;
                        pctx->syscall_arguments = pctx->reg_edx;
                        tracePreSyscall(pctx);        
                }
        }else{
                pctx->current_bbl = (ULONG_PTR)pbbl;
                _InterlockedIncrement(&pbbl->bbl_refcount);
        }
        
        if (pbbl->bbl_eip_redirection){
                DbgPrint(("%s -- error pbbl with eip redirection goes out?!?!?!", __FUNCTION__));
                __asm _emit 0xEB
                __asm _emit 0xFE        
        }
        
        fnExecBbl = (EXECBBL)g_vm_exit;
__ExecuteBbl:
        if (pctx->tf_set){
                pctx->tf_inject = 1;
        }
        
        release_spin_lock(&trace_lock);
        traceVmExitFixStack(pctx);
        fnExecBbl(); 
}

unsigned long tracePreSyscall(__in context *pctx){
        CONTEXT *pc;
        pbbl_struct     pbbl;
        PULONG_PTR      stack;
        unsigned long   b_handled = 0;
        NTSTATUS        status;
        BOOLEAN         Alertable;
        THREAD_BASIC_INFORMATION tbi;
        ULONG           cbNeeded;
        
        stack = (PULONG_PTR)pctx->syscall_arguments;
                
        if (pctx->syscall_number == NtContinue_sn){
                pc = (CONTEXT *)stack[0];
                Alertable = (BOOLEAN)stack[1];
                DbgPrint(("%s -- NtContinue executed...", __FUNCTION__));
                DbgPrint(("%s -- NtContinue target eip : %.08X", __FUNCTION__, pc->Eip));
                DbgPrint(("%s -- NtContinue eflags     : %.08X", __FUNCTION__, pc->EFlags));
                if (TestBit(pc->EFlags, F_TF)){
                        DbgPrint(("%s -- NtContinue injecting TF...", __FUNCTION__));
                }
                DbgPrint(("%s -- Dr0 : %.08X Dr1 : %.08X Dr2 : %.08X Dr3 : %.08X", __FUNCTION__,
                                                                                   pc->Dr0,
                                                                                   pc->Dr1,
                                                                                   pc->Dr2,
                                                                                   pc->Dr3));
                DbgPrint(("%s -- Dr6 : %.08X Dr7 : %.08X", __FUNCTION__,
                                                           pc->Dr6,
                                                           pc->Dr7));
                                                           
                if (pc->Eip == pRtlUserThreadStart && pRtlUserThreadStart != 0)
                        pc->Eip = pRtlUserThreadStartShadow;
                                                       
                pbbl = traceFindBbl(pc->Eip);
                if (!pbbl)
                        pbbl = traceBuildBbl((unsigned char *)pc->Eip, FALSE);
                
                if (pbbl && TestBit(pc->EFlags, F_TF)){
                        traceFreeBblRange((PVOID)pc->Eip, 0x1000);
                        pc->EFlags &= 0xFFFFFEFF;
                        pctx->tf_set = 1;               //say that TF is set
                        pctx->tf_inject = 1;            //say that TF has to be injected...
                        pbbl = traceBuildBbl((unsigned char *)pc->Eip, TRUE);  
                }
                
                if (pbbl){
                        if (pbbl->bbl_eip_redirection){
                                if (pc->Eip != pKiFastSystemCallRet){
                                        unsigned char *exec_buffer, *ptr;
                                        DbgPrint(("%s -- NtContinue goes to eip_redirection instruction...", __FUNCTION__));
                                        DbgPrint(("%s -- NtContinue redirection to : %.08X", __FUNCTION__, pc->Eip));
                                        exec_buffer = ptr = dlmalloc(0x20);
                                        exec_buffer = asm_add_fs_prefix(exec_buffer);
                                        exec_buffer = asm_mov_mem32_reg(exec_buffer, TEB_THREAD_SAVE_EAX, REG_EAX);
                                        exec_buffer = asm_add_fs_prefix(exec_buffer);
                                        exec_buffer = asm_mov_reg_mem32(exec_buffer, REG_EAX, TEB_THREAD_CONTEXT);
                                        exec_buffer = asm_mov_reg_mem32_imm(exec_buffer, REG_EAX, FIELD_OFFSET(context, reg_eip), (unsigned long)pc->Eip); 
                                        make_jmp(exec_buffer, (void *)g_vm_enter);
                                        pc->Eip = (unsigned long)ptr; 
                                }
                                pctx->tf_inject = 0;                              
                        }else{
                                pc->Eip = pbbl->bbl_buffer + pc->Eip - pbbl->bbl_start;                         
                        }
                        pctx->current_bbl = (unsigned long)pbbl;
                        _InterlockedIncrement(&pbbl->bbl_refcount);     
                }
                release_spin_lock(&trace_lock);
                traceVmExitFixStack(pctx);
                status = NtContinue(pc, Alertable);
                acquire_spin_lock(&trace_lock);
                traceVmEnterFixStack(pctx);
                DbgPrint(("%s -- failed to execute NtContinue...", __FUNCTION__));
                pctx->reg_eax = status;
                b_handled = TRUE;
        }else if (pctx->syscall_number == NtTerminateThread_sn){
                NtQueryInformationThread((HANDLE)stack[0], ThreadBasicInformation, &tbi, sizeof(tbi), &cbNeeded);
                if ((ULONG)(ULONG_PTR)tbi.ClientId.UniqueThread == get_currentthreadid() || stack[0] == 0 || stack[0] == -2){
                        DbgPrint(("%s -- killing all data for a thread... NtTerminateThread detected...", __FUNCTION__));
                        release_spin_lock(&trace_lock);
                        traceVmExitFixStack(pctx);
                        TerminateThreadAsm(dlfree, (PVOID)pctx->reg_esp, stack[1], 3, pctx->thread_stack_base, pctx->psyscall_ret, pctx);   
                }else if ((ULONG)(ULONG_PTR)tbi.ClientId.UniqueProcess == get_currentprocessid()){
                        DbgPrint(("%s -- NtTerminateThread executed to kill different thread : %d...", __FUNCTION__, tbi.ClientId.UniqueThread));
                        //Grab pctx for this thread                        
                }else{
                        DbgPrint(("%s -- killing thread which is not part of this process... no need to simulate...", __FUNCTION__));        
                }

        }else if (pctx->syscall_number == NtMapViewOfSection_sn){
                //ntOutputDebugString("ntmap view of section called... wooohoooo");                        
        }else if (pctx->syscall_number == NtTerminateProcess_sn){
                DbgPrint(("%s -- called NtTerminateProcess...", __FUNCTION__));
                if (stack[0] == 0){
                        DbgPrint(("%s -- called NtTerminateProcess to kill all threads...", __FUNCTION__));        
                        pctx->reg_eax = NtTerminateProcess((HANDLE)(ULONG_PTR)0, stack[1]);
                        b_handled = TRUE;
                }  
        }else if (pctx->syscall_number == NtRaiseException_sn){
                DbgPrint(("%s -- raising exception...", __FUNCTION__));
        }else if (pctx->syscall_number == NtSuspendThread_sn){
                DbgPrint(("%s -- NtSuspendThread called...", __FUNCTION__));
                status = NtSuspendThread((HANDLE)stack[0], (PULONG)stack[1]);
                b_handled = TRUE;
                pctx->reg_eax = status;
        }
        
        return b_handled;
}

VOID    tracePostSyscall(__in context *pctx){
        PULONG_PTR      stack;
        MEMORY_BASIC_INFORMATION        mbi;
        BYTE            wsImageName[MAX_PATH + 4];
        WCHAR           *ws;
        CHAR            *ansi;
        ULONG           cbNeeded;
        NTSTATUS        status;
        PUNICODE_STRING pus;
        ULONG_PTR       mem;
        ULONG_PTR       size;
        ULONG_PTR       prot;
        ULONG_PTR       alloc_type;
        HANDLE          hProcess;

        stack = (PULONG_PTR)pctx->syscall_arguments;
        
        if (pctx->syscall_number == NtMapViewOfSection_sn){
                if (pctx->reg_eax != STATUS_SUCCESS && pctx->reg_eax != STATUS_IMAGE_NOT_AT_BASE) return;        
                mem = *(ULONG_PTR *)stack[2];
                ntVirtualQuery((PVOID)mem, &mbi, sizeof(mbi));
                if (mbi.Type != MEM_IMAGE){
                        //add this memory to vmmaps as it can be later accessed and executed...
                        if (!(mbi.State & MEM_COMMIT)){
                        //        DbgPrint(("%s -- NtMapViewOfSection without MEM_COMMIT... ignore...", __FUNCTION__));
                                return;        
                        }
                        vmmapAdd((PVOID)mem, mbi.RegionSize, mbi.Protect);
                        return;         
                }   
                imageAdd((PVOID)mem);                       
        }else if (pctx->syscall_number == NtUnmapViewOfSection_sn){
                if (pctx->reg_eax != STATUS_SUCCESS) return;
                mem = stack[1];
                imageRemove((PVOID)mem);     
                size = vmmapRemoveRange((PVOID)mem);
                if (size != 0){
                        traceFreeBblRange((PVOID)mem, size);
                }           
        }else if (pctx->syscall_number == NtAllocateVirtualMemory_sn){
                if (pctx->reg_eax != STATUS_SUCCESS) return;        
                if (stack[0] != (ULONG_PTR)-1) return;                        
                mem  = *(ULONG_PTR *)stack[1];
                size = *(ULONG_PTR *)stack[3];
                prot = stack[5];        
                alloc_type = stack[6];
                
                if (!(alloc_type & MEM_COMMIT)){
                        //DbgPrint(("%s -- NtAllocateVirtualMemory without MEM_COMMIT...ignore...", __FUNCTION__));
                        return;
                }
                
                vmmapAdd((void *)mem, size, prot);
        }else if (pctx->syscall_number == NtProtectVirtualMemory_sn){
                if (pctx->reg_eax != STATUS_SUCCESS) return;
                if (stack[0] != (ULONG_PTR)-1) return;
                        
                mem  = *(ULONG_PTR *)stack[1];
                size = *(ULONG_PTR *)stack[2];
                prot = stack[3];
                
                vmmapChangeProtection((PVOID)mem, size, prot);
        }else if (pctx->syscall_number == NtFreeVirtualMemory_sn){
                if (pctx->reg_eax != STATUS_SUCCESS) return;
                if (stack[0] != (ULONG_PTR)-1) return;
                        
                mem  =  *(ULONG_PTR *)stack[1];
                size =  *(ULONG_PTR *)stack[2];  
                traceFreeBblRange((PVOID)mem, size);
        }else if (pctx->syscall_number == NtCreateProcess_sn){
                if (pctx->reg_eax != STATUS_SUCCESS) return;
                hProcess = (HANDLE)*(ULONG_PTR *)stack[0];
                DbgPrint(("%s -- NtCreateProcess with handle : %.08X", __FUNCTION__, hProcess));
                do_child_inject(hProcess);
        }else if (pctx->syscall_number == NtCreateProcessEx_sn && NtCreateProcessEx_sn != 0){
                if (pctx->reg_eax != STATUS_SUCCESS) return;
                hProcess = (HANDLE)*(ULONG_PTR *)stack[0];
                DbgPrint(("%s -- NtCreateProcessEx with handle : %.08X", __FUNCTION__, hProcess));
                do_child_inject(hProcess);
        }else if (pctx->syscall_number == NtCreateUserProcess_sn && NtCreateUserProcess_sn != 0){
                if (pctx->reg_eax != STATUS_SUCCESS) return;
                hProcess = (HANDLE)*(ULONG_PTR *)stack[0];
                DbgPrint(("%s -- NtCreateUserProcess with handle : %.08X", __FUNCTION__, hProcess));
                do_child_inject(hProcess);
        }
}

VOID    tracePostInt2e(__in context *pctx){
        pbbl_struct     pbbl;
        
        traceVmEnterFixStack(pctx);  
        acquire_spin_lock(&trace_lock);
        pctx->reg_eax = __readfsdword(TEB_THREAD_SAVE_EAX);
        tracePostSyscall(pctx);
        traceProcessNextBuffer(pctx);

}
VOID    tracePostSysenter(__in context *pctx){
        ULONG_PTR       stack_base;
        ULONG_PTR       stack_index;
        ULONG_PTR       index;
        ULONG_PTR       b_ok = FALSE;
        
        traceVmEnterFixStack(pctx);
        //DbgPrint(("%s -- (%.04X) syscall number 1 : %.08X", __FUNCTION__, get_currentthreadid(), pctx->syscall_number));       
        acquire_spin_lock(&trace_lock);
        //DbgPrint(("%s -- (%.04X) syscall number 2 : %.08X", __FUNCTION__, get_currentthreadid(), pctx->syscall_number));       
        pctx->reg_eax = __readfsdword(TEB_THREAD_SAVE_EAX);        
        pctx->reg_ecx = pKiFastSystemCallRetShadow;
                
        //in case we need to handle stuff after we returned from syscall...
        tracePostSyscall(pctx);
        traceProcessNextBuffer(pctx);     
}


VOID    traceInjectException(__in context *pctx, __in DWORD ExceptionCode, __in DWORD ExceptionAddress, __in DWORD b_write){
        PEXCEPTION_POINTERS     pExceptionPointers;
        EXCEPTION_RECORD        ExceptionRecord;
        CONTEXT                 ctx;
        ULONG_PTR               pExceptionStack;
        NTSTATUS                status;
        unsigned long           index_low, index_hi;
        EXECBBL                 fnExecBbl;
        PBBL_STRUCT             pbbl;
        PVOID                   *psubarray;
        
        
        memset(&ExceptionRecord, 0, sizeof(ExceptionRecord));
        memset(&ctx,0, sizeof(CONTEXT));
        
        ExceptionRecord.ExceptionCode    = ExceptionCode;
        ExceptionRecord.ExceptionAddress = (PVOID)pctx->reg_eip; 
        if (ExceptionCode == STATUS_ACCESS_VIOLATION || ExceptionCode == STATUS_PAGE_FAULT_GUARD_PAGE){
                ExceptionRecord.ExceptionInformation[0] = b_write;                       
                ExceptionRecord.ExceptionInformation[1] = ExceptionAddress;
                ExceptionRecord.NumberParameters        = 2;
        }
        
        pExceptionStack = pctx->reg_esp - sizeof(EXCEPTION_POINTERS) - sizeof(EXCEPTION_RECORD) - sizeof(CONTEXT);
        
        status = ProbeForRead((PVOID)pExceptionStack, sizeof(EXCEPTION_POINTERS) + sizeof(EXCEPTION_RECORD) + sizeof(CONTEXT), 1);
        if (status != STATUS_SUCCESS){
                DbgPrint(("Failed to allocate stack for exception injection..."));
                release_spin_lock(&trace_lock);
                traceVmExitFixStack(pctx);
                TerminateThreadAsm(dlfree, (PVOID)pctx->reg_esp, 0, 3, pctx->thread_stack_base, pctx->psyscall_ret, pctx);        
        }
        
        //needed to fill seg regs, as I don't touch them at all...
        ctx.ContextFlags = CONTEXT_ALL;
        NtGetContextThread((HANDLE)(ULONG_PTR)CURRENT_THREAD, &ctx);
        
        ctx.Eax          = pctx->reg_eax;
        ctx.Ecx          = pctx->reg_ecx;
        ctx.Edx          = pctx->reg_edx;
        ctx.Ebx          = pctx->reg_ebx;
        ctx.Esp          = pctx->reg_esp;
        ctx.Ebp          = pctx->reg_ebp;
        ctx.Esi          = pctx->reg_esi;
        ctx.Edi          = pctx->reg_edi;
        ctx.Eip          = pctx->reg_eip;
        ctx.EFlags       = pctx->reg_eflags;
        
        /************************************************************
         * Exception was delivered while TF was set, thus we need
         * to mark Eflags with 0x100 also...
         ************************************************************/
        if (pctx->tf_set == 1)
                ctx.EFlags |= 0x100;
        
        pExceptionPointers = (PEXCEPTION_POINTERS)pExceptionStack;
        
        pExceptionPointers->ExceptionRecord = (PEXCEPTION_RECORD)(pExceptionStack + sizeof(EXCEPTION_POINTERS));
        pExceptionPointers->ContextRecord   = (PCONTEXT)(pExceptionStack + sizeof(EXCEPTION_POINTERS) + sizeof(EXCEPTION_RECORD));
        
        memcpy(pExceptionPointers->ExceptionRecord, &ExceptionRecord, sizeof(ExceptionRecord));
        memcpy(pExceptionPointers->ContextRecord, &ctx, sizeof(ctx));
        
        pctx->reg_esp = (ULONG_PTR)pExceptionPointers;
        pctx->reg_eip = (ULONG_PTR)pKiUserExceptionDispatcherShadow;
        
        /***********************************************************
         * pKiUserExceptionDispatcher is already in BasicBlocks...
         * so it will never be rebuilt... well maybe...
         ***********************************************************/
        pctx->current_bbl = 0;
        DbgPrint(("%s -- injecting exception... %.08X at %.08X", __FUNCTION__, ExceptionCode, ctx.Eip));
        instrumentException(pExceptionPointers);
        traceProcessNextBuffer(pctx);
}