Changeset 15229


Ignore:
Timestamp:
Mar 4, 2012, 2:55:10 PM (7 years ago)
Author:
gb
Message:

When the GC tries to zero out recycled pages, it's traditionally
done so by calling memset(p,0,n). Profiling output indicates that
a lot of time is spent in this call to memset. When the GC uses
it, the address p is dnode-aligned and n is a multiple of the dnode
size, so try to exploit this by using a new function zero_dnodes().
The initial implementations may be faster than memset and there's
lots of room for easy improvement. (The PPC and ARM versions aren't
tested yet.)

Location:
trunk/source/lisp-kernel
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/source/lisp-kernel/arm-asmutils.s

    r15137 r15229  
    196196_endfn               
    197197
     198/* zero N (r1) dnodes, starting at the dnode-aligned address in r0 */
     199_exportfn(C(zero_dnodes))
     200        __(cmp r1,#0)
     201        __(adr r2,2f)
     202        __(fldd d0,[r2,#0])
     203        __(b 1f)
     2040:      __(subs r1,r1,#1)
     205        __(fstd d0,[r0])
     206        __(add r0,r0,#dnode_size)       
     2071:      __(bne 0b)
     208        __(bx lr)
     209        .align 3
     2102:      .long 0
     211        .long 0       
     212_endfn                       
    198213                               
    199214        _endfile
  • trunk/source/lisp-kernel/gc-common.c

    r15202 r15229  
    13531353}
    13541354
     1355extern void zero_dnodes(void *,natural);
     1356
    13551357void
    13561358gc(TCR *tcr, signed_natural param)
     
    17051707      last_zeroed_addr = a->high;
    17061708    }
    1707     zero_memory_range(a->active, last_zeroed_addr);
     1709    zero_dnodes(a->active, area_dnode(last_zeroed_addr,a->active));
    17081710
    17091711    /*
  • trunk/source/lisp-kernel/ppc-asmutils.s

    r15137 r15229  
    428428_endfn
    429429
    430 
     430/* zero N (r4) dnodes, starting at the dnode-aligned address in r3 */
     431_exportfn(C(zero_dnodes))
     432        __(cmpri(r4,0))
     433        __(li r5,0)
     434        __(li r6,0)
     435        __(b 1f)
     4360:      __(subi. r4,r4,1)
     437        __(str(r5,0(r3)))
     438        __(str(r6,node_size(r3)))
     439        __(la r3,dnode_size(r3))
     4401:      __(bne 0b)
     441        __(blr)
     442_endfn               
    431443        _endfile
  • trunk/source/lisp-kernel/x86-asmutils32.s

    r15137 r15229  
    280280_endfn                                       
    281281        __endif
     282
     283/* zero arg1 dnodes,starting at the dnode-aligned address in arg0 */
     284_exportfn(C(zero_dnodes)) 
     285        __(xorl %eax,%eax)
     286        __(mov 4(%esp),%edx)
     287        __(mov 8(%esp),%ecx)
     288        __(testl %ecx,%ecx)
     289        __(jmp 1f)
     2900:      __(mov %eax,0(%edx))
     291        __(mov %eax,4(%edx))
     292        __(lea dnode_size(%edx),%edx)
     293        __(subl $1,%ecx)
     2941:      __(jne 0b)
     295        __(repret)
     296_endfn       
    282297        _endfile
    283298
  • trunk/source/lisp-kernel/x86-asmutils64.s

    r15137 r15229  
    276276_endfn                                       
    277277        __endif
     278
     279/* zero N (%rsi) dnodes, starting at the dnode-aligned address in %rdi */
     280_exportfn(C(zero_dnodes))
     281        __(pxor %xmm0,%xmm0)
     282        __(cmpq $0,%rsi)
     283        __(jmp 1f)
     2840:      __(movdqa %xmm0,(%rdi))
     285        __(lea 16(%rdi),%rdi)
     286        __(subq $1,%rsi)
     2871:      __(jne 0b)
     288        __(repret)
     289_endfn       
    278290        _endfile
Note: See TracChangeset for help on using the changeset viewer.