Opened 5 years ago

Last modified 5 years ago

#1215 new defect

Bignum issue

Reported by: uchida Owned by:
Priority: normal Milestone:
Component: other Version: 1.9
Keywords: Windows bignum Cc:

Description

The following program crashes on Windows.

;;; bignum-test.lisp

(ccl:egc nil)

(setf *debugger-hook* #'(lambda (c v)
                          (declare (ignore v))
                          (princ c)
                          (ccl:quit 1)))

;; (defun test ()
;;   (do ((i 1 (1+ i))
;;        (end (+ 500000000)))
;;       ((eql i end))))

(defun test ()
  (do ((i (1+ most-positive-fixnum) (1+ i))
       (end (+ most-positive-fixnum 500000000)))
      ((eql i end))))

(defun run ()
  (format t "~A egc=~A cpu-count=~A~%" (lisp-implementation-version) (ccl:egc-enabled-p) (ccl:cpu-count))
  (let ((threads))
    (loop repeat 2 #|(ccl:cpu-count)|# do
         (push (ccl:process-run-function "test" #'test) threads))
    (dolist (thread threads)
      (ccl:join-process thread)))
  (ccl:quit 0))
>c:\lispbox-0.7\ccl-1.9x\wx86cl64 -n -l bignum-test.lisp -e (run)
Version 1.9-r15765  (WindowsX8664) egc=NIL cpu-count=8
Fault during read of memory address #x26A9FE0

>c:\lispbox-0.7\ccl-1.9x\wx86cl -n -l bignum-test.lisp -e (run)
Version 1.9-r15764  (WindowsX8632) egc=NIL cpu-count=8
Fault during read of memory address #xD35AFF0

>

Change History (6)

comment:1 Changed 5 years ago by uchida

I've tracked down the problem a bit.

;;; bignum-test2.lisp

(ccl:egc nil)

(setf *debugger-hook* #'(lambda (c v)
                          (declare (ignore v))
                          (princ c)
                          (ccl:quit 1)))

;; copied from l0-bignum32.lisp
(defmacro %allocate-bignum (ndigits)
  `(ccl::%alloc-misc ,ndigits target::subtag-bignum))

(defun stripped-down-version-of-add-bignums ()
  (%allocate-bignum 2))

(ccl:defstatic *sem* (ccl:make-semaphore))
(ccl:signal-semaphore *sem*)
(ccl:signal-semaphore *sem*) ;; Removing this line prevents the error.

(defun test ()
  (do ()
      ()
    (ccl:wait-on-semaphore *sem*)
    (dotimes (i 1000)
      (stripped-down-version-of-add-bignums))
    (ccl:signal-semaphore *sem*)))

(defun run ()
  (format t "~A egc=~A cpu-count=~A~%" (lisp-implementation-version) (ccl:egc-enabled-p) (ccl:cpu-count))
  (let ((threads))
    (loop repeat 2 #|(ccl:cpu-count)|# do
         (push (ccl:process-run-function "test" #'test) threads))
    (dolist (thread threads)
      (ccl:join-process thread)))
  (ccl:quit 0))

Last edited 5 years ago by uchida (previous) (diff)

comment:2 Changed 5 years ago by uchida

  • Component changed from IDE to other

comment:3 Changed 5 years ago by uchida

Does not need to be %allocate-bignum to cause this error, it is enough to call the cons as shown below.

(defun stripped-down-version-of-add-bignums ()
  (cons nil nil))

comment:4 Changed 5 years ago by uchida

I have not been able to approach the heart of the problem yet. The following is the result of some trial and error.

Here is an example using a simpler program of #571.

;;; debug1215.lisp

(ccl:egc nil)

(in-package :ccl)
(define-condition invalid-memory-access (storage-condition)
  ((address :initarg :address)
   (write-p :initform nil :initarg :write-p))
  (:report (lambda (c s)
             (with-slots (address write-p) c
               (format s "Fault during ~a memory address #x~x tcr=#x~x" (if write-p "write to" "read of") address
		       (ash (%current-tcr) 3))))))
(in-package :cl-user)

(setf *debugger-hook* #'(lambda (c v)
                          (declare (ignore v))
                          (princ c)
                          (ccl:quit 1)))

(defun run ()
  (format t "~A egc=~A cpu-count=~A~%" (lisp-implementation-version) (ccl:egc-enabled-p) (ccl:cpu-count))
  ;; Copied from #571
  (loop for i from 1 to 2 do 
	(process-run-function "test" (lambda nil (tagbody start (if (cons nil nil) 69 nil) (go start))))))

The result is as follows when you embed the debugging code in the kernel like this.

uchida@g-tune /cygdrive/c/lisp/ccl-1.9/lisp-kernel
$ svn diff
Index: thread_manager.c
===================================================================
--- thread_manager.c    (revision 16155)
+++ thread_manager.c    (working copy)
@@ -2036,6 +2036,7 @@
            basically want to get to he handler and have it notice
            the pending exception request, and suspend the thread at that
            point. */
+       printf("tcr=%llx where=%llx\n",tcr,where);fflush(stdout); //debug
         if (!((where < (pc)lisp_global(HEAP_END)) &&
               (where >= (pc)lisp_global(HEAP_START))) &&
            (!((where < (pc)(managed_static_area->active)) &&

uchida@g-tune /cygdrive/c/lisp/ccl-1.9/lisp-kernel
$

uchida@g-tune /cygdrive/c/lisp/ccl-1.9/lisp-kernel/win64
$ ../../wx86cl64 -n -l ../../../debug1215.lisp -e "(run)"
Version 1.9  (WindowsX8664) egc=NIL cpu-count=8
Welcome to Clozure Common Lisp Version 1.9  (WindowsX8664)!
? tcr=21930e00 where=21006f3bea
tcr=21925a50 where=7ffadeaccb40
tcr=21925a50 where=21006a8b5a
tcr=21930e00 where=21006a8b40
tcr=21930e00 where=21006a8b5a
Fault during read of memory address #x26ADFF8 tcr=#x21930E00

uchida@g-tune /cygdrive/c/lisp/ccl-1.9/lisp-kernel/win64
$ ../../wx86cl64 -n -l ../../../debug1215.lisp -e "(run)"
Version 1.9  (WindowsX8664) egc=NIL cpu-count=8
Welcome to Clozure Common Lisp Version 1.9  (WindowsX8664)!
? tcr=21950e00 where=21006f3bea
tcr=21950e00 where=21006a8c4a
tcr=21950e00 where=21006a8b40
tcr=21945a50 where=21006a8b5a
tcr=21950e00 where=21006a8b40
tcr=21950e00 where=21006a8b5a
tcr=21945a50 where=21006a8b47
tcr=21950e00 where=21006a8b5e
tcr=21950e00 where=21006a8b5a
tcr=21950e00 where=21006a8b5a
Fault during read of memory address #x26B6FF8 tcr=#x21950E00

uchida@g-tune /cygdrive/c/lisp/ccl-1.9/lisp-kernel/win64
$ ../../wx86cl64 -n -l ../../../debug1215.lisp -e "(run)"
Version 1.9  (WindowsX8664) egc=NIL cpu-count=8
Welcome to Clozure Common Lisp Version 1.9  (WindowsX8664)!
? tcr=21920e00 where=21006f3bea
tcr=21915a50 where=21006a8b50
tcr=21920e00 where=21006a8b40
tcr=21915a50 where=21006a8b5e
tcr=21920e00 where=21006a8b40
tcr=21920e00 where=21006a8b40
tcr=21920e00 where=21006a8b52
tcr=21915a50 where=21006a8b5a
tcr=21915a50 where=21006a8aca
Fault during read of memory address #x26A0FF8 tcr=#x21915A50


uchida@g-tune /cygdrive/c/lisp/ccl-1.9/lisp-kernel/win64
$ ../../wx86cl64 -n -l ../../../debug1215.lisp -e "(run)"
Version 1.9  (WindowsX8664) egc=NIL cpu-count=8
Welcome to Clozure Common Lisp Version 1.9  (WindowsX8664)!
? tcr=219e0e00 where=21006a8b5a
tcr=219e0e00 where=21006a8b40
tcr=219e0e00 where=21006a8b5a
tcr=219e0e00 where=21006a8b5a
tcr=219e0e00 where=21006a8b47
tcr=219e0e00 where=21006a8b40
tcr=219e0e00 where=21006a8b40
tcr=219d5a50 where=21006a8b40
tcr=219e0e00 where=21006a8b5a
Fault during read of memory address #x26AEFF8 tcr=#x219E0E00

uchida@g-tune /cygdrive/c/lisp/ccl-1.9/lisp-kernel/win64
$ ../../wx86cl64 -n -l ../../../debug1215.lisp -e "(run)"
Version 1.9  (WindowsX8664) egc=NIL cpu-count=8
Welcome to Clozure Common Lisp Version 1.9  (WindowsX8664)!
? tcr=21935a50 where=21006a8c4a
tcr=21935a50 where=21006a8b5a
Fault during read of memory address #x26A1FF8 tcr=#x21935A50


uchida@g-tune /cygdrive/c/lisp/ccl-1.9/lisp-kernel/win64
$ ../../wx86cl64 -n -l ../../../debug1215.lisp -e "(run)"
Version 1.9  (WindowsX8664) egc=NIL cpu-count=8
Welcome to Clozure Common Lisp Version 1.9  (WindowsX8664)!
? tcr=219c0e00 where=21006f3bea
tcr=219b5a50 where=21006a8b47
tcr=219c0e00 where=21006a8b5a
tcr=219c0e00 where=21006a8b40
tcr=219b5a50 where=21006a8b33
tcr=219c0e00 where=21006a8b5a
tcr=219b5a50 where=21006a8b52
tcr=219c0e00 where=21006a8b5a
tcr=219b5a50 where=21006a8b47
tcr=219b5a50 where=21006a8aca
tcr=219c0e00 where=21006a8ab7
tcr=219b5a50 where=21006a8ab0
tcr=219c0e00 where=21006a8ac2
tcr=219b5a50 where=21006a8ab0
tcr=219c0e00 where=21006a8aca
tcr=219c0e00 where=21006a8aca
tcr=219c0e00 where=21006a8ab0
tcr=219b5a50 where=21006a8aca
Fault during read of memory address #x26A8FF8 tcr=#x219B5A50

uchida@g-tune /cygdrive/c/lisp/ccl-1.9/lisp-kernel/win64
$

Please note that the program counter("where") was always 21006a8b5a or 21006a8aca whenever the error occurred.

If you change the kernel as shown below, the error will not occur.

uchida@g-tune /cygdrive/c/lisp/ccl-1.9/lisp-kernel
$ svn diff
Index: thread_manager.c
===================================================================
--- thread_manager.c    (revision 16155)
+++ thread_manager.c    (working copy)
@@ -2036,7 +2036,10 @@
            basically want to get to he handler and have it notice
            the pending exception request, and suspend the thread at that
            point. */
-        if (!((where < (pc)lisp_global(HEAP_END)) &&
+       printf("tcr=%llx where=%llx\n",tcr,where);fflush(stdout); //debug
+        if (where == (pc) 0x21006a8b5a ||
+           where == (pc) 0x21006a8aca ||
+           !((where < (pc)lisp_global(HEAP_END)) &&
               (where >= (pc)lisp_global(HEAP_START))) &&
            (!((where < (pc)(managed_static_area->active)) &&
              (where >= (pc)(readonly_area->low)))) &&

uchida@g-tune /cygdrive/c/lisp/ccl-1.9/lisp-kernel
$
Last edited 5 years ago by uchida (previous) (diff)

comment:5 Changed 5 years ago by uchida

For unknown reasons, if you change as shown below for the 64bit version, the error no longer occurs.

uchida@g-tune /cygdrive/c/lisp/ccl-trunk/lisp-kernel
$ svn diff
Index: pmcl-kernel.c
===================================================================
--- pmcl-kernel.c       (revision 16180)
+++ pmcl-kernel.c       (working copy)
@@ -444,7 +444,7 @@
   UnCommitMemory(start, len);
 }

-#define TOUCH_PAGES_ON_COMMIT 0
+#define TOUCH_PAGES_ON_COMMIT 1 /* 0 */

 Boolean
 touch_all_pages(void *start, size_t len)

uchida@g-tune /cygdrive/c/lisp/ccl-trunk/lisp-kernel
$

But 32bit version fails to build as shown below by this change.

uchida@g-tune /cygdrive/c/lisp/ccl-trunk/lisp-kernel/win32
$ make
i686-w64-mingw32-gcc -m32 -include ../platform-win32.h -c ../pmcl-kernel.c -DWIN_32 -DWINDOWS -D_REENTRANT -DX86 -DX8632 -D_GNU_SOURCE  -D__MSVCRT__ -D__MSVCRT_VERSION__=0x700 -D_WIN32_WINNT=0x0502 -DSVN_REVISION="16180" -g -O -Wno-format  -o pmcl-kernel.o
i686-w64-mingw32-gcc -m32 -o ../../wx86cl.exe  -Wl,--image-base=0x10000 \
-Wl,--enable-auto-import \
x86-spjump32.o x86-spentry32.o x86-subprims32.o pmcl-kernel.o gc-common.o x86-gc.o bits.o  x86-exceptions.o x86-utils.o image.o thread_manager.o lisp-debug.o memory.o windows-calls.o x86-asmutils32.o  imports.o lispdcmd.o plprint.o plsym.o xlbt.o x86_print.o -lpsapi -lws2_32
pmcl-kernel.o: In function `touch_all_pages':
/cygdrive/c/lisp/ccl-trunk/lisp-kernel/win32/../pmcl-kernel.c:457: undefined reference to `touch_page'
collect2: error: ld returned 1 exit status
Makefile:101: recipe for target '../../wx86cl.exe' failed
make: *** [../../wx86cl.exe] Error 1

uchida@g-tune /cygdrive/c/lisp/ccl-trunk/lisp-kernel/win32
$

This is a consequence of the trunk. It was the same for ccl-1.9.

I think #1179 is also to be resolved by this change.

Last edited 5 years ago by uchida (previous) (diff)

comment:6 Changed 5 years ago by uchida

It seems to go well with the following code for 32bit.

uchida@g-tune /cygdrive/c/lisp/ccl-trunk/lisp-kernel
$ svn diff
Index: pmcl-kernel.c
===================================================================
--- pmcl-kernel.c       (revision 16180)
+++ pmcl-kernel.c       (working copy)
@@ -444,8 +444,16 @@
   UnCommitMemory(start, len);
 }

-#define TOUCH_PAGES_ON_COMMIT 0
+#define TOUCH_PAGES_ON_COMMIT 1 /* 0 */

+#ifdef WIN_32
+Boolean touch_page(void* p)
+{
+  *(char*)p = 0;
+  return 1;
+}
+#endif
+
 Boolean
 touch_all_pages(void *start, size_t len)
 {

uchida@g-tune /cygdrive/c/lisp/ccl-trunk/lisp-kernel
$

Note: See TracTickets for help on using tickets.