From dc5ed1463fadfe141ec4f140438286d6fdbe4c0c Mon Sep 17 00:00:00 2001 From: Alex Wood Date: Wed, 3 Jul 2024 13:56:14 -0400 Subject: [PATCH 1/4] Move shared VM code into shared system Reduces code duplication. I still don't think this is very artful but it's better than having a bunch of huge local functions. --- maclina.asd | 12 ++- vm-cross.lisp | 205 +++++++++++++---------------------------------- vm-native.lisp | 210 +++++++++++++------------------------------------ vm-shared.lisp | 131 ++++++++++++++++++++++++++++++ 4 files changed, 253 insertions(+), 305 deletions(-) create mode 100644 vm-shared.lisp diff --git a/maclina.asd b/maclina.asd index b53b495..e76ef6a 100644 --- a/maclina.asd +++ b/maclina.asd @@ -70,12 +70,20 @@ :depends-on (:maclina/base :ieee-floats) :components ((:file "loadltv"))) +(asdf:defsystem #:maclina/vm-shared + :description "Code shared by VM implementations." + :author ("Charles Zhang" "Bike ") + :maintainer "Bike ") :maintainer "Bike " - :depends-on (:maclina/base :trucler) ; trucler only needed for client class - remove? + ;; trucler only needed for client class - remove? + :depends-on (:maclina/vm-shared :trucler) :components ((:file "vm-native"))) (asdf:defsystem #:maclina/vm-cross @@ -83,7 +91,7 @@ :author ("Charles Zhang" "Bike ") :maintainer "Bike " - :depends-on (:maclina/base :clostrum :clostrum-trucler) + :depends-on (:maclina/vm-shared :clostrum :clostrum-trucler) :components ((:file "vm-cross"))) (asdf:defsystem #:maclina/test diff --git a/vm-cross.lisp b/vm-cross.lisp index 98a554f..f7669e4 100644 --- a/vm-cross.lisp +++ b/vm-cross.lisp @@ -1,6 +1,7 @@ (defpackage #:maclina.vm-cross (:use #:cl) (:local-nicknames (#:m #:maclina.machine) + (#:vm #:maclina.vm-shared) (#:arg #:maclina.argparse) (#:cmp #:maclina.compile)) (:export #:client) @@ -42,8 +43,7 @@ (defvar *trace* nil) (defstruct (cell (:constructor make-cell (value))) value) -(defstruct unbound-marker) -(defvar *unbound* (make-unbound-marker)) +(defvar *unbound* (vm:make-unbound-marker)) (defstruct dynenv) (defstruct (entry-dynenv (:include dynenv) @@ -323,61 +323,10 @@ ((0) (call nargs)) (t (mapcar #'spush (subseq (multiple-value-list (call nargs)) 0 mvals))))) - (mv-call () (call (spop))) - (check-arg-count-<= (n) - (unless (<= (vm-arg-count vm) n) - (error 'arg:wrong-number-of-arguments - :given-nargs (vm-arg-count vm) - :max-nargs n))) - (check-arg-count->= (n) - (unless (>= (vm-arg-count vm) n) - (error 'arg:wrong-number-of-arguments - :given-nargs (vm-arg-count vm) - :min-nargs n))) - (check-arg-count-= (n) - (unless (= (vm-arg-count vm) n) - (error 'arg:wrong-number-of-arguments - :given-nargs (vm-arg-count vm) - :min-nargs n :max-nargs n))) - (bind-required-args (nargs) - ;; Use memcpy for this. - (let* ((args (vm-args vm)) - (args-end (+ args nargs))) - (do ((arg-index args (1+ arg-index)) - (frame-slot 0 (1+ frame-slot))) - ((>= arg-index args-end)) - (setf (local frame-slot) (stack arg-index))))) - (bind-optional-args (required-count optional-count) - (let* ((args (vm-args vm)) - (optional-start (+ args required-count)) - (args-end (+ args (vm-arg-count vm))) - (end (+ optional-start optional-count)) - (optional-frame-offset required-count) - (optional-frame-end (+ optional-frame-offset optional-count))) - (if (<= args-end end) - ;; Could be coded as memcpy in C. - (do ((arg-index optional-start (1+ arg-index)) - (frame-slot optional-frame-offset (1+ frame-slot))) - ((>= arg-index args-end) - ;; memcpy or similar. (blit bit pattern?) - (do ((frame-slot frame-slot (1+ frame-slot))) - ((>= frame-slot optional-frame-end)) - (setf (local frame-slot) (make-unbound-marker)))) - (setf (local frame-slot) (stack arg-index))) - ;; Could also be coded as memcpy. - (do ((arg-index optional-start (1+ arg-index)) - (frame-slot optional-frame-offset (1+ frame-slot))) - ((>= arg-index end)) - (setf (local frame-slot) (stack arg-index)))))) - (listify-rest-args (nfixed) - (setf (local nfixed) - (loop for index from nfixed below (vm-arg-count vm) - collect (stack (+ (vm-args vm) index)))))) + (mv-call () (call (spop)))) (declare (inline stack (setf stack) local (setf local) spush spop bind code next-code next-long constant closure - call mv-call check-arg-count-<= - check-arg-count->= check-arg-count-= - bind-required-args bind-optional-args listify-rest-args)) + call mv-call)) (prog ((end (length bytecode)) (trace *trace*) ;; KLUDGE: we can't use bp directly since catch uses eq. @@ -456,65 +405,42 @@ ((#.m:jump-if-16) (incf ip (if (spop) (next-code-signed-16) 3))) ((#.m:jump-if-24) (incf ip (if (spop) (next-code-signed-24) 4))) ((#.m:check-arg-count-<=) - (check-arg-count-<= (next-code)) (incf ip)) + (vm:check-arg-count-<= (vm-arg-count vm) (next-code)) + (incf ip)) ((#.m:check-arg-count->=) - (check-arg-count->= (next-code)) (incf ip)) + (vm:check-arg-count->= (vm-arg-count vm) (next-code)) + (incf ip)) ((#.m:check-arg-count-=) - (check-arg-count-= (next-code)) (incf ip)) + (vm:check-arg-count-= (vm-arg-count vm) (next-code)) + (incf ip)) ((#.m:jump-if-supplied-8) - (incf ip (if (typep (local (next-code)) 'unbound-marker) + (incf ip (if (typep (local (next-code)) 'vm:unbound-marker) 2 (1- (next-code-signed))))) ((#.m:jump-if-supplied-16) - (incf ip (if (typep (local (next-code)) 'unbound-marker) + (incf ip (if (typep (local (next-code)) 'vm:unbound-marker) 3 (1- (next-code-signed-16))))) ((#.m:bind-required-args) - (bind-required-args (next-code)) (incf ip)) + (vm:bind-required-args (next-code) stack bp (vm-args vm)) + (incf ip)) ((#.m:bind-optional-args) - (bind-optional-args (next-code) (next-code)) (incf ip)) + (vm:bind-optional-args (next-code) (next-code) + stack bp (vm-args vm) (vm-arg-count vm)) + (incf ip)) ((#.m:listify-rest-args) - (listify-rest-args (next-code)) (incf ip)) + (vm:listify-rest-args + (next-code) stack bp (vm-args vm) (vm-arg-count vm)) + (incf ip)) ((#.m:parse-key-args) - (let* ((args (vm-args vm)) - (end (+ args (vm-arg-count vm))) - (more-start (+ args (next-code))) - (key-count-info (next-code)) - (key-count (logand key-count-info #x7f)) - (key-literal-start (next-code)) - (key-literal-end (+ key-literal-start key-count)) - (key-frame-start (next-code)) - (unknown-keys nil) - (allow-other-keys-p nil)) - ;; Initialize all key values to # - (loop for index from key-frame-start below (+ key-frame-start key-count) - do (setf (local index) (make-unbound-marker))) - (when (> end more-start) - (do ((arg-index (- end 1) (- arg-index 2))) - ((< arg-index more-start) - (cond ((= arg-index (1- more-start))) - ((= arg-index (- more-start 2)) - (error 'arg:odd-keywords)) - (t - (error "BUG! This can't happen!")))) - (let ((key (stack (1- arg-index)))) - (when (eq key :allow-other-keys) - (setf allow-other-keys-p (stack arg-index))) - (loop for key-index from key-literal-start - below key-literal-end - for offset of-type (unsigned-byte 16) - from key-frame-start - do (when (eq (constant key-index) key) - (setf (local offset) (stack arg-index)) - (return)) - finally (unless (or allow-other-keys-p - (eq key :allow-other-keys)) - (push key unknown-keys)))))) - (when (and (not (or (logbitp 7 key-count-info) - allow-other-keys-p)) - unknown-keys) - (error 'arg:unrecognized-keyword-argument - :unrecognized-keywords unknown-keys))) + (let ((nfixed (next-code)) (key-count-info (next-code)) + (key-literal-start (next-code)) + (key-frame-start (next-code))) + (vm:parse-key-args + nfixed + (logand key-count-info #x7f) (logbitp 7 key-count-info) + key-literal-start key-frame-start + stack bp (vm-arg-count vm) (vm-args vm) constants)) (incf ip)) ((#.m:save-sp) (setf (local (next-code)) sp) @@ -667,63 +593,44 @@ (#.m:bind (bind (next-long) (next-long)) (incf ip)) (#.m:set (setf (local (next-long)) (spop)) (incf ip)) (#.m:bind-required-args - (bind-required-args (next-long)) (incf ip)) + (vm:bind-required-args (next-long) + stack bp (vm-args vm)) + (incf ip)) (#.m:bind-optional-args - (bind-optional-args (next-long) (next-long)) (incf ip)) + (vm:bind-optional-args + (next-long) (next-long) + stack bp (vm-args vm) (vm-arg-count vm)) + (incf ip)) (#.m:listify-rest-args - (listify-rest-args (next-long)) (incf ip)) + (vm:listify-rest-args + (next-long) stack bp (vm-args vm) (vm-arg-count vm)) + (incf ip)) (#.m:parse-key-args - (let* ((args (vm-args vm)) - (end (+ args (vm-arg-count vm))) - (more-start (+ args (next-long))) - (key-count-info (next-long)) - (key-count (logand key-count-info #x7fff)) - (key-literal-start (next-long)) - (key-literal-end (+ key-literal-start key-count)) - (key-frame-start (next-long)) - (unknown-keys nil) - (allow-other-keys-p nil)) - ;; Initialize all key values to # - (loop for index from key-frame-start below (+ key-frame-start key-count) - do (setf (local index) (make-unbound-marker))) - (when (> end more-start) - (do ((arg-index (- end 1) (- arg-index 2))) - ((< arg-index more-start) - (cond ((= arg-index (1- more-start))) - ((= arg-index (- more-start 2)) - (error 'arg:odd-keywords)) - (t - (error "BUG! This can't happen!")))) - (let ((key (stack (1- arg-index)))) - (when (eq key :allow-other-keys) - (setf allow-other-keys-p (stack arg-index))) - (loop for key-index from key-literal-start - below key-literal-end - for offset of-type (unsigned-byte 16) - from key-frame-start - do (when (eq (constant key-index) key) - (setf (local offset) (stack arg-index)) - (return)) - finally (unless (or allow-other-keys-p - (eq key :allow-other-keys)) - (push key unknown-keys)))))) - (when (and (not (or (logbitp 15 key-count-info) - allow-other-keys-p)) - unknown-keys) - (error 'arg:unrecognized-keyword-argument - :unrecognized-keywords unknown-keys)))) + (let ((nfixed (next-long)) (key-count-info (next-long)) + (key-literal-start (next-long)) + (key-frame-start (next-long))) + (vm:parse-key-args + nfixed + (logand key-count-info #x7fff) + (logbitp 15 key-count-info) + key-literal-start key-frame-start + stack bp (vm-arg-count vm) (vm-args vm) constants)) + (incf ip)) (#.m:check-arg-count-<= - (check-arg-count-<= (next-long)) (incf ip)) + (vm:check-arg-count-<= (vm-arg-count vm) (next-long)) + (incf ip)) (#.m:check-arg-count->= - (check-arg-count->= (next-long)) (incf ip)) + (vm:check-arg-count->= (vm-arg-count vm) (next-long)) + (incf ip)) (#.m:check-arg-count-= - (check-arg-count-= (next-long)) (incf ip)) + (vm:check-arg-count-= (vm-arg-count vm) (next-long)) + (incf ip)) (#.m:jump-if-supplied-8 - (incf ip (if (typep (local (next-long)) 'unbound-marker) + (incf ip (if (typep (local (next-long)) 'vm:unbound-marker) 2 (- (next-code-signed) 3)))) (#.m:jump-if-supplied-16 - (incf ip (if (typep (local (next-long)) 'unbound-marker) + (incf ip (if (typep (local (next-long)) 'vm:unbound-marker) 3 (- (next-code-signed-16) 3)))) (otherwise diff --git a/vm-native.lisp b/vm-native.lisp index 71cca63..e6fbf36 100644 --- a/vm-native.lisp +++ b/vm-native.lisp @@ -1,6 +1,7 @@ (defpackage #:maclina.vm-native (:use #:cl) (:local-nicknames (#:m #:maclina.machine) + (#:vm #:maclina.vm-shared) (#:arg #:maclina.argparse)) (:export #:initialize-vm) (:export #:*trace*)) @@ -64,7 +65,6 @@ (logior x (- (mask-field (byte 1 (1- size)) x)))) (defstruct (cell (:constructor make-cell (value))) value) -(defstruct (unbound-marker (:constructor make-unbound-marker))) (defvar *trace* nil) @@ -72,10 +72,9 @@ (defstruct (entry-dynenv (:include dynenv) (:constructor make-entry-dynenv (fun))) (fun (error "missing arg") :type function)) -(defstruct (sbind-dynenv (:include dynenv) - (:constructor make-sbind-dynenv ()))) (defun instruction-trace (bytecode literals stack ip bp sp frame-size) + (declare (ignorable stack bp sp frame-size)) (fresh-line *trace-output*) (let ((*standard-output* *trace-output*)) (maclina.machine:display-instruction bytecode literals ip)) @@ -159,63 +158,10 @@ ((0) (call nargs)) (t (mapcar #'spush (subseq (multiple-value-list (call nargs)) 0 mvals))))) - (mv-call () (call (spop))) - (check-arg-count-<= (n) - (unless (<= (vm-arg-count vm) n) - (error 'arg:wrong-number-of-arguments - :given-nargs (vm-arg-count vm) - :max-nargs n))) - (check-arg-count->= (n) - (unless (>= (vm-arg-count vm) n) - (error 'arg:wrong-number-of-arguments - :given-nargs (vm-arg-count vm) - :max-nargs n))) - (check-arg-count-= (n) - (unless (= (vm-arg-count vm) n) - (error 'arg:wrong-number-of-arguments - :given-nargs (vm-arg-count vm) - :max-nargs n))) - (bind-required-args (nargs) - ;; Use memcpy for this. - (let* ((args (vm-args vm)) - (args-end (+ args nargs))) - (do ((arg-index args (1+ arg-index)) - (frame-slot 0 (1+ frame-slot))) - ((>= arg-index args-end)) - (setf (local frame-slot) (stack arg-index))))) - (bind-optional-args (required-count optional-count) - (let* ((args (vm-args vm)) - (optional-start (+ args required-count)) - (args-end (+ args (vm-arg-count vm))) - (end (+ optional-start optional-count)) - (optional-frame-offset required-count) - (optional-frame-end (+ optional-frame-offset optional-count))) - (if (<= args-end end) - ;; Could be coded as memcpy in C. - (do ((arg-index optional-start (1+ arg-index)) - (frame-slot optional-frame-offset (1+ frame-slot))) - ((>= arg-index args-end) - ;; memcpy or similar. (blit bit - ;; pattern?) - (do ((frame-slot frame-slot (1+ frame-slot))) - ((>= frame-slot optional-frame-end)) - (setf (local frame-slot) (make-unbound-marker)))) - (setf (local frame-slot) (stack arg-index))) - ;; Could also be coded as memcpy. - (do ((arg-index optional-start (1+ arg-index)) - (frame-slot optional-frame-offset (1+ frame-slot))) - ((>= arg-index end)) - (setf (local frame-slot) (stack arg-index)))))) - (listify-rest-args (nfixed) - (setf (local nfixed) - (loop for index from nfixed below (vm-arg-count vm) - collect (stack (+ (vm-args vm) index)))))) + (mv-call () (call (spop)))) (declare (inline stack (setf stack) spush spop bind code next-code next-long constant closure - call mv-call call-fixed - check-arg-count-<= check-arg-count->= - check-arg-count-= bind-required-args bind-optional-args - listify-rest-args)) + call mv-call call-fixed)) (loop with end = (length bytecode) with trace = *trace* until (eql ip end) @@ -281,66 +227,42 @@ ((#.m:jump-if-16) (incf ip (if (spop) (next-code-signed-16) 3))) ((#.m:jump-if-24) (incf ip (if (spop) (next-code-signed-24) 4))) ((#.m:check-arg-count-<=) - (check-arg-count-<= (next-code)) (incf ip)) + (vm:check-arg-count-<= (vm-arg-count vm) (next-code)) + (incf ip)) ((#.m:check-arg-count->=) - (check-arg-count->= (next-code)) (incf ip)) + (vm:check-arg-count->= (vm-arg-count vm) (next-code)) + (incf ip)) ((#.m:check-arg-count-=) - (check-arg-count-= (next-code)) (incf ip)) + (vm:check-arg-count-= (vm-arg-count vm) (next-code)) + (incf ip)) ((#.m:jump-if-supplied-8) - (incf ip (if (typep (local (next-code)) 'unbound-marker) + (incf ip (if (typep (local (next-code)) 'vm:unbound-marker) 2 (1- (next-code-signed))))) ((#.m:jump-if-supplied-16) - (incf ip (if (typep (local (next-code)) 'unbound-marker) + (incf ip (if (typep (local (next-code)) 'vm:unbound-marker) 3 (1- (next-code-signed-16))))) ((#.m:bind-required-args) - (bind-required-args (next-code)) (incf ip)) + (vm:bind-required-args (next-code) stack bp (vm-args vm)) + (incf ip)) ((#.m:bind-optional-args) - (bind-optional-args (next-code) (next-code)) (incf ip)) + (vm:bind-optional-args (next-code) (next-code) + stack bp (vm-args vm) (vm-arg-count vm)) + (incf ip)) ((#.m:listify-rest-args) - (listify-rest-args (next-code)) (incf ip)) + (vm:listify-rest-args + (next-code) stack bp (vm-args vm) (vm-arg-count vm)) + (incf ip)) ((#.m:parse-key-args) - (let* ((args (vm-args vm)) - (end (+ args (vm-arg-count vm))) - (more-start (+ args (next-code))) - (key-count-info (next-code)) - (key-count (logand key-count-info #x7f)) - (key-literal-start (next-code)) - (key-literal-end (+ key-literal-start key-count)) - (key-frame-start (next-code)) - (unknown-keys nil) - (allow-other-keys-p nil)) - ;; Initialize all key values to # - (loop for index from key-frame-start below (+ key-frame-start key-count) - do (setf (local index) (make-unbound-marker))) - (when (> end more-start) - (do ((arg-index (- end 1) (- arg-index 2))) - ((< arg-index more-start) - (cond ((= arg-index (1- more-start))) - ((= arg-index (- more-start 2)) - (error 'arg:odd-keywords)) - (t - (error "BUG! This can't happen!")))) - (let ((key (stack (1- arg-index)))) - (when (eq key :allow-other-keys) - (setf allow-other-keys-p (stack arg-index))) - (loop for key-index from key-literal-start - below key-literal-end - for offset of-type (unsigned-byte 16) - from key-frame-start - do (when (eq (constant key-index) key) - (setf (local offset) (stack arg-index)) - (return)) - finally (unless (or allow-other-keys-p - ;; aok is always allowed - (eq key :allow-other-keys)) - (push key unknown-keys)))))) - (when (and (not (or (logbitp 7 key-count-info) - allow-other-keys-p)) - unknown-keys) - (error 'arg:unrecognized-keyword-argument - :unrecognized-keywords unknown-keys))) + (let ((nfixed (next-code)) (key-count-info (next-code)) + (key-literal-start (next-code)) + (key-frame-start (next-code))) + (vm:parse-key-args + nfixed + (logand key-count-info #x7f) (logbitp 7 key-count-info) + key-literal-start key-frame-start + stack bp (vm-arg-count vm) (vm-args vm) constants)) (incf ip)) ((#.m:save-sp) (setf (local (next-code)) sp) @@ -513,64 +435,44 @@ (#.m:bind (bind (next-long) (next-long)) (incf ip)) (#.m:set (setf (local (next-long)) (spop)) (incf ip)) (#.m:bind-required-args - (bind-required-args (next-long)) (incf ip)) + (vm:bind-required-args (next-long) + stack bp (vm-args vm)) + (incf ip)) (#.m:bind-optional-args - (bind-optional-args (next-long) (next-long)) (incf ip)) + (vm:bind-optional-args + (next-long) (next-long) + stack bp (vm-args vm) (vm-arg-count vm)) + (incf ip)) (#.m:listify-rest-args - (listify-rest-args (next-long)) (incf ip)) + (vm:listify-rest-args + (next-long) stack bp (vm-args vm) (vm-arg-count vm)) + (incf ip)) (#.m:parse-key-args - (let* ((args (vm-args vm)) - (end (+ args (vm-arg-count vm))) - (more-start (+ args (next-long))) - (key-count-info (next-long)) - (key-count (logand key-count-info #x7fff)) - (key-literal-start (next-long)) - (key-literal-end (+ key-literal-start key-count)) - (key-frame-start (next-long)) - (unknown-keys nil) - (allow-other-keys-p nil)) - ;; Initialize all key values to # - (loop for index from key-frame-start below (+ key-frame-start key-count) - do (setf (local index) (make-unbound-marker))) - (when (> end more-start) - (do ((arg-index (- end 1) (- arg-index 2))) - ((< arg-index more-start) - (cond ((= arg-index (1- more-start))) - ((= arg-index (- more-start 2)) - (error 'arg:odd-keywords)) - (t - (error "BUG! This can't happen!")))) - (let ((key (stack (1- arg-index)))) - (when (eq key :allow-other-keys) - (setf allow-other-keys-p (stack arg-index))) - (loop for key-index from key-literal-start - below key-literal-end - for offset of-type (unsigned-byte 16) - from key-frame-start - do (when (eq (constant key-index) key) - (setf (local offset) (stack arg-index)) - (return)) - finally (unless (or allow-other-keys-p - ;; aok is always allowed - (eq key :allow-other-keys)) - (push key unknown-keys)))))) - (when (and (not (or (logbitp 15 key-count-info) - allow-other-keys-p)) - unknown-keys) - (error 'arg:unrecognized-keyword-argument - :unrecognized-keywords unknown-keys)))) + (let ((nfixed (next-long)) (key-count-info (next-long)) + (key-literal-start (next-long)) + (key-frame-start (next-long))) + (vm:parse-key-args + nfixed + (logand key-count-info #x7fff) + (logbitp 15 key-count-info) + key-literal-start key-frame-start + stack bp (vm-arg-count vm) (vm-args vm) constants)) + (incf ip)) (#.m:check-arg-count-<= - (check-arg-count-<= (next-long)) (incf ip)) + (vm:check-arg-count-<= (vm-arg-count vm) (next-long)) + (incf ip)) (#.m:check-arg-count->= - (check-arg-count->= (next-long)) (incf ip)) + (vm:check-arg-count->= (vm-arg-count vm) (next-long)) + (incf ip)) (#.m:check-arg-count-= - (check-arg-count-= (next-long)) (incf ip)) + (vm:check-arg-count-= (vm-arg-count vm) (next-long)) + (incf ip)) (#.m:jump-if-supplied-8 - (incf ip (if (typep (local (next-long)) 'unbound-marker) + (incf ip (if (typep (local (next-long)) 'vm:unbound-marker) 2 (- (next-code-signed) 3)))) (#.m:jump-if-supplied-16 - (incf ip (if (typep (local (next-long)) 'unbound-marker) + (incf ip (if (typep (local (next-long)) 'vm:unbound-marker) 3 (- (next-code-signed-16) 3)))) (otherwise diff --git a/vm-shared.lisp b/vm-shared.lisp new file mode 100644 index 0000000..99eb68f --- /dev/null +++ b/vm-shared.lisp @@ -0,0 +1,131 @@ +(defpackage #:maclina.vm-shared + (:use #:cl) + (:local-nicknames (#:arg #:maclina.argparse)) + (:export #:unbound-marker #:make-unbound-marker) + (:export #:check-arg-count-<= #:check-arg-count->= #:check-arg-count-=) + (:export #:bind-required-args #:bind-optional-args + #:listify-rest-args #:parse-key-args)) + +(in-package #:maclina.vm-shared) + +(defstruct (unbound-marker (:constructor make-unbound-marker ()))) + +(declaim (inline stack (setf stack))) +(defun stack (stack index) + (declare (type simple-vector stack) (type (and unsigned-byte fixnum) index)) + (svref stack index)) +(defun (setf stack) (new stack index) + (declare (type simple-vector stack) (type (and unsigned-byte fixnum) index)) + (setf (svref stack index) new)) + +(declaim (inline local (setf local))) +(defun local (stack bp index) + (declare (type simple-vector stack) + (type (and unsigned-byte fixnum) bp index)) + (svref stack (+ bp index))) +(defun (setf local) (new stack bp index) + (declare (type simple-vector stack) + (type (and unsigned-byte fixnum) bp index)) + (setf (svref stack (+ bp index)) new)) + +(defun constant (constants index) + (declare (type simple-vector constants) (type (unsigned-byte 16) index)) + (svref constants index)) + +(declaim (inline check-arg-count-<= check-arg-count->= check-arg-count-=)) +(defun check-arg-count-<= (nargs expected) + (declare (type (unsigned-byte 16) nargs expected)) + (unless (<= nargs expected) + (error 'arg:wrong-number-of-arguments + :given-nargs nargs :max-nargs expected))) +(defun check-arg-count->= (nargs expected) + (declare (type (unsigned-byte 16) nargs expected)) + (unless (>= nargs expected) + (error 'arg:wrong-number-of-arguments + :given-nargs nargs :min-nargs expected))) +(defun check-arg-count-= (nargs expected) + (declare (type (unsigned-byte 16) nargs expected)) + (unless (= nargs expected) + (error 'arg:wrong-number-of-arguments + :given-nargs nargs :min-nargs expected :max-nargs expected))) + +(declaim (inline bind-required-args)) +(defun bind-required-args (nreq stack bp argsi) + ;; Use memcpy for this. + (let ((args-end (+ argsi nreq))) + (do ((arg-index argsi (1+ arg-index)) + (frame-slot 0 (1+ frame-slot))) + ((>= arg-index args-end)) + (setf (local stack bp frame-slot) (stack stack arg-index))))) + +(declaim (inline bind-optional-args)) +(defun bind-optional-args (required-count optional-count stack bp argsi nargs) + (let* ((optional-start (+ argsi required-count)) + (args-end (+ argsi nargs)) + (end (+ optional-start optional-count)) + (optional-frame-offset required-count) + (optional-frame-end (+ optional-frame-offset optional-count))) + (if (<= args-end end) + ;; Could be coded as memcpy in C. + (do ((arg-index optional-start (1+ arg-index)) + (frame-slot optional-frame-offset (1+ frame-slot))) + ((>= arg-index args-end) + ;; memcpy or similar. (blit bit + ;; pattern?) + (do ((frame-slot frame-slot (1+ frame-slot))) + ((>= frame-slot optional-frame-end)) + (setf (local stack bp frame-slot) (make-unbound-marker)))) + (setf (local stack bp frame-slot) (stack stack arg-index))) + ;; Could also be coded as memcpy. + (do ((arg-index optional-start (1+ arg-index)) + (frame-slot optional-frame-offset (1+ frame-slot))) + ((>= arg-index end)) + (setf (local stack bp frame-slot) (stack stack arg-index)))))) + +(declaim (inline parse-key-args)) +(defun listify-rest-args (nfixed stack bp argsi nargs) + (setf (local stack bp nfixed) + (loop for index from nfixed below nargs + collect (stack stack (+ argsi index))))) + +(declaim (inline parse-key-args)) +(defun parse-key-args (nfixed key-count ll-aok-p key-literal-start key-frame-start + stack bp nargs argsi constants) + (declare (type (unsigned-byte 16) nfixed key-count + key-literal-start key-frame-start nargs) + (type (simple-array t (*)) stack) + (type (and unsigned-byte fixnum) bp argsi)) + (let* ((end (+ argsi nargs)) + (more-start (+ argsi nfixed)) + (key-literal-end (+ key-literal-start key-count)) + (unknown-keys nil) + (allow-other-keys-p nil)) + ;; Initialize all key values to # + (loop for index from key-frame-start below (+ key-frame-start key-count) + do (setf (local stack bp index) (make-unbound-marker))) + (when (> end more-start) + (do ((arg-index (- end 1) (- arg-index 2))) + ((< arg-index more-start) + (cond ((= arg-index (1- more-start))) + ((= arg-index (- more-start 2)) + (error 'arg:odd-keywords)) + (t + (error "BUG! This can't happen!")))) + (let ((key (stack stack (1- arg-index)))) + (when (eq key :allow-other-keys) + (setf allow-other-keys-p (stack stack arg-index))) + (loop for key-index from key-literal-start + below key-literal-end + for offset of-type (unsigned-byte 16) + from key-frame-start + do (when (eq (constant constants key-index) key) + (setf (local stack bp offset) (stack stack arg-index)) + (return)) + finally (unless (or allow-other-keys-p + ;; aok is always allowed + (eq key :allow-other-keys)) + (push key unknown-keys)))))) + (when (and (not (or ll-aok-p allow-other-keys-p)) + unknown-keys) + (error 'arg:unrecognized-keyword-argument + :unrecognized-keywords unknown-keys)))) From 76b9fce62cce89d38464d759e3713bb610237f1e Mon Sep 17 00:00:00 2001 From: Alex Wood Date: Wed, 3 Jul 2024 14:05:35 -0400 Subject: [PATCH 2/4] Improve some type declarations & optimize cross VM This is enough to shut up SBCL. I haven't actually tested for any performance gain, and in any case suspect that there are lower hanging fruit in the actual instruction design (e.g. the treatment of multiple values). --- vm-cross.lisp | 9 +++++++-- vm-native.lisp | 8 ++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/vm-cross.lisp b/vm-cross.lisp index f7669e4..5431802 100644 --- a/vm-cross.lisp +++ b/vm-cross.lisp @@ -254,7 +254,7 @@ (declare (type (simple-array (unsigned-byte 8) (*)) bytecode) (type (simple-array t (*)) closure constants) (type (unsigned-byte 16) frame-size) - (optimize debug #+(or) speed)) + (optimize speed)) (let* ((vm *vm*) (stack (vm-stack vm)) (ip (vm-pc vm)) @@ -274,6 +274,7 @@ (local (index) (svref stack (+ bp index))) ((setf local) (object index) + (declare (type (unsigned-byte 16) index)) (setf (svref stack (+ bp index)) object)) (spush (object) (prog1 (setf (stack sp) object) (incf sp))) @@ -326,7 +327,9 @@ (mv-call () (call (spop)))) (declare (inline stack (setf stack) local (setf local) spush spop bind code next-code next-long constant closure - call mv-call)) + call mv-call call-fixed + next-code-signed next-code-signed-16 + next-code-signed-24)) (prog ((end (length bytecode)) (trace *trace*) ;; KLUDGE: we can't use bp directly since catch uses eq. @@ -458,6 +461,7 @@ (dest-tag tag) (tag (spop)) (de (make-catch-dynenv tag dest-tag target))) + (declare (type (and unsigned-byte fixnum) target)) (push de (vm-dynenv-stack vm)) (incf ip 2))) ((#.m:catch-16) @@ -465,6 +469,7 @@ (dest-tag tag) (tag (spop)) (de (make-catch-dynenv tag dest-tag target))) + (declare (type (and unsigned-byte fixnum) target)) (push de (vm-dynenv-stack vm)) (incf ip 3))) ((#.m:throw) (throw-to vm (spop))) diff --git a/vm-native.lisp b/vm-native.lisp index e6fbf36..4afacc0 100644 --- a/vm-native.lisp +++ b/vm-native.lisp @@ -109,6 +109,7 @@ (local (index) (svref stack (+ bp index))) ((setf local) (object index) + (declare (type (unsigned-byte 16) index)) (setf (svref stack (+ bp index)) object)) (spush (object) (prog1 (setf (stack sp) object) (incf sp))) @@ -161,7 +162,9 @@ (mv-call () (call (spop)))) (declare (inline stack (setf stack) spush spop bind code next-code next-long constant closure - call mv-call call-fixed)) + call mv-call call-fixed + next-code-signed next-code-signed-16 + next-code-signed-24)) (loop with end = (length bytecode) with trace = *trace* until (eql ip end) @@ -407,7 +410,8 @@ (m:make-bytecode-closure m:*client* template (coerce (gather envsize) 'simple-vector))))) - (declare (type function cleanup-thunk)) + (declare (type (unsigned-byte 16) envsize) + (type function cleanup-thunk)) (incf ip) (unwind-protect (vm bytecode closure constants frame-size) From db37cf877ef32eb649dbbdd374d9a72a59741f54 Mon Sep 17 00:00:00 2001 From: Alex Wood Date: Wed, 3 Jul 2024 15:03:56 -0400 Subject: [PATCH 3/4] Define timeout mechanism This may be useful when using Maclina as a sandbox. --- maclina.asd | 1 + test/timeout.lisp | 25 +++++++++++++++++++++++++ vm-cross.lisp | 22 ++++++++++++++++++++-- 3 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 test/timeout.lisp diff --git a/maclina.asd b/maclina.asd index e76ef6a..b983e7a 100644 --- a/maclina.asd +++ b/maclina.asd @@ -116,6 +116,7 @@ (:file "externalize"))) (:file "cleanliness" :depends-on ("suites" "rt" "packages")) (:file "cooperation" :depends-on ("suites" "rt" "packages")) + (:file "timeout" :depends-on ("suites" "rt" "packages")) (:file "long" :depends-on ("suites" "rt" "packages")) (:module "compiler-conditions" :depends-on ("suites" "rt" "packages") diff --git a/test/timeout.lisp b/test/timeout.lisp new file mode 100644 index 0000000..d119376 --- /dev/null +++ b/test/timeout.lisp @@ -0,0 +1,25 @@ +(in-package #:maclina.test) + +;;;; Tests of the timeout mechanism. + +(5am:def-suite timeout :in maclina-cross) +(5am:in-suite timeout) + +(5am:test timeout + (let ((spinner + (ceval '#'(lambda (i) + (block nil + (tagbody + loop + (if (= 0 i) (return)) + (setq i (- i 1)) + (go loop))))))) + (5am:finishes (maclina.vm-cross:with-timeout (1000000) + ;; It is unlikely that any future revisions will be + ;; dumb enough that a loop iteration takes 100k instructions. + (funcall spinner 10))) + (5am:signals maclina.vm-cross:timeout + (maclina.vm-cross:with-timeout (10) + (funcall spinner 1000))) + ;; make sure that without a timeout, we can run as long as we want. + (5am:finishes (funcall spinner 1000)))) diff --git a/vm-cross.lisp b/vm-cross.lisp index 5431802..407b20b 100644 --- a/vm-cross.lisp +++ b/vm-cross.lisp @@ -7,7 +7,8 @@ (:export #:client) (:export #:initialize-vm) (:export #:*trace*) - (:export #:make-variable-access-closures)) + (:export #:make-variable-access-closures) + (:export #:with-timeout #:timeout)) (in-package #:maclina.vm-cross) @@ -45,6 +46,19 @@ (defstruct (cell (:constructor make-cell (value))) value) (defvar *unbound* (vm:make-unbound-marker)) +(defvar *timeout* nil) +(declaim (type (or null (and fixnum unsigned-byte)) *timeout*)) +(defvar *odometer* nil) +(declaim (type (or null (and fixnum unsigned-byte)) *odometer*)) + +(defmacro with-timeout ((n) &body body) + `(let ((*timeout* ,n) (*odometer* 0)) ,@body)) + +(define-condition timeout (error) + ((%timeout :initarg :timeout :reader timeout)) + (:report (lambda (condition stream) + (format stream "VM timeout at ~d steps" (timeout condition))))) + (defstruct dynenv) (defstruct (entry-dynenv (:include dynenv) (:constructor make-entry-dynenv (tag))) @@ -259,7 +273,8 @@ (stack (vm-stack vm)) (ip (vm-pc vm)) (sp (vm-stack-top vm)) - (bp (vm-frame-pointer vm))) + (bp (vm-frame-pointer vm)) + (timeout *timeout*)) (declare (type (simple-array t (*)) stack) (type (and unsigned-byte fixnum) ip sp bp)) (labels ((stack (index) @@ -337,6 +352,9 @@ loop (when (>= ip end) (error "Invalid bytecode: Reached end")) + (when timeout + (when (> (incf *odometer*) timeout) + (error 'timeout :timeout timeout))) (when trace (instruction-trace bytecode constants stack ip bp sp frame-size)) ;; The catch is for NLX. Without NLX, a (go loop) at the From 59429a23e9932f83d6d34134d480e0c09e3f6c23 Mon Sep 17 00:00:00 2001 From: Alex Wood Date: Wed, 3 Jul 2024 16:52:32 -0400 Subject: [PATCH 4/4] Talk about sandboxing and extrinsicl in README --- README.md | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/README.md b/README.md index 25cfdd3..74be998 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,85 @@ The `maclina/vm-cross` subsystem allows Maclina to be used for compiling and run ;;; And of course, the host *READTABLE* and + are unaffected. ``` +## Sandboxing + +A more complete CL experience requires a richer environment. The [Extrinsicl](https://github.com/s-expressionists/Extrinsicl) project can be used to construct such an environment, but more generally you can just fill a Clostrum environment. Extrinsicl can additionally be configured to provide functions like `eval` through Maclina. + +For real sandboxing of untrusted code, you will need a "safe" environment lacking any undesirable operators. What is undesirable depends on your application, but might include, for instance, file I/O. If your environment is well constructed, you don't need to worry about functions that carry out evaluation or introspection in themselves, because they will only operate with respect to your safe environment. + +Another danger of untrusted code is it not halting, which can be a denial of service attack. The cross VM has a `with-timeout` macro that can be used to abort evaluation after executing some number of VM instructions. This covers all evaluations within the cross VM, including indirectly as from calls to VM functions. Note that computations outside of the VM are not tracked, so for example there will be no abort if the untrusted code calls a non-VM function that does not halt. + +Here is an example of a basic sandbox: + +```lisp +(ql:quickload '(:clostrum-basic :extrinsicl :extrinsicl/maclina :maclina)) + +;;; Set up Maclina. +(setf maclina.machine:*client* (make-instance 'maclina.vm-cross:client)) +(maclina.vm-cross:initialize-vm 20000) + +;;; Create the (empty) environment. +(defvar *rte* (make-instance 'clostrum-basic:run-time-environment)) +(defvar *env* (make-instance 'clostrum-basic:compilation-environment + :parent *rte*)) + +;;; Install most of CL. +(extrinsicl:install-cl maclina.machine:*client* *rte*) +(extrinsicl.maclina:install-eval maclina.machine:*client* *rte*) + +;;; Uninstall filesystem access. +(loop for f in '(open directory probe-file ensure-directories-exist truename + file-author file-write-date rename-file delete-file) + do (clostrum:fmakunbound maclina.machine:*client* *rte* f)) + +;;; Also add a trap. +(setf (clostrum:fdefinition maclina.machine:*client* *rte* 'o) + (lambda (&rest args) (apply #'open args))) + +;;; Try it out. +(maclina.compile:eval '(+ 2 7) *env*) ;=> 9 +(defparameter *fib* + (maclina.compile:compile + '(lambda (n) + (loop for a = 0 for b = 1 + repeat n + do (psetf a b b (+ a b)) + finally (return a))) + *rte*)) +(funcall *fib* 37) ;=> big number + +;;; But we can't access the filesystem. +(maclina.compile:eval '(open "/tmp/hello.txt") *env*) +;=> error: UNDEFINED-FUNCTION OPEN + +;;; Tricky stuff is available but doesn't help escape. +(maclina.compile:eval '(eval 'pi) *env*) ;=> pi +(maclina.compile:eval `(funcall ,*fib* 37) *env*) ;=> big number +(maclina.compile:eval '(find-symbol "OPEN") *env*) ;=> OPEN +(maclina.compile:eval '(eval (list (find-symbol "OPEN") "/tmp/hello.txt")) *env*) +;=> error: UNDEFINED-FUNCTION OPEN + +;;; Whoops, we forgot WITH-OPEN-FILE. But that's okay. +(maclina.compile:eval '(with-open-file (s "/tmp/hello.txt")) *env*) +;=> error: UNDEFINED-FUNCTION OPEN + +;;; But the VM can't intercept a function call within a host function. +(maclina.compile:eval '(o "/tmp/hello.txt") *env*) ;=> actually opens + +;;; DoS denied. +(maclina.vm-cross:with-timeout (1000000) + (maclina.compile:eval '(loop) *env*)) +;=> error: TIMEOUT + +;;; Watch out for more exotic DoS outside of the VM, though. +(maclina.vm-cross:with-timeout (100000) + (maclina.compile:compile '(lambda () (progn . #1=(nil . #1#))) *env*)) +; => compiler hangs +(maclina.vm-cross:with-timeout (100000) + (maclina.compile:eval '(typep 17 '#1=(not #1#)) *env*)) +; => hang or stack overflow +``` + # Subsystems Maclina defines a variety of subsystems that can be loaded independently. It's set up this way so that you can, for example, load one of the VM definitions and run bytecode compiled elsewhere, without needing to load any of the compiler's multitudinous dependencies. @@ -109,6 +188,7 @@ Maclina defines a variety of subsystems that can be loaded independently. It's s * `maclina/base` is the base system. Everything depends on `maclina/base`. `maclina/base` defines various shared conditions, the MOP magic that lets bytecode functions be run in a host Lisp,the names of instructions, and the disassembler. * `maclina/compile` turns Lisp forms into bytecode. You need it in order to compile or evaluate forms. But this alone won't let you run bytecode; you'll need one of the VM systems for that. And Lisp compilation frequently involves evaluation, so you'll probably need to load a VM before you can compile much of anything. * `maclina/compile-file` implements the file compiler. It depends on the compiler in `maclina/compile` to do that. +* `maclina/vm-shared` is an internal system containing some code shared by the VM implementations. * `maclina/vm-native` is the "native" implementation of the VM, which is to say that it operates entirely in the host Lisp's normal global environment. This is simple but a bit inflexible. * `maclina/vm-cross` is an implementation of the VM that operates relative to a Clostrum environment. This is what you want to do anything first-class-environment-related. * `maclina/load` loads FASL files created by `maclina/compile-file`. `maclina/load` and one of the VMs is sufficient to load and run FASLs.