Dec 13, 2012, 6:27:55 AM (8 years ago)

Support using the "coding" option in a file's file options line (a
line at the start of a text file that contains name:value pairs
separated by semicolons bracketed by -*- sequences) to determine a
file's character encoding. Specifically:

  • OPEN now allows an external-format of :INFERRED; previously, this was shorthand for an external-format whose line-termination was inferred and whose character encoding was based on *DEFAULT-FILE-CHARACTER-ENCODING*. When an input file whose external-format is specified as :INFERRED is opened, its file options are parsed and the value of the "coding" option is used if such an option is found (and if the value is something that CCL supports.) If a supported "coding" option isn't found, *DEFAULT-FILE-CHARACTER-ENCODING* is used as before.
  • In the Cocoa IDE, the Hemlock command "Ensure File Options Line" (bound to Control-Meta-M by default) ensures that the first line in the current buffer is a file options line and fills in some plausible values for the "Mode", "Package", and "Coding" options. The "Process File Options" command (Control-Meta-m) can be used to process the file options line after it's been edited. (The file options line is always processed when the file is first opened; changes to the "coding" option affect how the file will be saved.)

When a Lisp source file is opened in the IDE editor, the following
character encodings are tried in this order until one of them

  • if the "Open ..." panel was used to open the file and an encoding other than "Automatic" - which is now the default - is selected, that encoding is tried.
  • if a "coding" option is found, that encoding is tried.
  • the value of *DEFAULT-FILE-CHARACTER-ENCODING* is tried.
  • iso-8859-1 is tried. All files can be decoded in iso-8859-1.

This is all supposed to be what Emacs does and I think that it's
pretty close in practice.

A file that caused problems for Paul Krueger a few days ago
because its encoding (ISO-8859-1) wasn't guessed correctly
now has an explicit "coding" option and serves as a test case.

1 edited


  • trunk/source/cocoa-ide/cocoa-editor.lisp

    r15502 r15536  
    18511851    (when doc
    18521852      (document-invalidate-modeline doc))))
     1854;;; Process a file's "coding" file-option.
     1855(defun hemlock-ext:set-buffer-external-format (buffer string)
     1856  (let* ((ef (ccl::process-file-coding-option string (or (hi::buffer-line-termination buffer) :unix)))
     1857         (encoding-val (nsstring-encoding-for-external-format ef)))
     1858    (cond (encoding-val
     1859           (setf (hi::buffer-line-termination buffer)
     1860                 (external-format-line-termination ef))
     1861           (let* ((doc (hi::buffer-document buffer)))
     1862             (when doc
     1863               (with-slots (encoding) doc
     1864                 (setq encoding encoding-val))))
     1865           (hemlock-ext:invalidate-modeline buffer))
     1866          (t
     1867           (hi:loud-message "Can't parse coding option ~a." string)))))
    18541870(def-cocoa-default *text-pane-margin-width* :float 0.0f0 "width of indented margin around text pane")
    23372353                               :crlf
    23382354                               :cr)
    2339                              :lf))
     2355                             :unix))
    23402356         (hemlock-string (case line-termination
    23412357                           (:crlf (remove #\return string))
    26662682      (hemlock-view frame))))
     2685(defun nsstring-encoding-for-character-encoding-name (name)
     2686  (let* ((string (string name))
     2687         (len (length string)))
     2688    (with-cstrs ((cstr string))
     2689      (with-nsstr (nsstr cstr len)
     2690        (let* ((cf (#_CFStringConvertIANACharSetNameToEncoding nsstr)))
     2691          (if (= cf #$kCFStringEncodingInvalidId)
     2692            (setq cf (#_CFStringGetSystemEncoding)))
     2693          (let* ((ns (#_CFStringConvertEncodingToNSStringEncoding cf)))
     2694            (if (= ns #$kCFStringEncodingInvalidId)
     2695              (#/defaultCStringEncoding ns:ns-string)
     2696              ns)))))))
     2698(defun nsstring-encoding-for-external-format (ef)
     2699  (and ef (nsstring-encoding-for-character-encoding-name
     2700           (ccl:external-format-character-encoding ef))))
    26682702;;; Map *default-file-character-encoding* to an :<NSS>tring<E>ncoding
    26692703(defun get-default-encoding ()
    26722706    (when (and (typep file-encoding 'keyword)
    26732707               (lookup-character-encoding file-encoding))
    2674       (let* ((string (string file-encoding))
    2675              (len (length string)))
    2676         (with-cstrs ((cstr string))
    2677           (with-nsstr (nsstr cstr len)
    2678             (let* ((cf (#_CFStringConvertIANACharSetNameToEncoding nsstr)))
    2679               (if (= cf #$kCFStringEncodingInvalidId)
    2680                 (setq cf (#_CFStringGetSystemEncoding)))
    2681               (let* ((ns (#_CFStringConvertEncodingToNSStringEncoding cf)))
    2682                 (if (= ns #$kCFStringEncodingInvalidId)
    2683                   (#/defaultCStringEncoding ns:ns-string)
    2684                   ns)))))))))
     2708      (nsstring-encoding-for-character-encoding-name file-encoding))))
    26862710(defclass hemlock-document-controller (ns:ns-document-controller)
    28962920    buffer))
     2922;;; Try to read the URL's contents into an NSString which can be
     2923;;; used to initialize the document's Hemlock buffer and related
     2924;;; data structures.  First, try to use the encoding specified
     2925;;; in the last call to the document controller's "open" panel;
     2926;;; if that wasn't specified (was 0, "automatic") or if the string
     2927;;; couldn't be initialized in that encoding, try to use the
     2928;;; encoding specified in the "coding:" file option if that's present.
     2929;;; If that wasn't specified or fails, fall back to the default
     2930;;; encoding (based on CCL:*DEFAULT-FILE-CHARACTER-ENCODING*), and
     2931;;; if that fails, try using :iso-8859-1 (which should always win
     2932;;; but which may misinterpret some characters.)
     2933;;; We should only lose because of a filesystem or permissions
     2934;;; problem or because of a severe low-memory condition or something
     2935;;; equally catastrophic.
     2936;;; We should be careful to zero out the encoding from the last call
     2937;;; to the "open" panel so that leftover value doesn't affect anything
     2938;;; but the next call to this method, and if an encoding selected
     2939;;; explicitly (via the "open" panel or the file-options line) didn't
     2940;;; work, it'd be nice to (somehow) let the user know that.
     2941;;; Whatever encoding works here is remembered as the document's
     2942;;; encoding; that may be overridden when the file-options are parsed.
    28982943(objc:defmethod (#/readFromURL:ofType:error: :<BOOL>)
    28992944    ((self hemlock-editor-document) url type (perror (:* :id)))
    29002945  (declare (ignorable type))
    29012946  (with-callback-context "readFromURL"
    2902     (rlet ((pused-encoding :<NSS>tring<E>ncoding 0))
    2903       (let* ((pathname
    2904               (lisp-string-from-nsstring
    2905                (if (#/isFileURL url)
    2906                  (#/path url)
    2907                  (#/absoluteString url))))
    2908              (buffer (or (hemlock-buffer self)
    2909                          (make-buffer-for-document self pathname)))
    2910              (selected-encoding (slot-value (#/sharedDocumentController (find-class 'hemlock-document-controller)) 'last-encoding))
    2911              (string
    2912               (if (zerop selected-encoding)
    2913                 (#/stringWithContentsOfURL:usedEncoding:error:
    2914                  ns:ns-string
    2915                  url
    2916                  pused-encoding
    2917                  perror)
    2918                 +null-ptr+)))
    2920         (if (%null-ptr-p string)
    2921           (progn
    2922             (if (zerop selected-encoding)
    2923               (setq selected-encoding (or (get-default-encoding) #$NSISOLatin1StringEncoding)))
    2924             (setq string (#/stringWithContentsOfURL:encoding:error:
    2925                           ns:ns-string
    2926                           url
    2927                           selected-encoding
    2928                           perror)))
    2929           (setq selected-encoding (pref pused-encoding :<NSS>tring<E>ncoding)))
     2947    (let* ((data (#/dataWithContentsOfURL:options:error:
     2948                  ns:ns-data url 0 perror))
     2949           (bytes (#/bytes data))
     2950           (length (#/length data))
     2951           (pathname
     2952            (lisp-string-from-nsstring
     2953             (if (#/isFileURL url)
     2954                   (#/path url)
     2955               (#/absoluteString url))))
     2956           (buffer (or (hemlock-buffer self)
     2957                       (make-buffer-for-document self pathname)))
     2958           (document-controller (#/sharedDocumentController (find-class 'hemlock-document-controller)))
     2959           (string +null-ptr+))
     2960      (flet ((try-encoding (encoding)
     2961               (setq string
     2962                     (if (or (null encoding)
     2963                             (zerop encoding))
     2964                       +null-ptr+
     2965                       (make-instance ns:ns-string
     2966                                      :with-bytes-no-copy bytes
     2967                                      :length length
     2968                                      :encoding encoding
     2969                                      :free-when-done nil)))
     2970               (unless (%null-ptr-p string)
     2971                 (setf (slot-value self 'encoding) encoding)
     2972                 t)))
     2973        (unless (try-encoding (with-slots (last-encoding) document-controller
     2974                                (prog1 last-encoding
     2975                                  (setq last-encoding 0))))
     2976          (unless (try-encoding (nsstring-encoding-for-external-format (ccl::external-format-from-octet-buffer bytes length)))
     2977            (unless (try-encoding (get-default-encoding))
     2978              (try-encoding #$NSISOLatin1StringEncoding))))
    29302979        (unless (%null-ptr-p string)
    2931           (with-slots (encoding) self (setq encoding selected-encoding))
    29332980          ;; ** TODO: Argh.  How about we just let hemlock insert it.
    29342981          (let* ((textstorage (slot-value self 'textstorage))
    29482995                 (hi::note-modeline-change buffer)
    29492996                 (setf (hi::buffer-modified buffer) nil))))
    2950           t)))))
     2997            t)))))
    30143061(objc:defmethod (#/prepareSavePanel: :<BOOL>) ((self hemlock-editor-document)
    30153062                                               panel)
    3016   (with-slots (encoding) self
    3017     (let* ((popup (build-encodings-popup (#/sharedDocumentController hemlock-document-controller) encoding)))
     3063  (let* ((popup (build-encodings-popup (#/sharedDocumentController hemlock-document-controller))))
    30183064      (#/setAction: popup (@selector #/noteEncodingChange:))
    30193065      (#/setTarget: popup self)
    3020       (#/setAccessoryView: panel popup)))
     3066      (#/setAccessoryView: panel popup))
    30213067  (#/setExtensionHidden: panel nil)
    30223068  (#/setCanSelectHiddenExtension: panel nil)
    33733419;;; user isn't interested in.)
    33743420(defmethod build-encodings-popup ((self hemlock-document-controller)
    3375                                   &optional (preferred-encoding (get-default-encoding)))
     3421                                  &optional preferred-encoding)
     3422  (declare (ignorable preferred-encoding))
    33763423  (let* ((id-list (supported-nsstring-encodings))
    33773424         (popup (make-instance 'ns:ns-pop-up-button)))
    33823429      (#/addItemWithTitle: popup (nsstring-for-nsstring-encoding id))
    33833430      (#/setTag: (#/lastItem popup) (nsstring-encoding-to-nsinteger id)))
    3384     (when preferred-encoding
    3385       (#/selectItemWithTag: popup (nsstring-encoding-to-nsinteger preferred-encoding)))
     3431    (#/selectItemWithTag: popup (if preferred-encoding (nsstring-encoding-to-nsinteger preferred-encoding) 0))
    33863432    (#/sizeToFit popup)
    33873433    popup))
Note: See TracChangeset for help on using the changeset viewer.