;; based on select-safe-coding-system in mule-cmds.el of Emacs 20.7. (require 'unicode) (defun charref-regeon (from to &optional default-coding-system) "Convert characters of unsafe coding-system into XML character references." (interactive "r") (or default-coding-system (setq default-coding-system buffer-file-coding-system)) (let* ((charsets (if (stringp from) (find-charset-string from) (find-charset-region from to))) (safe-coding-systems (find-coding-systems-for-charsets charsets))) (if (or (not enable-multibyte-characters) (eq (car safe-coding-systems) 'undecided) (eq default-coding-system 'no-conversion) (and default-coding-system (memq (coding-system-base default-coding-system) safe-coding-systems))) default-coding-system ;; At first, change each coding system to the corresponding ;; mime-charset name if it is also a coding system. (let ((l safe-coding-systems) mime-charset) (while l (setq mime-charset (coding-system-get (car l) 'mime-charset)) (if (and mime-charset (coding-system-p mime-charset)) (setcar l mime-charset)) (setq l (cdr l)))) (let ((non-safe-chars (find-multibyte-characters from to 3 (and default-coding-system (coding-system-get default-coding-system 'safe-charsets)))) show-position) (save-excursion (when (integerp from) (goto-char from) (let ((found nil)) (while (and (not found) (re-search-forward "[^\000-\177]" to t)) (setq found (assq (char-charset (preceding-char)) non-safe-chars)))) (forward-line -1) (setq show-position (point)) (save-excursion (while (re-search-forward "[^\000-\177]" to t) (let* ((char (preceding-char)) (charset (char-charset char))) (when (assq charset non-safe-chars) (let* ((ucs (char-to-ucs char)) (ref (if (integerp ucs) (format "&#%d;" ucs) char))) (delete-char -1) (insert ref) (setq to (+ to (length ref))))))))))))))