Commit f9421834 authored by MORIOKA Tomohiko's avatar MORIOKA Tomohiko
Browse files

New file.

parent afb32e0b
;;; chiset-common.el --- CHISET common utility -*- coding: utf-8-er; -*-
;; Copyright (C) 2010,2011,2012,2013,2014,2015,2016,2017,2018 MORIOKA Tomohiko.
;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
;; Keywords: CHISE, RDF, Turtle, WWW
;; This file is part of CHISET (CHISE/Turtle).
;; XEmacs CHISE is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation; either version 2, or (at
;; your option) any later version.
;; XEmacs CHISE is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with XEmacs CHISE; see the file COPYING. If not, write to
;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.
;;; Code:
(defun decode-uri-string (string &optional coding-system)
(if (> (length string) 0)
(let ((i 0)
dest)
(setq string
(mapconcat (lambda (char)
(if (eq char ?+)
" "
(char-to-string char)))
string ""))
(while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
(setq dest (concat dest
(substring string i (match-beginning 0))
(char-to-string
(int-char
(string-to-int (match-string 1 string) 16))))
i (match-end 0)))
(decode-coding-string
(concat dest (substring string i))
coding-system))))
;;; @ URI representation
;;;
(defun est-uri-decode-feature-name-body (uri-feature)
(let ((len (length uri-feature))
(i 0)
ch dest)
(while (< i len)
(setq dest
(concat
dest
(if (eq (aref uri-feature i) ?\.)
(if (and (< (+ i 2) len)
(eq (aref uri-feature (+ i 2)) ?\.))
(prog1
(cond
((eq (setq ch (aref uri-feature (1+ i))) ?\.)
"/")
((eq ch ?-)
"*")
(t
(substring uri-feature i (+ i 3))
))
(setq i (+ i 3)))
(setq i (1+ i))
".")
(prog1
(char-to-string (aref uri-feature i))
(setq i (1+ i)))))))
dest))
(defun est-uri-encode-feature-name-body (feature)
(mapconcat (lambda (c)
(cond ((eq c ?*)
".-.")
((eq c ?/)
"...")
(t (char-to-string c))))
feature ""))
(defun www-uri-decode-feature-name (uri-feature)
(let (feature)
(setq uri-feature (decode-uri-string uri-feature 'utf-8-mcs-er))
(cond
((string-match "^from\\." uri-feature)
(intern (format "<-%s"
(est-uri-decode-feature-name-body
(substring uri-feature (match-end 0)))))
)
((string-match "^to\\." uri-feature)
(intern (format "->%s"
(est-uri-decode-feature-name-body
(substring uri-feature (match-end 0)))))
)
((string-match "^rep\\." uri-feature)
(intern (format "=%s"
(est-uri-decode-feature-name-body
(substring uri-feature (match-end 0)))))
)
((string-match "^rep[2i]\\." uri-feature)
(intern (format "===%s"
(est-uri-decode-feature-name-body
(substring uri-feature (match-end 0)))))
)
((string-match "^g\\." uri-feature)
(intern (format "=>>%s"
(est-uri-decode-feature-name-body
(substring uri-feature (match-end 0)))))
)
((string-match "^g[i2]\\." uri-feature)
(intern (format "==%s"
(est-uri-decode-feature-name-body
(substring uri-feature (match-end 0)))))
)
((string-match "^gi\\([0-9]+\\)\\." uri-feature)
(intern (format "=>>%s%s"
(make-string (string-to-int
(match-string 1 uri-feature))
?>)
(est-uri-decode-feature-name-body
(substring uri-feature (match-end 0)))))
)
((string-match "^o\\." uri-feature)
(intern (format "=+>%s"
(est-uri-decode-feature-name-body
(substring uri-feature (match-end 0)))))
)
((string-match "^a\\." uri-feature)
(intern (format "=>%s"
(est-uri-decode-feature-name-body
(substring uri-feature (match-end 0)))))
)
((string-match "^a\\([0-9]+\\)\\." uri-feature)
(intern (format "%s>%s"
(make-string (string-to-int
(match-string 1 uri-feature))
?=)
(est-uri-decode-feature-name-body
(substring uri-feature (match-end 0)))))
)
((and (setq uri-feature (est-uri-decode-feature-name-body uri-feature))
(setq feature (intern (format "=>%s" uri-feature)))
(find-charset feature))
feature)
((and (setq feature (intern (format "=>>%s" uri-feature)))
(find-charset feature))
feature)
((and (setq feature (intern (format "=>>>%s" uri-feature)))
(find-charset feature))
feature)
((and (setq feature (intern (format "=%s" uri-feature)))
(find-charset feature))
feature)
(t (intern uri-feature)))))
(defun www-uri-encode-feature-name (feature-name)
(setq feature-name (symbol-name feature-name))
(cond
((string-match "^=\\+>\\([^=>]+\\)" feature-name)
(concat "o."
(est-uri-encode-feature-name-body
(substring feature-name (match-beginning 1))))
)
((string-match "^=\\([^=>]+\\)" feature-name)
(concat "rep."
(est-uri-encode-feature-name-body
(substring feature-name (match-beginning 1))))
)
((string-match "^==\\([^=>]+\\)" feature-name)
(concat "g2."
(est-uri-encode-feature-name-body
(substring feature-name (match-beginning 1))))
)
((string-match "^===\\([^=>]+\\)" feature-name)
(concat "repi."
(est-uri-encode-feature-name-body
(substring feature-name (match-beginning 1))))
)
((string-match "^=>>\\([^=>]+\\)" feature-name)
(concat "g."
(est-uri-encode-feature-name-body
(substring feature-name (match-beginning 1))))
)
((string-match "^=>>>\\([^=>]+\\)" feature-name)
(concat "gi."
(est-uri-encode-feature-name-body
(substring feature-name (match-beginning 1))))
)
((string-match "^=>>\\(>+\\)" feature-name)
(format "gi%d.%s"
(length (match-string 1 feature-name))
(est-uri-encode-feature-name-body
(substring feature-name (match-end 1))))
)
((string-match "^=>\\([^=>]+\\)" feature-name)
(concat "a."
(est-uri-encode-feature-name-body
(substring feature-name (match-beginning 1))))
)
((string-match "^\\(=+\\)>" feature-name)
(format "a%d.%s"
(length (match-string 1 feature-name))
(est-uri-encode-feature-name-body
(substring feature-name (match-end 0))))
)
((string-match "^->" feature-name)
(concat "to."
(est-uri-encode-feature-name-body
(substring feature-name (match-end 0))))
)
((string-match "^<-" feature-name)
(concat "from."
(est-uri-encode-feature-name-body
(substring feature-name (match-end 0))))
)
(t (est-uri-encode-feature-name-body feature-name))))
(defvar chise-turtle-ccs-prefix-alist nil)
(defun chise-turtle-uri-decode-feature-name (uri-feature)
(cond ((string= "a.ucs" uri-feature)
'=ucs)
((string= "a.big5" uri-feature)
'=big5)
(t
(www-uri-decode-feature-name uri-feature))))
(defun chise-turtle-uri-encode-ccs-name (feature-name)
(cond
((eq '=ucs feature-name)
"a.ucs")
((eq '=big5 feature-name)
"a.big5")
((eq '==>ucs@bucs feature-name)
"bucs")
(t
(mapconcat (lambda (c)
(cond
((eq c ?@)
"_")
((eq c ?+)
"._.")
((eq c ?=)
".:.")
((eq c ?|)
"._cmp_.")
(t
(char-to-string c))))
(www-uri-encode-feature-name feature-name)
""))))
(defun charset-code-point-format-spec (ccs)
(cond ((memq ccs '(=ucs))
"0x%04X")
(t
(let ((ccs-name (symbol-name ccs)))
(cond
((string-match
"\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
ccs-name)
"%04d")
((string-match
"\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
ccs-name)
"%05d")
((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
"%06d")
((string-match "hanyo-denshi/tk" ccs-name)
"%08d")
(t
"0x%X"))))))
(defun chise-turtle-format-ccs-code-point (ccs code-point)
(let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
(unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
(setq chise-turtle-ccs-prefix-alist
(cons (cons ccs-uri ccs)
chise-turtle-ccs-prefix-alist)))
(format "%s:%s"
ccs-uri
(format (charset-code-point-format-spec ccs)
code-point))))
(defun chise-turtle-encode-char (object)
(let (spec cell dest
ccs ret ret2)
(if (setq ret (encode-char object '=ucs))
(chise-turtle-format-ccs-code-point '=ucs ret)
(setq spec (char-attribute-alist object))
(while (and spec
(setq cell (pop spec)))
(if (and (find-charset (car cell))
(setq ret (cdr cell)))
(setq dest (cons cell dest))))
(setq ret (car (sort dest (lambda (a b)
(char-attribute-name< (car a)(car b)))))
ccs (car ret)
ret (cdr ret))
(cond (ret
(chise-turtle-format-ccs-code-point ccs ret)
)
((and (setq ccs (car (split-char object)))
(setq ret (encode-char object ccs)))
(chise-turtle-format-ccs-code-point ccs ret)
)
((setq ret (get-char-attribute object 'ideographic-combination))
(format "ideocomb:%s"
(mapconcat (lambda (cell)
(cond ((characterp cell)
(char-to-string cell)
)
((setq ret2 (find-char cell))
(char-to-string ret2)
)
(t
(format "%S" cell)
)))
ret ""))
)
(t
(format "system-char-id:0x%X"
(encode-char object 'system-char-id))
)))))
;;; @ end
;;;
(provide 'chiset-common)
;;; chiset-common.el ends here
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment