Commit 2742d479 authored by MORIOKA Tomohiko's avatar MORIOKA Tomohiko
Browse files

(chise-json-dump-directory): New variable.

(chise-json-make-ja-romaji-spec): Fix problem to treat representations
of conjugations.
(chise-json-other-spec-get-sound): Fixed.
(chise-json-make-char-id): Fixed.
(chise-json-dump-get-path): New function.
(chise-json-char-get-info): New optional variable `without-CCS-spec';
if `without-CCS-spec' is nil, output `unify' block.
(chise-json-dump-char): New function.
(chise-json-dump-ccs): New function.
(chise-json-dump-ucs-basic): New function.
(chise-json-dump-big5-cdp): New function.
(chise-json-api-character-get-info): Look for a pre-made JSON-dump
file in `chise-json-dump-directory' and use it if found.
parent fb0a4d71
......@@ -4,6 +4,8 @@
(require 'ids-find)
(require 'ja-romaji-to-kana)
(defvar chise-json-dump-directory "/usr/local/share/chise-dump-json/")
(setq file-name-coding-system 'utf-8-mcs-er)
(make-coding-system
......@@ -1386,15 +1388,74 @@
rest)))
(defun chise-json-make-ja-romaji-spec (romaji)
(list (cons 'ja-romaji romaji)
(cons 'ja-kana (historical-roman-to-modern-kana-translate-string romaji))
(cons 'ja-kana-zion (historical-roman-to-historical-kana-translate-string romaji))))
(let (romaji-for-kana)
(cond
((string-match "-\\([a-z]\\)\\+" romaji)
(setq romaji-for-kana (list (substring romaji 0 (match-beginning 0))
(concat (match-string 1 romaji)
(substring romaji (match-end 0)))))
)
((string-match "-\\([a-z]+\\)\\+\\([a-z]\\)5" romaji)
(setq romaji-for-kana (list (substring romaji 0 (match-beginning 0))
(concat
(match-string 1 romaji)
(match-string 2 romaji)
"u")
(concat
(match-string 2 romaji)
"a行5\u6BB5")))
)
((string-match "-\\([a-z]+\\)\\+" romaji)
(setq romaji-for-kana (list (substring romaji 0 (match-beginning 0))
(concat
(match-string 1 romaji)
(substring romaji (match-end 0)))))
)
((string-match "-" romaji)
(setq romaji-for-kana (list (substring romaji 0 (match-beginning 0))
(substring romaji (match-end 0))))
)
(t
(setq romaji-for-kana (list romaji))))
(list (cons 'ja-romaji romaji)
(cons 'ja-kana
(concat
(historical-roman-to-modern-kana-translate-string
(car romaji-for-kana))
(if (nth 1 romaji-for-kana)
(concat "\u2010"
(historical-roman-to-modern-kana-translate-string
(nth 1 romaji-for-kana)))
"")
(if (nth 2 romaji-for-kana)
(concat "\u3014"
(japanese-katakana
(historical-roman-to-modern-kana-translate-string
(nth 2 romaji-for-kana)))
"\u3015")
"")))
(cons 'ja-kana-zion
(concat
(historical-roman-to-historical-kana-translate-string
(car romaji-for-kana))
(if (nth 1 romaji-for-kana)
(concat "\u2010"
(historical-roman-to-historical-kana-translate-string
(nth 1 romaji-for-kana)))
"")
(if (nth 2 romaji-for-kana)
(concat "\u3014"
(japanese-katakana
(historical-roman-to-historical-kana-translate-string
(nth 2 romaji-for-kana)))
"\u3015")
""))))))
(defun chise-json-other-spec-get-sound (other-spec)
(let (domain-sound-alist
rest ret domain
kan-list go-list on-list kan go
kan-spec go-spec i len)
kan-spec go-spec i len len-kan len-go)
(dolist (cell other-spec)
(cond
((eq (caar cell) 'sound)
......@@ -1406,16 +1467,20 @@
(cons (cons 'chise:domain/ja/on
(mapvector (lambda (item)
(cdr (assq 'body item)))
(cdr cell)))
(cdr (assq 'body (cdr cell)))))
domain-sound-alist))
(setq kan-list (cdr (assq 'body (cdr cell)))
go-list (cdr (assq 'body (cdr ret)))
on-list nil)
(setq i 0
len (max (length kan-list)(length go-list)))
len-kan (length kan-list)
len-go (length go-list)
len (max len-kan len-go))
(while (< i len)
(setq kan (cdr (assq 'body (aref kan-list i)))
go (cdr (assq 'body (aref go-list i)))
(setq kan (if (< i len-kan)
(cdr (assq 'body (aref kan-list i))))
go (if (< i len-go)
(cdr (assq 'body (aref go-list i))))
i (1+ i))
(setq kan-spec
(if kan
......@@ -1438,9 +1503,9 @@
))
((eq domain 'ja/on/go)
)
((eq domain 'ja/on/conventional)
((memq domain '(ja/on ja/on/conventional ja/kun ja/kun/name))
(setq domain-sound-alist
(cons (cons 'chise:domain/ja/on/conventional
(cons (cons (intern (format "'chise:domain/%s" domain))
(mapvector (lambda (item)
(nconc
(chise-json-make-ja-romaji-spec
......@@ -1490,13 +1555,13 @@
(defun chise-json-make-char-id (ccs code-point)
(let ((ccs-spec (chise-split-ccs-name ccs))
ccs granularity domain)
(setq ccs (pop ccs-spec)
ccs-base granularity domain)
(setq ccs-base (pop ccs-spec)
granularity (pop ccs-spec)
domain (pop ccs-spec))
(format "chise:ccs/%s/%s/%s/%s"
ccs
(chise-ccs-format-code-point ccs code-point "0x")
ccs-base
(chise-ccs-format-code-point ccs-base code-point "0x" ccs)
granularity
(or domain "common"))))
......@@ -2044,8 +2109,58 @@
(chise-json-make-char-ref-object (chise-json-char-get-id char)
(chise-json-char-get-title-info char)))
(defun chise-json-char-get-info (char &optional subnode)
(let ((char-info (chise-json-separate-normalized-spec
(defun chise-json-dump-get-path (id)
(let (len path ccs code ccs-obj dim)
(setq path
(if (string-match "^chise:" id)
(substring id (match-end 0))
id))
(cond
((string-match "^ccs/\\(.*\\)/0x\\([0-9A-F]+\\)/" path)
(setq ccs (match-string 1 path)
code (match-string 2 path))
(setq dim
(if (setq ccs-obj (or (find-charset (intern ccs))
(find-charset (intern (concat "=" ccs)))))
(charset-dimension ccs-obj)
2))
(setq len (length code))
(cond
((eq dim 1)
path)
((eq dim 2)
(setq path (format "ccs/%s/SB0x%02s/%02s/%s"
ccs
(substring code 0 (max (- len 2) 0))
(substring code (max (- len 2) 0) len)
(substring path (match-end 0))))
)
(t
(setq path (format "ccs/%s/ST0x%01s/%02s/%02s/%s"
ccs
(substring code 0 (max (- len 4) 0))
(substring code (max (- len 4) 0) (max (- len 2) 0))
(substring code (max (- len 2) 0) len)
(substring path (match-end 0))))
))
)
((string-match "^ccs/\\(daikanwa\\|mj\\|koseki\\|gt\\|gt-k\\)/\\([0-9]+\\)/" path)
(setq ccs (match-string 1 path)
code (match-string 2 path))
(setq len (length code))
(setq path (format "ccs/%s/SB_%02s/%03s/%s"
ccs
(substring code 0 (max (- len 3) 0))
(substring code (max (- len 3) 0) len)
(substring path (match-end 0))))
)
(t
path))))
(defun chise-json-char-get-info (char &optional subnode without-CCS-spec)
(let ((json-encoding-pretty-print t)
(coding-system-for-write 'utf-8-mcs-er)
(char-info (chise-json-separate-normalized-spec
(chise-json-char-get-normalized-spec char subnode)
subnode))
(title-info (chise-json-char-get-title-info char))
......@@ -2059,6 +2174,7 @@
sound-spec
id
ret i
node-id
anno-id
domain code
kangxi-radical kangxi-strokes total-strokes
......@@ -2097,7 +2213,8 @@
(mapvector (lambda (item)
(chise-json-char-get-info
(cdr (assq 'body item))
'subnode))
'subnode
without-CCS-spec))
ret)
(mapvector (lambda (item)
(chise-json-char-get-ref-info
......@@ -2187,26 +2304,31 @@
dest))
))
)
;; (if CCS-spec
;; (setq dest
;; (cons
;; (cons 'unify
;; (mapcar
;; (lambda (cell)
;; (setq code (cdr (assq 'body (cdr cell))))
;; (setq ret (chise-split-ccs-name (car cell)))
;; (cons (car cell)
;; (list*
;; (cons '@id (chise-json-make-char-id (car cell) code))
;; (cons 'CCS (format "ccs:%s" (car ret)))
;; (cons 'granularity (format "chise:glyph-granularity/%s"
;; (nth 1 ret)))
;; (cons 'domain (format "chise:domain/%s"
;; (or (nth 2 ret) "common")))
;; (cons 'code-point code)
;; (del-alist 'body (cdr cell)))))
;; CCS-spec))
;; dest)))
(if (and (not without-CCS-spec)
CCS-spec)
(setq dest
(cons
(cons 'unify
(mapcar
(lambda (cell)
(setq code (cdr (assq 'body (cdr cell))))
(setq ret (chise-split-ccs-name (car cell)))
(setq node-id (chise-json-make-char-id (car cell) code))
(setq ret
(list*
(cons 'CCS (format "ccs:%s" (car ret)))
(cons 'granularity (format "chise:glyph-granularity/%s"
(nth 1 ret)))
(cons 'domain (format "chise:domain/%s"
(or (nth 2 ret) "common")))
(cons 'code-point code)
(del-alist 'body (cdr cell))))
(cons (car cell)
(cons (cons '@id node-id)
ret))
)
CCS-spec))
dest)))
(if sound-spec
(setq dest
(cons (cons 'sound sound-spec)
......@@ -2270,12 +2392,100 @@
(setq dest
(append (chise-json-make-char-ref-object id title-info)
dest))
;; (vector structure-spec relation-spec dest)
))
dest))
(defun chise-json-dump-char (char &optional ccs code-point force-rewrite)
(let ((json-encoding-pretty-print t)
(coding-system-for-write 'utf-8-mcs-er)
id char-CCS-spec
path-id
root-char root-id root-info path dir file ret)
(unless (setq char-CCS-spec (chise-json-char-get-CCS-spec char))
(cond ((and (setq ret (split-char char))
(memq (char-feature-property (car ret) 'mother) '(ucs =ucs =ucs@JP/hanazono))
(setq ret (encode-char char '=ucs)))
(put-char-attribute char '=ucs ret)
)
(t
(setq char (define-char (list ret)))
))
(setq char-CCS-spec (chise-json-char-get-CCS-spec char)))
(setq id (chise-json-make-char-id (caar char-CCS-spec)(cdar char-CCS-spec)))
(setq path-id (if (and ccs code-point)
(chise-json-make-char-id ccs code-point)
id))
(setq root-char (chise-json-char-get-root char))
(setq path (chise-json-dump-get-path path-id))
(setq dir (expand-file-name path chise-json-dump-directory))
(with-temp-buffer
(unless (file-exists-p dir)
(make-directory dir t))
(setq file (expand-file-name "index.jsonld" dir))
(when (or force-rewrite
(not (file-exists-p file)))
(insert
(json-encode
(list* '(@context . "http://api.chise.org/contexts/v1/chise.jsonld")
(chise-json-char-get-info char))))
(write-region (point-min)(point-max) file))
(when (and (not (eq char root-char))
(setq file (expand-file-name "index_with-root.jsonld" dir))
(or force-rewrite
(not (file-exists-p file))))
(setq root-info (chise-json-char-get-info root-char))
(erase-buffer)
(insert
(json-encode
(list* '(@context . "http://api.chise.org/contexts/v1/chise.jsonld")
(cons '$target id)
root-info)))
(write-region (point-min)(point-max) file)
(setq root-char (chise-json-char-get-root root-char))
(setq root-id (chise-json-char-get-id root-char))
(setq path (chise-json-dump-get-path root-id))
(setq dir (expand-file-name path chise-json-dump-directory))
(unless (file-exists-p dir)
(make-directory dir t))
(setq file (expand-file-name "index.jsonld" dir))
(when (or force-rewrite
(not (file-exists-p file)))
(insert
(json-encode
(list* '(@context . "http://api.chise.org/contexts/v1/chise.jsonld")
root-info)))
(write-region (point-min)(point-max) file))))))
(defun chise-json-dump-ccs (ccs)
(map-char-attribute
(lambda (char val)
;; (chise-json-dump-char char ccs val)
(chise-json-dump-char char)
nil)
ccs))
(defun chise-json-dump-ucs-basic ()
(let ((code #x4E00)
char)
(while (<= code #x9FEA)
(when (setq char (decode-char '=ucs code))
(chise-json-dump-char char))
(setq code (1+ code)))))
(defun chise-json-dump-big5-cdp ()
(map-char-attribute
(lambda (char val)
(if (and (<= #x854B val)(<= val #x8DFE))
;; (chise-json-dump-char char ccs val)
(chise-json-dump-char char))
nil)
'=big5-cdp))
(defun chise-json-api-character-get-info (args)
(let (char id char-CCS-spec
root-char dest ret)
(let ((coding-system-for-read 'utf-8-mcs-er)
char id char-CCS-spec
root-char dest ret
path dir file)
(when (and (setq char (assoc "character" args))
(setq char (cdr char))
(setq char (or (chise-rdf-iri-decode-object char)
......@@ -2291,18 +2501,30 @@
))
(setq char-CCS-spec (chise-json-char-get-CCS-spec char)))
(setq id (chise-json-make-char-id (caar char-CCS-spec)(cdar char-CCS-spec)))
(setq root-char (chise-json-char-get-root char))
(setq dest (chise-json-char-get-info root-char))
(unless (eq char root-char)
(setq dest (cons (cons '$target id)
dest)))
(setq dest
(cons '(@context . "http://api.chise.org/contexts/v1/chise.jsonld")
dest))
(cons (encode-coding-string (json-encode dest) 'utf-8-mcs-er)
"application/ld+json")
(setq path (chise-json-dump-get-path id))
(setq dir (expand-file-name path chise-json-dump-directory))
(cons
(if (or (file-exists-p
(setq file (expand-file-name "index_with-root.jsonld" dir)))
(file-exists-p
(setq file (expand-file-name "index.jsonld" dir))))
(with-temp-buffer
(insert-file-contents-literally file)
(buffer-substring (point-min)(point-max))
;; (encode-coding-string (buffer-substring (point-min)(point-max))
;; 'utf-8-mcs-er)
)
(setq root-char (chise-json-char-get-root char))
(setq dest (chise-json-char-get-info root-char nil 'without-CCS-spec))
(unless (eq char root-char)
(setq dest (cons (cons '$target id)
dest)))
(setq dest
(cons '(@context . "http://api.chise.org/contexts/v1/chise.jsonld")
dest))
(encode-coding-string (json-encode dest) 'utf-8-mcs-er))
"application/ld+json")
)))
(defun chise-batch-character-api ()
(setq debug-on-error t)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment