Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
DROP TABLE IF EXISTS statistics_documentstatistic;
--;;
ALTER TABLE statistics_documentstatistic_old RENAME statistics_documentstatistic;
17 changes: 17 additions & 0 deletions resources/migrations/20210803135039-simplify-statistics.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
ALTER TABLE statistics_documentstatistic RENAME statistics_documentstatistic_old;
--;;
CREATE TABLE statistics_documentstatistic (
id int(11) NOT NULL AUTO_INCREMENT PRIMARY KEY,
date datetime NOT NULL,
document_id int(11) NOT NULL,
total int(10) unsigned NOT NULL,
unknown int(10) unsigned NOT NULL,
UNIQUE KEY (document_id),
CONSTRAINT FOREIGN KEY (document_id) REFERENCES documents_document(id)
);
--;;
INSERT INTO statistics_documentstatistic (id, date, document_id, total, unknown)
SELECT DISTINCT id, date, document_id, total, MAX(unknown) AS unknown
FROM statistics_documentstatistic_old
GROUP BY document_id;

13 changes: 13 additions & 0 deletions resources/sql/queries.sql
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,19 @@ AND (((:grade IN (0,2)) AND l.contracted IS NULL) OR ((:grade IN (0,1)) AND l.un
ORDER BY untranslated
LIMIT :limit OFFSET :offset

-----------------------------
-- Unknown word statistics --
-----------------------------

-- :name insert-unknown-words-stats :! :n
-- :doc insert statistical data about the number of words and unknown words for a given `document-id`. The `total` must be given. The number of unknown words are calculated based on the local words for the document. This assumes that all unknown words have been inserted in the local word table and they have not yet been confirmed, i.e. moved to the global words table.
INSERT INTO statistics_documentstatistic (date, document_id, total, unknown)
VALUES (NOW(), :document-id, :total, (SELECT count(*) FROM dictionary_localword where document_id = :document_id))
ON DUPLICATE KEY UPDATE
total = VALUES(total),
unknown = VALUES(unknown),
date = VALUES(date)

-----------------------
-- Confirmable words --
-----------------------
Expand Down
12 changes: 12 additions & 0 deletions src/clj/daisyproducer2/words/statistics.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
(ns daisyproducer2.words.statistics
(:require [daisyproducer2.db.core :as db]
[daisyproducer2.documents :as docs]
[daisyproducer2.words.unknown :as unknown]))

(defn put-statistics
"Persist some statistics about total and unknown words for a given
document `document-id` to the database"
[document-id]
(let [xml (docs/get-latest-version)
total (unknown/get-new-words)]
(db/insert-unknown-words-stats {:document-id document-id :total total})))
15 changes: 10 additions & 5 deletions src/clj/daisyproducer2/words/unknown.clj
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,18 @@
tuples (map (fn [w] [w 0 "" document-id]) all-words)]
tuples))

(defn get-new-words
"Extract all words from a given `xml` for given `document-id`"
[xml document-id]
(concat
(get-names xml document-id)
(get-places xml document-id)
(get-homographs xml document-id)
(get-plain xml document-id)))

(defn get-words
[xml document-id grade limit offset]
(let [new-words (concat
(get-names xml document-id)
(get-places xml document-id)
(get-homographs xml document-id)
(get-plain xml document-id))]
(let [new-words (get-new-words xml document-id)]
(if (empty? new-words)
[] ; if there are no new words there are no unknown words
(conman/with-transaction [db/*db*]
Expand Down