utils.cljs 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. (ns microtables-frontend.utils
  2. (:require
  3. ["mathjs" :as mathjs]))
  4. ; to add an npm package to shadow-cljs:
  5. ; https://clojureverse.org/t/guide-on-how-to-use-import-npm-modules-packages-in-clojurescript/2298
  6. ; https://shadow-cljs.github.io/docs/UsersGuide.html#npm
  7. (defn highest-col
  8. "Return the highest column (letter) for which there is a non-empty cell"
  9. [data]
  10. ; choose the "max" (alphabetical order) value among the longest keys
  11. (apply max (val (apply max-key key (group-by #(.-length %) (keys data))))))
  12. (defn highest-row
  13. "Return the highest row (number) for which there is a non-empty cell"
  14. [data]
  15. ; get all the row keys from all the column objects (and flatten), then pick the max
  16. (apply max (flatten (map keys (vals data)))))
  17. (defn increment-letter-code [s]
  18. (let [l (last s)]
  19. (cond
  20. (empty? s) [65]
  21. (= l 90) (conj (increment-letter-code (subvec s 0 (dec (count s)))) 65)
  22. :else (conj (subvec s 0 (dec (count s))) (inc l)))))
  23. (defn next-letter [lc]
  24. (apply str (map char (increment-letter-code (mapv #(.charCodeAt % 0) lc)))))
  25. (def col-letters (iterate next-letter "A"))
  26. (defn order-two-cols
  27. "Accepts two column names (letters) and returns them in order."
  28. [col1 col2]
  29. (cond
  30. (> (.-length col1) (.-length col2)) [col2 col1]
  31. (> (.-length col2) (.-length col1)) [col1 col2]
  32. (= (max col1 col2) col1) [col2 col1]
  33. :else [col1 col2]))
  34. ; the order goes top to bottom, then left to right - that makes the most sense to me
  35. ; I don't know why a different order would be important, or even in what situation order is important at all
  36. (defn parse-range
  37. "Converts a range in \"A1:B2\" notation to a comma-separated list of cells: \"A1,A2,B1,B2\"."
  38. [range-string]
  39. (let [col1 (second (re-find #"\(\s*([A-Z]+)" range-string))
  40. col2 (second (re-find #":\s*([A-Z]+)" range-string))
  41. row1 (.parseInt js/window (second (re-find #"([0-9]+)\s*:" range-string)))
  42. row2 (.parseInt js/window (second (re-find #"([0-9]+)\s*\)" range-string)))
  43. [start-col end-col] (order-two-cols col1 col2)
  44. start-row (min row1 row2)
  45. end-row (max row1 row2)]
  46. (for [col (take-while #(not= (next-letter end-col) %) (iterate next-letter start-col))
  47. row (range start-row (inc end-row))]
  48. {:col col :row row})))
  49. (def range->commalist
  50. "Converts a range in \"A1:B2\" notation to a comma-separated list of cells: \"A1,A2,B1,B2\"."
  51. (memoize (fn [range-string]
  52. (let [cell-list (parse-range range-string)
  53. strings (map #(str (:col %) (:row %)) cell-list)]
  54. (str "(" (clojure.string/join "," strings) ")")))))
  55. (def replace-ranges-in-expression
  56. "Receives an expression string, and replaces all ranges in colon notation (\"A1:B2\") into a comma-separated list of cells (\"A1,A2,B1,B2\")."
  57. (memoize (fn [expression]
  58. (clojure.string/replace expression #"\(\s*[A-Z]+[0-9]+\s*:\s*[A-Z]+[0-9]+\s*\)" parse-range))))
  59. (defn formula?
  60. "Determines if a value is a fomula. If it is, it returns it (without the leading equals sign. If not, it returns nil."
  61. [value]
  62. (if (= (first value) "=")
  63. (subs value 1)
  64. nil))
  65. (def parse-variables (memoize (fn [expression]
  66. (as-> (js->clj (.parse mathjs (replace-ranges-in-expression expression))) $
  67. (.filter $ #(true? (.-isSymbolNode %)))
  68. (map #(.-name %) $)
  69. (map #(.toUpperCase %) $)
  70. (filter #(re-matches #"[A-Z]+[0-9]+" %) $)))))
  71. (def str->rc (memoize (fn [s]
  72. (let [c (re-find #"^[A-Z]+" s)
  73. r (.parseInt js/window (re-find #"[0-9]+$" s))]
  74. {:row r :col c}))))
  75. ; leave in the :inbound references, since they probably have not have changed
  76. (defn add-references
  77. "Parses the expression in the value of a datum, and adds refs as necessary"
  78. [datum]
  79. (let [formula (formula? (:value datum))]
  80. (if formula
  81. (let [vars (parse-variables formula)
  82. refs (map str->rc vars)]
  83. (-> datum
  84. (assoc :refs refs)
  85. (dissoc :error)))
  86. (-> datum
  87. (dissoc :refs)
  88. (dissoc :display)
  89. (dissoc :error)))))
  90. ; the references in the data are a set of disconnected, doubly-linked trees
  91. ;TODO: rather than denotify all, then re-notify all, maybe use a diff? maybe on small scales it's not worth it?
  92. (defn denotify-references
  93. "Remove references in all cells formerly referenced by this cell"
  94. [data origin refs]
  95. (if (empty? refs)
  96. data
  97. (let [target (first refs)
  98. de-notified (update-in data [(:col target) (:row target) :inbound] (partial filter #(not= % origin)))]
  99. (recur de-notified origin (rest refs)))))
  100. (defn notify-references
  101. "Update references in all cells referenced by this cell"
  102. [data origin refs]
  103. (if (empty? refs)
  104. data
  105. (let [target (first refs)
  106. notified (update-in data [(:col target) (:row target) :inbound] conj origin)]
  107. (recur notified origin (rest refs)))))
  108. (defn create-all-references
  109. "Starting from a clean slate, add in all references. This wipes any references that may have been present."
  110. [data]
  111. (reduce-kv
  112. (fn [columns c curr-column]
  113. (assoc columns c (reduce-kv
  114. (fn [rows r datum]
  115. (assoc rows r (add-references (dissoc (dissoc datum :refs) :inbound))))
  116. {}
  117. curr-column)))
  118. {}
  119. data))
  120. ;TODO: re-write create-all-references to use walk-modify-data instead
  121. (defn walk-modify-data
  122. "Walks through the data map and updates each datum by applying f (a function accepting col, row, datum)."
  123. [data f]
  124. (reduce-kv
  125. (fn [columns c curr-column]
  126. (assoc columns c (reduce-kv
  127. (fn [rows r datum]
  128. (assoc rows r (f c r datum)))
  129. {}
  130. curr-column)))
  131. {}
  132. data))
  133. (defn walk-get-refs
  134. "Walks through the data map and returns a list of :col/:row maps for each cell which satisfies the predicate (a function accepting col, row, datum)."
  135. [data pred]
  136. (reduce-kv (fn [l c column] (concat l (map (fn [[r _]] {:col c :row r}) (filter (fn [[r datum]] (pred c r datum)) column)))) '() data))
  137. ; proposed alternative (the beginning of one) to walk-get-refs
  138. ;(defn col-map? [m] (and (map? m) (every? #(and (string? %) (re-matches #"[A-Z]+" %)) (keys m))))
  139. ;(defn row-map? [m] (and (map? m) (every? #(and (integer? %) (pos? %)) (keys m))))
  140. ;(defn get-all-cells [data] (filter #(not (or (col-map? %) (row-map? %))) (tree-seq #(and (map? %) (or (col-map? %) (row-map? %))) vals data)))
  141. (defn create-all-back-references
  142. "Assuming all references have been added, insert all back references."
  143. [data]
  144. (loop [data data
  145. formulas (walk-get-refs data #(formula? (:value %3)))]
  146. (if (empty? formulas)
  147. data
  148. (let [origin (first formulas)
  149. refs (get-in data [(:col origin) (:row origin) :refs])
  150. updated-one (notify-references data origin refs)]
  151. (recur updated-one (rest formulas))))))
  152. (defn set-dirty-flags
  153. "Sets the target cell to \"dirty\" and recursively repeat with its back-references all the way up. Returns the new data set."
  154. ([data c r]
  155. (set-dirty-flags data (list {:col c :row r})))
  156. ([data queue]
  157. (if (empty? queue)
  158. data
  159. (let [cur (first queue)
  160. c (:col cur)
  161. r (:row cur)
  162. datum (get-in data [c r])]
  163. (if (true? (:dirty datum))
  164. (recur data (rest queue))
  165. (let [new-data (assoc-in data [c r :dirty] true)
  166. new-queue (concat (rest queue) (:inbound datum))]
  167. (recur new-data new-queue)))))))
  168. (defn change-datum-value
  169. "Modify the value of a datum in the table, and update all applicable references"
  170. [data c r value]
  171. (let [datum (get-in data [c r])
  172. updated (assoc datum :value value)]
  173. (-> data
  174. (assoc-in [c r :value] value)
  175. (set-dirty-flags c r))))
  176. (defn reset-references
  177. "If there has been a change to which cells are referenced by this cell, then change the necessary back-references to this cell."
  178. [data c r]
  179. (let [datum (get-in data [c r])
  180. parsed (add-references datum)]
  181. (if (= (:refs datum) (:refs parsed))
  182. data
  183. (-> data
  184. (assoc-in [c r] parsed)
  185. (denotify-references {:col c :row r} (:refs datum))
  186. (notify-references {:col c :row r} (:refs parsed))))))
  187. (defn remove-valueless-range-elements
  188. "Remove nil values specifically from ranges (to solve issues with some functions like average)."
  189. [variables var-list]
  190. (println "remove-valueless-range-elements" variables var-list (first var-list))
  191. (let [l (clojure.string/split (clojure.string/replace (first var-list) #"[()]" "") #",")
  192. has-values (filter #(not (nil? (variables %))) l)]
  193. (str "(" (clojure.string/join "," has-values) ")")))
  194. (defn preprocess-expression
  195. "Handle range cases, rename certain functions (to work with math.js), prepare expression and variables for processing."
  196. [expression variables]
  197. (let [renamed-expression (clojure.string/replace expression #"\baverage\(" "mean(")
  198. new-expression (clojure.string/replace renamed-expression #"\(([A-Z]+[0-9]+,)*[A-Z]+[0-9]+\)" (partial remove-valueless-range-elements variables))
  199. new-variables (reduce-kv #(assoc %1 %2 (if (nil? %3) "0" %3)) {} variables)]
  200. (println "PREPROCESS" {:expression new-expression :variables new-variables})
  201. {:expression new-expression
  202. :variables new-variables}))
  203. (def evaluate-expression
  204. "Convert (via mathjs) an expression string to a final answer (also a string). A map of variables must also be provided. If there is an error, it will return :calc-error."
  205. (memoize (fn [expression variables]
  206. (let [range-replaced (replace-ranges-in-expression expression)
  207. {ready-expression :expression ready-variables :variables} (preprocess-expression range-replaced variables)]
  208. (try
  209. (.evaluate mathjs ready-expression (clj->js ready-variables))
  210. (catch js/Error e
  211. (println "mathjs evaluation error" (.-message e) e)
  212. :calc-error))))))
  213. ;TODO: deal with lowercase cell references
  214. ;TODO: memoize dynamically? probably not worth memoizing directly, and could take up too much memory over time
  215. ; https://stackoverflow.com/a/13123571/8172807
  216. (defn find-cycle
  217. "Accepts the data and a datum, and peforms a depth-first search to find reference cycles, following back-references."
  218. ([data c r] (find-cycle data c r #{}))
  219. ([data c r ancest]
  220. (let [datum (get-in data [c r])
  221. current {:col c :row r}
  222. this-and-above (conj ancest current)
  223. inbound (:inbound datum)
  224. found-repeat (not (empty? (clojure.set/intersection this-and-above (set inbound))))]
  225. (if found-repeat
  226. :cycle-error
  227. (some #(find-cycle data (:col %) (:row %) this-and-above) inbound)))))
  228. (defn gather-variables-and-evaluate-cell
  229. "Assumes that all the cell's immediate references have been resolved. Collects the final values from them, then evaluates the current cell's expression. Returns the new data map."
  230. [data c r]
  231. (let [datum (dissoc (dissoc (get-in data [c r]) :dirty) :display) ; get rid of the dirty flag right away (it must be included with the returned data to have effect)
  232. refs (:refs datum)
  233. value (:value datum)
  234. formula (formula? value)
  235. resolved-refs (map #(merge % (get-in data [(:col %) (:row %)])) refs)
  236. evaluated-refs (map #(if (formula? (:value %)) (:display %) (:value %)) resolved-refs)
  237. invalid-refs (some nil? resolved-refs)
  238. dirty-refs (some :dirty resolved-refs)
  239. error-refs (some #(= (:display %) :error) resolved-refs)
  240. ;unevaluated-refs (some nil? evaluated-refs)
  241. cycle-refs (some #(= (:display %) :cycle-error) resolved-refs)
  242. disqualified? (or invalid-refs dirty-refs error-refs)]
  243. (cond
  244. (not formula) (assoc-in data [c r] datum) ; if it's not a formula, then return as is (with the dirty flag removed)
  245. cycle-refs (-> data ; if one of its references has a reference cycle, then this one is "poisoned" as well
  246. (assoc-in [c r] datum)
  247. (assoc-in [c r :display] :cycle-error))
  248. ;unevaluated-refs (assoc-in data [c r :display] :insufficient-data) ; do not un-mark as "dirty", since it has not been evaluated yet
  249. disqualified? (-> data ; some other error is present
  250. (assoc-in [c r] datum)
  251. (assoc-in [c r :display] :error))
  252. (empty? refs) (-> data
  253. (assoc-in [c r] datum)
  254. (assoc-in [c r :display] (evaluate-expression (subs value 1) {})))
  255. :else (let [variables (zipmap (map #(str (:col %) (:row %)) refs) evaluated-refs)
  256. evaluated-value (evaluate-expression (subs value 1) variables)
  257. new-datum (assoc datum :display evaluated-value)]
  258. (assoc-in data [c r] new-datum)))))
  259. ; THE NEW EVALUATE FUNCTION
  260. ; - check for cycles in the back references, starting from the target cell (if any, use another function to mark it and its back references with :cycle-error and remove :dirty)
  261. ; - if any of the forward references are dirty, mark the cell (and recurse up) with an error (and set a TODO to think about this further)
  262. ; - evaluate (using forward references if necessary)
  263. ; - add all back-references to the queue
  264. ; - recurse
  265. ; - TODO: consider initialization case
  266. ; - TODO: consider multiple cells modified simultaneously
  267. (defn evaluate-from-cell
  268. "Evaluate the final value of a cell, and recursively re-evaluate all the cells that reference it."
  269. [data c r]
  270. (let [cycles? (find-cycle data c r)
  271. new-data (if cycles?
  272. (-> data ; if there are cycles, mark :cycle-error and remove :dirty (rathan than evaluate) - still need to recurse up the tree to mark dependents with :cycle-error
  273. (update-in [c r] dissoc :dirty)
  274. (assoc-in [c r :display] :cycle-error))
  275. (gather-variables-and-evaluate-cell data c r))] ; if there are no cycles, evaluate the cell
  276. (loop [data new-data
  277. queue (get-in new-data [c r :inbound])]
  278. (if (empty? queue)
  279. data ; if the queue is empty, we're done
  280. (let [current (first queue)
  281. cc (:col current)
  282. cr (:row current)
  283. dirty? (get-in data [cc cr :dirty])
  284. re-evaluated-data (if dirty?
  285. (gather-variables-and-evaluate-cell data cc cr)
  286. data)
  287. sufficient? (not= (get-in re-evaluated-data [cc cr :display]) :insufficient-data)
  288. new-queue (if dirty?
  289. (if sufficient?
  290. (concat (rest queue) (get-in re-evaluated-data [cc cr :inbound])) ; if all is well, then add the back-references onto the queue
  291. (concat (rest queue) (list current))) ; if the current cell's dependencies are not satisfied, re-add to the end of the queue
  292. (rest queue))] ; if the current cell is not marked as dirty, then it has already been processed
  293. (recur re-evaluated-data new-queue))))))
  294. ;TODO: does this need a cycle check?
  295. (defn evaluate-all
  296. "Evaluates all cells marked as \"dirty\". Generally reserved for the initialization."
  297. ([data]
  298. (evaluate-all data (walk-get-refs data #(:dirty %3))))
  299. ([data queue]
  300. (if (empty? queue)
  301. data
  302. (let [cur (first queue)
  303. cc (:col cur)
  304. cr (:row cur)
  305. dirty? (get-in data [cc cr :dirty])]
  306. (if dirty?
  307. (let [evaluated (evaluate-from-cell data (:col cur) (:row cur))
  308. result (get-in evaluated [cc cr :display])]
  309. (if (= result :insufficient-data)
  310. (recur data (concat (rest queue) (list cur)))
  311. (recur evaluated (rest queue))))
  312. (recur data (rest queue)))))))