Commit
Author: Kevin Schoon [kevinschoon@gmail.com]
Hash: f18f27749ec1685bc370bd5ddb63761eb6b1c25a
Timestamp: Tue, 29 Sep 2020 15:27:48 +0000 (4 years ago)

+39 -51 +/-1 browse
clean up word counting
1diff --git a/lib/note.ml b/lib/note.ml
2index edb619f..7c817b8 100644
3--- a/lib/note.ml
4+++ b/lib/note.ml
5 @@ -45,56 +45,6 @@ let get_tags t =
6
7 let get_path t = Slug.get_path t.slug
8
9- let tokenize t =
10- let rec _tokenize markdown =
11- List.fold
12- ~init:([] : string list)
13- ~f:(fun accm entry ->
14- match entry with
15- | Omd.Text text -> accm @ String.split_on_chars ~on:[ ' ' ] text
16- | Omd.H1 header -> accm @ _tokenize header
17- | Omd.H2 header -> accm @ _tokenize header
18- | Omd.H3 header -> accm @ _tokenize header
19- | Omd.H4 header -> accm @ _tokenize header
20- | Omd.H5 header -> accm @ _tokenize header
21- | Omd.H6 header -> accm @ _tokenize header
22- | Omd.Paragraph paragraph -> accm @ _tokenize paragraph
23- | Omd.Emph text -> accm @ _tokenize text
24- | Omd.Bold text -> accm @ _tokenize text
25- | Omd.Ul markdown_list ->
26- let inner =
27- List.fold ~init:[]
28- ~f:(fun accm entry -> accm @ _tokenize entry)
29- markdown_list
30- in
31- accm @ inner
32- | Omd.Ol markdown_list ->
33- let inner =
34- List.fold ~init:[]
35- ~f:(fun accm entry -> accm @ _tokenize entry)
36- markdown_list
37- in
38- accm @ inner
39- | Omd.Ulp markdown_list ->
40- let inner =
41- List.fold ~init:[]
42- ~f:(fun accm entry -> accm @ _tokenize entry)
43- markdown_list
44- in
45- accm @ inner
46- | Omd.Olp markdown_list ->
47- let inner =
48- List.fold ~init:[]
49- ~f:(fun accm entry -> accm @ _tokenize entry)
50- markdown_list
51- in
52- accm @ inner
53- | _ -> accm)
54- markdown
55- in
56-
57- _tokenize t.markdown
58-
59 let get_data t =
60 let data =
61 List.filter_map
62 @@ -159,6 +109,43 @@ let of_string ~data slug =
63 let markdown = Omd.of_string data in
64 { frontmatter; markdown; slug }
65
66+ module Util = struct
67+ let split_words str =
68+ List.filter_map
69+ ~f:(fun x ->
70+ match String.strip ~drop:(fun x -> Char.equal x ' ') x with
71+ | "" -> None
72+ | _ -> Some x)
73+ (String.split ~on:' ' str)
74+
75+ let rec to_words markdown =
76+ match markdown with
77+ | [] -> []
78+ | hd :: tl ->
79+ ( match hd with
80+ | Omd.Text s -> split_words s
81+ | Omd.H1 v
82+ | Omd.H2 v
83+ | Omd.H3 v
84+ | Omd.H4 v
85+ | Omd.H5 v
86+ | Omd.H6 v
87+ | Omd.Blockquote v
88+ | Omd.Bold v
89+ | Omd.Emph v
90+ | Omd.Paragraph v ->
91+ to_words v
92+ | Omd.Url (_, inner, title) -> split_words title @ to_words inner
93+ | Omd.Ref (_, _, title, _) -> split_words title
94+ | Omd.Ol l | Omd.Olp l | Omd.Ul l | Omd.Ulp l ->
95+ List.fold
96+ ~init:([] : string list)
97+ ~f:(fun accm elem -> accm @ to_words elem)
98+ l
99+ | _ -> [] )
100+ @ to_words tl
101+ end
102+
103 module Encoding = struct
104 let to_string ~style t =
105 match style with
106 @@ -227,7 +214,8 @@ module Display = struct
107 let title = (get_title note, [ Reset ]) in
108 let tags = (String.concat ~sep:"|" (get_tags note), [ Reset ]) in
109 let word_count =
110- (Core.sprintf "%d" (List.length (tokenize note)), [ Reset ])
111+ ( Core.sprintf "%d" (List.length (Util.to_words note.markdown)),
112+ [ Reset ] )
113 in
114 let slug = (Slug.to_string note.slug, [ Reset ]) in
115 accm @ [ [ title; tags; word_count; slug ] ])