|
| 1 | +package encode |
| 2 | + |
| 3 | +import ( |
| 4 | + "encoding/json" |
| 5 | + "fmt" |
| 6 | + "io" |
| 7 | + "net/http" |
| 8 | + "os" |
| 9 | + "path/filepath" |
| 10 | + "sort" |
| 11 | + "unicode" |
| 12 | +) |
| 13 | + |
| 14 | +/* |
| 15 | +
|
| 16 | + 0 ' ' 32 224434 9 |
| 17 | + 1 ',' 44 115891 15 |
| 18 | + 2 'e' 101 90954 19 |
| 19 | + 3 '0' 48 88983 23 |
| 20 | + 4 '\n' 10 79910 26 |
| 21 | + 5 't' 116 77762 30 |
| 22 | + 6 '2' 50 74192 33 |
| 23 | + 7 'i' 105 68819 36 |
| 24 | + 8 '\t' 9 65162 39 |
| 25 | + 9 '3' 51 64568 42 |
| 26 | + 10 's' 115 64344 45 |
| 27 | + 11 'n' 110 61169 47 |
| 28 | + 12 '1' 49 60069 50 |
| 29 | + 13 'r' 114 56912 53 |
| 30 | + 14 'a' 97 56274 55 |
| 31 | + 15 'l' 108 48418 57 |
| 32 | + 16 '5' 53 46883 59 |
| 33 | + 17 'o' 111 46422 61 |
| 34 | + 18 'c' 99 45621 64 |
| 35 | + 19 '4' 52 44040 65 |
| 36 | + 20 'd' 100 41524 67 |
| 37 | + 21 '6' 54 40700 69 |
| 38 | + 22 'f' 102 40551 71 |
| 39 | + 23 '_' 95 37853 73 |
| 40 | + 24 '8' 56 36760 74 |
| 41 | + 25 '7' 55 35389 76 |
| 42 | + 26 '9' 57 34599 77 |
| 43 | + 27 'u' 117 30030 79 |
| 44 | + 28 ')' 41 29131 80 |
| 45 | + 29 '(' 40 29079 81 |
| 46 | + 30 'p' 112 28625 83 |
| 47 | + 31 '=' 61 26891 84 |
| 48 | + 32 'x' 120 25945 85 |
| 49 | + 33 ';' 59 25008 86 |
| 50 | + 34 '*' 42 24708 87 |
| 51 | + 35 'm' 109 23479 88 |
| 52 | + 36 'h' 104 21306 89 |
| 53 | + 37 '.' 46 17628 90 |
| 54 | + 38 'b' 98 17628 91 |
| 55 | + 39 'g' 103 16661 91 |
| 56 | + 40 '-' 45 16113 92 |
| 57 | + 41 '\x00' 0 14173 93 |
| 58 | + 42 'y' 121 11987 93 |
| 59 | + 43 '/' 47 11130 94 |
| 60 | + 44 '>' 62 9455 94 |
| 61 | + 45 'v' 118 9052 95 |
| 62 | + 46 'w' 119 9034 95 |
| 63 | + 47 '+' 43 8586 95 |
| 64 | + 48 '#' 35 7752 96 |
| 65 | + 49 'k' 107 7298 96 |
| 66 | + 50 '{' 123 7285 96 |
| 67 | + 51 '}' 125 7281 97 |
| 68 | + 52 '"' 34 5979 97 |
| 69 | + 53 '<' 60 5758 97 |
| 70 | + 54 'z' 122 5574 98 |
| 71 | + 55 '\\' 92 5341 98 |
| 72 | + 56 '[' 91 5131 98 |
| 73 | + 57 ']' 93 5129 98 |
| 74 | + 58 '&' 38 4880 98 |
| 75 | + 59 ':' 58 3563 99 |
| 76 | + 60 '\'' 39 3369 99 |
| 77 | + 61 '|' 124 3135 99 |
| 78 | + 62 '%' 37 3087 99 |
| 79 | + 63 'q' 113 2212 99 |
| 80 | + 64 '!' 33 2083 99 |
| 81 | + 65 'j' 106 1850 99 |
| 82 | + 66 '$' 36 1618 99 |
| 83 | + 67 '?' 63 601 99 |
| 84 | + 68 '^' 94 573 99 |
| 85 | + 69 '@' 64 519 99 |
| 86 | + 70 '~' 126 260 99 |
| 87 | + 71 '\x01' 1 138 99 |
| 88 | + 72 '\b' 8 133 99 |
| 89 | + 73 '`' 96 123 99 |
| 90 | + 74 '\x04' 4 71 99 |
| 91 | + 75 '\x06' 6 58 99 |
| 92 | + 76 '\x05' 5 40 99 |
| 93 | + 77 '\x02' 2 34 99 |
| 94 | + 78 '\x03' 3 32 99 |
| 95 | + 79 '\a' 7 31 99 |
| 96 | + 80 '\x10' 16 25 99 |
| 97 | + 81 '\v' 11 9 99 |
| 98 | + 82 '\x14' 20 4 99 |
| 99 | + 83 '\f' 12 4 99 |
| 100 | + 84 '\r' 13 3 99 |
| 101 | + 85 '\x1d' 29 2 99 |
| 102 | + 86 '\x1b' 27 2 99 |
| 103 | + 87 '\x1f' 31 2 99 |
| 104 | + 88 '\x1a' 26 2 99 |
| 105 | + 89 '\x0f' 15 1 99 |
| 106 | + 90 '\x15' 21 1 99 |
| 107 | + 91 '\x18' 24 1 99 |
| 108 | + 92 '\x11' 17 1 99 |
| 109 | + 93 '\x16' 22 1 99 |
| 110 | + 94 '\x19' 25 1 100 |
| 111 | +
|
| 112 | +
|
| 113 | +
|
| 114 | +
|
| 115 | + 0 ' ' 32 15827074 17 |
| 116 | + 1 'e' 101 8521608 26 |
| 117 | + 2 't' 116 5995059 33 |
| 118 | + 3 'a' 97 5634185 39 |
| 119 | + 4 'o' 111 4959336 44 |
| 120 | + 5 'n' 110 4620222 49 |
| 121 | + 6 'h' 104 4470892 54 |
| 122 | + 7 'i' 105 4354779 59 |
| 123 | + 8 's' 115 4164566 63 |
| 124 | + 9 'r' 114 4047746 68 |
| 125 | + 10 'd' 100 3386714 72 |
| 126 | + 11 'l' 108 2931460 75 |
| 127 | + 12 'u' 117 1862109 77 |
| 128 | + 13 'w' 119 1662748 79 |
| 129 | + 14 'm' 109 1623024 80 |
| 130 | + 15 'g' 103 1581262 82 |
| 131 | + 16 'c' 99 1525031 84 |
| 132 | + 17 'f' 102 1410620 85 |
| 133 | + 18 'y' 121 1278989 87 |
| 134 | + 19 '\x00' 0 1242402 88 |
| 135 | + 20 ',' 44 1172181 89 |
| 136 | + 21 '.' 46 1122071 91 |
| 137 | + 22 'b' 98 1053480 92 |
| 138 | + 23 'p' 112 1048503 93 |
| 139 | + 24 '\n' 10 901311 94 |
| 140 | + 25 '\r' 13 805623 95 |
| 141 | + 26 'k' 107 687854 95 |
| 142 | + 27 'v' 118 596802 96 |
| 143 | + 28 '"' 34 589817 97 |
| 144 | + 29 '\'' 39 266935 97 |
| 145 | + 30 '-' 45 146654 97 |
| 146 | + 31 '_' 95 137890 97 |
| 147 | + 32 'x' 120 125445 97 |
| 148 | + 33 '1' 49 108131 98 |
| 149 | + 34 'z' 122 103677 98 |
| 150 | + 35 '0' 48 99303 98 |
| 151 | + 36 '?' 63 93497 98 |
| 152 | + 37 '\x01' 1 90709 98 |
| 153 | + 38 '2' 50 86841 98 |
| 154 | + 39 'j' 106 86222 98 |
| 155 | + 40 '\t' 9 78048 98 |
| 156 | + 41 '3' 51 70327 98 |
| 157 | + 42 ':' 58 67016 98 |
| 158 | + 43 ';' 59 61595 98 |
| 159 | + 44 '!' 33 61315 99 |
| 160 | + 45 'q' 113 55944 99 |
| 161 | + 46 '5' 53 54857 99 |
| 162 | + 47 '4' 52 51946 99 |
| 163 | + 48 '6' 54 48144 99 |
| 164 | + 49 '8' 56 44632 99 |
| 165 | + 50 '7' 55 44525 99 |
| 166 | + 51 '9' 57 43274 99 |
| 167 | + 52 '(' 40 37249 99 |
| 168 | + 53 '=' 61 36726 99 |
| 169 | + 54 ')' 41 35620 99 |
| 170 | + 55 '*' 42 33342 99 |
| 171 | + 56 '>' 62 29395 99 |
| 172 | + 57 '<' 60 25531 99 |
| 173 | + 58 '\x02' 2 23263 99 |
| 174 | + 59 '$' 36 21742 99 |
| 175 | + 60 '\x05' 5 16554 99 |
| 176 | + 61 '\x03' 3 16129 99 |
| 177 | + 62 '/' 47 14509 99 |
| 178 | + 63 '\x04' 4 14109 99 |
| 179 | + 64 '\b' 8 12232 99 |
| 180 | + 65 '\x13' 19 11834 99 |
| 181 | + 66 '#' 35 10697 99 |
| 182 | + 67 '+' 43 10663 99 |
| 183 | + 68 '\x10' 16 10592 99 |
| 184 | + 69 '@' 64 9604 99 |
| 185 | + 70 '}' 125 9503 99 |
| 186 | + 71 '{' 123 9050 99 |
| 187 | + 72 '\\' 92 8428 99 |
| 188 | + 73 ']' 93 8132 99 |
| 189 | + 74 '\x12' 18 7844 99 |
| 190 | + 75 '\x06' 6 7762 99 |
| 191 | + 76 '[' 91 7626 99 |
| 192 | + 77 '&' 38 7145 99 |
| 193 | + 78 '\x0e' 14 7035 99 |
| 194 | + 79 '|' 124 6258 99 |
| 195 | + 80 '\v' 11 6181 99 |
| 196 | + 81 '\x0f' 15 5676 99 |
| 197 | + 82 '%' 37 5664 99 |
| 198 | + 83 '`' 96 5166 99 |
| 199 | + 84 '\a' 7 4880 99 |
| 200 | + 85 '^' 94 4647 99 |
| 201 | + 86 '\x14' 20 4531 99 |
| 202 | + 87 '\x18' 24 4347 99 |
| 203 | + 88 '\x1c' 28 4162 99 |
| 204 | + 89 '\x1a' 26 3624 99 |
| 205 | + 90 '\x11' 17 3385 99 |
| 206 | + 91 '\x19' 25 3336 99 |
| 207 | +
|
| 208 | +*/ |
| 209 | + |
| 210 | +// Count frequencies with phonetic grouping |
| 211 | +func countASCII(root string) map[byte]int { |
| 212 | + freq := make(map[byte]int) |
| 213 | + |
| 214 | + _ = filepath.Walk(root, func(path string, info os.FileInfo, err error) error { |
| 215 | + if err != nil { |
| 216 | + fmt.Println("Error:", err) |
| 217 | + return nil |
| 218 | + } |
| 219 | + if info.IsDir() { |
| 220 | + return nil |
| 221 | + } |
| 222 | + file, err := os.Open(path) |
| 223 | + if err != nil { |
| 224 | + fmt.Println("Error opening file:", err) |
| 225 | + return nil |
| 226 | + } |
| 227 | + defer file.Close() |
| 228 | + |
| 229 | + buf := make([]byte, 4096) |
| 230 | + for { |
| 231 | + n, err := file.Read(buf) |
| 232 | + if err != nil && err != io.EOF { |
| 233 | + break |
| 234 | + } |
| 235 | + if n == 0 { |
| 236 | + break |
| 237 | + } |
| 238 | + for _, b := range buf[:n] { |
| 239 | + if b < 128 { // ASCII only |
| 240 | + char := unicode.ToLower(rune(b)) |
| 241 | + |
| 242 | + freq[ASCII32Index[char]]++ |
| 243 | + } |
| 244 | + } |
| 245 | + } |
| 246 | + return nil |
| 247 | + }) |
| 248 | + |
| 249 | + return freq |
| 250 | +} |
| 251 | + |
| 252 | +func sortedMap() map[string]int { |
| 253 | + root := "./" |
| 254 | + frequencies := countASCII(root) |
| 255 | + |
| 256 | + // Convert map to slice for sorting |
| 257 | + type kv struct { |
| 258 | + Char string |
| 259 | + Count int |
| 260 | + } |
| 261 | + var sortedFreq []kv |
| 262 | + |
| 263 | + totalChar := 0 |
| 264 | + |
| 265 | + for k, v := range frequencies { |
| 266 | + sortedFreq = append(sortedFreq, kv{Char: fmt.Sprintf("%2d %8q", k, string(SEC32KeyMap[k])), Count: v}) |
| 267 | + totalChar += v |
| 268 | + } |
| 269 | + |
| 270 | + // Sort by frequency (descending) |
| 271 | + sort.Slice(sortedFreq, func(i, j int) bool { |
| 272 | + return sortedFreq[i].Count > sortedFreq[j].Count |
| 273 | + }) |
| 274 | + |
| 275 | + // Convert back to a sorted map for JSON response |
| 276 | + sortedMap := make(map[string]int) |
| 277 | + |
| 278 | + charCount := 0 |
| 279 | + for i, kv := range sortedFreq { |
| 280 | + sortedMap[kv.Char] = kv.Count |
| 281 | + |
| 282 | + charCount += kv.Count |
| 283 | + |
| 284 | + fmt.Printf("%4d %6v %8v %4d %4d\n", i, kv.Char, kv.Count, 100*charCount/totalChar, 100*kv.Count/totalChar) |
| 285 | + } |
| 286 | + return sortedMap |
| 287 | +} |
| 288 | + |
| 289 | +func frequencyHandler(w http.ResponseWriter, r *http.Request) { |
| 290 | + w.Header().Set("Content-Type", "application/json") |
| 291 | + json.NewEncoder(w).Encode(sortedMap()) |
| 292 | +} |
| 293 | + |
| 294 | +func main() { |
| 295 | + sortedMap() |
| 296 | + |
| 297 | + fs := http.FileServer(http.Dir("./")) // Serve static files |
| 298 | + http.Handle("/", fs) |
| 299 | + http.HandleFunc("/frequency", frequencyHandler) // API endpoint |
| 300 | + |
| 301 | + fmt.Println("Server running at http://localhost:8080") |
| 302 | + http.ListenAndServe(":8080", nil) |
| 303 | +} |
| 304 | + |
| 305 | +var html = `<!DOCTYPE html> |
| 306 | +<html lang="en"> |
| 307 | +
|
| 308 | +<head> |
| 309 | + <meta charset="UTF-8"> |
| 310 | + <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| 311 | + <title>Character Frequency</title> |
| 312 | + <script src="https://cdn.jsdelivr.net/npm/chart.js"></script> |
| 313 | +</head> |
| 314 | +
|
| 315 | +<body> |
| 316 | + <h2>Character Frequency Graph</h2> |
| 317 | + <canvas id="charChart"></canvas> |
| 318 | + <script> |
| 319 | + async function fetchData() { |
| 320 | + const response = await fetch('/frequency'); |
| 321 | + const data = await response.json(); |
| 322 | + const labels = Object.keys(data).map(ch => ch === " " ? "Space" : ch); |
| 323 | + const values = Object.values(data); |
| 324 | +
|
| 325 | + const ctx = document.getElementById('charChart').getContext('2d'); |
| 326 | + new Chart(ctx, { |
| 327 | + type: 'bar', |
| 328 | + data: { |
| 329 | + labels: labels, |
| 330 | + datasets: [{ |
| 331 | + label: 'Character Frequency', |
| 332 | + data: values, |
| 333 | + backgroundColor: 'rgba(54, 162, 235, 0.6)', |
| 334 | + borderColor: 'rgba(54, 162, 235, 1)', |
| 335 | + borderWidth: 1 |
| 336 | + }] |
| 337 | + }, |
| 338 | + options: { |
| 339 | + scales: { |
| 340 | + y: { beginAtZero: true } |
| 341 | + } |
| 342 | + } |
| 343 | + }); |
| 344 | + } |
| 345 | + fetchData(); |
| 346 | + </script> |
| 347 | +</body> |
| 348 | +
|
| 349 | +</html>` |
0 commit comments