Generate a “graphical” Zipf distribution for the entire text of Moby Dick

In line 12, we remove HTML tags from a version of the text found on the web. Line 14 splits the entire text into words, and continues with a Linq grouping expression that tallies distinct words into an anonymous type. After deriving a scaling factor for the graph, line 22 prints an ASCII histogram bar for each of the top 35 words.

moby_dick.html

01using System;
02using System.IO;
03using System.Linq;
04using System.Text.RegularExpressions;
05 
06class MainClass
07{
08    static void Main()
09    {
10        String text = new StreamReader("moby_dick.html").ReadToEnd();
11 
12        text = Regex.Replace(text, "<(.|\n)*?>", String.Empty);
13 
14        var tallies = text
15            .Split(" \n\",.;-!?".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
16            .GroupBy(w => w.ToLower())
17            .Select(g => new { g.Key, Tally = g.Count() })
18            .OrderByDescending(e => e.Tally);
19 
20        int scale = tallies.First().Tally / 60;
21        foreach (var tally in tallies.Take(35))
22            Console.WriteLine("{0,6} {1}", tally.Key, new String('*', tally.Tally / scale));
23    }
24};
Output.
01  the ************************************************************
02   of ***************************
03  and **************************
04   to *******************
05    a *******************
06   in *****************
07 that ************
08  his **********
09   it **********
10    i ********
11  but *******
12   he *******
13   as *******
14 with *******
15   is *******
16  was ******
17  for ******
18  all ******
19 this *****
20   at *****
21   by ****
22  not ****
23 from ****
24  him ****
25   so ****
26   on ****
27whale ****
28   be ****
29  one ***
30  you ***
31there ***
32  now ***
33  had ***
34 have ***
35   or **