SHARE
TWEET
ClippingsToAnki
a guest
Feb 2nd, 2013
47
Never
- using System;
- using System.Collections.Generic;
- using System.ComponentModel;
- using System.Data;
- using System.Text;
- using System.Windows.Forms;
- using System.IO;
- using System.Text.RegularExpressions;
- namespace ClippingsToAnki
- {
- public partial class ClippingsToAnki : Form
- {
- private static Regex RubyRegex = new Regex("《.*?》", RegexOptions.Compiled);
- private static Regex HTMLRubyRegex = new Regex("<rt>.*?</rt>", RegexOptions.Compiled);
- private static Regex HTMLRegex = new Regex("<.*?>", RegexOptions.Compiled);
- public ClippingsToAnki()
- {
- InitializeComponent();
- this.Icon = new System.Drawing.Icon(System.Reflection.Assembly.GetExecutingAssembly().GetManifestResourceStream("ClippingsToAnki.Cube.ico"));
- #if DEBUG
- this.txtDebug.Visible = true;
- this.Height += 300;
- #endif
- }
- private void InitializeForm()
- {
- this.cboBookName.Enabled = false;
- this.cboEncoding.SelectedIndex = 0;
- string kindleDrive = this.GetKindleDrive();
- if(kindleDrive != string.Empty) {
- this.txtClippingsFile.Text = Path.Combine(kindleDrive, "documents\\My Clippings.txt");
- this.LoadBookList();
- } else {
- MessageBox.Show("Your kindle doesn't appear to be plugged in. Please manually select your \"My Clippings.txt\" file.", "", MessageBoxButtons.OK, MessageBoxIcon.Information);
- }
- }
- private string GetKindleDrive()
- {
- foreach(DriveInfo di in DriveInfo.GetDrives()) {
- if(di.VolumeLabel.Equals("Kindle", StringComparison.InvariantCultureIgnoreCase) && di.DriveType == DriveType.Removable) {
- return di.RootDirectory.FullName;
- }
- }
- return string.Empty;
- }
- private bool PopulateBookTitleDropdown()
- {
- try {
- this.cboBookName.Items.Clear();
- foreach(Clipping clipping in this.GetClippings()) {
- string bookName = clipping.BookTitle;
- if(!this.cboBookName.Items.Contains(bookName)) {
- this.cboBookName.Items.Add(bookName);
- }
- }
- this.cboBookName.SelectedIndex = 0;
- return true;
- } catch {
- return false;
- }
- }
- private List<Clipping> GetClippings()
- {
- string clippingText = File.ReadAllText(this.txtClippingsFile.Text).Replace("\xFEFF", "").Replace("\r", "");
- List<Clipping> clippings = new List<Clipping>();
- int filePos = 0;
- foreach(string clippingContent in clippingText.Split(new string[] { "==========\n" }, StringSplitOptions.RemoveEmptyEntries)) {
- Clipping clipping = new Clipping(clippingContent, filePos);
- if(clipping.Type == eClippingType.Highlight && clipping.Content.Trim() != string.Empty) {
- clippings.Add(clipping);
- }
- filePos++;
- }
- clippings.Sort();
- return clippings;
- }
- private void txtClippingsFile_Leave(object sender, EventArgs e)
- {
- this.LoadBookList();
- }
- private void LoadBookList()
- {
- this.UpdateGenerateButtonState();
- if(!this.PopulateBookTitleDropdown()) {
- MessageBox.Show("Could not read specified \"My Clippings.txt\" file (" + this.txtClippingsFile.Text + ")", "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
- this.txtClippingsFile.Text = string.Empty;
- this.cboBookName.Enabled = false;
- } else {
- this.cboBookName.Enabled = true;
- }
- }
- private string SelectFile(bool allowHTML)
- {
- OpenFileDialog ofd = new OpenFileDialog();
- ofd.CheckFileExists = true;
- if(allowHTML) {
- ofd.Filter = "Text and HTML Files|*.txt;*.html;*.htm";
- } else {
- ofd.Filter = "Text Files|*.txt";
- }
- if(ofd.ShowDialog() == System.Windows.Forms.DialogResult.OK) {
- return ofd.FileName;
- }
- return null;
- }
- private void btnBrowseBook_Click(object sender, EventArgs e)
- {
- string fileName = this.SelectFile(true);
- if(fileName != null) {
- this.txtBookFile.Text = fileName;
- this.UpdateGenerateButtonState();
- }
- }
- private void btnBrowseClippingsFile_Click(object sender, EventArgs e)
- {
- string fileName = this.SelectFile(false);
- if(fileName != null) {
- this.txtClippingsFile.Text = fileName;
- this.LoadBookList();
- }
- }
- private bool GenerateHTML(Encoding bookEncoding, out string errorMessage, out string outputFilename)
- {
- errorMessage = string.Empty;
- outputFilename = string.Empty;
- try {
- //Strip aozora formatting
- string bookText = RubyRegex.Replace(File.ReadAllText(this.txtBookFile.Text, bookEncoding), "").Replace("|", "");
- //Strip html formatting, convert </p> to new lines for end-of-sentence detection purposes
- bookText = HTMLRegex.Replace(HTMLRubyRegex.Replace(bookText.Replace("</p>", "\n"), ""), "");
- string output = "<html><head><title>ClippingsToAnki Result</title>";
- output += "<style type=\"text/css\">" +
- "body {background:beige;color:#000;font-size:18px;}" +
- ".hl {color:#F00;font-weight:bold;}" +
- "a {color: #00aaff; text-decoration: none; font-weight:bold;}" +
- "</style>";
- output += "<body>";
- int currentTextIndex = 0;
- int firstIndexForCurrentLocation = 0;
- int lastLocation = -1;
- int highlightCount = 0;
- List<string> unmatchedWords = new List<string>();
- foreach(Clipping clipping in this.GetClippings()) {
- if(clipping.Type == eClippingType.Highlight && clipping.BookTitle == this.cboBookName.SelectedItem.ToString()) {
- highlightCount++;
- if(lastLocation != clipping.Location) {
- firstIndexForCurrentLocation = clipping.Location;
- }
- currentTextIndex = bookText.IndexOf(clipping.Content, currentTextIndex);
- if(currentTextIndex == -1) {
- //Try again from the start of the current location in case file order of highlights doesn't match text order
- currentTextIndex = bookText.IndexOf(clipping.Content, firstIndexForCurrentLocation);
- if(currentTextIndex == -1) {
- //Still can't find the word, add it as a word that wasn't matched, reset index to start of current location
- unmatchedWords.Add(clipping.Content);
- currentTextIndex = firstIndexForCurrentLocation;
- continue;
- }
- }
- string sentence = this.GetSentence(bookText, currentTextIndex);
- sentence = sentence.Insert(sentence.IndexOf(clipping.Content), "<span class=\"hl\">");
- sentence = sentence.Insert(sentence.IndexOf(clipping.Content)+clipping.Content.Length, "</span>");
- string encodeSearchString = Uri.EscapeDataString("\"" + clipping.Content + "\"");
- output += "<a target=\"_blank\" href=\"https://www.google.co.jp/search?q=" + encodeSearchString + "&lr=lang_ja\">◎</a> ";
- output += "<span style=\"display:none;\">。</span>";
- output += "<span>" + sentence + "</span><br/><br/>";
- }
- }
- output += "</body></html>";
- if(unmatchedWords.Count == highlightCount) {
- errorMessage = "None of the words highlighted could be found in the specified book file.\n\nSelect the correct book/encoding and try again.";
- return false;
- } else if(unmatchedWords.Count > 0) {
- errorMessage = "The following words/phrases were not found in the specified book file:";
- foreach(string word in unmatchedWords) {
- errorMessage += word + Environment.NewLine;
- }
- }
- outputFilename = Path.Combine(Path.GetTempPath() + "clippingstoanki.html");
- File.WriteAllText(outputFilename, output);
- return true;
- } catch(Exception ex) {
- errorMessage = ex.Message + "\n\n" + ex.StackTrace;
- return false;
- }
- }
- private void btnGenerate_Click(object sender, EventArgs e)
- {
- bool result;
- string message;
- string outputFilename;
- if(this.cboEncoding.SelectedIndex == 0) {
- //Try Shift-JIS first, then UTF8 if Shift-JIS finds no word matches
- result = this.GenerateHTML(Encoding.GetEncoding(932), out message, out outputFilename);
- if(!result) {
- result = this.GenerateHTML(Encoding.UTF8, out message, out outputFilename);
- }
- } else {
- Encoding bookEncoding;
- if(this.cboEncoding.SelectedIndex == 1) {
- bookEncoding = Encoding.GetEncoding(932);
- } else {
- bookEncoding = Encoding.UTF8;
- }
- result = this.GenerateHTML(Encoding.UTF8, out message, out outputFilename);
- }
- if(message != string.Empty) {
- if(!result) {
- MessageBox.Show(message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
- } else {
- MessageBox.Show(message, "Warning", MessageBoxButtons.OK, MessageBoxIcon.Warning);
- }
- }
- if(result) {
- System.Diagnostics.Process.Start(outputFilename);
- }
- }
- private bool IsEndOfSentence(char chr)
- {
- return chr == '.' || chr == '。' || chr == '…' || chr == '\n' || chr == '\r';
- }
- private string GetSentence(string text, int index)
- {
- int startPos = index;
- int endPos = index;
- int len = text.Length;
- while(!this.IsEndOfSentence(text[startPos]) && startPos > 0) {
- startPos--;
- }
- if(startPos != 0) {
- startPos++;
- }
- while(!this.IsEndOfSentence(text[endPos]) && endPos < len) {
- endPos++;
- }
- return text.Substring(startPos, endPos - startPos + 1).Trim();
- }
- private void cboBookName_SelectedIndexChanged(object sender, EventArgs e)
- {
- this.UpdateGenerateButtonState();
- #if DEBUG
- this.txtDebug.Text = string.Empty;
- foreach(Clipping clipping in this.GetClippings()) {
- if(clipping.BookTitle == this.cboBookName.SelectedItem.ToString()) {
- this.txtDebug.Text += clipping.ToString() + Environment.NewLine;
- }
- }
- #endif
- }
- private void UpdateGenerateButtonState()
- {
- this.btnGenerate.Enabled = true;
- if(this.cboBookName.SelectedIndex == -1) {
- this.btnGenerate.Enabled = false;
- return;
- }
- if(!File.Exists(this.txtBookFile.Text)) {
- this.btnGenerate.Enabled = false;
- return;
- }
- if(!File.Exists(this.txtClippingsFile.Text)) {
- this.btnGenerate.Enabled = false;
- return;
- }
- }
- private void txtBookFile_Leave(object sender, EventArgs e)
- {
- this.UpdateGenerateButtonState();
- }
- private void tlpMain_Click(object sender, EventArgs e)
- {
- this.UpdateGenerateButtonState();
- }
- private void txtBookFile_Click(object sender, EventArgs e)
- {
- if(this.txtBookFile.Text.Trim() == string.Empty) {
- this.btnBrowseBook_Click(null, null);
- }
- }
- private void txtClippingsFile_Click(object sender, EventArgs e)
- {
- if(this.txtClippingsFile.Text.Trim() == string.Empty) {
- this.btnBrowseClippingsFile_Click(null, null);
- }
- }
- private void ClippingsToAnki_Shown(object sender, EventArgs e)
- {
- this.InitializeForm();
- }
- }
- public enum eClippingType
- {
- Unknown = 0,
- Highlight = 1,
- Note = 2
- }
- public class Clipping : IComparable
- {
- static Regex LocationRegex = new Regex("\\s([0-9]+)-{0,1}[0-9]*\\s\\|", RegexOptions.Compiled);
- private string _firstLine = string.Empty;
- private string _secondLine = string.Empty;
- private string _remainingLines = string.Empty;
- private int _filePos;
- public Clipping(string clippingContent, int filePos)
- {
- string[] lines = clippingContent.Substring(0, clippingContent.Length).Split(new string[] { "\n" }, StringSplitOptions.RemoveEmptyEntries);
- if(lines.Length > 2) {
- this._firstLine = lines[0].Trim();
- this._secondLine = lines[1].Trim();
- for(int i = 2; i < lines.Length; i++) {
- if(this._remainingLines != string.Empty) {
- this._remainingLines += "\n";
- }
- this._remainingLines += lines[i].Trim();
- }
- }
- this._filePos = filePos;
- }
- public string BookTitle
- {
- get
- {
- //Return first line for Title + Author
- return this._firstLine;
- }
- }
- public int Location
- {
- get
- {
- int location = -1;
- string line = this._secondLine;
- if(line.CountOf('|') > 1) {
- //When both a page and a location are given, strip the page info
- line = line.Substring(line.IndexOf("|"));
- }
- Match match = LocationRegex.Match(line);
- if(match.Groups.Count > 0) {
- Int32.TryParse(match.Groups[1].Value, out location);
- }
- return location;
- }
- }
- public eClippingType Type
- {
- get
- {
- //En, Jap, Fr, Spanish, Chinese, German, Italian, Portuguese
- if(this._secondLine.ContainsAny("Highlight", "ハイライト", "Surlignement", "Subrayado", "标注", "Markierung", "Evidenziazione", "Destaque")) {
- return eClippingType.Highlight;
- } else if(this._secondLine.ContainsAny("Note", "メモ", "Note", "Nota", "笔记", "Notiz", "Note", "Nota")) {
- return eClippingType.Note;
- }
- return eClippingType.Unknown;
- }
- }
- public string Content
- {
- get
- {
- return this._remainingLines;
- }
- }
- public int CompareTo(object obj)
- {
- if(obj is Clipping) {
- Clipping b = (Clipping)obj;
- if(this.Location < b.Location) {
- return -1;
- } else if(this.Location > b.Location) {
- return 1;
- } else {
- if(this._filePos < b._filePos) {
- return -1;
- } else {
- return 1;
- }
- }
- } else {
- return 0;
- }
- }
- public override string ToString()
- {
- return this.Type.ToString() + " - " + this.Location.ToString() + " - " + this.Content;
- }
- }
- public static class StringExtension
- {
- public static bool ContainsAny(this string str, params string[] needles)
- {
- string tmp = str.ToLowerInvariant();
- foreach(string needle in needles) {
- if(tmp.Contains(needle.ToLowerInvariant())) {
- return true;
- }
- }
- return false;
- }
- public static int CountOf(this string str, char needle)
- {
- int count = 0;
- foreach(char c in str) {
- if(c == needle) {
- count++;
- }
- }
- return count;
- }
- }
- }
RAW Paste Data
