using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Text;
using System.Windows.Forms;
using System.IO;
using System.Text.RegularExpressions;
namespace ClippingsToAnki
{
public partial class ClippingsToAnki : Form
{
private static Regex RubyRegex = new Regex("《.*?》", RegexOptions.Compiled);
private static Regex HTMLRubyRegex = new Regex("<rt>.*?</rt>", RegexOptions.Compiled);
private static Regex HTMLRegex = new Regex("<.*?>", RegexOptions.Compiled);
public ClippingsToAnki()
{
InitializeComponent();
this.Icon = new System.Drawing.Icon(System.Reflection.Assembly.GetExecutingAssembly().GetManifestResourceStream("ClippingsToAnki.Cube.ico"));
#if DEBUG
this.txtDebug.Visible = true;
this.Height += 300;
#endif
}
private void InitializeForm()
{
this.cboBookName.Enabled = false;
this.cboEncoding.SelectedIndex = 0;
string kindleDrive = this.GetKindleDrive();
if(kindleDrive != string.Empty) {
this.txtClippingsFile.Text = Path.Combine(kindleDrive, "documents\\My Clippings.txt");
this.LoadBookList();
} else {
MessageBox.Show("Your kindle doesn't appear to be plugged in. Please manually select your \"My Clippings.txt\" file.", "", MessageBoxButtons.OK, MessageBoxIcon.Information);
}
}
private string GetKindleDrive()
{
foreach(DriveInfo di in DriveInfo.GetDrives()) {
if(di.VolumeLabel.Equals("Kindle", StringComparison.InvariantCultureIgnoreCase) && di.DriveType == DriveType.Removable) {
return di.RootDirectory.FullName;
}
}
return string.Empty;
}
private bool PopulateBookTitleDropdown()
{
try {
this.cboBookName.Items.Clear();
foreach(Clipping clipping in this.GetClippings()) {
string bookName = clipping.BookTitle;
if(!this.cboBookName.Items.Contains(bookName)) {
this.cboBookName.Items.Add(bookName);
}
}
this.cboBookName.SelectedIndex = 0;
return true;
} catch {
return false;
}
}
private List<Clipping> GetClippings()
{
string clippingText = File.ReadAllText(this.txtClippingsFile.Text).Replace("\xFEFF", "").Replace("\r", "");
List<Clipping> clippings = new List<Clipping>();
int filePos = 0;
foreach(string clippingContent in clippingText.Split(new string[] { "==========\n" }, StringSplitOptions.RemoveEmptyEntries)) {
Clipping clipping = new Clipping(clippingContent, filePos);
if(clipping.Type == eClippingType.Highlight && clipping.Content.Trim() != string.Empty) {
clippings.Add(clipping);
}
filePos++;
}
clippings.Sort();
return clippings;
}
private void txtClippingsFile_Leave(object sender, EventArgs e)
{
this.LoadBookList();
}
private void LoadBookList()
{
this.UpdateGenerateButtonState();
if(!this.PopulateBookTitleDropdown()) {
MessageBox.Show("Could not read specified \"My Clippings.txt\" file (" + this.txtClippingsFile.Text + ")", "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
this.txtClippingsFile.Text = string.Empty;
this.cboBookName.Enabled = false;
} else {
this.cboBookName.Enabled = true;
}
}
private string SelectFile(bool allowHTML)
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.CheckFileExists = true;
if(allowHTML) {
ofd.Filter = "Text and HTML Files|*.txt;*.html;*.htm";
} else {
ofd.Filter = "Text Files|*.txt";
}
if(ofd.ShowDialog() == System.Windows.Forms.DialogResult.OK) {
return ofd.FileName;
}
return null;
}
private void btnBrowseBook_Click(object sender, EventArgs e)
{
string fileName = this.SelectFile(true);
if(fileName != null) {
this.txtBookFile.Text = fileName;
this.UpdateGenerateButtonState();
}
}
private void btnBrowseClippingsFile_Click(object sender, EventArgs e)
{
string fileName = this.SelectFile(false);
if(fileName != null) {
this.txtClippingsFile.Text = fileName;
this.LoadBookList();
}
}
private bool GenerateHTML(Encoding bookEncoding, out string errorMessage, out string outputFilename)
{
errorMessage = string.Empty;
outputFilename = string.Empty;
try {
//Strip aozora formatting
string bookText = RubyRegex.Replace(File.ReadAllText(this.txtBookFile.Text, bookEncoding), "").Replace("|", "");
//Strip html formatting, convert </p> to new lines for end-of-sentence detection purposes
bookText = HTMLRegex.Replace(HTMLRubyRegex.Replace(bookText.Replace("</p>", "\n"), ""), "");
string output = "<html><head><title>ClippingsToAnki Result</title>";
output += "<style type=\"text/css\">" +
"body {background:beige;color:#000;font-size:18px;}" +
".hl {color:#F00;font-weight:bold;}" +
"a {color: #00aaff; text-decoration: none; font-weight:bold;}" +
"</style>";
output += "<body>";
int currentTextIndex = 0;
int firstIndexForCurrentLocation = 0;
int lastLocation = -1;
int highlightCount = 0;
List<string> unmatchedWords = new List<string>();
foreach(Clipping clipping in this.GetClippings()) {
if(clipping.Type == eClippingType.Highlight && clipping.BookTitle == this.cboBookName.SelectedItem.ToString()) {
highlightCount++;
if(lastLocation != clipping.Location) {
firstIndexForCurrentLocation = clipping.Location;
}
currentTextIndex = bookText.IndexOf(clipping.Content, currentTextIndex);
if(currentTextIndex == -1) {
//Try again from the start of the current location in case file order of highlights doesn't match text order
currentTextIndex = bookText.IndexOf(clipping.Content, firstIndexForCurrentLocation);
if(currentTextIndex == -1) {
//Still can't find the word, add it as a word that wasn't matched, reset index to start of current location
unmatchedWords.Add(clipping.Content);
currentTextIndex = firstIndexForCurrentLocation;
continue;
}
}
string sentence = this.GetSentence(bookText, currentTextIndex);
sentence = sentence.Insert(sentence.IndexOf(clipping.Content), "<span class=\"hl\">");
sentence = sentence.Insert(sentence.IndexOf(clipping.Content)+clipping.Content.Length, "</span>");
string encodeSearchString = Uri.EscapeDataString("\"" + clipping.Content + "\"");
output += "<a target=\"_blank\" href=\"https://www.google.co.jp/search?q=" + encodeSearchString + "&lr=lang_ja\">◎</a> ";
output += "<span style=\"display:none;\">。</span>";
output += "<span>" + sentence + "</span><br/><br/>";
}
}
output += "</body></html>";
if(unmatchedWords.Count == highlightCount) {
errorMessage = "None of the words highlighted could be found in the specified book file.\n\nSelect the correct book/encoding and try again.";
return false;
} else if(unmatchedWords.Count > 0) {
errorMessage = "The following words/phrases were not found in the specified book file:";
foreach(string word in unmatchedWords) {
errorMessage += word + Environment.NewLine;
}
}
outputFilename = Path.Combine(Path.GetTempPath() + "clippingstoanki.html");
File.WriteAllText(outputFilename, output);
return true;
} catch(Exception ex) {
errorMessage = ex.Message + "\n\n" + ex.StackTrace;
return false;
}
}
private void btnGenerate_Click(object sender, EventArgs e)
{
bool result;
string message;
string outputFilename;
if(this.cboEncoding.SelectedIndex == 0) {
//Try Shift-JIS first, then UTF8 if Shift-JIS finds no word matches
result = this.GenerateHTML(Encoding.GetEncoding(932), out message, out outputFilename);
if(!result) {
result = this.GenerateHTML(Encoding.UTF8, out message, out outputFilename);
}
} else {
Encoding bookEncoding;
if(this.cboEncoding.SelectedIndex == 1) {
bookEncoding = Encoding.GetEncoding(932);
} else {
bookEncoding = Encoding.UTF8;
}
result = this.GenerateHTML(Encoding.UTF8, out message, out outputFilename);
}
if(message != string.Empty) {
if(!result) {
MessageBox.Show(message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
} else {
MessageBox.Show(message, "Warning", MessageBoxButtons.OK, MessageBoxIcon.Warning);
}
}
if(result) {
System.Diagnostics.Process.Start(outputFilename);
}
}
private bool IsEndOfSentence(char chr)
{
return chr == '.' || chr == '。' || chr == '…' || chr == '\n' || chr == '\r';
}
private string GetSentence(string text, int index)
{
int startPos = index;
int endPos = index;
int len = text.Length;
while(!this.IsEndOfSentence(text[startPos]) && startPos > 0) {
startPos--;
}
if(startPos != 0) {
startPos++;
}
while(!this.IsEndOfSentence(text[endPos]) && endPos < len) {
endPos++;
}
return text.Substring(startPos, endPos - startPos + 1).Trim();
}
private void cboBookName_SelectedIndexChanged(object sender, EventArgs e)
{
this.UpdateGenerateButtonState();
#if DEBUG
this.txtDebug.Text = string.Empty;
foreach(Clipping clipping in this.GetClippings()) {
if(clipping.BookTitle == this.cboBookName.SelectedItem.ToString()) {
this.txtDebug.Text += clipping.ToString() + Environment.NewLine;
}
}
#endif
}
private void UpdateGenerateButtonState()
{
this.btnGenerate.Enabled = true;
if(this.cboBookName.SelectedIndex == -1) {
this.btnGenerate.Enabled = false;
return;
}
if(!File.Exists(this.txtBookFile.Text)) {
this.btnGenerate.Enabled = false;
return;
}
if(!File.Exists(this.txtClippingsFile.Text)) {
this.btnGenerate.Enabled = false;
return;
}
}
private void txtBookFile_Leave(object sender, EventArgs e)
{
this.UpdateGenerateButtonState();
}
private void tlpMain_Click(object sender, EventArgs e)
{
this.UpdateGenerateButtonState();
}
private void txtBookFile_Click(object sender, EventArgs e)
{
if(this.txtBookFile.Text.Trim() == string.Empty) {
this.btnBrowseBook_Click(null, null);
}
}
private void txtClippingsFile_Click(object sender, EventArgs e)
{
if(this.txtClippingsFile.Text.Trim() == string.Empty) {
this.btnBrowseClippingsFile_Click(null, null);
}
}
private void ClippingsToAnki_Shown(object sender, EventArgs e)
{
this.InitializeForm();
}
}
public enum eClippingType
{
Unknown = 0,
Highlight = 1,
Note = 2
}
public class Clipping : IComparable
{
static Regex LocationRegex = new Regex("\\s([0-9]+)-{0,1}[0-9]*\\s\\|", RegexOptions.Compiled);
private string _firstLine = string.Empty;
private string _secondLine = string.Empty;
private string _remainingLines = string.Empty;
private int _filePos;
public Clipping(string clippingContent, int filePos)
{
string[] lines = clippingContent.Substring(0, clippingContent.Length).Split(new string[] { "\n" }, StringSplitOptions.RemoveEmptyEntries);
if(lines.Length > 2) {
this._firstLine = lines[0].Trim();
this._secondLine = lines[1].Trim();
for(int i = 2; i < lines.Length; i++) {
if(this._remainingLines != string.Empty) {
this._remainingLines += "\n";
}
this._remainingLines += lines[i].Trim();
}
}
this._filePos = filePos;
}
public string BookTitle
{
get
{
//Return first line for Title + Author
return this._firstLine;
}
}
public int Location
{
get
{
int location = -1;
string line = this._secondLine;
if(line.CountOf('|') > 1) {
//When both a page and a location are given, strip the page info
line = line.Substring(line.IndexOf("|"));
}
Match match = LocationRegex.Match(line);
if(match.Groups.Count > 0) {
Int32.TryParse(match.Groups[1].Value, out location);
}
return location;
}
}
public eClippingType Type
{
get
{
//En, Jap, Fr, Spanish, Chinese, German, Italian, Portuguese
if(this._secondLine.ContainsAny("Highlight", "ハイライト", "Surlignement", "Subrayado", "标注", "Markierung", "Evidenziazione", "Destaque")) {
return eClippingType.Highlight;
} else if(this._secondLine.ContainsAny("Note", "メモ", "Note", "Nota", "笔记", "Notiz", "Note", "Nota")) {
return eClippingType.Note;
}
return eClippingType.Unknown;
}
}
public string Content
{
get
{
return this._remainingLines;
}
}
public int CompareTo(object obj)
{
if(obj is Clipping) {
Clipping b = (Clipping)obj;
if(this.Location < b.Location) {
return -1;
} else if(this.Location > b.Location) {
return 1;
} else {
if(this._filePos < b._filePos) {
return -1;
} else {
return 1;
}
}
} else {
return 0;
}
}
public override string ToString()
{
return this.Type.ToString() + " - " + this.Location.ToString() + " - " + this.Content;
}
}
public static class StringExtension
{
public static bool ContainsAny(this string str, params string[] needles)
{
string tmp = str.ToLowerInvariant();
foreach(string needle in needles) {
if(tmp.Contains(needle.ToLowerInvariant())) {
return true;
}
}
return false;
}
public static int CountOf(this string str, char needle)
{
int count = 0;
foreach(char c in str) {
if(c == needle) {
count++;
}
}
return count;
}
}
}