Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Xml;
- using System.Xml.Linq;
- using System.Xml.Serialization;
- using System.IO;
- using System.Text;
- using System.Threading;
- using System.Threading.Tasks;
- using System.Xml.Schema;
- using HtmlAgilityPack;
- namespace XML_Template_Generator
- {
- class Program
- {
- public class Global
- {
- // vars
- public static string AllFilesExtension = "*.*";
- public static string HtmlExtension = "*.html";
- public static string JpgExtension = "*.jpg";
- public static string GifExtension = "*.gif";
- public static string PngExtension = "*.png";
- public static string ArchiveDirectory = @"D:\Journals\";
- public static string Directory = @"D:\Journals\1-1";
- }
- public class AuthorData
- {
- public AuthorData(string firstName, string lastName, string email)
- {
- FirstName = firstName;
- LastName = lastName;
- Email = email;
- }
- public static string FirstName { get; set; }
- public static string LastName { get; set; }
- public static string Email { get; set; }
- }
- public class XmlGenerator
- {
- public static string[] htmlFilelist = Directory.GetFiles(Global.Directory, Global.HtmlExtension);
- // Issue and article specific strings
- public static string Title = "";
- public static string Description = "";
- public static string AuthorsFirstname = AuthorData.FirstName;
- public static string AuthorsLastname = AuthorData.LastName;
- public static string AuthorEmail = AuthorData.Email;
- public static string Publisher = "";
- public static string PrimaryAuthorFirstname = "";
- public static string PrimaryAuthorLastname = "";
- public static string PrimaryAuthorEmail = "info@email.se";
- public static string XmlConstructor(){
- // XML constructor
- XNamespace xsi = "http://www.w3.org/2001/XMLSchema-instance";
- XDocument xmldocument = new XDocument(
- new XDeclaration("1.0", "utf-8", "yes"),
- new XComment("Creating the issues and articles tree for import"),
- new XElement("issues",
- new XElement("issue",
- new XAttribute(XNamespace.Xmlns + "xsi", "http://www.w3.org/2001/XMLSchema-instance"),
- new XAttribute("published", true),
- new XAttribute("current", false),
- new XAttribute("access_status", "1"),
- new XAttribute(xsi + "schemaLocation", "http://dev.openjournal.tld native.xsd"),
- new XElement("id",
- new XAttribute("type", "internal"),
- new XAttribute("advice", "ignore"), "4"),
- new XElement("description",
- new XAttribute("locale", "en_US"), Description),
- new XElement("issue_identification",
- new XElement("volume", 1),
- new XElement("number", 1),
- new XElement("year", 1995),
- new XElement("title", Title,
- new XAttribute("locale", "en-us"))),
- new XElement("date_published", "2018-05-16"),
- new XElement("last_modified", "2018-05-16"),
- new XElement("sections",
- new XElement("section",
- new XAttribute("ref", "ART"),
- new XAttribute("seq", 0),
- new XAttribute("editor_restricted", 0),
- new XAttribute("meta_indexed", 1),
- new XAttribute("abstracts_not_required", 0),
- new XAttribute("hide_title", 0),
- new XAttribute("hide_author", 0),
- new XAttribute("abstract_word_count", 0)),
- new XElement("id",
- new XAttribute("type", "internal"),
- new XAttribute("advice", "ignore")),
- new XElement("abbrev",
- new XAttribute("locale", "en_US"), "ART"),
- new XElement("title",
- new XAttribute("locale", "en_US"), "Artiklar")),
- new XElement("issue_covers",
- new XElement("cover",
- new XAttribute("locale", "en_US")),
- new XElement("cover_image", "cover_issue_4_en_US.jpg"),
- new XElement("cover_image_alt_text")),
- new XElement("issue_galleys",
- new XAttribute(XNamespace.Xmlns + "xsi", "http://www.w3.org/2001/XMLSchema-instance"),
- new XAttribute(xsi + "schemaLocation", "http://dev.openjournal.tld native.xsd"),
- new XElement("issue_galley",
- new XAttribute("locale", "en_US"),
- new XElement("label", "Paper1"),
- from f in htmlFilelist
- select new XElement("issue_file",
- new XElement("file_name", f),
- new XElement("file_type", "text/html"),
- new XElement("file_size", "FILE SIZE"),
- new XElement("file_size", "FILE SIZE"),
- new XElement("content_type", 1),
- new XElement("original_file_name", "FILE NAME"),
- new XElement("date_uploaded", "2018-05-16"),
- new XElement("date_modified", "2018-05-16")))),
- new XElement("articles",
- new XAttribute(XNamespace.Xmlns + "xsi", "http://www.w3.org/2001/XMLSchema-instance"),
- new XAttribute(xsi + "schemaLocation", "http://dev.openjournal.tld native.xsd"),
- new XElement("article",
- new XAttribute(XNamespace.Xmlns + "xsi",
- "http://www.w3.org/2001/XMLSchema-instance"),
- new XAttribute("locale", "en_US"),
- new XAttribute("date_submitted", DateTime.Now.ToString("yyyy-MM-dd")),
- new XAttribute("stage", "production"),
- new XAttribute("date_published", "1995-01-01"),
- new XAttribute("section_ref", "ART"),
- new XAttribute("seq", 1),
- new XAttribute("access_status", 0),
- new XElement("id",
- new XAttribute("type", "internal"),
- new XAttribute("advice", "ignore"), 5),
- new XElement("title",
- new XAttribute("locale", "en_US"), Title),
- new XElement("abstract",
- new XAttribute("locale", "en_US"), Description),
- new XElement("licenseUrl", "http://creativecommons.org/licenses/by-nc-nd/4.0"),
- new XElement("copyrightHolder",
- new XAttribute("locale", "en_US"), "INSERT NAME OF COPYRIGHT HOLDER HERE"),
- new XElement("copyrightYear", "INSERT YEAR HERE"),
- new XElement("keywords",
- new XAttribute("locale", "en_US"),
- new XElement("keyword", "HOW DO I GET MULTIPLE KEYWORDS?")),
- new XElement("authors",
- new XAttribute(XNamespace.Xmlns + "xsi",
- "http://www.w3.org/2001/XMLSchema-instance"),
- new XAttribute(xsi + "schemaLocation", "http://dev.openjournal.tld native.xsd"),
- new XElement("author",
- new XAttribute("primary_contact", "true"),
- new XAttribute("include_in_browser", "true"),
- new XAttribute("user_group_ref", "Authors"),
- new XElement("firstname", PrimaryAuthorFirstname),
- new XElement("lastname", PrimaryAuthorLastname),
- new XElement("email", PrimaryAuthorEmail)),
- new XElement("author",
- new XAttribute("primary_contact", "false"),
- new XAttribute("include_in_browser", "true"),
- new XAttribute("user_group_ref", "Authors"),
- new XElement("firstname", AuthorData.FirstName),
- new XElement("lastname", AuthorData.LastName),
- new XElement("email", AuthorData.Email))),
- new XElement("submission_file",
- new XAttribute(XNamespace.Xmlns + "xsi",
- "http://www.w3.org/2001/XMLSchema-instance"),
- new XAttribute("id", "INSERT ID HERE"),
- new XAttribute(xsi + "schemaLocation", "http://dev.openjournal.tld native.xsd"),
- new XElement("revision",
- new XAttribute("number", "1"),
- new XAttribute("genre", "ARTIKELTEXT"),
- new XAttribute("filename", "INSERT FILENAME HERE"),
- new XAttribute("date_uploaded", "INSERT DATE FOR UPLOAD HERE"),
- new XAttribute("date_modified", "INSERT DATE FOR LAST MODIFICATION HERE"),
- new XAttribute("filesize", "INSERT FILE SIZE HERE (MIGHT BE OPTIONAL)"),
- new XAttribute("filetype", "INSERT TYPE OF FILE text/html OR PICTURES"),
- new XAttribute("user_group", "Authors"),
- new XAttribute("uploader", "INSERT FIRST AUTHOR AS UPLOADER HERE"),
- new XElement("name",
- new XAttribute("locale", "en_US"), "AUTHORSNAME, USER_GROUP, FILENAME"),
- new XElement("href",
- new XAttribute("src", "http://localhost/importfolder/papers.html"),
- new XAttribute("mime_type", "text/url"))
- )))))));
- return xmldocument.ToString();
- }
- }
- static void Main(string[] args)
- {
- // list all directory paths. Use in a foreach-loop to select each directory.
- string[] dirs = Directory.GetDirectories(Global.ArchiveDirectory, Global.AllFilesExtension, SearchOption.TopDirectoryOnly);
- // List all files and types in invidual lists
- string[] htmlFilelist = XmlGenerator.htmlFilelist;
- var jpgFilelist = Getfile(Global.Directory, Global.JpgExtension);
- var gifFilelist = Getfile(Global.Directory, Global.GifExtension);
- var pngFilelist = Getfile(Global.Directory, Global.PngExtension);
- // Read every file in list and find information with XPath
- foreach (string file in htmlFilelist)
- {
- HtmlDocument doc = new HtmlDocument();
- doc.Load(file);
- HtmlNodeNavigator navigator = (HtmlNodeNavigator) doc.CreateNavigator();
- // Get the title for the file
- string title_xpath = "//meta[@name='DC.Title']/@content";
- XmlGenerator.Title = navigator.SelectSingleNode(title_xpath).Value;
- // Get the description of the document
- string description_xpath = "//meta[@name='DC.Description']/@content";
- XmlGenerator.Description = navigator.SelectSingleNode(description_xpath).Value;
- // Get the author or authors in to a list
- string author_xpath = "//meta[@name='DC.Creator']/@content";
- var authors = new List<string>();
- string[] author = navigator.SelectSingleNode(author_xpath).Value.Split(',');
- foreach (var a in author)
- {
- authors.Add(a);
- }
- // Split their names into first and lastname
- var authorName = authors.First().Split(' ');
- // Get the publisher meta tag
- string publisher_xpath = "//meta[@name='DC.Publisher']/@content";
- XmlGenerator.Publisher = navigator.SelectSingleNode(publisher_xpath).Value;
- // Test for co-authors names
- AuthorData.FirstName = "Kalle";
- AuthorData.LastName = "Andersson";
- AuthorData.Email = "authors@email.com";
- // Test with output to console
- Console.WriteLine("*** Authors ***");
- foreach (var a in authors)
- {
- Console.WriteLine(a);
- }
- Console.WriteLine("File: {0}", file);
- Console.WriteLine("Title: {0}", XmlGenerator.Title);
- Console.WriteLine();
- Console.WriteLine("Description: {0}", XmlGenerator.Description);
- Console.WriteLine();
- Console.WriteLine("Primary Author: {0} {1}", authorName[0], authorName[1]);
- Console.WriteLine("Primary Author Email: {0}", XmlGenerator.PrimaryAuthorEmail);
- Console.WriteLine("Publisher: {0}", XmlGenerator.Publisher);
- Console.WriteLine("********");
- }
- // Output to console for testing
- Console.WriteLine("Total number of directories: {0}", dirs.Count());
- Console.WriteLine("Number of html-files: {0}", htmlFilelist.Count());
- Console.WriteLine("Number of jpg-files: {0}", jpgFilelist.Count());
- Console.WriteLine("Number of gif-files: {0}", gifFilelist.Count());
- Console.WriteLine("Number of png-files: {0}", pngFilelist.Count());
- Console.WriteLine("*** XML ***");
- Console.WriteLine(XmlGenerator.XmlConstructor());
- Console.ReadKey();
- }
- private static IEnumerable<string> Getfile(string fdir, string ext)
- {
- List<string> files = new List<string>();
- try
- {
- files.AddRange(Directory.GetFiles(fdir, ext, SearchOption.AllDirectories));
- }
- catch (Exception ex)
- {
- Console.WriteLine(ex.Message);
- }
- return files;
- }
- static List<string> build_directory_list(string dir)
- {
- // Get all subdirectories
- string[] subdirectories = Directory.GetDirectories(dir);
- List<string> directories = new List<string>();
- foreach (string directory in subdirectories)
- {
- directories.Add(directory);
- }
- return directories;
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement