Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- namespace cscrawl
- {
- using Contentstack.Core;
- using Contentstack.Core.Configuration;
- using Contentstack.Core.Internals;
- using Contentstack.Core.Models;
- using Newtonsoft.Json.Linq;
- using System;
- using System.Collections.Generic;
- using System.IO;
- using System.Net;
- public class Program
- {
- static void Main(string[] args)
- {
- string output = "C:\\temp\\export"; //TODO: config
- string urlPrefix = "https://localhost:44342"; //TODO: config
- ContentstackClient stack = new ContentstackClient(
- new ContentstackOptions()
- {
- ApiKey = "//TODO: config", //TODO: config
- DeliveryToken = "//TODO: config", //TODO: config
- Environment = "//TODO: config" //TODO: config
- });
- // retrieve content types and entries in pages of 100 records
- int pageSize = 100;
- // the number of content type records processed (for paging)
- int contentTypesProcessed = 0;
- // if there are no records in the current page, then there are no more pages
- bool moreContentTypesMayExist = false;
- // iterate pages of content type records
- do
- {
- // control content type paging
- Dictionary<string, object> contentTypePageParameters = new Dictionary<string, object>();
- contentTypePageParameters["skip"] = contentTypesProcessed;
- contentTypePageParameters["limit"] = pageSize;
- try
- {
- // for each content type in this page of content types
- foreach (JObject contentTypeJson in
- stack.GetContentTypes(contentTypePageParameters).Result)
- {
- // number of entries of this content type processed (for paging)
- int entriesProcessed = 0;
- // if there are no records in the current page, then there are no more pages
- bool moreEntriesMayExist = false;
- ContentType contentType = stack.ContentType(contentTypeJson["uid"].ToString());
- // iterate pages of entry records
- do
- {
- // control entry paging
- Dictionary<string, object> entryPageParameters = new Dictionary<string, object>();
- entryPageParameters["skip"] = entriesProcessed;
- entryPageParameters["limit"] = pageSize;
- Query query = contentType.Query().Skip(entriesProcessed).Limit(pageSize);
- // if there are no records in the page, then there are no more pages
- moreEntriesMayExist = false;
- // for each entry in this page of this content type
- foreach (Entry entry in query.Find<Entry>().Result)
- {
- entriesProcessed++;
- moreEntriesMayExist = true;
- // ignore entries that do not have URLs
- if (!entry.Object.ContainsKey("url"))
- {
- continue;
- }
- string url = entry.Object["url"].ToString();
- // ignore entries with URLs that do not appear to be file paths
- if (String.IsNullOrWhiteSpace(url)
- || !url.StartsWith("/"))
- {
- continue;
- }
- // path to filename to create, without extension
- FileInfo fileInfo = new FileInfo(output + url.Replace("/", "\\"));
- // if a directory exists at that path,
- // then the file should be index.html (home page) in that directory
- if (Directory.Exists(fileInfo.FullName))
- {
- fileInfo = new FileInfo(fileInfo.FullName + "\\index");
- }
- // create directory if required
- if (!Directory.Exists(fileInfo.Directory.FullName))
- {
- Directory.CreateDirectory(fileInfo.Directory.FullName);
- }
- // download the page, write the HTML and JSON to files
- WebClient wc = new WebClient();
- File.WriteAllBytes(fileInfo.FullName + ".html", wc.DownloadData(urlPrefix + url));
- File.WriteAllText(fileInfo.FullName + ".json", entry.ToJson().ToString());
- }
- }
- while (entriesProcessed % pageSize == 0 && moreEntriesMayExist);
- }
- contentTypesProcessed++;
- }
- catch (Exception ex)
- {
- Exception originalException = ex;
- while (ex != null)
- {
- WebException wex = ex as WebException;
- if (wex != null && wex.Response != null)
- {
- using (var stream = wex.Response.GetResponseStream())
- {
- using (var reader = new StreamReader(stream))
- {
- Console.WriteLine(wex.GetType() + " : " + wex.Message);
- Console.WriteLine(reader.ReadToEnd());
- Console.WriteLine(wex.StackTrace);
- }
- }
- throw;
- }
- ContentstackException cex = ex as ContentstackException;
- if (cex != null)
- {
- Console.WriteLine(cex.GetType() + " : " + cex.Message);
- Console.WriteLine("Error code: " + cex.ErrorCode);
- Console.WriteLine("Error message: " + cex.ErrorMessage);
- Console.WriteLine("Status code: " + cex.StatusCode);
- foreach (string key in cex.Errors.Keys)
- {
- Console.WriteLine("Error : " + key + " : " + cex.Errors[key]);
- }
- Console.WriteLine(cex.StackTrace);
- throw;
- }
- Console.WriteLine(originalException.GetType() + " : " + originalException.Message);
- Console.WriteLine(originalException.StackTrace);
- ex = ex.InnerException;
- }
- throw;
- }
- }
- while (contentTypesProcessed % pageSize == 0 && moreContentTypesMayExist);
- RenameFiles(new DirectoryInfo(output));
- }
- // recursive: for directory (/) and descendants (/child, /child/grandchild, etc),
- // if corresponding .html and .json files (/child.html and /child.json) exist,
- // then rename and move them to index files in that directory (/child/index.html and /child/index.json).
- private static void RenameFiles(DirectoryInfo directory)
- {
- if (File.Exists(directory.FullName + ".html")
- && File.Exists(directory.FullName + ".json"))
- {
- File.Move(directory + ".html", directory.FullName + "\\index.html", true /*overwrite*/ );
- File.Move(directory + ".json", directory.FullName + "\\index.json", true /*overwrite*/ );
- }
- foreach (DirectoryInfo subdirectory in directory.GetDirectories())
- {
- RenameFiles(subdirectory);
- }
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement