Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System;
- using System.Collections;
- using System.Collections.Generic;
- using System.Globalization;
- using System.IO;
- using System.Linq;
- using System.Text;
- namespace IgaStats
- {
- public class IgaEntry
- {
- public int mViewCount;
- public int mClickCount;
- }
- public class IgaAdKey : IEquatable<IgaAdKey>, IComparable<IgaAdKey>
- {
- public int ResourceId;
- public int EditionId;
- public int LocationId;
- public IgaAdKey(int resourceId, int editionId, int locationId)
- {
- ResourceId = resourceId;
- EditionId = editionId;
- LocationId = locationId;
- }
- public override bool Equals(object obj)
- {
- if (ReferenceEquals(this, obj))
- return true;
- else if (ReferenceEquals(null, obj))
- return false;
- if (obj.GetType() != GetType())
- return false;
- var other = (IgaAdKey)obj;
- return ResourceId == other.ResourceId && EditionId == other.EditionId && LocationId == other.LocationId;
- }
- public override int GetHashCode()
- {
- return ResourceId.GetHashCode() ^ EditionId.GetHashCode() ^ LocationId.GetHashCode();
- }
- public override string ToString()
- {
- return string.Format("ResourceId={0}, EditionId={1}, LocationId={2}", ResourceId, EditionId, LocationId);
- }
- #region IEquatable<IgaAdKey> Members
- public bool Equals(IgaAdKey other)
- {
- return Equals((object)other);
- }
- #endregion
- #region IComparable<IgaAdKey> Members
- public int CompareTo(IgaAdKey other)
- {
- if (other == null)
- return -1; // At end?
- if (object.ReferenceEquals(this, other))
- return 0;
- int diff;
- if ((diff = ResourceId.CompareTo(other.ResourceId)) != 0)
- return diff;
- if ((diff = EditionId.CompareTo(other.EditionId)) != 0)
- return diff;
- if ((diff = LocationId.CompareTo(other.LocationId)) != 0)
- return diff;
- return 0;
- }
- #endregion
- }
- public class IgaStats
- {
- private static void Main(string[] args)
- {
- string[] files;
- //if (args.Length < 2)
- //{
- // Usage();
- // return;
- //}
- string path = @"E:\\Ads";
- string file = @"E:\\Ads\results.csv";
- try
- {
- files = Directory.GetFiles(path, "*.csv", SearchOption.TopDirectoryOnly);
- }
- catch (Exception e)
- {
- Console.WriteLine("Exception " + e.Message);
- return;
- }
- HashSet<int> allResources = new HashSet<int>();
- Dictionary<DateTime, List<string>> dateFiles = GetAllCsvFilesByDate(files);
- Dictionary<string, HashSet<int>> resourcesDate = new Dictionary<string, HashSet<int>>();
- // string path = args[1];
- if (String.IsNullOrEmpty(path))
- {
- Console.Error.WriteLine("Please write output directory e.g E:\\Ads\\result.csv");
- }
- try
- {
- using (var stream = File.CreateText(file))
- {
- foreach (KeyValuePair<DateTime, List<string>> dateFile in dateFiles)
- {
- GetAllResourcesID(dateFile.Value, allResources);
- GetAllResourcesByDate(dateFile.Key, dateFile.Value, resourcesDate);
- }
- // write columns
- foreach (int id in allResources)
- {
- stream.Write(id + ",");
- stream.Write("Locationd ID" + ",");
- stream.Write("Edition ID" + ",");
- stream.Write("Click Count" + ",");
- stream.Write("View Count" + ",");
- }
- stream.Write("\n");
- // merge each file and write the rows
- foreach (KeyValuePair<DateTime, List<string>> dateFile in dateFiles)
- {
- MergeFilesForDay(dateFile.Key, dateFile.Value, stream, allResources, resourcesDate);
- stream.Write("\n");
- }
- }
- }
- catch (IOException ex)
- {
- Console.WriteLine("File is in use" + ex.Message);
- }
- }
- private static void GetAllResourcesByDate(DateTime date, List<string> files, Dictionary<string, HashSet<int>> resourcesDate)
- {
- const char lineSep = ';';
- foreach (string fn in files)
- {
- foreach (string line in File.ReadAllLines(fn))
- {
- string[] igaItems = line.Split(lineSep);
- foreach (var item in igaItems)
- {
- string[] fields = item.Split(new string[] { "," }, StringSplitOptions.None);
- if (fields.Length < 3)
- {
- continue; // skip invalid data
- }
- int resourceId;
- bool resourceIdValid = int.TryParse(fields[1].Trim(), out resourceId);
- bool validDate = resourcesDate.ContainsKey(date.ToString("yyyyMMdd"));
- if (!validDate)
- {
- resourcesDate.Add(date.ToString("yyyyMMdd"), new HashSet<int>());
- }
- resourcesDate[date.ToString("yyyyMMdd")].Add(resourceId);
- }
- }
- }
- }
- /// <summary>
- /// Crawl over the files and get date to files
- /// </summary>
- /// <param name="files"></param>
- /// <returns></returns>
- public static Dictionary<DateTime, List<string>> GetAllCsvFilesByDate(string[] files)
- {
- var dataFiles = new Dictionary<DateTime, List<string>>();
- foreach (string file in files)
- {
- string fn = Path.GetFileNameWithoutExtension(file);
- if (fn.Length < "yyyyMMdd_HHmmss".Length)
- {
- continue;
- }
- string datePart = fn.Remove("yyyyMMdd".Length); // we need only date
- DateTime date;
- if (DateTime.TryParseExact(datePart, "yyyyMMdd", DateTimeFormatInfo.InvariantInfo, DateTimeStyles.None,
- out date))
- {
- bool containsDate = dataFiles.ContainsKey(date);
- if (!containsDate)
- {
- dataFiles.Add(date, new List<string>());
- }
- dataFiles[date].Add(file);
- }
- }
- return dataFiles;
- }
- /// <summary>
- /// gets all resource IDs to populate them in one row
- /// </summary>
- /// <param name="files"></param>
- /// <param name="resourcesSet"></param>
- private static void GetAllResourcesID(List<string> files, HashSet<int> resourcesSet)
- {
- const char lineSep = ';';
- foreach (string fn in files)
- {
- foreach (string line in File.ReadAllLines(fn))
- {
- string[] igaItems = line.Split(lineSep);
- foreach (var item in igaItems)
- {
- string[] fields = item.Split(new string[] { "," }, StringSplitOptions.None);
- if (fields.Length < 3)
- {
- continue; // skip invalid data
- }
- int resourceId;
- bool resourceIdValid = int.TryParse(fields[1].Trim(), out resourceId);
- if (resourceIdValid)
- {
- resourcesSet.Add(resourceId);
- }
- }
- }
- }
- }
- /// <summary>
- /// merges all the files for one specific date and create iga entry, merge their values, write them to the file
- /// </summary>
- /// <param name="date"></param>
- /// <param name="files"></param>
- /// <param name="streamWriter"></param>
- private static void MergeFilesForDay(DateTime date, List<string> files, StreamWriter streamWriter,
- HashSet<int> allResources, Dictionary<string, HashSet<int>> resourcesDate)
- {
- var enteries = new Dictionary<IgaAdKey, IgaEntry>();
- HashSet<int> currResources = new HashSet<int>();
- List<int> resourcesList = allResources.ToList();
- const char lineSep = ';';
- foreach (string fn in files)
- {
- foreach (string line in File.ReadAllLines(fn))
- {
- string[] igaItems = line.Split(lineSep);
- foreach (var item in igaItems)
- {
- string[] fields = item.Split(new string[] { "," }, StringSplitOptions.None);
- if (fields.Length < 5)
- {
- continue; // skip invalid data
- }
- int viewCount, clickCount, editionId, locationId, resourceId;
- bool locationIdValid = int.TryParse(fields[0].Trim(), out locationId);
- bool resourceIdValid = int.TryParse(fields[1].Trim(), out resourceId);
- bool editionValid = int.TryParse(fields[2].Trim(), out editionId);
- bool viewCountValid = int.TryParse(fields[3].Trim(), out viewCount);
- bool clickCountValid = int.TryParse(fields[4].Trim(), out clickCount);
- if (locationIdValid && resourceIdValid && viewCountValid && clickCountValid && editionValid)
- {
- IgaAdKey key = new IgaAdKey(resourceId, editionId, locationId);
- bool knownId = enteries.ContainsKey(key);
- if (!knownId)
- {
- enteries.Add(key, new IgaEntry());
- }
- IgaEntry entry = enteries[key];
- entry.mClickCount += clickCount;
- entry.mViewCount += viewCount;
- }
- }
- }
- }
- streamWriter.Write(date.ToString("yyyyMMdd") + ",");
- HashSet<int> resources;
- foreach (var pair in enteries.OrderBy(pair => pair.Key))
- {
- streamWriter.Write(pair.Key.LocationId + ",");
- streamWriter.Write(pair.Key.EditionId + ",");
- streamWriter.Write(pair.Value.mClickCount + ",");
- streamWriter.Write(pair.Value.mViewCount + ",");
- streamWriter.Write(",");
- }
- }
- private string GetCmdParameterValue(string[] args, string parameter)
- {
- foreach (string[] parts in args.Select(s => s.Split('=')).
- Where(p => p.Length == 2 && p[0].Equals(parameter)))
- {
- return parts[1];
- }
- return null;
- }
- private static void Usage()
- {
- Console.Error.WriteLine("Usage: IgaStats ads_path output_path \n eg. IgaStats E:\\Ads E:\\Ads\\results.csv");
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement