2017年7月23日日曜日

GetFileSizeGroup

PowerShellで作成したCSVファイルを元に
ファイル名が近いものを探してグループ化して
合計サイズを降順で表示するプログラムです。

まだ、結果がいまいちかもしれません。
もう少し改良できそうです。

2017-07-25:
コマンドライン引数の取り込みを修正。
(ワイルドカード(*)が指定されたら展開する)
時間の掛かる比較処理を並列化。

/**
* Copyright (c) 2017 Yuki Ono
* Licensed under the MIT License.
*/
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.IO;
using Microsoft.VisualBasic.FileIO;
namespace GetFileSizeGroup {
class Program {
// https://discuss.leetcode.com/topic/17639/20ms-detailed-explained-c-solutions-o-n-space
static int MinDistance(string w1, string w2) {
var M = w1.Length; var N = w2.Length;
var cur = Enumerable.Range(0, M + 1).ToArray();
foreach (var j in Enumerable.Range(1, N)) {
var pre = cur.First();
cur[0] = j;
foreach (var i in Enumerable.Range(1, M)) {
var temp = cur[i];
cur[i] = (w1[i - 1] == w2[j - 1]) ? pre
: Math.Min(pre + 1, Math.Min(cur[i] + 1, cur[i - 1] + 1));
pre = temp;
}
}
return cur.Last();
}
static IEnumerable<string[]> ReadCSV(string csvPath) {
using (var tfp = new TextFieldParser(csvPath)) {
tfp.CommentTokens = new string[] { "#" };
tfp.SetDelimiters(new string[] { "," });
tfp.HasFieldsEnclosedInQuotes = true;
tfp.ReadLine();
tfp.ReadLine();
while (!tfp.EndOfData)
yield return tfp.ReadFields();
}
}
static string GetSizeMB(long size) =>
(size / 1024 / 1024) + "MB";
static long GetGroupSize(List<(string, long)> fileList) =>
fileList.Select(f => f.Item2).Sum();
static readonly Regex SPACE_REG = new Regex(@"[_-]", RegexOptions.Compiled);
// without file extension
static string GetCompName(string path) {
var name = Path.GetFileNameWithoutExtension(path);
return SPACE_REG.Replace(name.ToLower(), " ");
}
// https://stackoverflow.com/questions/381366/is-there-a-wildcard-expansion-option-for-net-apps
static string[] ExpandPath(string path) {
var dir = Path.GetDirectoryName(path);
dir = String.IsNullOrEmpty(dir) ? "./" : dir;
var file = Path.GetFileName(path);
return Directory.GetFiles(dir, file);
}
static string[] GetCSVFiles(string[] args) {
return args.SelectMany(ExpandPath)
.Where(p => p.EndsWith(".csv")).ToArray();
}
static bool CompareName(string[] x, string[] y) {
var w1 = GetCompName(x[0]);
var w2 = GetCompName(y[0]);
return (MinDistance(w1, w2) < 5);
}
static void Main(string[] args) {
var csvFiles = GetCSVFiles(args);
if (csvFiles.Length == 0) {
Console.WriteLine("Usage:");
Console.WriteLine(@"Get-ChildItem C:\ -rec | where {!$_.PSIsContainer} | select-object FullName, Length | export-csv -encoding utf8 -path C:\file-list.csv");
Console.WriteLine("GetFileSizeGroup *.csv > fsg-list.txt");
return;
}
LinkedList<string[]> fileList = new LinkedList<string[]>(csvFiles.SelectMany(fn => ReadCSV(fn)));
var fileGroups = new List<(List<(string, long)>, long)>();
while (fileList.Count != 0) {
var x = fileList.First.Value;
fileList.RemoveFirst();
(string, long) makeTuple(string[] fields) =>
(fields[0], long.Parse(fields[1]));
var group = new List<(string, long)> { makeTuple(x) };
foreach (var f in fileList.AsParallel().Where(y => CompareName(x, y)).ToArray()) {
group.Add(makeTuple(f));
fileList.Remove(f);
}
fileGroups.Add((group, GetGroupSize(group)));
}
foreach (var (fileGroup, groupSize, index) in fileGroups.OrderByDescending(fg => fg.Item2).Select((f, i) => (f.Item1, f.Item2, i))) {
Console.WriteLine((index + 1) + ": " + GetSizeMB(groupSize));
foreach (var fg in fileGroup)
Console.WriteLine(fg.Item1 + ": " + fg.Item2);
Console.WriteLine();
Console.WriteLine();
}
//Console.ReadKey();
}
}
}
view raw Program.cs hosted with ❤ by GitHub

0 件のコメント:

コメントを投稿