ファイル名が近いものを探してグループ化して
合計サイズを降順で表示するプログラムです。
まだ、結果がいまいちかもしれません。
もう少し改良できそうです。
2017-07-25:
コマンドライン引数の取り込みを修正。
(ワイルドカード(*)が指定されたら展開する)
時間の掛かる比較処理を並列化。
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Copyright (c) 2017 Yuki Ono | |
* Licensed under the MIT License. | |
*/ | |
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Text; | |
using System.Text.RegularExpressions; | |
using System.Threading.Tasks; | |
using System.IO; | |
using Microsoft.VisualBasic.FileIO; | |
namespace GetFileSizeGroup { | |
class Program { | |
// https://discuss.leetcode.com/topic/17639/20ms-detailed-explained-c-solutions-o-n-space | |
static int MinDistance(string w1, string w2) { | |
var M = w1.Length; var N = w2.Length; | |
var cur = Enumerable.Range(0, M + 1).ToArray(); | |
foreach (var j in Enumerable.Range(1, N)) { | |
var pre = cur.First(); | |
cur[0] = j; | |
foreach (var i in Enumerable.Range(1, M)) { | |
var temp = cur[i]; | |
cur[i] = (w1[i - 1] == w2[j - 1]) ? pre | |
: Math.Min(pre + 1, Math.Min(cur[i] + 1, cur[i - 1] + 1)); | |
pre = temp; | |
} | |
} | |
return cur.Last(); | |
} | |
static IEnumerable<string[]> ReadCSV(string csvPath) { | |
using (var tfp = new TextFieldParser(csvPath)) { | |
tfp.CommentTokens = new string[] { "#" }; | |
tfp.SetDelimiters(new string[] { "," }); | |
tfp.HasFieldsEnclosedInQuotes = true; | |
tfp.ReadLine(); | |
tfp.ReadLine(); | |
while (!tfp.EndOfData) | |
yield return tfp.ReadFields(); | |
} | |
} | |
static string GetSizeMB(long size) => | |
(size / 1024 / 1024) + "MB"; | |
static long GetGroupSize(List<(string, long)> fileList) => | |
fileList.Select(f => f.Item2).Sum(); | |
static readonly Regex SPACE_REG = new Regex(@"[_-]", RegexOptions.Compiled); | |
// without file extension | |
static string GetCompName(string path) { | |
var name = Path.GetFileNameWithoutExtension(path); | |
return SPACE_REG.Replace(name.ToLower(), " "); | |
} | |
// https://stackoverflow.com/questions/381366/is-there-a-wildcard-expansion-option-for-net-apps | |
static string[] ExpandPath(string path) { | |
var dir = Path.GetDirectoryName(path); | |
dir = String.IsNullOrEmpty(dir) ? "./" : dir; | |
var file = Path.GetFileName(path); | |
return Directory.GetFiles(dir, file); | |
} | |
static string[] GetCSVFiles(string[] args) { | |
return args.SelectMany(ExpandPath) | |
.Where(p => p.EndsWith(".csv")).ToArray(); | |
} | |
static bool CompareName(string[] x, string[] y) { | |
var w1 = GetCompName(x[0]); | |
var w2 = GetCompName(y[0]); | |
return (MinDistance(w1, w2) < 5); | |
} | |
static void Main(string[] args) { | |
var csvFiles = GetCSVFiles(args); | |
if (csvFiles.Length == 0) { | |
Console.WriteLine("Usage:"); | |
Console.WriteLine(@"Get-ChildItem C:\ -rec | where {!$_.PSIsContainer} | select-object FullName, Length | export-csv -encoding utf8 -path C:\file-list.csv"); | |
Console.WriteLine("GetFileSizeGroup *.csv > fsg-list.txt"); | |
return; | |
} | |
LinkedList<string[]> fileList = new LinkedList<string[]>(csvFiles.SelectMany(fn => ReadCSV(fn))); | |
var fileGroups = new List<(List<(string, long)>, long)>(); | |
while (fileList.Count != 0) { | |
var x = fileList.First.Value; | |
fileList.RemoveFirst(); | |
(string, long) makeTuple(string[] fields) => | |
(fields[0], long.Parse(fields[1])); | |
var group = new List<(string, long)> { makeTuple(x) }; | |
foreach (var f in fileList.AsParallel().Where(y => CompareName(x, y)).ToArray()) { | |
group.Add(makeTuple(f)); | |
fileList.Remove(f); | |
} | |
fileGroups.Add((group, GetGroupSize(group))); | |
} | |
foreach (var (fileGroup, groupSize, index) in fileGroups.OrderByDescending(fg => fg.Item2).Select((f, i) => (f.Item1, f.Item2, i))) { | |
Console.WriteLine((index + 1) + ": " + GetSizeMB(groupSize)); | |
foreach (var fg in fileGroup) | |
Console.WriteLine(fg.Item1 + ": " + fg.Item2); | |
Console.WriteLine(); | |
Console.WriteLine(); | |
} | |
//Console.ReadKey(); | |
} | |
} | |
} |
0 件のコメント:
コメントを投稿