此示例演示如何对 .csv 文件的列执行诸如 Sum、Average、Min 和 Max 等聚合计算。 此处所示的示例原则可以应用于其他类型的结构化文本。
创建源文件
- 将下面这些行复制到名为 scores.csv 的文件中,并将此文件保存到您的解决方案文件夹。 假定第一列表示学员 ID,后面几列表示四次考试的分数。 - 111, 97, 92, 81, 60 112, 75, 84, 91, 39 113, 88, 94, 65, 91 114, 97, 89, 85, 82 115, 35, 72, 91, 70 116, 99, 86, 90, 94 117, 93, 92, 80, 87 118, 92, 90, 83, 78 119, 68, 79, 88, 92 120, 99, 82, 81, 79 121, 96, 85, 91, 60 122, 94, 92, 91, 91
示例
    Class SumColumns
        Public Shared Sub Main()
            Dim lines As String() = System.IO.File.ReadAllLines("../../../scores.csv")
            ' Specifies the column to compute
            ' This value could be passed in at runtime.
            Dim exam = 3
            ' Spreadsheet format:
            ' Student ID    Exam#1  Exam#2  Exam#3  Exam#4
            ' 111,          97,     92,     81,     60
            ' one is added to skip over the first column
            ' which holds the student ID.
            SumColumn(lines, exam + 1)
            Console.WriteLine()
            MultiColumns(lines)
            ' Keep the console window open in debug mode.
            Console.WriteLine("Press any key to exit...")
            Console.ReadKey()
        End Sub
        Shared Sub SumColumn(ByVal lines As IEnumerable(Of String), ByVal col As Integer)
            ' This query performs two steps:
            ' split the string into a string array
            ' convert the specified element to
            ' integer and select it.
            Dim columnQuery = From line In lines 
                               Let x = line.Split(",") 
                               Select Convert.ToInt32(x(col))
            ' Execute and cache the results for performance.
            ' Only needed with very large files.
            Dim results = columnQuery.ToList()
            ' Perform aggregate calculations 
            ' on the column specified by col.
            Dim avgScore = Aggregate score In results Into Average(score)
            Dim minScore = Aggregate score In results Into Min(score)
            Dim maxScore = Aggregate score In results Into Max(score)
            Console.WriteLine("Single Column Query:")
            Console.WriteLine("Exam #{0}: Average:{1:##.##} High Score:{2} Low Score:{3}", 
                         col, avgScore, maxScore, minScore)
        End Sub
        Shared Sub MultiColumns(ByVal lines As IEnumerable(Of String))
            Console.WriteLine("Multi Column Query:")
            ' Create the query. It will produce nested sequences. 
            ' multiColQuery performs these steps:
            ' 1) convert the string to a string array
            ' 2) skip over the "Student ID" column and take the rest
            ' 3) convert each field to an int and select that 
            '    entire sequence as one row in the results.
            Dim multiColQuery = From line In lines 
                                Let fields = line.Split(",") 
                                Select From str In fields Skip 1 
                                            Select Convert.ToInt32(str)
            Dim results = multiColQuery.ToList()
            ' Find out how many columns we have.
            Dim columnCount = results(0).Count()
            ' Perform aggregate calculations on each column.            
            ' One loop for each score column in scores.
            ' We can use a for loop because we have already
            ' executed the multiColQuery in the call to ToList.
            For j As Integer = 0 To columnCount - 1
                Dim column = j
                Dim res2 = From row In results 
                           Select row.ElementAt(column)
                ' Perform aggregate calculations 
                ' on the column specified by col.
                Dim avgScore = Aggregate score In res2 Into Average(score)
                Dim minScore = Aggregate score In res2 Into Min(score)
                Dim maxScore = Aggregate score In res2 Into Max(score)
                ' Add 1 to column numbers because exams in this course start with #1
                Console.WriteLine("Exam #{0} Average: {1:##.##} High Score: {2} Low Score: {3}", 
                                  column + 1, avgScore, maxScore, minScore)
            Next
        End Sub
    End Class
    ' Output:
    ' Single Column Query:
    ' Exam #4: Average:76.92 High Score:94 Low Score:39
    ' Multi Column Query:
    ' Exam #1 Average: 86.08 High Score: 99 Low Score: 35
    ' Exam #2 Average: 86.42 High Score: 94 Low Score: 72
    ' Exam #3 Average: 84.75 High Score: 91 Low Score: 65
    ' Exam #4 Average: 76.92 High Score: 94 Low Score: 39
class SumColumns
{
    static void Main(string[] args)
    {
        string[] lines = System.IO.File.ReadAllLines(@"../../../scores.csv");
        // Specifies the column to compute
        int exam = 3;
        // Spreadsheet format:
        // Student ID    Exam#1  Exam#2  Exam#3  Exam#4
        // 111,          97,     92,     81,     60
        // one is added to skip over the first column
        // which holds the student ID.
        SingleColumn(lines, exam + 1);
        Console.WriteLine();
        MultiColumns(lines);
        Console.WriteLine("Press any key to exit");
        Console.ReadKey();
    }
    static void SingleColumn(IEnumerable<string> strs, int examNum)
    {
        Console.WriteLine("Single Column Query:");
        // examNum specifies the column to run the 
        // calculations on. This could also be
        // passed in dynamically at runtime.             
        // columnQuery is a IEnumerable<int>
        // This query performs two steps:
        // 1) split the string into a string[]
        // 2) convert the specified element to
        //    int and select it.
        var columnQuery =
            from line in strs
            let x = line.Split(',')
            select Convert.ToInt32(x[examNum]);
        // Execute and cache the results for performance.
        // Only needed with very large files.
        var results = columnQuery.ToList();
        // Perform aggregate calculations 
        // on the column specified by examNum.
        double average = results.Average();
        int max = results.Max();
        int min = results.Min();
        Console.WriteLine("Exam #{0}: Average:{1:##.##} High Score:{2} Low Score:{3}",
                 examNum, average, max, min);
    }
    static void MultiColumns(IEnumerable<string> strs)
    {
        Console.WriteLine("Multi Column Query:");
        // Create the columnQuery. Explicit typing is used
        // to make clear that the columnQuery will produce 
        // nested sequences. You can also just use 'var'.
        // The columnQuery performs these steps:
        // 1) convert the string to a string[]
        // 2) skip over the "Student ID" column and take the rest
        // 3) convert each string to an int and select that 
        //    entire sequence as one row in the results.
        IEnumerable<IEnumerable<int>> query =
            from line in strs
            let x = line.Split(',')
            let y = x.Skip(1)
            select (from str in y
                    select Convert.ToInt32(str));
        // Execute and cache the results for performance.
        // ToArray could also be used here.
        var results = query.ToList();
        // Find out how many columns we have.
        int columnCount = results[0].Count();
        // Perform aggregate calculations on each column.            
        // One loop for each score column in scores.
        // We can use a for loop because we have already
        // executed the columnQuery in the call to ToList.
        for (int column = 0; column < columnCount; column++)
        {
            var res2 = from row in results
                       select row.ElementAt(column);
            double average = res2.Average();
            int max = res2.Max();
            int min = res2.Min();
            // 1 is added to column because Exam numbers
            // begin with 1
            Console.WriteLine("Exam #{0} Average: {1:##.##} High Score: {2} Low Score: {3}",
                          column + 1, average, max, min);
        }
    }
}
/* Output:
    Single Column Query:
    Exam #4: Average:76.92 High Score:94 Low Score:39
    Multi Column Query:
    Exam #1 Average: 86.08 High Score: 99 Low Score: 35
    Exam #2 Average: 86.42 High Score: 94 Low Score: 72
    Exam #3 Average: 84.75 High Score: 91 Low Score: 65
    Exam #4 Average: 76.92 High Score: 94 Low Score: 39
 */
查询的工作原理是使用 Split 方法将每一行文本转换为数组。 每个数组元素表示一列。 最后,每一列中的文本都转换为其数字表示形式。 如果文件是制表符分隔文件,只需将 Split 方法中的参数更新为 \t。
编译代码
- 创建一个面向 .NET Framework 3.5 版的 Visual Studio 项目。 默认情况下,该项目具有对 System.Core.dll 的引用以及针对 System.Linq 命名空间的 using 指令 (C#) 或 Imports 语句 (Visual Basic)。 
- 将此代码复制到您的项目。 
- 按 F5 编译并运行程序。 
- 按任意键退出控制台窗口。