I cannot re-use a workable code in 2021 to predict from LbfgsMaximumEntropy taining

DJamin 386 Reputation points
2024-10-04T12:14:17.69+00:00

Hello this piece of code is not working anymore, it was running well in 2021.

Current setup :

Microsoft Visual Studio Professional 2022 (64-bit) - LTSC 17.4

Version 17.4.21

Windows 10 Entreprise LTSC 21H2

crash when use Training code at ligne 223 :

System.ArgumentOutOfRangeException: 'Could not find input column 'Label'

Nom du paramètre : inputSchema'

codes and input csv file :

////////////////////////////////////////////////

// Predict

////////////////////////////////////////////////

using Microsoft.ML.Data;

using Microsoft.ML;

using System;

using System.Collections.Generic;

using System.IO;

using System.Linq;

using System.Text;

using System.Threading.Tasks;

namespace PredictCategorie

{

public class InputObject

{

    [LoadColumn(0)]

    public string CatSWM { get; set; }

    [LoadColumn(1)]

    public string ArticleFR { get; set; }

    [LoadColumn(2)]

    [ColumnName("Label")]

    public float IDCategorie { get; set; }

}

/////////////// from Example BEGIN

public class InputObjectDataView : IDataView

{

    private readonly IEnumerable<InputObject> _data;

    public IEnumerable<InputObject> Data

    {

        get

        {

            return _data;

        }

    }

    public DataViewSchema Schema { get; }

    public bool CanShuffle => false;

    public InputObjectDataView(IEnumerable<InputObject> data)

    {

        _data = data;

        var builder = new DataViewSchema.Builder();

        builder.AddColumn("CatSWM", TextDataViewType.Instance);

        builder.AddColumn("ArticleFR", TextDataViewType.Instance);

        builder.AddColumn("IDCategorie", NumberDataViewType.Single);

        Schema = builder.ToSchema();

    }

    public long? GetRowCount() => null;

    public DataViewRowCursor GetRowCursor(

        IEnumerable<DataViewSchema.Column> columnsNeeded,

        Random rand = null)

        => new Cursor(this, columnsNeeded.Any(c => c.Index == 0),

            columnsNeeded.Any(c => c.Index == 1), columnsNeeded.Any(c => c.Index == 2));

    public DataViewRowCursor[] GetRowCursorSet(

        IEnumerable<DataViewSchema.Column> columnsNeeded, int n,

        Random rand = null)

        => new[] { GetRowCursor(columnsNeeded, rand) };

    public class Cursor : DataViewRowCursor

    {

        private bool _disposed;

        private long _position;

        private readonly IEnumerator<InputObject> _enumerator;

        private readonly Delegate[] _getters;

        public override long Position => _position;

        public override long Batch => 0;

        public override DataViewSchema Schema { get; }

        public Cursor(InputObjectDataView parent, bool wantsCatSWM, bool wantsArticleFR, bool wantsIDCategorie)

        {

            Schema = parent.Schema;

            _position = -1;

            _enumerator = parent.Data.GetEnumerator();

            _getters = new Delegate[]

            {

                    wantsCatSWM ?

                        (ValueGetter<ReadOnlyMemory<char>>)

                        Text2GetterImplementation : null,

                    wantsArticleFR ?

                        (ValueGetter<ReadOnlyMemory<char>>)

                        Text1GetterImplementation : null,

                    wantsIDCategorie ?

                        (ValueGetter<float>)

                        Text3GetterImplementation : null

            };

        }

        protected override void Dispose(bool disposing)

        {

            if (_disposed)

                return;

            if (disposing)

            {

                _enumerator.Dispose();

                _position = -1;

            }

            _disposed = true;

            base.Dispose(disposing);

        }

        private void Text2GetterImplementation(ref ReadOnlyMemory<char> value)

            => value = _enumerator.Current.CatSWM.AsMemory();

        private void Text1GetterImplementation(ref ReadOnlyMemory<char> value)

            => value = _enumerator.Current.ArticleFR.AsMemory();

        private void Text3GetterImplementation(ref float value)

            => value = _enumerator.Current.IDCategorie;

        private void IdGetterImplementation(ref DataViewRowId id)

            => id = new DataViewRowId((ulong)_position, 0);

        public override ValueGetter<TValue> GetGetter<TValue>(

            DataViewSchema.Column column)

        {

            if (!IsColumnActive(column))

                throw new ArgumentOutOfRangeException(nameof(column));

            return (ValueGetter<TValue>)_getters[column.Index];

        }

        public override ValueGetter<DataViewRowId> GetIdGetter()

            => IdGetterImplementation;

        public override bool IsColumnActive(DataViewSchema.Column column)

            => _getters[column.Index] != null;

        public override bool MoveNext()

        {

            if (_disposed)

                return false;

            if (_enumerator.MoveNext())

            {

                _position++;

                return true;

            }

            Dispose();

            return false;

        }

    }

}

/////////////// from Example END

class Program

{

    static void Main(string[] args)

    {

        Console.WriteLine("On va trouver la catégorie!");

        // path and file location definition

        string file_path = new DirectoryInfo(Environment.CurrentDirectory).Parent.Parent.Parent.Parent.FullName + @"\MLwork\categorie\";

        string file_name = "cat_TestOpenIndexOnly_classification_04.10.2024";

        // file to write

        string filename_out = $"{file_path}{file_name}_out.csv";

        if (File.Exists(filename_out))

        {

            File.Delete(filename_out);

        }

        using StreamWriter sw = File.CreateText(filename_out);

        // the 1st line

        sw.WriteLine("CatSWM;ArticleFR;IDcategorie;LibelleCategorie;SMP");

        // file to open

        Console.WriteLine("Read input file");

        string filename_in = $"{file_path}{file_name}.csv";

        string[] lines = System.IO.File.ReadAllLines(filename_in);

        // read file and store

        List<string> art = new List<string>();

        List<string> cat = new List<string>();

        int count = 0;

        foreach (string line in lines)

        {

            string[] line_elements = line.Split(';');

            if (line_elements[0] != "CatSWM")

            {

                art.Add(line_elements[0]);

                cat.Add(line_elements[1]);

                count++;

            }

        }

        // https://docs.microsoft.com/fr-fr/dotnet/machine-learning/how-to-guides/save-load-machine-learning-models-ml-net

        //Create MLContext

        MLContext mlContext = new MLContext();

        // Define data preparation and trained model schemas

        DataViewSchema dataPrepPipelineSchema, modelSchema;

        // Load data preparation pipeline

        Console.WriteLine("Load data preparation pipeline");

        string data_prep_name = "\\files\\data_preparation_pipeline_categorie";

        string data_prep_file = $"{file_path}{data_prep_name}.zip";

        ITransformer dataPrepPipeline = mlContext.Model.Load(data_prep_file, out dataPrepPipelineSchema);

        // Load Trained Model

        Console.WriteLine("Load Trained Model");

        //string model_name = "\\ML_categorie";

        string model_name = "\\files\\model_lbfgs";

        string model_file = $"{file_path}{model_name}.zip";

        ITransformer trainedModel = mlContext.Model.Load(model_file, out modelSchema);

        Console.WriteLine("Load IDataView");

        List<InputObject> categorieData = new List<InputObject>();

        for (int i = 0; i < count; i++)

        {

            categorieData.Add(new InputObject { CatSWM = cat[i], ArticleFR = art[i] });

        }

        var inputData = new InputObjectDataView(categorieData);

        // Predicted Data

        Console.WriteLine("Predict");

        IDataView predictions = trainedModel.Transform(inputData);

        float[] scoreColumn = predictions.GetColumn<float>("PredictedLabel").ToArray();

        /////////////////////////////

        // output

        Console.WriteLine("Write ouptput");

        string line_out = "";

        for (int i = 0; i < count; i++)

        {

            if (i % 500 == 0) { Console.WriteLine($"{i}/{count} DONE"); }

            line_out = $"{art[i]};{cat[i]};{scoreColumn[i]}";

            //Console.WriteLine(line_out);

            sw.WriteLine(line_out);

        }

    }

}

}

////////////////////////////////////////////////

// Train

////////////////////////////////////////////////

using Microsoft.ML.Data;

using Microsoft.ML.Trainers;

using Microsoft.ML;

using System;

using System.Collections.Generic;

using System.IO;

using System.Linq;

using System.Text;

using System.Threading.Tasks;

namespace TrainCategorie

{

public class InputObject

{

    [LoadColumn(0)]

    public string CatSWM { get; set; }

    [LoadColumn(1)]

    public string ArticleFR { get; set; }

    [LoadColumn(2)]

    [ColumnName("Label")]

    public float IDCategorie { get; set; }

}

class Program

{

    static void Main(string[] args)

    {

        Console.WriteLine("On va entraîner la catégorie!!");

        // path and file location definition

        //string file_path = @"\\int.ofac.ch\OFAC\Collaborateurs\jamin\Visual Studio 2019\MLwork\categorie\files";

        string file_path = new DirectoryInfo(Environment.CurrentDirectory).Parent.Parent.Parent.Parent.FullName + @"\MLwork\categorie\files";

        string file_name = "cat_TrainOpenIndex_classification_04.10.2024";

        // file to open

        Console.WriteLine("Read input file");

        string filename_in = $"{file_path}\\{file_name}.csv";

        // https://docs.microsoft.com/fr-fr/dotnet/machine-learning/how-to-guides/load-data-ml-net

        //Create MLContext

        MLContext mlContext = new MLContext();

        // Load Trained Model

        Console.WriteLine("Load Pipeline");

        IDataView raw_data = mlContext.Data.LoadFromTextFile<InputObject>(filename_in, separatorChar: ';', hasHeader: true);

        //https://docs.microsoft.com/en-us/dotnet/machine-learning/how-to-guides/prepare-data-ml-net

        Console.WriteLine("Convert str->float");

        // 1

        // Define text transform estimator

        var textEstimator1 = mlContext.Transforms.Text.FeaturizeText("ArticleFR");

        var textEstimator2 = mlContext.Transforms.Text.FeaturizeText("CatSWM");

        // Fit data to estimator

        // Fitting generates a transformer that applies the operations of defined by estimator

        ITransformer textTransformer1 = textEstimator1.Fit(raw_data);

        ITransformer textTransformer2 = textEstimator2.Fit(raw_data);

        var fullTransformer = textTransformer1.Append(textTransformer2);

        // Transform data

        IDataView data = fullTransformer.Transform(raw_data);

        //https://docs.microsoft.com/fr-fr/dotnet/machine-learning/how-to-guides/train-machine-learning-model-ml-net

        //DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

        //IDataView trainData = dataSplit.TrainSet;

        //IDataView testData = dataSplit.TestSet;

        IDataView trainData = data;

        IDataView testData = data;

        // Define Data Prep Estimator

        // 1. Concatenate Size and Historical into a single feature vector output to a new column called Features

        // 2. Normalize Features vector

        Console.WriteLine("Concatenate Features");

        IEstimator<ITransformer> dataPrepEstimator =

            mlContext.Transforms.Concatenate("Features", "ArticleFR", "CatSWM")

                .Append(mlContext.Transforms.NormalizeMinMax("Features"));

        // Create data prep transformer

        ITransformer dataPrepTransformer = dataPrepEstimator.Fit(trainData);

        // Apply transforms to data

        Console.WriteLine("Prepare Train Data");

        IDataView transformedTrainingData = dataPrepTransformer.Transform(trainData);

        Console.WriteLine("Prepare Test Data");

        IDataView transformedTestData = dataPrepTransformer.Transform(testData);

        // tuto trainer

        //Console.WriteLine("Train Sdca");

        //TrainSdca(mlContext, transformedTrainingData, transformedTestData);

        // Lbfgs needed Trainer

        Console.WriteLine("Train Lbfgs");

        TrainLbfgs(mlContext, transformedTrainingData, transformedTestData, file_path);

        // Save Data Prep transformer

        Console.WriteLine("Save Data Prep transformer");

        mlContext.Model.Save(dataPrepTransformer, trainData.Schema, $"{file_path}\\data_preparation_pipeline_categorie.zip");

    }

    private static void TrainSdca(MLContext mlContext, IDataView transformedTrainingData, IDataView transformedTestData)

    {

        // Define StochasticDualCoordinateAscent regression algorithm estimator

        Console.WriteLine("Build ML");

        var sdcaEstimator = mlContext.Regression.Trainers.Sdca();

        // Build machine learning model

        Console.WriteLine("Train ML");

        var trainedModel = sdcaEstimator.Fit(transformedTrainingData);

        // extract model parameters

        //var trainedModelParameters = trainedModel.Model as LinearRegressionModelParameters;

        // Measure trained model performance

        // Use trained model to make inferences on test data

        IDataView testDataPredictions = trainedModel.Transform(transformedTestData);

        // Extract model metrics and get RSquared

        Console.WriteLine("Evaluate Test Data");

        RegressionMetrics trainedModelMetrics = mlContext.Regression.Evaluate(testDataPredictions);

        double rSquared = trainedModelMetrics.RSquared;

        Console.WriteLine($"rSquared={rSquared}");

        // save model

        //mlContext.Model.Save(trainedModel, data.Schema, $"{file_path}\\testmodel.zip");

    }

    private static void TrainLbfgs(MLContext mlContext, IDataView transformedTrainingData, IDataView transformedTestData, string file_path)

    {

        // https://docs.microsoft.com/en-us/dotnet/api/microsoft.ml.standardtrainerscatalog.lbfgslogisticregression?view=ml-dotnet

        // https://docs.microsoft.com/en-us/dotnet/api/microsoft.ml.standardtrainerscatalog.lbfgsmaximumentropy?view=ml-dotnet#Microsoft_ML_StandardTrainersCatalog_LbfgsMaximumEntropy_Microsoft_ML_MulticlassClassificationCatalog_MulticlassClassificationTrainers_System_String_System_String_System_String_System_Single_System_Single_System_Single_System_Int32_System_Boolean_

        Console.WriteLine("Build ML");

        var options = new LbfgsMaximumEntropyMulticlassTrainer.Options()

        {

            //LabelColumnName = "IDCategorie",

            //FeatureColumnName = "Features",

            HistorySize = 50,

            L1Regularization = 0.1f,

            NumberOfThreads = 1

        };

        // Define the trainer.

        var pipeline =

            // Convert the string labels into key types.

            mlContext.Transforms.Conversion.MapValueToKey("Label")

            // Apply LbfgsMaximumEntropy multiclass trainer.

            .Append(mlContext.MulticlassClassification.Trainers

            //.LbfgsMaximumEntropy(options));

            .LbfgsMaximumEntropy());

        // Train the model.

        Console.WriteLine("Train ML");

        var trainedModel = pipeline.Fit(transformedTrainingData);

        // Use trained model to make inferences on test data

        Console.WriteLine("transform trained model");

        IDataView testDataPredictions = trainedModel.Transform(transformedTestData);

        // Extract model metrics and get accuracy

        Console.WriteLine("Evaluate Test Data");

        var trainedModelMetrics = mlContext.MulticlassClassification.Evaluate(testDataPredictions);

        double accuracy = trainedModelMetrics.MicroAccuracy;

        Console.WriteLine($"accuracy={accuracy}");

        // Save Trained Model

        Console.WriteLine("Save Trained Model");

        mlContext.Model.Save(trainedModel, transformedTrainingData.Schema, $"{file_path}\\model_lbfgs.zip");

    }

}

}

////////////////////////////////////////////////

// csv file

////////////////////////////////////////////////

CatSWM;ArticleFR;IDCategorie

cat1;name1;1

;name2;2

cat3;nam3;3

Developer technologies | .NET | .NET Machine Learning
Developer technologies | .NET | .NET Machine Learning
.NET: Microsoft Technologies based on the .NET software framework. Machine learning: A type of artificial intelligence focused on enabling computers to use observed data to evolve new behaviors that have not been explicitly programmed.
0 comments No comments
{count} votes

Your answer

Answers can be marked as 'Accepted' by the question author and 'Recommended' by moderators, which helps users know the answer solved the author's problem.