开发环境:VS2022+.net4.8+OpenCvSharp4+Sdcb.PaddleInference
using OpenCvSharp; using Sdcb.PaddleInference; using Sdcb.PaddleInference.Native; using System; using System.Collections.Generic; using System.Drawing; using System.Globalization; using System.IO; using System.Runtime.InteropServices; using System.Text; using System.Windows.Forms; namespace PaddleInference_OCR文字识别 { public partial class Form1 : Form { public Form1() { InitializeComponent(); } Bitmap bmp; string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png"; string img = ""; string startupPath = ""; OcrShape recShape = new OcrShape(3, 320, 48); PaddlePredictor rec_predictor; public IReadOnlyList<string> Labels; DateTime dt1 = DateTime.Now; DateTime dt2 = DateTime.Now; private void button1_Click(object sender, EventArgs e) { OpenFileDialog ofd = new OpenFileDialog(); ofd.Filter = fileFilter; if (ofd.ShowDialog() != DialogResult.OK) return; pictureBox1.Image = null; img = ofd.FileName; bmp = new Bitmap(img); pictureBox1.Image = new Bitmap(img); textBox1.Text = ""; } private unsafe void Form1_Load(object sender, EventArgs e) { // 堆代码 duidaima.com string startupPath = Application.StartupPath; IntPtr det_ptr = PaddleNative.PD_ConfigCreate(); Encoding PaddleEncoding = Environment.OSVersion.Platform == PlatformID.Win32NT ? Encoding.GetEncoding(CultureInfo.CurrentCulture.TextInfo.ANSICodePage) : Encoding.UTF8; //识别模型 IntPtr rec_ptr = PaddleNative.PD_ConfigCreate(); String rec_programPath = startupPath + "\\ch_PP-OCRv3_rec\\inference.pdmodel"; String rec_paramsPath = startupPath + "\\ch_PP-OCRv3_rec\\inference.pdiparams"; byte[] rec_programBytes = PaddleEncoding.GetBytes(rec_programPath); byte[] rec_paramsBytes = PaddleEncoding.GetBytes(rec_paramsPath); fixed (byte* rec_programPtr = rec_programBytes) fixed (byte* rec_paramsPtr = rec_paramsBytes) { PaddleNative.PD_ConfigSetModel(rec_ptr, (IntPtr)rec_programPtr, (IntPtr)rec_paramsPtr); } rec_predictor = new PaddlePredictor(PaddleNative.PD_PredictorCreate(rec_ptr)); //Labels String labelsPath = startupPath + "\\ppocr_keys.txt"; Stream Steam = new FileStream(labelsPath, FileMode.Open, FileAccess.Read, FileShare.Read); StreamReader reader = new StreamReader(Steam); List<string> tempList = new List<string>(); while (!reader.EndOfStream) { tempList.Add(reader.ReadLine()); } reader.Dispose(); Steam.Dispose(); Labels = tempList; } private void button3_Click(object sender, EventArgs e) { if (pictureBox1.Image == null) { return; } dt1 = DateTime.Now; var src = OpenCvSharp.Extensions.BitmapConverter.ToMat(bmp); int modelHeight = recShape.Height; int maxWidth = (int)Math.Ceiling(1.0 * src.Width / src.Height * modelHeight); Mat channel3 = new Mat(); if (src.Channels() == 4) { channel3 = src.CvtColor(ColorConversionCodes.RGBA2BGR); } else if (src.Channels() == 3) { channel3 = src.Clone(); } else if (src.Channels() == 1) { channel3 = src.CvtColor(ColorConversionCodes.GRAY2RGB); } else { throw new Exception("Unexpect src channel: {" + src.Channels() + "}, allow: (1/3/4)"); } Mat resized = ResizePadding(channel3, modelHeight, maxWidth); Mat normalize = Normalize(resized); using (PaddleTensor input = rec_predictor.GetInputTensor(rec_predictor.InputNames[0])) { int channel = normalize.Channels(); input.Shape = new[] { 1, channel, modelHeight, maxWidth }; float[] data = ExtractMat(normalize, channel, modelHeight, maxWidth); input.SetData(data); } normalize.Dispose(); resized.Dispose(); if (!rec_predictor.Run()) { throw new Exception($"PaddlePredictor(Recognizer) run failed."); } using (PaddleTensor output = rec_predictor.GetOutputTensor(rec_predictor.OutputNames[0])) { float[] data = output.GetData<float>(); int[] shape = output.Shape; GCHandle dataHandle = default; try { dataHandle = GCHandle.Alloc(data, GCHandleType.Pinned); IntPtr dataPtr = dataHandle.AddrOfPinnedObject(); int labelCount = shape[2]; int charCount = shape[1]; StringBuilder sbInfo = new StringBuilder(); for (int i = 0; i < shape[0]; i++) { StringBuilder sb = new StringBuilder(); int lastIndex = 0; float score = 0; for (int n = 0; n < charCount; ++n) { Mat mat = new Mat(1, labelCount, MatType.CV_32FC1, dataPtr + (n + i * charCount) * labelCount * sizeof(float)); int[] maxIdx = new int[2]; mat.MinMaxIdx(out double _, out double maxVal, new int[0], maxIdx); if (maxIdx[1] > 0 && (!(n > 0 && maxIdx[1] == lastIndex))) { score += (float)maxVal; sb.Append(GetLabelByIndex(maxIdx[1])); } lastIndex = maxIdx[1]; mat.Dispose(); } sbInfo.AppendLine("Text:" + sb.ToString()); sbInfo.AppendLine("Score:" + score / sb.Length); } dt2 = DateTime.Now; sbInfo.AppendLine("-----------------------------------\n"); sbInfo.AppendLine(DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss.fff")); sbInfo.AppendLine("耗时:" + (dt2 - dt1).TotalMilliseconds + "ms\n"); textBox1.Text = sbInfo.ToString(); } finally { dataHandle.Free(); } } } string GetLabelByIndex(int x) { if (x > 0 && x <= Labels.Count) { return Labels[x - 1]; } else if (x == Labels.Count + 1) { return ""; } else { throw new Exception("Unable to GetLabelByIndex: index {" + x + "} out of range {" + Labels.Count + "}, OCR model or labels not matched?"); } } private Mat ResizePadding(Mat src, int height, int targetWidth) { OpenCvSharp.Size size = src.Size(); float whRatio = 1.0f * size.Width / size.Height; int width = (int)Math.Ceiling(height * whRatio); if (width == targetWidth) { return src.Resize(new OpenCvSharp.Size(width, height)); } else { Mat resized = src.Resize(new OpenCvSharp.Size(width, height)); return resized.CopyMakeBorder(0, 0, 0, targetWidth - width, BorderTypes.Constant, Scalar.Gray); } } private Mat Normalize(Mat src) { Mat normalized = new Mat(); src.ConvertTo(normalized, MatType.CV_32FC3, 1.0 / 255); Mat[] bgr = normalized.Split(); float[] scales = new[] { 1 / 0.229f, 1 / 0.224f, 1 / 0.225f }; float[] means = new[] { 0.485f, 0.456f, 0.406f }; for (int i = 0; i < bgr.Length; ++i) { bgr[i].ConvertTo(bgr[i], MatType.CV_32FC1, 1.0 * scales[i], (0.0 - means[i]) * scales[i]); } normalized.Dispose(); Mat dest = new Mat(); Cv2.Merge(bgr, dest); foreach (Mat channel in bgr) { channel.Dispose(); } return dest; } private float[] ExtractMat(Mat mat, int channel, int height, int width) { float[] result = new float[1 * channel * width * height]; GCHandle resultHandle = GCHandle.Alloc(result, GCHandleType.Pinned); IntPtr resultPtr = resultHandle.AddrOfPinnedObject(); try { Mat src = mat.Clone(); if (src.Channels() != channel) { throw new Exception($"src channel={src.Channels()}, expected {channel}"); } for (int c = 0; c < channel; ++c) { Mat dest = new Mat(height, width, MatType.CV_32FC1, resultPtr + c * height * width * sizeof(float)); Cv2.ExtractChannel(src, dest, c); dest.Dispose(); } return result; } finally { resultHandle.Free(); } } private float[] ExtractMat(Mat[] srcs, int channel, int height, int width) { float[] result = new float[srcs.Length * channel * width * height]; GCHandle resultHandle = GCHandle.Alloc(result, GCHandleType.Pinned); IntPtr resultPtr = resultHandle.AddrOfPinnedObject(); try { for (int i = 0; i < srcs.Length; ++i) { Mat src = srcs[i]; if (src.Channels() != channel) { throw new Exception($"src[{i}] channel={src.Channels()}, expected {channel}"); } for (int c = 0; c < channel; ++c) { Mat dest = new Mat(height, width, MatType.CV_32FC1, resultPtr + (c + i * channel) * height * width * sizeof(float)); Cv2.ExtractChannel(src, dest, c); dest.Dispose(); } } return result; } finally { resultHandle.Free(); } } } }