﻿/*
 * Created by SharpDevelop.
 * User: utente
 * Date: 10/7/2016
 * 
 * To change this template use Tools | Options | Coding | Edit Standard Headers.
 */
using System;
using ICSharpCode.SharpZipLib.Core;
using ICSharpCode.SharpZipLib.Zip;
using System.IO;
using System.Text;
using System.Xml;
using iTextSharp.text;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
using System.Drawing;
using System.Drawing.Imaging;
using System.Collections;
using System.Collections.Generic;

namespace MainProject.Libs
{
	/// <summary>
	/// Description of SharpZip.
	/// </summary>
	public static class TextExtractor
	{
		
		public static string GetText(string filename, fileType srcType, int pageNumber = 1)
		{
			switch (srcType) {
				case fileType.Odt: return TextFromOdt(filename, pageNumber);
				case fileType.Ods: return TextFromOds(filename, pageNumber);
				case fileType.Docx: return TextFromDocx(filename, pageNumber);
				case fileType.Xlsx: 
				case fileType.Xlsm: 
					return TextFromXlsx(filename, pageNumber);
				case fileType.Pdf: return TextFromPdf(filename, pageNumber);
			}
			return string.Empty;
		}
		

		private static string TextFromPdf(string filename, int pageNumber) {
			string res = string.Empty;
			using (PdfReader reader = new PdfReader(filename)) {
				res = PdfTextExtractor.GetTextFromPage(reader, pageNumber, new SimpleTextExtractionStrategy());
				reader.Close();
				reader.Dispose();
			}
			return res;
		}

		private static string TextFromOdt(string filename, int pageNumber) {
			if (pageNumber != 1) return string.Empty;
			
			StringBuilder sb = new StringBuilder();
			
			ZipFile zip = new ZipFile(filename);
			var entry = zip.GetEntry("content.xml");
			Stream zipStream =  zip.GetInputStream(entry);
			using (XmlReader xr = XmlReader.Create(zipStream)) {
				while (xr.Read()) {
					if (xr.Value != string.Empty && (xr.NodeType == XmlNodeType.Text || xr.NodeType == XmlNodeType.Whitespace || xr.NodeType == XmlNodeType.SignificantWhitespace)) {
						sb.Append(xr.Value);
					}
				}
				xr.Close();
			}
			zipStream.Close();
			zipStream.Dispose();
			zip.Close();
			
			return sb.ToString();
		}

		private static string TextFromDocx(string filename, int pageNumber) {
			if (pageNumber != 1) return string.Empty;
			
			StringBuilder sb = new StringBuilder();
			
			ZipFile zip = new ZipFile(filename);
			var entry = zip.GetEntry("word/document.xml");
			Stream zipStream = zip.GetInputStream(entry);
			using (XmlReader xr = XmlReader.Create(zipStream)) {
				while (xr.Read()) {
					if (xr.Value != string.Empty && (xr.NodeType == XmlNodeType.Text || xr.NodeType == XmlNodeType.Whitespace || xr.NodeType == XmlNodeType.SignificantWhitespace)) {
						sb.Append(xr.Value);
					}
				}
				xr.Close();
			}
			
			zipStream.Close();
			zipStream.Dispose();
			zip.Close();
			
			return sb.ToString();
		}


		private static string TextFromOds(string filename, int pageNumber) {
			if (pageNumber != 1) return string.Empty;

			StringBuilder sb = new StringBuilder();
			
			ZipFile zip = new ZipFile(filename);
			var entry = zip.GetEntry("content.xml");
			Stream zipStream = zip.GetInputStream(entry);
			using (XmlReader xr = XmlReader.Create(zipStream)) {
				while (xr.Read()) {
					if (xr.Value != string.Empty && (xr.NodeType == XmlNodeType.Text || xr.NodeType == XmlNodeType.Whitespace || xr.NodeType == XmlNodeType.SignificantWhitespace)) {
						sb.Append(xr.Value);
					}
				}
				xr.Close();
			}

			zipStream.Close();
			zipStream.Dispose();
			zip.Close();

			return sb.ToString();
		}
		
		private static string TextFromXlsx(string filename, int pageNumber) {
			if (pageNumber != 1) return string.Empty;

			StringBuilder sb = new StringBuilder();
			
			ZipFile zip = new ZipFile(filename);
			var entry = zip.GetEntry("xl/sharedStrings.xml");
			Stream zipStream = zip.GetInputStream(entry);
			using (XmlReader xr = XmlReader.Create(zipStream)) {
				while (xr.Read()) {
					if (xr.Value != string.Empty && (xr.NodeType == XmlNodeType.Text || xr.NodeType == XmlNodeType.Whitespace || xr.NodeType == XmlNodeType.SignificantWhitespace)) {
						sb.Append(xr.Value);
					}
				}
				xr.Close();
			}

			zipStream.Close();
			zipStream.Dispose();
			zip.Close();

			return sb.ToString();
		}

	}
}
