ExcelReader.pbi

Version: #4 [example improved (thanks to Michael Vogel)] 1 2 3 4  
Created: 18.04.2022 17:27:05
;original idea from Mijikai, thanks for that!
;https://www.purebasic.fr/german/viewtopic.php?p=354120#p354120

;Last update: 18.04.2022

Enumeration
	;
	#ER_ERROR_ID_NOT_FOUND
	#ER_ERROR_SHEET_NOT_FOUND
	#ER_ERROR_NUM_OUT_OF_BOUNDS
	#ER_ERROR_LINE_OUT_OF_BOUNDS
	#ER_ERROR_CANT_EXTRACT_EXCEL_FILE
	#ER_ERROR_CANT_EXAMINE_EXCEL_FILE
	#ER_ERROR_NO_WORKBOOK
	
EndEnumeration

DeclareModule EXCEL
	
	
	UseZipPacker()
	Declare Initialize(ExcelFile.s)                 ;excel file will be initialized, sheets counted, strings collected, relationships loaded
	Declare DeInitialize(ID.i)                      ;deinitialize all stored data for this excel file
	Declare LoadAllContent(ID)                      ;load content of ALL sheets at once
	Declare LoadSheetContent(ID, SheetName.s)       ;load only content of SheetName
	Declare CountSheets(ID.i)                       ;Amount of included sheets
	Declare.s GetSheetName(ID.i, Num.i)             ;Get sheet name
	Declare GetSheetMaxColumns(ID, SheetName.s)     ;Returns max available columns from this sheet.
	Declare CountContentLines(ID, SheetName.s)      ;Count content lines of a sheet
	Declare.s GetContentLine(ID, SheetName.s, Line) ;Get whole line, columns are separated with #ESC$
	Declare GetLasterror()                          ;Get the last error
	Declare.s GetLastErrorText()                    ;Get the last error as text
	
EndDeclareModule

Module EXCEL
	
	EnableExplicit
	Structure _SHEETS_
		rID.s
		MaxColumns.i
		ArraySize.i
		ContentSize.i
		Array Content.s(500)
	EndStructure
	
	Structure _OPENFILES_
		FileName.s
		DataBase.i
		Array Strings.s(0)
		Map Sheets._SHEETS_()
		Map SheetRelationships.s()
	EndStructure
	
	
	Global OpenFiles.i, LastError.l, ErrorMsg.s
	Global NewMap Files._OPENFILES_()
	
	
	Enumeration
		;
		#ER_ERROR_ID_NOT_FOUND
		#ER_ERROR_SHEET_NOT_FOUND
		#ER_ERROR_NUM_OUT_OF_BOUNDS
		#ER_ERROR_LINE_OUT_OF_BOUNDS
		#ER_ERROR_CANT_EXTRACT_EXCEL_FILE
		#ER_ERROR_CANT_EXAMINE_EXCEL_FILE
		#ER_ERROR_NO_WORKBOOK
		
	EndEnumeration
	
	;{ private procedures
	Procedure.s GetCellName_(Column.i, Row.i)     ; get the excel name for a column... e.g. A1 = Column 0, Row 1
		Protected Result.s, k
		
		Result = ""
		k      = Column / 26
		If k > 0
			Result = Chr(64 + k)
			Column - k * 26
		EndIf
		Result + Chr(65 + Column) + Str(Row)
		
		ProcedureReturn Result
	EndProcedure
	
	Procedure GetColumnNum_(Name.s)
		Protected Result, Pos, Row, *C.CHARACTER
		
		Name = UCase(Name)
		*C   = @Name + StringByteLength(Name, #PB_Unicode)
		Pos  = Len(Name)
		While *C\c <> @Name
			If *C\c >= '0' And *C\c <= '9'
				Pos - 1
			Else
				Break
			EndIf
		Wend
		Row  = Val(Mid(Name, Pos))
		Name = Left(Name, Pos - 1)
		
		If Len(Name) > 1
			Result = (Asc(Left(Name, 1)) - 64) * 26
		EndIf
		Result + Asc(Right(Name, 1)) - 64
		
		ProcedureReturn Result
	EndProcedure
	
	Procedure GetSheets_(*CurrentNode)
		Protected text.s, *child, Now, a$, b$
		
		If XMLNodeType(*CurrentNode) = #PB_XML_Normal
			Now = 0
			If GetXMLNodeName(*CurrentNode) = "sheet"
				Now = 1
			EndIf
			
			
			If Now = 1
				a$ = GetXMLAttribute(*CurrentNode, "name")
				b$ = GetXMLAttribute(*CurrentNode, "r:id")
				If a$ And b$
					Files()\Sheets(a$)\rID         = b$
					Files()\Sheets(a$)\ArraySize   = 500
					Files()\Sheets(a$)\ContentSize = 0
				EndIf
			EndIf
			*child = ChildXMLNode(*CurrentNode)
			While *child
				GetSheets_(*child)
				*child = NextXMLNode(*child)
			Wend
		EndIf
	EndProcedure
	
	Procedure GetStrings_(*MainNode)
		Protected Count, *Node, *Node2, i
		
		Count = XMLChildCount(*MainNode)
		ReDim Files()\Strings(Count)
		
		For i = 0 To Count - 1
			*Node              = ChildXMLNode(*MainNode, i + 1)
			*Node2             = ChildXMLNode(*Node, 1)
			Files()\Strings(i) = GetXMLNodeText(*Node2)
		Next i
		
	EndProcedure
	
	Procedure GetRelationships_(*MainNode)
		Protected Count, *Node, *Node2, i, j, Sheet.s
		
		Count = XMLChildCount(*MainNode)
		For i = 0 To Count - 1
			*Node = ChildXMLNode(*MainNode, i + 1)
			Sheet = GetXMLAttribute(*Node, "Target")
			j     = FindString(Sheet, "/sheet")
			If j
				Files()\SheetRelationships(GetXMLAttribute(*Node, "Id")) = Mid(Sheet, j + 1)
			EndIf
		Next i
		
	EndProcedure
	
	Procedure LoadSheetContentFromNode_(*currentNode)
		Protected i, j, k, l, Rows, Columns, Count3, *Node, *Node2, *Node3, *Node4
		Protected a$, b$, c$, MaxColumns, ContentSize, ColumnNum, CurrCellName.s
		
		*Node = XMLNodeFromPath(*currentNode, "/worksheet/sheetData/")
		If *Node
			
			Rows = XMLChildCount(*Node)
			For i = 1 To Rows
        ;rows
				*Node2  = ChildXMLNode(*Node, i)
				Columns = XMLChildCount(*Node2)
				If Columns > Files()\Sheets()\MaxColumns
					Files()\Sheets()\MaxColumns = Columns
				EndIf
				c$ = ""
				k  = 0
				For j = 1 To Columns
					ColumnNum = GetColumnNum_(GetCellName_(k, i))
					If ColumnNum > MaxColumns
						MaxColumns = ColumnNum
					EndIf
          ;cells
					*Node3       = ChildXMLNode(*Node2, j)
					CurrCellName = GetXMLAttribute(*Node3, "r")
					While GetCellName_(k, i) <> CurrCellName
						c$ + #ESC$
						k + 1
					Wend
					Count3 = XMLChildCount(*Node3)
					a$     = GetXMLAttribute(*Node3, "r")
					If XMLChildCount(*Node3) > 0
						*Node4 = ChildXMLNode(*Node3, 1)
						If GetXMLAttribute(*Node3, "t") = "s"
							b$ = Files()\Strings(Val(GetXMLNodeText(*Node4)))
						Else
							b$ = GetXMLNodeText(*Node4)
						EndIf
					Else
						b$ = ""
					EndIf
					c$ + b$ + #ESC$
					k + 1
				Next j
				Files()\Sheets()\MaxColumns           = MaxColumns
				Files()\Sheets()\Content(ContentSize) = c$
				ContentSize + 1
				If ContentSize >= ArraySize(Files()\Sheets()\Content())
					Files()\Sheets()\ArraySize + 500
					ReDim Files()\Sheets()\Content(Files()\Sheets()\ArraySize)
				EndIf
			Next i
			For i = 0 To ArraySize(Files()\Sheets()\Content()) - 1
				If Files()\Sheets()\Content()
					While CountString(Files()\Sheets()\Content(i), #ESC$) < MaxColumns
						Files()\Sheets()\Content(i) + #ESC$
					Wend
				EndIf
			Next i
			
			Files()\Sheets()\ContentSize = ContentSize
		EndIf
		
	EndProcedure
	
	Procedure LoadSheetContentFromrID_(rID.s)
		Protected Pack, worksheet_size, *worksheet, XML, xml_node, Result, a$
		
		Pack = OpenPack(#PB_Any, Files()\FileName)
		If Pack = 0
			LastError = #ER_ERROR_CANT_EXTRACT_EXCEL_FILE
			ErrorMsg  = "Can't extract excel file!"
		Else
			If ExaminePack(Pack) = 0
				LastError = #ER_ERROR_CANT_EXAMINE_EXCEL_FILE
				ErrorMsg  = "Can't examine excel file, wrong format?"
			Else
				a$ = Files()\SheetRelationships(rID)
				While NextPackEntry(Pack)
					If Right(PackEntryName(Pack), Len(a$)) = a$
						worksheet_size = PackEntrySize(Pack, #PB_Packer_UncompressedSize)
						If worksheet_size
							*worksheet = AllocateMemory(worksheet_size)
							If *worksheet
								If UncompressPackMemory(Pack, *worksheet, worksheet_size) = worksheet_size
									XML = CatchXML(#PB_Any, *worksheet, worksheet_size)
									If XML
										Result   = #True
										xml_node = MainXMLNode(XML)
										If xml_node
											LoadSheetContentFromNode_(xml_node)
										EndIf
										FreeXML(XML)
									EndIf
								EndIf
								FreeMemory(*worksheet)
							EndIf
						EndIf
					EndIf
				Wend
				If Result = #False
					LastError = #ER_ERROR_NO_WORKBOOK
					ErrorMsg  = "No Workbook found in excel file?!"
				EndIf
			EndIf
			ClosePack(Pack)
		EndIf
		
		ProcedureReturn Result
	EndProcedure
	;}
	
	;{ public procedures
	Procedure LoadData()
		Protected Pack, worksheet_size, *worksheet, XML, xml_node, Result
		
		Pack = OpenPack(#PB_Any, Files()\FileName)
		If Pack = 0
			LastError = #ER_ERROR_CANT_EXTRACT_EXCEL_FILE
			ErrorMsg  = "Can't extract excel file!"
		Else
			If ExaminePack(Pack) = 0
				LastError = #ER_ERROR_CANT_EXAMINE_EXCEL_FILE
				ErrorMsg  = "Can't examine excel file, wrong format?"
			Else
				While NextPackEntry(Pack)
					If LCase(Right(PackEntryName(Pack), 12)) = "workbook.xml"
						worksheet_size = PackEntrySize(Pack, #PB_Packer_UncompressedSize)
						If worksheet_size
							*worksheet = AllocateMemory(worksheet_size)
							If *worksheet
								If UncompressPackMemory(Pack, *worksheet, worksheet_size) = worksheet_size
									XML = CatchXML(#PB_Any, *worksheet, worksheet_size)
									If XML
										Result   = Result | 1
										xml_node = MainXMLNode(XML)
										If xml_node
											GetSheets_(xml_node)
										EndIf
										FreeXML(XML)
									EndIf
								EndIf
								FreeMemory(*worksheet)
							EndIf
						EndIf
					ElseIf LCase(Right(PackEntryName(Pack), 17)) = "sharedstrings.xml"
						worksheet_size = PackEntrySize(Pack, #PB_Packer_UncompressedSize)
						If worksheet_size
							*worksheet = AllocateMemory(worksheet_size)
							If *worksheet
								If UncompressPackMemory(Pack, *worksheet, worksheet_size) = worksheet_size
									XML = CatchXML(#PB_Any, *worksheet, worksheet_size)
									If XML
										xml_node = MainXMLNode(XML)
										If xml_node
											GetStrings_(xml_node)
										EndIf
										FreeXML(XML)
									EndIf
								EndIf
								FreeMemory(*worksheet)
							EndIf
						EndIf
					ElseIf LCase(Right(PackEntryName(Pack), 17)) = "workbook.xml.rels"
						worksheet_size = PackEntrySize(Pack, #PB_Packer_UncompressedSize)
						If worksheet_size
							*worksheet = AllocateMemory(worksheet_size)
							If *worksheet
								If UncompressPackMemory(Pack, *worksheet, worksheet_size) = worksheet_size
									XML = CatchXML(#PB_Any, *worksheet, worksheet_size)
									If XML
										ClearMap(Files()\SheetRelationships())
										Result   = Result | 2
										xml_node = MainXMLNode(XML)
										If xml_node
											GetRelationships_(xml_node)
										EndIf
										FreeXML(XML)
									EndIf
								EndIf
								FreeMemory(*worksheet)
							EndIf
						EndIf
					EndIf
				Wend
				If Result <> 3
					LastError = #ER_ERROR_NO_WORKBOOK
					ErrorMsg  = "No Workbook found in excel file?!"
					Result    = 0
				EndIf
			EndIf
			ClosePack(Pack)
		EndIf
		
		ProcedureReturn Result
	EndProcedure
	
	Procedure Initialize(ExcelFile.s)
		Protected Result, Found, i
		
		LastError = #Null
		
		ForEach Files()
			If Files()\FileName = ExcelFile
				Found  = #True
				Result = Val(MapKey(Files()))
				Break
			EndIf
		Next
		If Found = 0
			OpenFiles + 1
			AddMapElement(Files(), Str(OpenFiles))
			Files()\FileName = ExcelFile
			LoadData()
			Result = OpenFiles
		EndIf
		
		ProcedureReturn Result
	EndProcedure
	
	Procedure DeInitialize(ID.i)
		Protected Result
		
		If FindMapElement(Files(), Str(ID))
			If MapSize(Files()\Sheets()) > 0
				ReDim Files()\Sheets()\Content(0)
			EndIf
			ReDim Files()\Strings(0)
			ClearMap(Files()\SheetRelationships())
			ClearMap(Files()\Sheets())
			DeleteMapElement(Files())
			If MapSize(Files()) = 0
				OpenFiles = 0
			EndIf
			Result = #True
		Else
			LastError = #ER_ERROR_ID_NOT_FOUND
			ErrorMsg  = "ID not found!"
		EndIf
		
		ProcedureReturn Result
	EndProcedure
	
	Procedure CountSheets(ID.i)
		Protected Result
		
		If FindMapElement(Files(), Str(ID))
			Result = MapSize(Files()\Sheets())
		Else
			LastError = #ER_ERROR_ID_NOT_FOUND
			ErrorMsg  = "ID not found!"
		EndIf
		
		ProcedureReturn Result
	EndProcedure
	
	Procedure.s GetSheetName(ID.i, Num.i)
		Protected Result.s, i
		
		If FindMapElement(Files(), Str(ID))
			If Num >= 0 And Num < MapSize(Files()\Sheets())
				i = 0
				ForEach Files()\Sheets()
					If i = Num
						Break
					EndIf
					i + 1
				Next
				Result = MapKey(Files()\Sheets())
			Else
				LastError = #ER_ERROR_NUM_OUT_OF_BOUNDS
				ErrorMsg  = "Num out of bounds!"
			EndIf
		Else
			LastError = #ER_ERROR_ID_NOT_FOUND
			ErrorMsg  = "ID not found!"
		EndIf
		
		ProcedureReturn Result
	EndProcedure
	
	Procedure LoadSheetContent(ID, SheetName.s)
		Protected Result
		
		
		If FindMapElement(Files(), Str(ID))
			If FindMapElement(Files()\Sheets(), SheetName)
				LoadSheetContentFromrID_(Files()\Sheets()\rID)
				Result = #True
			Else
				LastError = #ER_ERROR_SHEET_NOT_FOUND
				ErrorMsg  = "Sheet not found!"
			EndIf
		Else
			LastError = #ER_ERROR_ID_NOT_FOUND
			ErrorMsg  = "ID not found!"
		EndIf
		
		
		ProcedureReturn Result
	EndProcedure
	
	Procedure GetSheetMaxColumns(ID, SheetName.s)
		Protected Result
		
		
		If FindMapElement(Files(), Str(ID))
			If FindMapElement(Files()\Sheets(), SheetName)
				Result = Files()\Sheets()\MaxColumns
			Else
				LastError = #ER_ERROR_SHEET_NOT_FOUND
				ErrorMsg  = "Sheet not found!"
			EndIf
		Else
			LastError = #ER_ERROR_ID_NOT_FOUND
			ErrorMsg  = "ID not found!"
		EndIf
		
		ProcedureReturn Result
	EndProcedure
	
	
	Procedure LoadAllContent(ID)
		Protected Result, a$, Columns, ContentSize
		
		If FindMapElement(Files(), Str(ID))
			ForEach Files()\Sheets()
				LoadSheetContentFromrID_(Files()\Sheets()\rID)
				Result = #True
			Next
		Else
			LastError = #ER_ERROR_ID_NOT_FOUND
			ErrorMsg  = "ID not found!"
		EndIf
		
		ProcedureReturn Result
	EndProcedure
	
	Procedure CountContentLines(ID, SheetName.s)
		Protected Result, Found
		
		If FindMapElement(Files(), Str(ID))
			If FindMapElement(Files()\Sheets(), SheetName)
				Result = Files()\Sheets(SheetName)\ContentSize
			Else
				LastError = #ER_ERROR_SHEET_NOT_FOUND
				ErrorMsg  = "Sheet not found!"
			EndIf
		Else
			LastError = #ER_ERROR_ID_NOT_FOUND
			ErrorMsg  = "ID not found!"
		EndIf
		
		ProcedureReturn Result
	EndProcedure
	
	Procedure.s GetContentLine(ID, SheetName.s, Line)
		Protected Result.s, Found
		
		If FindMapElement(Files(), Str(ID))
			If FindMapElement(Files()\Sheets(), SheetName)
				If Line >= 0 And Line < Files()\Sheets()\ContentSize
					Result = Files()\Sheets()\Content(Line)
				Else
					LastError = #ER_ERROR_LINE_OUT_OF_BOUNDS
					ErrorMsg  = "Line out of bounds!"
				EndIf
			Else
				LastError = #ER_ERROR_SHEET_NOT_FOUND
				ErrorMsg  = "Sheet not found!"
			EndIf
		Else
			LastError = #ER_ERROR_ID_NOT_FOUND
			ErrorMsg  = "ID not found!"
		EndIf
		
		ProcedureReturn Result
	EndProcedure
	
	Procedure GetLastError()
		ProcedureReturn LastError
	EndProcedure
	
	Procedure.s GetLastErrorText()
		ProcedureReturn ErrorMsg
	EndProcedure
	;}
	
EndModule

CompilerIf #PB_Compiler_IsMainFile
	
	Procedure InputExcelColumns(Column.s)
		
		Protected Result
		
		Column = UCase(Column)
		If Len(Column) > 1
			Result = (Asc(Left(Column, 1)) - 64) * 26
		EndIf
		Result + Asc(Right(Column, 1)) - 64
		
		ProcedureReturn Result
		
	EndProcedure
	Procedure.s OutputExcelColumns(Num.i)
		
		Protected Result.s, k
		
		Result = ""
		k      = Num / 26
		If k > 0
			Result = Chr(64 + k)
			Num - k * 26
		EndIf
		Result + Chr(65 + Num)
		
		ProcedureReturn Result
		
	EndProcedure
	Procedure main()
		
		Protected ExcelID, File$, count, i, j, SheetName.s, b$, Line.s, nr.i
		
		File$ = OpenFileRequester("Select Excel File", "Test.xlsx", "Excel (*.xlsx;*.xlsm)|*.xlsx;*.xlsm|Alle Dateien (*.*)|*.*", 0)
		If File$ = ""
			ProcedureReturn
		EndIf
		
		OpenConsole("Excel Reader")
		EnableGraphicalConsole(1)
		
		ExcelID = EXCEL::Initialize(File$)
		
		If ExcelID = 0
			PrintN("Error: " + EXCEL::GetLastErrorText())
			ProcedureReturn
		EndIf
		
		Count = EXCEL::CountSheets(ExcelID) - 1
		PrintN("Select Sheet for some action:")
		
		For i = 0 To Count
			PrintN(OutputExcelColumns(i) + ": " + EXCEL::GetSheetName(ExcelID, i))
		Next i
		PrintN("")
		Print("(A-" + OutputExcelColumns(i - 1) + ") : ")
		SheetName = Input()
		If SheetName = "" : SheetName = "A" : EndIf
		j         = InputExcelColumns(SheetName) - 1
		SheetName = EXCEL::GetSheetName(ExcelID, j)
		PrintN("")
		PrintN(SheetName + " selected!")
		
		If EXCEL::LoadSheetContent(ExcelID, SheetName) = 0
			PrintN("Error: " + EXCEL::GetLastErrorText())
		Else
			count = EXCEL::CountContentLines(ExcelID, SheetName)
			If count = 0
				PrintN("Error! No Content in this Sheet!")
			Else
				PrintN("")
				Repeat
					Print("Select a line, you want to read (1..." + Str(count) + ") : ")
					b$ = Input()
					nr = Val(b$)
					;ClearConsole()
					If b$ <> "" And nr > 0 And nr <= count
						Line = EXCEL::GetContentLine(ExcelID, SheetName, nr - 1)
						Line = ReplaceString(Line, #ESC$, "|")
						PrintN("#" + RSet(Str(nr), 3, "0") + " = " + Line)
						;Debug Line
					Else
						PrintN("")
						PrintN("Illegal line number")
						Break
					EndIf
				ForEver
			EndIf
		EndIf
		EXCEL::DeInitialize(ExcelID)
		
		PrintN("Press Enter to exit")
		Input()
	EndProcedure
	main()
	
CompilerEndIf