|
|
(未显示同一用户的12个中间版本) |
第1行: |
第1行: |
| local ftcsv = {
| | --[[ |
| _VERSION = 'ftcsv 1.4.0', | | Modifed by Phlamcenth Sicusa from: |
| _DESCRIPTION = 'CSV library for Lua', | | CSV Library v1 (Author: Michael Lutz, 2022-12-14) |
| _URL = 'https://github.com/FourierTransformer/ftcsv', | | Built on: http://lua-users.org/wiki/LuaCsv |
| _LICENSE = [[ | | |
| The MIT License (MIT)
| | csv.load = function(filename, delimiter, header) |
| | filename := CSV file to load |
| | delimiter := Delimiter (";", ",", "\t", etc..), default = ',' |
| | header := (optional) if first line is a header (true/false), default = false |
| | |
| | automatically removes quotes from text |
| | returns a table |
| | |
| | csv.save = function(filename, delimiter, data, header) |
| | filename := CSV file to write to |
| | delimiter := Delimiter (";", ",", "\t", etc..), default = ',' |
| | data := a Lua table that holds the rows and columns |
| | header := a Lua table that holds the names of the columns e.g. { "Name", "Address", "Email", ... } |
| | --]] |
|
| |
|
| Copyright (c) 2016-2023 Shakil Thakur
| | local unpack = unpack or table.unpack |
|
| |
|
| Permission is hereby granted, free of charge, to any person obtaining a copy
| | local BYTE_QUOTE = string.byte('"') |
| of this software and associated documentation files (the "Software"), to deal
| | local BYTE_ENTER = string.byte('\r') |
| in the Software without restriction, including without limitation the rights
| | local BYTE_NEWLINE = string.byte('\n') |
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
| | local BYTE_COMMA = string.byte(',') |
| copies of the Software, and to permit persons to whom the Software is
| |
| furnished to do so, subject to the following conditions:
| |
|
| |
|
| The above copyright notice and this permission notice shall be included in all
| | local function parse_quoted(input, sep, pos, s) |
| copies or substantial portions of the Software.
| | while true do |
| | | local c = string.byte(input, pos) |
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
| | if c == BYTE_QUOTE then |
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
| | if string.byte(input, pos + 1) == BYTE_QUOTE then |
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
| | pos = pos + 1 |
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | | s[#s+1] = '"' |
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
| |
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
| |
| SOFTWARE.
| |
| ]]
| |
| }
| |
| | |
| -- perf
| |
| local sbyte = string.byte | |
| local ssub = string.sub
| |
| | |
| -- luajit/lua compatability layer
| |
| local luaCompatibility = {}
| |
| if type(jit) == 'table' or _ENV then | |
| -- luajit and lua 5.2+
| |
| luaCompatibility.load = _G.load
| |
| else
| |
| -- lua 5.1
| |
| luaCompatibility.load = loadstring
| |
| end
| |
| | |
| -- luajit specific speedups
| |
| -- luajit performs faster with iterating over string.byte,
| |
| -- whereas vanilla lua performs faster with string.find
| |
| if type(jit) == 'table' then
| |
| luaCompatibility.LuaJIT = true
| |
| -- finds the end of an escape sequence
| |
| function luaCompatibility.findClosingQuote(i, inputLength, inputString, quote, doubleQuoteEscape)
| |
| local currentChar, nextChar = sbyte(inputString, i), nil
| |
| while i <= inputLength do
| |
| nextChar = sbyte(inputString, i+1)
| |
| | |
| -- this one deals with " double quotes that are escaped "" within single quotes "
| |
| -- these should be turned into a single quote at the end of the field
| |
| if currentChar == quote and nextChar == quote then
| |
| doubleQuoteEscape = true | |
| i = i + 2 | |
| currentChar = sbyte(inputString, i)
| |
| | |
| -- identifies the escape toggle
| |
| elseif currentChar == quote and nextChar ~= quote then
| |
| return i-1, doubleQuoteEscape
| |
| else | | else |
| i = i + 1 | | return pos + 1 |
| currentChar = nextChar
| |
| end | | end |
| | elseif c == sep then |
| | return pos + 1 |
| | elseif c == nil then |
| | return pos |
| | else |
| | s[#s+1] = c |
| end | | end |
| | pos = pos + 1 |
| end | | end |
| | end |
|
| |
|
| else
| | local function bytes_to_string(bytes) |
| luaCompatibility.LuaJIT = false
| | if #bytes == 0 then |
| | | return "" |
| -- vanilla lua closing quote finder
| | else |
| function luaCompatibility.findClosingQuote(i, inputLength, inputString, quote, doubleQuoteEscape)
| | return string.char(unpack(bytes)) |
| local j, difference
| |
| i, j = inputString:find('"+', i)
| |
| if j == nil then
| |
| return nil
| |
| end
| |
| difference = j - i | |
| if difference >= 1 then doubleQuoteEscape = true end
| |
| if difference % 2 == 1 then
| |
| return luaCompatibility.findClosingQuote(j+1, inputLength, inputString, quote, doubleQuoteEscape)
| |
| end
| |
| return j-1, doubleQuoteEscape
| |
| end | | end |
| end | | end |
|
| |
|
| | | local function parse_row(input, sep, pos) |
| -- determine the real headers as opposed to the header mapping
| | local r = {} |
| local function determineRealHeaders(headerField, fieldsToKeep) | | local s = {} |
| local realHeaders = {} | | while true do |
| local headerSet = {} | | local c = string.byte(input, pos) |
| for i = 1, #headerField do | | if c == sep then |
| if not headerSet[headerField[i]] then | | r[#r+1] = bytes_to_string(s) |
| if fieldsToKeep ~= nil and fieldsToKeep[headerField[i]] then | | s = {} |
| table.insert(realHeaders, headerField[i])
| | pos = pos + 1 |
| headerSet[headerField[i]] = true
| | elseif c == nil then |
| elseif fieldsToKeep == nil then | | if #r ~= 0 or #s ~= 0 then |
| table.insert(realHeaders, headerField[i])
| | r[#r+1] = bytes_to_string(s) |
| headerSet[headerField[i]] = true | |
| end | | end |
| | break |
| | elseif c == BYTE_NEWLINE then |
| | r[#r+1] = bytes_to_string(s) |
| | pos = pos + 1 |
| | break |
| | elseif c == BYTE_ENTER then |
| | pos = pos + 1 |
| | elseif c == BYTE_QUOTE then |
| | pos = parse_quoted(input, sep, pos + 1, s) |
| | else |
| | s[#s+1] = c |
| | pos = pos + 1 |
| end | | end |
| end | | end |
| return realHeaders | | return r, pos |
| end | | end |
|
| |
|
| | local csv = {} |
|
| |
|
| local function determineTotalColumnCount(headerField, fieldsToKeep)
| | csv.parse_row = function(str, delimiter) |
| local totalColumnCount = 0
| | local sep = delimiter and string.byte(delimiter) or BYTE_COMMA |
| local headerFieldSet = {}
| | return parse_row(str, sep, 1) |
| for _, header in pairs(headerField) do
| |
| -- count unique columns and
| |
| -- also figure out if it's a field to keep
| |
| if not headerFieldSet[header] and
| |
| (fieldsToKeep == nil or fieldsToKeep[header]) then
| |
| headerFieldSet[header] = true
| |
| totalColumnCount = totalColumnCount + 1
| |
| end
| |
| end
| |
| return totalColumnCount
| |
| end | | end |
|
| |
|
| local function generateHeadersMetamethod(finalHeaders)
| | csv.parse = function(str, enable_header, delimiter) |
| -- if a header field tries to escape, we will simply return nil
| | local sep = delimiter and string.byte(delimiter) or BYTE_COMMA |
| -- the parser will still parse, but wont get the performance benefit of
| | local pos = 1 |
| -- having headers predefined
| | local csv = {} |
| for _, headers in ipairs(finalHeaders) do
| | local row_mt = nil |
| if headers:find("]") then
| |
| return nil
| |
| end
| |
| end
| |
| local rawSetup = "local t, k, _ = ... \
| |
| rawset(t, k, {[ [[%s]] ]=true})"
| |
| rawSetup = rawSetup:format(table.concat(finalHeaders, "]] ]=true, [ [["))
| |
| return luaCompatibility.load(rawSetup)
| |
| end
| |
| | |
| -- main function used to parse
| |
| local function parseString(inputString, i, options)
| |
| | |
| -- keep track of my chars!
| |
| local inputLength = options.inputLength or #inputString | |
| local currentChar, nextChar = sbyte(inputString, i), nil
| |
| local skipChar = 0
| |
| local field
| |
| local fieldStart = i
| |
| local fieldNum = 1
| |
| local lineNum = 1
| |
| local lineStart = i
| |
| local doubleQuoteEscape, emptyIdentified = false, false
| |
| | |
| local skipIndex
| |
| local charPatternToSkip = "[" .. options.delimiter .. "\r\n]"
| |
| | |
| --bytes
| |
| local CR = sbyte("\r")
| |
| local LF = sbyte("\n")
| |
| local quote = sbyte('"')
| |
| local delimiterByte = sbyte(options.delimiter)
| |
| | |
| -- explode most used options
| |
| local headersMetamethod = options.headersMetamethod | |
| local fieldsToKeep = options.fieldsToKeep | |
| local ignoreQuotes = options.ignoreQuotes
| |
| local headerField = options.headerField
| |
| local endOfFile = options.endOfFile
| |
| local buffered = options.buffered
| |
| | |
| local outResults = {}
| |
| | |
| -- in the first run, the headers haven't been set yet.
| |
| if headerField == nil then | |
| headerField = {}
| |
| -- setup a metatable to simply return the key that's passed in
| |
| local headerMeta = {__index = function(_, key) return key end}
| |
| setmetatable(headerField, headerMeta)
| |
| end
| |
| | |
| if headersMetamethod then
| |
| setmetatable(outResults, {__newindex = headersMetamethod})
| |
| end
| |
| outResults[1] = {}
| |
| | |
| -- totalColumnCount based on unique headers and fieldsToKeep
| |
| local totalColumnCount = options.totalColumnCount or determineTotalColumnCount(headerField, fieldsToKeep)
| |
| | |
| local function assignValueToField()
| |
| if fieldsToKeep == nil or fieldsToKeep[headerField[fieldNum]] then
| |
| | |
| -- create new field
| |
| if ignoreQuotes == false and sbyte(inputString, i-1) == quote then
| |
| field = ssub(inputString, fieldStart, i-2)
| |
| else
| |
| field = ssub(inputString, fieldStart, i-1)
| |
| end
| |
| if doubleQuoteEscape then
| |
| field = field:gsub('""', '"')
| |
| end
| |
|
| |
|
| -- reset flags
| | if enable_header then |
| doubleQuoteEscape = false
| | local header |
| emptyIdentified = false
| | header, pos = parse_row(str, sep, pos) |
| | csv.header = header |
|
| |
|
| -- assign field in output
| | local head_map = {} |
| if headerField[fieldNum] ~= nil then
| | for i = 1, #header do |
| outResults[lineNum][headerField[fieldNum]] = field
| | head_map[header[i]] = i |
| else
| |
| error('ftcsv: too many columns in row ' .. options.rowOffset + lineNum)
| |
| end
| |
| end | | end |
| end
| |
|
| |
| while i <= inputLength do
| |
| -- go by two chars at a time,
| |
| -- currentChar is set at the bottom.
| |
| nextChar = sbyte(inputString, i+1)
| |
|
| |
|
| -- empty string | | row_mt = { |
| if ignoreQuotes == false and currentChar == quote and nextChar == quote then
| | __index = function (t,k) |
| skipChar = 1
| | local i = head_map[k] |
| fieldStart = i + 2
| | if i then |
| emptyIdentified = true
| | return t[i] |
| | |
| -- escape toggle.
| |
| -- This can only happen if fields have quotes around them
| |
| -- so the current "start" has to be where a quote character is.
| |
| elseif ignoreQuotes == false and currentChar == quote and nextChar ~= quote and fieldStart == i then
| |
| fieldStart = i + 1
| |
| -- if an empty field was identified before assignment, it means
| |
| -- that this is a quoted field that starts with escaped quotes
| |
| -- ex: """a"""
| |
| if emptyIdentified then
| |
| fieldStart = fieldStart - 2
| |
| emptyIdentified = false
| |
| end | |
| skipChar = 1
| |
| i, doubleQuoteEscape = luaCompatibility.findClosingQuote(i+1, inputLength, inputString, quote, doubleQuoteEscape)
| |
| | |
| -- create some fields
| |
| elseif currentChar == delimiterByte then
| |
| assignValueToField()
| |
| | |
| -- increaseFieldIndices
| |
| fieldNum = fieldNum + 1
| |
| fieldStart = i + 1
| |
| | |
| -- newline
| |
| elseif (currentChar == LF or currentChar == CR) then
| |
| assignValueToField()
| |
| | |
| -- handle CRLF
| |
| if (currentChar == CR and nextChar == LF) then
| |
| skipChar = 1
| |
| fieldStart = fieldStart + 1 | |
| end
| |
| | |
| -- incrememnt for new line
| |
| if fieldNum < totalColumnCount then
| |
| -- sometimes in buffered mode, the buffer starts with a newline
| |
| -- this skips the newline and lets the parsing continue.
| |
| if buffered and lineNum == 1 and fieldNum == 1 and field == "" then
| |
| fieldStart = i + 1 + skipChar | |
| lineStart = fieldStart
| |
| else
| |
| error('ftcsv: too few columns in row ' .. options.rowOffset + lineNum)
| |
| end | | end |
| else
| | return nil |
| lineNum = lineNum + 1 | |
| outResults[lineNum] = {}
| |
| fieldNum = 1
| |
| fieldStart = i + 1 + skipChar
| |
| lineStart = fieldStart
| |
| end
| |
| | |
| elseif luaCompatibility.LuaJIT == false then
| |
| skipIndex = inputString:find(charPatternToSkip, i)
| |
| if skipIndex then
| |
| skipChar = skipIndex - i - 1
| |
| end
| |
| | |
| end
| |
| | |
| -- in buffered mode and it can't find the closing quote
| |
| -- it usually means in the middle of a buffer and need to backtrack
| |
| if i == nil then
| |
| if buffered then
| |
| outResults[lineNum] = nil
| |
| return outResults, lineStart
| |
| else
| |
| error("ftcsv: can't find closing quote in row " .. options.rowOffset + lineNum ..
| |
| ". Try running with the option ignoreQuotes=true if the source incorrectly uses quotes.")
| |
| end
| |
| end
| |
| | |
| -- Increment Counter
| |
| i = i + 1 + skipChar
| |
| if (skipChar > 0) then
| |
| currentChar = sbyte(inputString, i)
| |
| else
| |
| currentChar = nextChar
| |
| end
| |
| skipChar = 0
| |
| end
| |
| | |
| if buffered and not endOfFile then
| |
| outResults[lineNum] = nil
| |
| return outResults, lineStart
| |
| end
| |
| | |
| -- create last new field
| |
| assignValueToField()
| |
| | |
| -- remove last field if empty
| |
| if fieldNum < totalColumnCount then
| |
| | |
| -- indicates last field was really just a CRLF,
| |
| -- so, it can be removed
| |
| if fieldNum == 1 and field == "" then
| |
| outResults[lineNum] = nil
| |
| else
| |
| error('ftcsv: too few columns in row ' .. options.rowOffset + lineNum)
| |
| end
| |
| end
| |
| | |
| return outResults, i, totalColumnCount
| |
| end
| |
| | |
| local function handleHeaders(headerField, options)
| |
| -- for files where there aren't headers!
| |
| if options.headers == false then
| |
| for j = 1, #headerField do
| |
| headerField[j] = j
| |
| end
| |
| else
| |
| -- make sure a header isn't empty if there are headers
| |
| for _, headerName in ipairs(headerField) do
| |
| if #headerName == 0 then
| |
| error('ftcsv: Cannot parse a file which contains empty headers')
| |
| end
| |
| end
| |
| end
| |
| | |
| -- rename fields as needed!
| |
| if options.rename then
| |
| -- basic rename (["a" = "apple"])
| |
| for j = 1, #headerField do
| |
| if options.rename[headerField[j]] then
| |
| headerField[j] = options.rename[headerField[j]]
| |
| end
| |
| end
| |
| -- files without headers, but with a options.rename need to be handled too!
| |
| if #options.rename > 0 then
| |
| for j = 1, #options.rename do
| |
| headerField[j] = options.rename[j]
| |
| end
| |
| end
| |
| end
| |
| | |
| -- apply some sweet header manipulation
| |
| if options.headerFunc then
| |
| for j = 1, #headerField do
| |
| headerField[j] = options.headerFunc(headerField[j])
| |
| end
| |
| end
| |
| | |
| return headerField
| |
| end
| |
| | |
| -- load an entire file into memory
| |
| local function loadFile(textFile, amount)
| |
| local file = io.open(textFile, "r")
| |
| if not file then error("ftcsv: File not found at " .. textFile) end
| |
| local lines = file:read(amount)
| |
| if amount == "*all" then
| |
| file:close()
| |
| end
| |
| return lines, file
| |
| end
| |
| | |
| local function initializeInputFromStringOrFile(inputFile, options, amount)
| |
| -- handle input via string or file!
| |
| local inputString, file
| |
| if options.loadFromString then
| |
| inputString = inputFile
| |
| else
| |
| inputString, file = loadFile(inputFile, amount)
| |
| end
| |
| | |
| -- if they sent in an empty file...
| |
| if inputString == "" then
| |
| error('ftcsv: Cannot parse an empty file')
| |
| end
| |
| return inputString, file
| |
| end
| |
| | |
| local function determineArgumentOrder(delimiter, options)
| |
| -- backwards compatibile layer
| |
| if type(delimiter) == "string" then
| |
| return delimiter, options
| |
| | |
| -- the new format for parseLine
| |
| elseif type(delimiter) == "table" then
| |
| local realDelimiter = delimiter.delimiter or ","
| |
| return realDelimiter, delimiter
| |
| | |
| -- if nothing is specified, assume "," delimited and call it a day!
| |
| else
| |
| return ",", nil
| |
| end
| |
| end
| |
| | |
| local function parseOptions(delimiter, options, fromParseLine)
| |
| -- delimiter MUST be one character
| |
| assert(#delimiter == 1 and type(delimiter) == "string", "the delimiter must be of string type and exactly one character")
| |
| | |
| local fieldsToKeep = nil
| |
| | |
| if options then
| |
| | |
| if options.headers ~= nil then
| |
| assert(type(options.headers) == "boolean", "ftcsv only takes the boolean 'true' or 'false' for the optional parameter 'headers' (default 'true'). You passed in '" .. tostring(options.headers) .. "' of type '" .. type(options.headers) .. "'.")
| |
| end
| |
| | |
| if options.rename ~= nil then
| |
| assert(type(options.rename) == "table", "ftcsv only takes in a key-value table for the optional parameter 'rename'. You passed in '" .. tostring(options.rename) .. "' of type '" .. type(options.rename) .. "'.")
| |
| end
| |
| | |
| if options.fieldsToKeep ~= nil then
| |
| assert(type(options.fieldsToKeep) == "table", "ftcsv only takes in a list (as a table) for the optional parameter 'fieldsToKeep'. You passed in '" .. tostring(options.fieldsToKeep) .. "' of type '" .. type(options.fieldsToKeep) .. "'.")
| |
| local ofieldsToKeep = options.fieldsToKeep
| |
| if ofieldsToKeep ~= nil then
| |
| fieldsToKeep = {}
| |
| for j = 1, #ofieldsToKeep do
| |
| fieldsToKeep[ofieldsToKeep[j]] = true
| |
| end | | end |
| end
| |
| if options.headers == false and options.rename == nil then
| |
| error("ftcsv: fieldsToKeep only works with header-less files when using the 'rename' functionality")
| |
| end
| |
| end
| |
|
| |
| if options.loadFromString ~= nil then
| |
| assert(type(options.loadFromString) == "boolean", "ftcsv only takes a boolean value for optional parameter 'loadFromString'. You passed in '" .. tostring(options.loadFromString) .. "' of type '" .. type(options.loadFromString) .. "'.")
| |
| end
| |
|
| |
| if options.headerFunc ~= nil then
| |
| assert(type(options.headerFunc) == "function", "ftcsv only takes a function value for optional parameter 'headerFunc'. You passed in '" .. tostring(options.headerFunc) .. "' of type '" .. type(options.headerFunc) .. "'.")
| |
| end
| |
|
| |
| if options.ignoreQuotes == nil then
| |
| options.ignoreQuotes = false
| |
| else
| |
| assert(type(options.ignoreQuotes) == "boolean", "ftcsv only takes a boolean value for optional parameter 'ignoreQuotes'. You passed in '" .. tostring(options.ignoreQuotes) .. "' of type '" .. type(options.ignoreQuotes) .. "'.")
| |
| end
| |
|
| |
| if fromParseLine == true then
| |
| if options.bufferSize == nil then
| |
| options.bufferSize = 2^16
| |
| else
| |
| assert(type(options.bufferSize) == "number", "ftcsv only takes a number value for optional parameter 'bufferSize'. You passed in '" .. tostring(options.bufferSize) .. "' of type '" .. type(options.bufferSize) .. "'.")
| |
| end
| |
|
| |
| else
| |
| if options.bufferSize ~= nil then
| |
| error("ftcsv: bufferSize can only be specified using 'parseLine'. When using 'parse', the entire file is read into memory")
| |
| end
| |
| end
| |
|
| |
| else
| |
| options = {
| |
| ["headers"] = true,
| |
| ["loadFromString"] = true,
| |
| ["ignoreQuotes"] = false,
| |
| ["bufferSize"] = 2^16
| |
| } | | } |
| end | | end |
|
| |
|
| return options, fieldsToKeep | | local row |
| | row, pos = parse_row(str, sep, pos) |
|
| |
|
| end
| | while #row ~= 0 do |
| | | if row_mt then |
| local function findEndOfHeaders(str, entireFile)
| | setmetatable(row, row_mt) |
| local i = 1
| |
| local quote = sbyte('"')
| |
| local newlines = { | |
| [sbyte("\n")] = true, | |
| [sbyte("\r")] = true
| |
| }
| |
| local quoted = false
| |
| local char = sbyte(str, i)
| |
| repeat
| |
| -- this should still work for escaped quotes
| |
| -- ex: " a "" b \r\n " -- there is always a pair around the newline
| |
| if char == quote then
| |
| quoted = not quoted
| |
| end | | end |
| i = i + 1 | | csv[#csv+1] = row |
| char = sbyte(str, i)
| | row, pos = parse_row(str, sep, pos) |
| until (newlines[char] and not quoted) or char == nil
| |
| | |
| if not entireFile and char == nil then
| |
| error("ftcsv: bufferSize needs to be larger to parse this file") | |
| end
| |
| | |
| local nextChar = sbyte(str, i+1)
| |
| if nextChar == sbyte("\n") and char == sbyte("\r") then
| |
| i = i + 1
| |
| end
| |
| return i
| |
| end
| |
| | |
| local function determineBOMOffset(inputString)
| |
| -- BOM files start with bytes 239, 187, 191
| |
| if sbyte(inputString, 1) == 239
| |
| and sbyte(inputString, 2) == 187
| |
| and sbyte(inputString, 3) == 191 then
| |
| return 4
| |
| else
| |
| return 1
| |
| end
| |
| end
| |
| | |
| local function parseHeadersAndSetupArgs(inputString, delimiter, options, fieldsToKeep, entireFile)
| |
| local startLine = determineBOMOffset(inputString)
| |
| | |
| local endOfHeaderRow = findEndOfHeaders(inputString, entireFile)
| |
| | |
| local parserArgs = {
| |
| delimiter = delimiter,
| |
| headerField = nil,
| |
| fieldsToKeep = nil,
| |
| inputLength = endOfHeaderRow,
| |
| buffered = false,
| |
| ignoreQuotes = options.ignoreQuotes,
| |
| rowOffset = 0
| |
| }
| |
| | |
| local rawHeaders, endOfHeaders = parseString(inputString, startLine, parserArgs)
| |
| | |
| -- manipulate the headers as per the options
| |
| local modifiedHeaders = handleHeaders(rawHeaders[1], options)
| |
| parserArgs.headerField = modifiedHeaders
| |
| parserArgs.fieldsToKeep = fieldsToKeep
| |
| parserArgs.inputLength = nil
| |
| | |
| if options.headers == false then endOfHeaders = startLine end
| |
| | |
| local finalHeaders = determineRealHeaders(modifiedHeaders, fieldsToKeep)
| |
| if options.headers ~= false then
| |
| local headersMetamethod = generateHeadersMetamethod(finalHeaders)
| |
| parserArgs.headersMetamethod = headersMetamethod
| |
| end | | end |
|
| |
|
| return endOfHeaders, parserArgs, finalHeaders | | return csv |
| end | | end |
|
| |
|
| -- runs the show!
| | local function format_csv(str, sep) |
| function ftcsv.parse(inputFile, delimiter, options) | | local str, matches = string.gsub(str or "", '"', '""') |
| local delimiter, options = determineArgumentOrder(delimiter, options) | | if string.find(str, "[%"..sep.."\r\n]") or matches > 0 then |
| | | return '"'..str..'"' |
| local options, fieldsToKeep = parseOptions(delimiter, options, false)
| |
| | |
| local inputString = initializeInputFromStringOrFile(inputFile, options, "*all")
| |
| | |
| local endOfHeaders, parserArgs, finalHeaders = parseHeadersAndSetupArgs(inputString, delimiter, options, fieldsToKeep, true) | |
| | |
| local output = parseString(inputString, endOfHeaders, parserArgs)
| |
| | |
| return output, finalHeaders
| |
| end
| |
| | |
| local function getFileSize (file)
| |
| local current = file:seek()
| |
| local size = file:seek("end")
| |
| file:seek("set", current)
| |
| return size
| |
| end
| |
| | |
| local function determineAtEndOfFile(file, fileSize)
| |
| if file:seek() >= fileSize then
| |
| return true | |
| else
| |
| return false
| |
| end | | end |
| | return str |
| end | | end |
|
| |
|
| local function initializeInputFile(inputString, options)
| | csv.format_row = function(row, delimiter) |
| if options.loadFromString == true then
| | local r = {} |
| error("ftcsv: parseLine currently doesn't support loading from string") | | local sep = delimiter and string.sub(delimiter,1,1) or ',' |
| end
| | |
| return initializeInputFromStringOrFile(inputString, options, options.bufferSize)
| | for i = 1, #row do |
| | r[#r+1] = format_csv(row[i], sep) |
| | r[#r+1] = sep |
| | end |
| | |
| | r[#r] = nil |
| | return table.concat(r) |
| end | | end |
|
| |
|
| function ftcsv.parseLine(inputFile, delimiter, userOptions) | | csv.format = function(data, header, delimiter) |
| local delimiter, userOptions = determineArgumentOrder(delimiter, userOptions) | | local r = {} |
| local options, fieldsToKeep = parseOptions(delimiter, userOptions, true) | | local sep = delimiter and string.sub(delimiter,1,1) or ',' |
| local inputString, file = initializeInputFile(inputFile, options)
| |
|
| |
|
| | | if header then |
| local fileSize, atEndOfFile = 0, false | | for i = 1, #header do |
| fileSize = getFileSize(file)
| | r[#r+1] = format_csv(header[i], sep) |
| atEndOfFile = determineAtEndOfFile(file, fileSize)
| | r[#r+1] = sep |
| | |
| local endOfHeaders, parserArgs, _ = parseHeadersAndSetupArgs(inputString, delimiter, options, fieldsToKeep, atEndOfFile)
| |
| parserArgs.buffered = true
| |
| parserArgs.endOfFile = atEndOfFile
| |
| | |
| local parsedBuffer, endOfParsedInput, totalColumnCount = parseString(inputString, endOfHeaders, parserArgs)
| |
| parserArgs.totalColumnCount = totalColumnCount
| |
| | |
| inputString = ssub(inputString, endOfParsedInput)
| |
| local bufferIndex, returnedRowsCount = 0, 0
| |
| local currentRow, buffer
| |
| | |
| return function()
| |
| -- check parsed buffer for value | |
| bufferIndex = bufferIndex + 1
| |
| currentRow = parsedBuffer[bufferIndex]
| |
| if currentRow then
| |
| returnedRowsCount = returnedRowsCount + 1
| |
| return returnedRowsCount, currentRow
| |
| end
| |
| | |
| -- read more of the input
| |
| buffer = file:read(options.bufferSize)
| |
| if not buffer then
| |
| file:close() | |
| return nil
| |
| else
| |
| parserArgs.endOfFile = determineAtEndOfFile(file, fileSize)
| |
| end | | end |
| | | r[#r] = "\n" |
| -- appends the new input to what was left over | |
| inputString = inputString .. buffer
| |
| | |
| -- re-analyze and load buffer
| |
| parserArgs.rowOffset = returnedRowsCount
| |
| parsedBuffer, endOfParsedInput = parseString(inputString, 1, parserArgs)
| |
| bufferIndex = 1
| |
| | |
| -- cut the input string down
| |
| inputString = ssub(inputString, endOfParsedInput)
| |
| | |
| if #parsedBuffer == 0 then
| |
| error("ftcsv: bufferSize needs to be larger to parse this file")
| |
| end
| |
| | |
| returnedRowsCount = returnedRowsCount + 1
| |
| return returnedRowsCount, parsedBuffer[bufferIndex]
| |
| end | | end |
| end
| |
|
| |
|
| |
|
| |
| -- The ENCODER code is below here
| |
| -- This could be broken out, but is kept here for portability
| |
|
| |
|
| |
|
| local function delimitField(field)
| | for i = 1, #data do |
| field = tostring(field) | | local v = data[i] |
| if field:find('"') then
| | for j = 1, #v do |
| return field:gsub('"', '""')
| | r[#r+1] = format_csv(v[j], sep) |
| else
| | r[#r+1] = sep |
| return field | |
| end
| |
| end
| |
| | |
| local function generateDelimitAndQuoteField(delimiter) | |
| local generatedFunction = function(field)
| |
| field = tostring(field) | |
| if field:find('"') then
| |
| return '"' .. field:gsub('"', '""') .. '"' | |
| elseif field:find('[\n' .. delimiter .. ']') then
| |
| return '"' .. field .. '"'
| |
| else
| |
| return field
| |
| end | | end |
| | r[#r] = "\n" |
| end | | end |
| return generatedFunction | | |
| end
| | r[#r] = nil |
| | | return table.concat(r) |
| local function escapeHeadersForLuaGenerator(headers)
| |
| local escapedHeaders = {}
| |
| for i = 1, #headers do
| |
| if headers[i]:find('"') then
| |
| escapedHeaders[i] = headers[i]:gsub('"', '\\"')
| |
| else
| |
| escapedHeaders[i] = headers[i]
| |
| end
| |
| end
| |
| return escapedHeaders
| |
| end
| |
| | |
| -- a function that compiles some lua code to quickly print out the csv
| |
| local function csvLineGenerator(inputTable, delimiter, headers, options)
| |
| local escapedHeaders = escapeHeadersForLuaGenerator(headers)
| |
| | |
| local outputFunc = [[
| |
| local args, i = ...
| |
| i = i + 1;
| |
| if i > ]] .. #inputTable .. [[ then return nil end;
| |
| return i, '"' .. args.delimitField(args.t[i]["]] ..
| |
| table.concat(escapedHeaders, [["]) .. '"]] ..
| |
| delimiter .. [["' .. args.delimitField(args.t[i]["]]) ..
| |
| [["]) .. '"\r\n']]
| |
| | |
| if options and options.onlyRequiredQuotes == true then
| |
| outputFunc = [[
| |
| local args, i = ...
| |
| i = i + 1;
| |
| if i > ]] .. #inputTable .. [[ then return nil end;
| |
| return i, args.delimitField(args.t[i]["]] ..
| |
| table.concat(escapedHeaders, [["]) .. ']] ..
| |
| delimiter .. [[' .. args.delimitField(args.t[i]["]]) ..
| |
| [["]) .. '\r\n']]
| |
| end
| |
| | |
| local arguments = {}
| |
| arguments.t = inputTable
| |
| -- we want to use the same delimitField throughout,
| |
| -- so we're just going to pass it in
| |
| if options and options.onlyRequiredQuotes == true then
| |
| arguments.delimitField = generateDelimitAndQuoteField(delimiter)
| |
| else
| |
| arguments.delimitField = delimitField
| |
| end
| |
| | |
| return luaCompatibility.load(outputFunc), arguments, 0 | |
| | |
| end
| |
| | |
| local function validateHeaders(headers, inputTable)
| |
| for i = 1, #headers do
| |
| if inputTable[1][headers[i]] == nil then
| |
| error("ftcsv: the field '" .. headers[i] .. "' doesn't exist in the inputTable")
| |
| end
| |
| end
| |
| end
| |
| | |
| local function initializeOutputWithEscapedHeaders(escapedHeaders, delimiter, options)
| |
| local output = {}
| |
| if options and options.onlyRequiredQuotes == true then
| |
| output[1] = table.concat(escapedHeaders, delimiter) .. '\r\n'
| |
| else
| |
| output[1] = '"' .. table.concat(escapedHeaders, '"' .. delimiter .. '"') .. '"\r\n'
| |
| end
| |
| return output
| |
| end
| |
| | |
| local function escapeHeadersForOutput(headers, delimiter, options)
| |
| local escapedHeaders = {}
| |
| local delimitField = delimitField
| |
| if options and options.onlyRequiredQuotes == true then
| |
| delimitField = generateDelimitAndQuoteField(delimiter)
| |
| end
| |
| for i = 1, #headers do
| |
| escapedHeaders[i] = delimitField(headers[i])
| |
| end
| |
| | |
| return escapedHeaders
| |
| end
| |
| | |
| local function extractHeadersFromTable(inputTable)
| |
| local headers = {}
| |
| for key, _ in pairs(inputTable[1]) do
| |
| headers[#headers+1] = key
| |
| end
| |
| | |
| -- lets make the headers alphabetical
| |
| table.sort(headers)
| |
| | |
| return headers
| |
| end
| |
| | |
| local function getHeadersFromOptions(options)
| |
| local headers = nil
| |
| if options then
| |
| if options.fieldsToKeep ~= nil then
| |
| assert(
| |
| type(options.fieldsToKeep) == "table", "ftcsv only takes in a list (as a table) for the optional parameter 'fieldsToKeep'. You passed in '" .. tostring(options.headers) .. "' of type '" .. type(options.headers) .. "'.")
| |
| headers = options.fieldsToKeep
| |
| end
| |
| end
| |
| return headers
| |
| end
| |
| | |
| local function initializeGenerator(inputTable, delimiter, options)
| |
| -- delimiter MUST be one character
| |
| assert(#delimiter == 1 and type(delimiter) == "string", "the delimiter must be of string type and exactly one character")
| |
| | |
| local headers = getHeadersFromOptions(options)
| |
| if headers == nil then
| |
| headers = extractHeadersFromTable(inputTable)
| |
| end
| |
| validateHeaders(headers, inputTable)
| |
| | |
| local escapedHeaders = escapeHeadersForOutput(headers, delimiter, options)
| |
| local output = initializeOutputWithEscapedHeaders(escapedHeaders, delimiter, options)
| |
| return output, headers
| |
| end
| |
| | |
| -- works really quickly with luajit-2.1, because table.concat life
| |
| function ftcsv.encode(inputTable, delimiter, options)
| |
| local delimiter, options = determineArgumentOrder(delimiter, options)
| |
| local output, headers = initializeGenerator(inputTable, delimiter, options)
| |
| | |
| for i, line in csvLineGenerator(inputTable, delimiter, headers, options) do
| |
| output[i+1] = line
| |
| end
| |
| | |
| -- combine and return final string
| |
| return table.concat(output)
| |
| end | | end |
|
| |
|
| return ftcsv | | return csv |