-
-
Notifications
You must be signed in to change notification settings - Fork 224
Expand file tree
/
Copy pathio.csv.ts
More file actions
221 lines (202 loc) · 7.02 KB
/
io.csv.ts
File metadata and controls
221 lines (202 loc) · 7.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
/**
* @license
* Copyright 2022 JsData. All rights reserved.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ==========================================================================
*/
import { CsvInputOptionsBrowser, CsvOutputOptionsBrowser, ArrayType2D } from "../../shared/types"
import { DataFrame, NDframe, Series } from '../../'
import Papa from 'papaparse'
/**
* Reads a CSV file from local or remote location into a DataFrame.
* @param filePath URL or local file path to CSV file. `readCSV` uses PapaParse to parse the CSV file,
* hence all PapaParse options are supported.
* @param options Configuration object. Supports all Papaparse parse config options.
* @returns DataFrame containing the parsed CSV file.
* @throws {Error} If file cannot be read or parsed
* @example
* ```
* import { readCSV } from "danfojs-node"
* const df = await readCSV("https://raw.githubusercontent.com/test.csv")
* ```
* @example
* ```
* import { readCSV } from "danfojs-node"
* const df = await readCSV("https://raw.githubusercontent.com/test.csv", {
* delimiter: ",",
* headers: {
* Accept: "text/csv",
* Authorization: "Bearer YWRtaW46YWRtaW4="
* }
* })
* ```
* @example
* ```
* import { readCSV } from "danfojs-node"
* const df = await readCSV("./data/sample.csv")
* ```
*/
const $readCSV = async (file: any, options?: CsvInputOptionsBrowser): Promise<DataFrame> => {
const frameConfig = options?.frameConfig || {}
const hasStringType = frameConfig.dtypes?.includes("string")
return new Promise((resolve, reject) => {
let hasError = false;
Papa.parse(file, {
header: true,
dynamicTyping: !hasStringType,
skipEmptyLines: 'greedy',
delimiter: ",",
...options,
error: (error) => {
hasError = true;
reject(new Error(`Failed to parse CSV: ${error.message}`));
},
download: true,
complete: (results) => {
if (hasError) return; // Skip if error already occurred
if (!results.data || results.data.length === 0) {
reject(new Error('No data found in CSV file'));
return;
}
if (results.errors && results.errors.length > 0) {
reject(new Error(`CSV parsing errors: ${results.errors.map(e => e.message).join(', ')}`));
return;
}
try {
const df = new DataFrame(results.data, frameConfig);
resolve(df);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
reject(new Error(`Failed to create DataFrame from CSV: ${errorMessage}`));
}
}
});
});
};
/**
* Streams a CSV file from local or remote location in chunks. Intermediate chunks is passed as a DataFrame to the callback function.
* @param filePath URL or local file path to CSV file. `readCSV` uses PapaParse to parse the CSV file,
* hence all PapaParse options are supported.
* @param options Configuration object. Supports all Papaparse parse config options.
* @param callback Callback function to be called once the specifed rows are parsed into DataFrame.
* @example
* ```
* import { streamCSV } from "danfojs-node"
* streamCSV("https://raw.githubusercontent.com/test.csv", (dfRow) => {
* const dfModified = dfRow["Names"].map((name) => name.split(",")[0])
* return dfModified
* })
* ```
*/
const $streamCSV = async (file: string, callback: (df: DataFrame) => void, options: CsvInputOptionsBrowser,): Promise<null> => {
const frameConfig = options?.frameConfig || {}
return new Promise((resolve, reject) => {
let count = 0
let hasError = false;
const hasStringType = frameConfig.dtypes?.includes("string")
Papa.parse(file, {
header: true,
download: true,
dynamicTyping: !hasStringType,
delimiter: ",",
...options,
step: results => {
if (hasError) return;
try {
const df = new DataFrame([results.data], { ...frameConfig, index: [count++] });
callback(df);
} catch (error) {
hasError = true;
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
reject(new Error(`Failed to process CSV chunk: ${errorMessage}`));
}
},
complete: () => {
if (!hasError) {
resolve(null);
}
},
error: (error) => {
hasError = true;
reject(new Error(`Failed to parse CSV: ${error.message}`));
}
});
});
};
/**
* Converts a DataFrame or Series to CSV.
* @param df DataFrame or Series to be converted to CSV.
* @param options Configuration object. Supports the following options:
* - `filePath`: Local file path to write the CSV file. If not specified, the CSV will be returned as a string.
* - `header`: Boolean indicating whether to include a header row in the CSV file.
* - `sep`: Character to be used as a separator in the CSV file.
* @example
* ```
* import { toCSV } from "danfojs-node"
* const df = new DataFrame([[1, 2, 3], [4, 5, 6]])
* const csv = toCSV(df)
* ```
* @example
* ```
* import { toCSV } from "danfojs-node"
* const df = new DataFrame([[1, 2, 3], [4, 5, 6]])
* toCSV(df, {
* filePath: "./data/sample.csv",
* header: true,
* sep: "+"
* })
* ```
*/
const $toCSV = (df: NDframe | DataFrame | Series, options?: CsvOutputOptionsBrowser): string | void => {
let { fileName, download, sep, header } = { fileName: "output.csv", sep: ",", header: true, download: false, ...options }
if (df.$isSeries) {
const csv = df.values.join(sep);
if (download) {
if (!(fileName.endsWith(".csv"))) {
fileName = fileName + ".csv"
}
$downloadFileInBrowser(csv, fileName);
} else {
return csv;
}
} else {
const rows = df.values as ArrayType2D
let csvStr = header === true ? `${df.columns.join(sep)}\n` : ""
for (let i = 0; i < rows.length; i++) {
const row = `${rows[i].join(sep)}\n`;
csvStr += row;
}
if (download) {
if (!(fileName.endsWith(".csv"))) {
fileName = fileName + ".csv"
}
$downloadFileInBrowser(csvStr, fileName);
} else {
return csvStr;
}
}
};
/**
* Internal function to download a CSV file in the browser.
* @param content A string of CSV file contents
* @param fileName The name of the file to be downloaded
*/
const $downloadFileInBrowser = (content: any, fileName: string) => {
var hiddenElement = document.createElement('a');
hiddenElement.href = 'data:text/csv;charset=utf-8,' + encodeURIComponent(content);
hiddenElement.target = '_blank';
hiddenElement.download = fileName;
hiddenElement.click();
}
export {
$readCSV,
$streamCSV,
$toCSV,
}