ImportCsv.class.php 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588
  1. <?php
  2. /* vim: set expandtab sw=4 ts=4 sts=4: */
  3. /**
  4. * CSV import plugin for phpMyAdmin
  5. *
  6. * @todo add an option for handling NULL values
  7. * @package PhpMyAdmin-Import
  8. * @subpackage CSV
  9. */
  10. if (! defined('PHPMYADMIN')) {
  11. exit;
  12. }
  13. /* Get the import interface */
  14. require_once 'libraries/plugins/import/AbstractImportCsv.class.php';
  15. /**
  16. * Handles the import for the CSV format
  17. *
  18. * @package PhpMyAdmin-Import
  19. * @subpackage CSV
  20. */
  21. class ImportCsv extends AbstractImportCsv
  22. {
  23. /**
  24. * Whether to analyze tables
  25. *
  26. * @var bool
  27. */
  28. private $_analyze;
  29. /**
  30. * Constructor
  31. */
  32. public function __construct()
  33. {
  34. $this->setProperties();
  35. }
  36. /**
  37. * Sets the import plugin properties.
  38. * Called in the constructor.
  39. *
  40. * @return void
  41. */
  42. protected function setProperties()
  43. {
  44. $this->_setAnalyze(false);
  45. if ($GLOBALS['plugin_param'] !== 'table') {
  46. $this->_setAnalyze(true);
  47. }
  48. $generalOptions = parent::setProperties();
  49. $this->properties->setText('CSV');
  50. $this->properties->setExtension('csv');
  51. if ($GLOBALS['plugin_param'] !== 'table') {
  52. $leaf = new BoolPropertyItem();
  53. $leaf->setName("col_names");
  54. $leaf->setText(
  55. __(
  56. 'The first line of the file contains the table column names'
  57. . ' <i>(if this is unchecked, the first line will become part'
  58. . ' of the data)</i>'
  59. )
  60. );
  61. $generalOptions->addProperty($leaf);
  62. } else {
  63. $hint = new PMA_Message(
  64. __(
  65. 'If the data in each row of the file is not'
  66. . ' in the same order as in the database, list the corresponding'
  67. . ' column names here. Column names must be separated by commas'
  68. . ' and not enclosed in quotations.'
  69. )
  70. );
  71. $leaf = new TextPropertyItem();
  72. $leaf->setName("columns");
  73. $leaf->setText(
  74. __('Column names: ')
  75. . PMA_Util::showHint($hint)
  76. );
  77. $generalOptions->addProperty($leaf);
  78. }
  79. $leaf = new BoolPropertyItem();
  80. $leaf->setName("ignore");
  81. $leaf->setText(__('Do not abort on INSERT error'));
  82. $generalOptions->addProperty($leaf);
  83. }
  84. /**
  85. * This method is called when any PluginManager to which the observer
  86. * is attached calls PluginManager::notify()
  87. *
  88. * @param SplSubject $subject The PluginManager notifying the observer
  89. * of an update.
  90. *
  91. * @return void
  92. */
  93. public function update (SplSubject $subject)
  94. {
  95. }
  96. /**
  97. * Handles the whole import logic
  98. *
  99. * @return void
  100. */
  101. public function doImport()
  102. {
  103. global $db, $table, $csv_terminated, $csv_enclosed, $csv_escaped,
  104. $csv_new_line, $csv_columns, $err_url;
  105. // $csv_replace and $csv_ignore should have been here,
  106. // but we use directly from $_POST
  107. global $error, $timeout_passed, $finished, $message;
  108. $replacements = array(
  109. '\\n' => "\n",
  110. '\\t' => "\t",
  111. '\\r' => "\r",
  112. );
  113. $csv_terminated = strtr($csv_terminated, $replacements);
  114. $csv_enclosed = strtr($csv_enclosed, $replacements);
  115. $csv_escaped = strtr($csv_escaped, $replacements);
  116. $csv_new_line = strtr($csv_new_line, $replacements);
  117. $param_error = false;
  118. if (strlen($csv_terminated) != 1) {
  119. $message = PMA_Message::error(
  120. __('Invalid parameter for CSV import: %s')
  121. );
  122. $message->addParam(__('Columns terminated by'), false);
  123. $error = true;
  124. $param_error = true;
  125. // The default dialog of MS Excel when generating a CSV produces a
  126. // semi-colon-separated file with no chance of specifying the
  127. // enclosing character. Thus, users who want to import this file
  128. // tend to remove the enclosing character on the Import dialog.
  129. // I could not find a test case where having no enclosing characters
  130. // confuses this script.
  131. // But the parser won't work correctly with strings so we allow just
  132. // one character.
  133. } elseif (strlen($csv_enclosed) > 1) {
  134. $message = PMA_Message::error(
  135. __('Invalid parameter for CSV import: %s')
  136. );
  137. $message->addParam(__('Columns enclosed by'), false);
  138. $error = true;
  139. $param_error = true;
  140. } elseif (strlen($csv_escaped) != 1) {
  141. $message = PMA_Message::error(
  142. __('Invalid parameter for CSV import: %s')
  143. );
  144. $message->addParam(__('Columns escaped by'), false);
  145. $error = true;
  146. $param_error = true;
  147. } elseif (strlen($csv_new_line) != 1 && $csv_new_line != 'auto') {
  148. $message = PMA_Message::error(
  149. __('Invalid parameter for CSV import: %s')
  150. );
  151. $message->addParam(__('Lines terminated by'), false);
  152. $error = true;
  153. $param_error = true;
  154. }
  155. // If there is an error in the parameters entered,
  156. // indicate that immediately.
  157. if ($param_error) {
  158. PMA_Util::mysqlDie($message->getMessage(), '', '', $err_url);
  159. }
  160. $buffer = '';
  161. $required_fields = 0;
  162. if (! $this->_getAnalyze()) {
  163. if (isset($_POST['csv_replace'])) {
  164. $sql_template = 'REPLACE';
  165. } else {
  166. $sql_template = 'INSERT';
  167. if (isset($_POST['csv_ignore'])) {
  168. $sql_template .= ' IGNORE';
  169. }
  170. }
  171. $sql_template .= ' INTO ' . PMA_Util::backquote($table);
  172. $tmp_fields = PMA_DBI_get_columns($db, $table);
  173. if (empty($csv_columns)) {
  174. $fields = $tmp_fields;
  175. } else {
  176. $sql_template .= ' (';
  177. $fields = array();
  178. $tmp = preg_split('/,( ?)/', $csv_columns);
  179. foreach ($tmp as $key => $val) {
  180. if (count($fields) > 0) {
  181. $sql_template .= ', ';
  182. }
  183. /* Trim also `, if user already included backquoted fields */
  184. $val = trim($val, " \t\r\n\0\x0B`");
  185. $found = false;
  186. foreach ($tmp_fields as $field) {
  187. if ($field['Field'] == $val) {
  188. $found = true;
  189. break;
  190. }
  191. }
  192. if (! $found) {
  193. $message = PMA_Message::error(
  194. __(
  195. 'Invalid column (%s) specified! Ensure that columns'
  196. . ' names are spelled correctly, separated by commas'
  197. . ', and not enclosed in quotes.'
  198. )
  199. );
  200. $message->addParam($val);
  201. $error = true;
  202. break;
  203. }
  204. $fields[] = $field;
  205. $sql_template .= PMA_Util::backquote($val);
  206. }
  207. $sql_template .= ') ';
  208. }
  209. $required_fields = count($fields);
  210. $sql_template .= ' VALUES (';
  211. }
  212. // Defaults for parser
  213. $i = 0;
  214. $len = 0;
  215. $line = 1;
  216. $lasti = -1;
  217. $values = array();
  218. $csv_finish = false;
  219. $tempRow = array();
  220. $rows = array();
  221. $col_names = array();
  222. $tables = array();
  223. $col_count = 0;
  224. $max_cols = 0;
  225. while (! ($finished && $i >= $len) && ! $error && ! $timeout_passed) {
  226. $data = PMA_importGetNextChunk();
  227. if ($data === false) {
  228. // subtract data we didn't handle yet and stop processing
  229. $offset -= strlen($buffer);
  230. break;
  231. } elseif ($data === true) {
  232. // Handle rest of buffer
  233. } else {
  234. // Append new data to buffer
  235. $buffer .= $data;
  236. unset($data);
  237. // Do not parse string when we're not at the end
  238. // and don't have new line inside
  239. if (($csv_new_line == 'auto'
  240. && strpos($buffer, "\r") === false
  241. && strpos($buffer, "\n") === false)
  242. || ($csv_new_line != 'auto'
  243. && strpos($buffer, $csv_new_line) === false)
  244. ) {
  245. continue;
  246. }
  247. }
  248. // Current length of our buffer
  249. $len = strlen($buffer);
  250. // Currently parsed char
  251. $ch = $buffer[$i];
  252. while ($i < $len) {
  253. // Deadlock protection
  254. if ($lasti == $i && $lastlen == $len) {
  255. $message = PMA_Message::error(
  256. __('Invalid format of CSV input on line %d.')
  257. );
  258. $message->addParam($line);
  259. $error = true;
  260. break;
  261. }
  262. $lasti = $i;
  263. $lastlen = $len;
  264. // This can happen with auto EOL and \r at the end of buffer
  265. if (! $csv_finish) {
  266. // Grab empty field
  267. if ($ch == $csv_terminated) {
  268. if ($i == $len - 1) {
  269. break;
  270. }
  271. $values[] = '';
  272. $i++;
  273. $ch = $buffer[$i];
  274. continue;
  275. }
  276. // Grab one field
  277. $fallbacki = $i;
  278. if ($ch == $csv_enclosed) {
  279. if ($i == $len - 1) {
  280. break;
  281. }
  282. $need_end = true;
  283. $i++;
  284. $ch = $buffer[$i];
  285. } else {
  286. $need_end = false;
  287. }
  288. $fail = false;
  289. $value = '';
  290. while (($need_end
  291. && ( $ch != $csv_enclosed || $csv_enclosed == $csv_escaped ))
  292. || ( ! $need_end
  293. && ! ( $ch == $csv_terminated
  294. || $ch == $csv_new_line
  295. || ( $csv_new_line == 'auto'
  296. && ( $ch == "\r" || $ch == "\n" ) ) ) )
  297. ) {
  298. if ($ch == $csv_escaped) {
  299. if ($i == $len - 1) {
  300. $fail = true;
  301. break;
  302. }
  303. $i++;
  304. $ch = $buffer[$i];
  305. if ($csv_enclosed == $csv_escaped
  306. && ($ch == $csv_terminated
  307. || $ch == $csv_new_line
  308. || ($csv_new_line == 'auto'
  309. && ($ch == "\r" || $ch == "\n")))
  310. ) {
  311. break;
  312. }
  313. }
  314. $value .= $ch;
  315. if ($i == $len - 1) {
  316. if (! $finished) {
  317. $fail = true;
  318. }
  319. break;
  320. }
  321. $i++;
  322. $ch = $buffer[$i];
  323. }
  324. // unquoted NULL string
  325. if (false === $need_end && $value === 'NULL') {
  326. $value = null;
  327. }
  328. if ($fail) {
  329. $i = $fallbacki;
  330. $ch = $buffer[$i];
  331. break;
  332. }
  333. // Need to strip trailing enclosing char?
  334. if ($need_end && $ch == $csv_enclosed) {
  335. if ($finished && $i == $len - 1) {
  336. $ch = null;
  337. } elseif ($i == $len - 1) {
  338. $i = $fallbacki;
  339. $ch = $buffer[$i];
  340. break;
  341. } else {
  342. $i++;
  343. $ch = $buffer[$i];
  344. }
  345. }
  346. // Are we at the end?
  347. if ($ch == $csv_new_line
  348. || ($csv_new_line == 'auto' && ($ch == "\r" || $ch == "\n"))
  349. || ($finished && $i == $len - 1)
  350. ) {
  351. $csv_finish = true;
  352. }
  353. // Go to next char
  354. if ($ch == $csv_terminated) {
  355. if ($i == $len - 1) {
  356. $i = $fallbacki;
  357. $ch = $buffer[$i];
  358. break;
  359. }
  360. $i++;
  361. $ch = $buffer[$i];
  362. }
  363. // If everything went okay, store value
  364. $values[] = $value;
  365. }
  366. // End of line
  367. if ($csv_finish
  368. || $ch == $csv_new_line
  369. || ($csv_new_line == 'auto' && ($ch == "\r" || $ch == "\n"))
  370. ) {
  371. if ($csv_new_line == 'auto' && $ch == "\r") { // Handle "\r\n"
  372. if ($i >= ($len - 2) && ! $finished) {
  373. break; // We need more data to decide new line
  374. }
  375. if ($buffer[$i + 1] == "\n") {
  376. $i++;
  377. }
  378. }
  379. // We didn't parse value till the end of line, so there was
  380. // empty one
  381. if (! $csv_finish) {
  382. $values[] = '';
  383. }
  384. if ($this->_getAnalyze()) {
  385. foreach ($values as $val) {
  386. $tempRow[] = $val;
  387. ++$col_count;
  388. }
  389. if ($col_count > $max_cols) {
  390. $max_cols = $col_count;
  391. }
  392. $col_count = 0;
  393. $rows[] = $tempRow;
  394. $tempRow = array();
  395. } else {
  396. // Do we have correct count of values?
  397. if (count($values) != $required_fields) {
  398. // Hack for excel
  399. if ($values[count($values) - 1] == ';') {
  400. unset($values[count($values) - 1]);
  401. } else {
  402. $message = PMA_Message::error(
  403. __('Invalid column count in CSV input on line %d.')
  404. );
  405. $message->addParam($line);
  406. $error = true;
  407. break;
  408. }
  409. }
  410. $first = true;
  411. $sql = $sql_template;
  412. foreach ($values as $key => $val) {
  413. if (! $first) {
  414. $sql .= ', ';
  415. }
  416. if ($val === null) {
  417. $sql .= 'NULL';
  418. } else {
  419. $sql .= '\''
  420. . PMA_Util::sqlAddSlashes($val)
  421. . '\'';
  422. }
  423. $first = false;
  424. }
  425. $sql .= ')';
  426. /**
  427. * @todo maybe we could add original line to verbose
  428. * SQL in comment
  429. */
  430. PMA_importRunQuery($sql, $sql);
  431. }
  432. $line++;
  433. $csv_finish = false;
  434. $values = array();
  435. $buffer = substr($buffer, $i + 1);
  436. $len = strlen($buffer);
  437. $i = 0;
  438. $lasti = -1;
  439. $ch = $buffer[0];
  440. }
  441. } // End of parser loop
  442. } // End of import loop
  443. if ($this->_getAnalyze()) {
  444. /* Fill out all rows */
  445. $num_rows = count($rows);
  446. for ($i = 0; $i < $num_rows; ++$i) {
  447. for ($j = count($rows[$i]); $j < $max_cols; ++$j) {
  448. $rows[$i][] = 'NULL';
  449. }
  450. }
  451. if (isset($_REQUEST['csv_col_names'])) {
  452. $col_names = array_splice($rows, 0, 1);
  453. $col_names = $col_names[0];
  454. }
  455. if ((isset($col_names) && count($col_names) != $max_cols)
  456. || ! isset($col_names)
  457. ) {
  458. // Fill out column names
  459. for ($i = 0; $i < $max_cols; ++$i) {
  460. $col_names[] = 'COL '.($i+1);
  461. }
  462. }
  463. if (strlen($db)) {
  464. $result = PMA_DBI_fetch_result('SHOW TABLES');
  465. $tbl_name = 'TABLE '.(count($result) + 1);
  466. } else {
  467. $tbl_name = 'TBL_NAME';
  468. }
  469. $tables[] = array($tbl_name, $col_names, $rows);
  470. /* Obtain the best-fit MySQL types for each column */
  471. $analyses = array();
  472. $analyses[] = PMA_analyzeTable($tables[0]);
  473. /**
  474. * string $db_name (no backquotes)
  475. *
  476. * array $table = array(table_name, array() column_names, array()() rows)
  477. * array $tables = array of "$table"s
  478. *
  479. * array $analysis = array(array() column_types, array() column_sizes)
  480. * array $analyses = array of "$analysis"s
  481. *
  482. * array $create = array of SQL strings
  483. *
  484. * array $options = an associative array of options
  485. */
  486. /* Set database name to the currently selected one, if applicable */
  487. if (strlen($db)) {
  488. $db_name = $db;
  489. $options = array('create_db' => false);
  490. } else {
  491. $db_name = 'CSV_DB';
  492. $options = null;
  493. }
  494. /* Non-applicable parameters */
  495. $create = null;
  496. /* Created and execute necessary SQL statements from data */
  497. PMA_buildSQL($db_name, $tables, $analyses, $create, $options);
  498. unset($tables);
  499. unset($analyses);
  500. }
  501. // Commit any possible data in buffers
  502. PMA_importRunQuery();
  503. if (count($values) != 0 && ! $error) {
  504. $message = PMA_Message::error(
  505. __('Invalid format of CSV input on line %d.')
  506. );
  507. $message->addParam($line);
  508. $error = true;
  509. }
  510. }
  511. /* ~~~~~~~~~~~~~~~~~~~~ Getters and Setters ~~~~~~~~~~~~~~~~~~~~ */
  512. /**
  513. * Returns true if the table should be analyzed, false otherwise
  514. *
  515. * @return bool
  516. */
  517. private function _getAnalyze()
  518. {
  519. return $this->_analyze;
  520. }
  521. /**
  522. * Sets to true if the table should be analyzed, false otherwise
  523. *
  524. * @param bool $analyze status
  525. *
  526. * @return void
  527. */
  528. private function _setAnalyze($analyze)
  529. {
  530. $this->_analyze = $analyze;
  531. }
  532. }