ImportOds.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. <?php
  2. /**
  3. * OpenDocument Spreadsheet import plugin for phpMyAdmin
  4. *
  5. * @todo Pretty much everything
  6. * @todo Importing of accented characters seems to fail
  7. */
  8. declare(strict_types=1);
  9. namespace PhpMyAdmin\Plugins\Import;
  10. use PhpMyAdmin\File;
  11. use PhpMyAdmin\Import;
  12. use PhpMyAdmin\Message;
  13. use PhpMyAdmin\Plugins\ImportPlugin;
  14. use PhpMyAdmin\Properties\Options\Groups\OptionsPropertyMainGroup;
  15. use PhpMyAdmin\Properties\Options\Groups\OptionsPropertyRootGroup;
  16. use PhpMyAdmin\Properties\Options\Items\BoolPropertyItem;
  17. use PhpMyAdmin\Properties\Plugins\ImportPluginProperties;
  18. use SimpleXMLElement;
  19. use const LIBXML_COMPACT;
  20. use function count;
  21. use function implode;
  22. use function libxml_disable_entity_loader;
  23. use function rtrim;
  24. use function simplexml_load_string;
  25. use function strcmp;
  26. use function strlen;
  27. use const PHP_VERSION_ID;
  28. /**
  29. * Handles the import for the ODS format
  30. */
  31. class ImportOds extends ImportPlugin
  32. {
  33. public function __construct()
  34. {
  35. parent::__construct();
  36. $this->setProperties();
  37. }
  38. /**
  39. * Sets the import plugin properties.
  40. * Called in the constructor.
  41. *
  42. * @return void
  43. */
  44. protected function setProperties()
  45. {
  46. $importPluginProperties = new ImportPluginProperties();
  47. $importPluginProperties->setText('OpenDocument Spreadsheet');
  48. $importPluginProperties->setExtension('ods');
  49. $importPluginProperties->setOptionsText(__('Options'));
  50. // create the root group that will be the options field for
  51. // $importPluginProperties
  52. // this will be shown as "Format specific options"
  53. $importSpecificOptions = new OptionsPropertyRootGroup(
  54. 'Format Specific Options'
  55. );
  56. // general options main group
  57. $generalOptions = new OptionsPropertyMainGroup('general_opts');
  58. // create primary items and add them to the group
  59. $leaf = new BoolPropertyItem(
  60. 'col_names',
  61. __(
  62. 'The first line of the file contains the table column names'
  63. . ' <i>(if this is unchecked, the first line will become part'
  64. . ' of the data)</i>'
  65. )
  66. );
  67. $generalOptions->addProperty($leaf);
  68. $leaf = new BoolPropertyItem(
  69. 'empty_rows',
  70. __('Do not import empty rows')
  71. );
  72. $generalOptions->addProperty($leaf);
  73. $leaf = new BoolPropertyItem(
  74. 'recognize_percentages',
  75. __(
  76. 'Import percentages as proper decimals <i>(ex. 12.00% to .12)</i>'
  77. )
  78. );
  79. $generalOptions->addProperty($leaf);
  80. $leaf = new BoolPropertyItem(
  81. 'recognize_currency',
  82. __('Import currencies <i>(ex. $5.00 to 5.00)</i>')
  83. );
  84. $generalOptions->addProperty($leaf);
  85. // add the main group to the root group
  86. $importSpecificOptions->addProperty($generalOptions);
  87. // set the options for the import plugin property item
  88. $importPluginProperties->setOptions($importSpecificOptions);
  89. $this->properties = $importPluginProperties;
  90. }
  91. /**
  92. * Handles the whole import logic
  93. *
  94. * @param array $sql_data 2-element array with sql data
  95. *
  96. * @return void
  97. */
  98. public function doImport(?File $importHandle = null, array &$sql_data = [])
  99. {
  100. global $db, $error, $timeout_passed, $finished;
  101. $i = 0;
  102. $len = 0;
  103. $buffer = '';
  104. /**
  105. * Read in the file via Import::getNextChunk so that
  106. * it can process compressed files
  107. */
  108. while (! ($finished && $i >= $len) && ! $error && ! $timeout_passed) {
  109. $data = $this->import->getNextChunk($importHandle);
  110. if ($data === false) {
  111. /* subtract data we didn't handle yet and stop processing */
  112. $GLOBALS['offset'] -= strlen($buffer);
  113. break;
  114. }
  115. if ($data === true) {
  116. continue;
  117. }
  118. /* Append new data to buffer */
  119. $buffer .= $data;
  120. }
  121. /**
  122. * Disable loading of external XML entities for PHP versions below 8.0.
  123. */
  124. if (PHP_VERSION_ID < 80000) {
  125. // phpcs:ignore Generic.PHP.DeprecatedFunctions.Deprecated
  126. libxml_disable_entity_loader();
  127. }
  128. /**
  129. * Load the XML string
  130. *
  131. * The option LIBXML_COMPACT is specified because it can
  132. * result in increased performance without the need to
  133. * alter the code in any way. It's basically a freebee.
  134. */
  135. $xml = @simplexml_load_string($buffer, 'SimpleXMLElement', LIBXML_COMPACT);
  136. unset($buffer);
  137. if ($xml === false) {
  138. $sheets = [];
  139. $GLOBALS['message'] = Message::error(
  140. __(
  141. 'The XML file specified was either malformed or incomplete.'
  142. . ' Please correct the issue and try again.'
  143. )
  144. );
  145. $GLOBALS['error'] = true;
  146. } else {
  147. /** @var SimpleXMLElement $root */
  148. $root = $xml->children('office', true)->{'body'}->{'spreadsheet'};
  149. if (empty($root)) {
  150. $sheets = [];
  151. $GLOBALS['message'] = Message::error(
  152. __('Could not parse OpenDocument Spreadsheet!')
  153. );
  154. $GLOBALS['error'] = true;
  155. } else {
  156. $sheets = $root->children('table', true);
  157. }
  158. }
  159. [$tables, $rows] = $this->iterateOverTables($sheets);
  160. /**
  161. * Bring accumulated rows into the corresponding table
  162. */
  163. $num_tables = count($tables);
  164. for ($i = 0; $i < $num_tables; ++$i) {
  165. $num_rows = count($rows);
  166. for ($j = 0; $j < $num_rows; ++$j) {
  167. if (strcmp($tables[$i][Import::TBL_NAME], $rows[$j][Import::TBL_NAME])) {
  168. continue;
  169. }
  170. if (! isset($tables[$i][Import::COL_NAMES])) {
  171. $tables[$i][] = $rows[$j][Import::COL_NAMES];
  172. }
  173. $tables[$i][Import::ROWS] = $rows[$j][Import::ROWS];
  174. }
  175. }
  176. /* No longer needed */
  177. unset($rows);
  178. /* Obtain the best-fit MySQL types for each column */
  179. $analyses = [];
  180. $len = count($tables);
  181. for ($i = 0; $i < $len; ++$i) {
  182. $analyses[] = $this->import->analyzeTable($tables[$i]);
  183. }
  184. /**
  185. * string $db_name (no backquotes)
  186. *
  187. * array $table = array(table_name, array() column_names, array()() rows)
  188. * array $tables = array of "$table"s
  189. *
  190. * array $analysis = array(array() column_types, array() column_sizes)
  191. * array $analyses = array of "$analysis"s
  192. *
  193. * array $create = array of SQL strings
  194. *
  195. * array $options = an associative array of options
  196. */
  197. /* Set database name to the currently selected one, if applicable */
  198. [$db_name, $options] = $this->getDbnameAndOptions($db, 'ODS_DB');
  199. /* Non-applicable parameters */
  200. $create = null;
  201. /* Created and execute necessary SQL statements from data */
  202. $this->import->buildSql($db_name, $tables, $analyses, $create, $options, $sql_data);
  203. unset($tables, $analyses);
  204. /* Commit any possible data in buffers */
  205. $this->import->runQuery('', '', $sql_data);
  206. }
  207. /**
  208. * Get value
  209. *
  210. * @param array $cell_attrs Cell attributes
  211. * @param array $text Texts
  212. *
  213. * @return float|string
  214. */
  215. protected function getValue($cell_attrs, $text)
  216. {
  217. if ($_REQUEST['ods_recognize_percentages']
  218. && ! strcmp(
  219. 'percentage',
  220. (string) $cell_attrs['value-type']
  221. )
  222. ) {
  223. return (float) $cell_attrs['value'];
  224. }
  225. if ($_REQUEST['ods_recognize_currency']
  226. && ! strcmp('currency', (string) $cell_attrs['value-type'])
  227. ) {
  228. return (float) $cell_attrs['value'];
  229. }
  230. /* We need to concatenate all paragraphs */
  231. $values = [];
  232. foreach ($text as $paragraph) {
  233. $values[] = (string) $paragraph;
  234. }
  235. return implode("\n", $values);
  236. }
  237. private function iterateOverColumns(
  238. SimpleXMLElement $row,
  239. bool $col_names_in_first_row,
  240. array $tempRow,
  241. array $col_names,
  242. int $col_count
  243. ): array {
  244. $cellCount = $row->count();
  245. $a = 0;
  246. /** @var SimpleXMLElement $cell */
  247. foreach ($row as $cell) {
  248. $a++;
  249. $text = $cell->children('text', true);
  250. $cell_attrs = $cell->attributes('office', true);
  251. if ($text->count() != 0) {
  252. $attr = $cell->attributes('table', true);
  253. $num_repeat = (int) $attr['number-columns-repeated'];
  254. $num_iterations = $num_repeat ?: 1;
  255. for ($k = 0; $k < $num_iterations; $k++) {
  256. $value = $this->getValue($cell_attrs, $text);
  257. if (! $col_names_in_first_row) {
  258. $tempRow[] = $value;
  259. } else {
  260. // MySQL column names can't end with a space
  261. // character.
  262. $col_names[] = rtrim((string) $value);
  263. }
  264. ++$col_count;
  265. }
  266. continue;
  267. }
  268. // skip empty repeats in the last row
  269. if ($a == $cellCount) {
  270. continue;
  271. }
  272. $attr = $cell->attributes('table', true);
  273. $num_null = (int) $attr['number-columns-repeated'];
  274. if ($num_null) {
  275. if (! $col_names_in_first_row) {
  276. for ($i = 0; $i < $num_null; ++$i) {
  277. $tempRow[] = 'NULL';
  278. ++$col_count;
  279. }
  280. } else {
  281. for ($i = 0; $i < $num_null; ++$i) {
  282. $col_names[] = $this->import->getColumnAlphaName(
  283. $col_count + 1
  284. );
  285. ++$col_count;
  286. }
  287. }
  288. } else {
  289. if (! $col_names_in_first_row) {
  290. $tempRow[] = 'NULL';
  291. } else {
  292. $col_names[] = $this->import->getColumnAlphaName(
  293. $col_count + 1
  294. );
  295. }
  296. ++$col_count;
  297. }
  298. }
  299. return [$tempRow, $col_names, $col_count];
  300. }
  301. private function iterateOverRows(
  302. SimpleXMLElement $sheet,
  303. bool $col_names_in_first_row,
  304. array $tempRow,
  305. array $col_names,
  306. int $col_count,
  307. int $max_cols,
  308. array $tempRows
  309. ): array {
  310. /** @var SimpleXMLElement $row */
  311. foreach ($sheet as $row) {
  312. $type = $row->getName();
  313. if (strcmp('table-row', $type)) {
  314. continue;
  315. }
  316. [$tempRow, $col_names, $col_count] = $this->iterateOverColumns(
  317. $row,
  318. $col_names_in_first_row,
  319. $tempRow,
  320. $col_names,
  321. $col_count
  322. );
  323. /* Find the widest row */
  324. if ($col_count > $max_cols) {
  325. $max_cols = $col_count;
  326. }
  327. /* Don't include a row that is full of NULL values */
  328. if (! $col_names_in_first_row) {
  329. if ($_REQUEST['ods_empty_rows'] ?? false) {
  330. foreach ($tempRow as $cell) {
  331. if (strcmp('NULL', $cell)) {
  332. $tempRows[] = $tempRow;
  333. break;
  334. }
  335. }
  336. } else {
  337. $tempRows[] = $tempRow;
  338. }
  339. }
  340. $col_count = 0;
  341. $col_names_in_first_row = false;
  342. $tempRow = [];
  343. }
  344. return [$tempRow, $col_names, $max_cols, $tempRows];
  345. }
  346. /**
  347. * @param array|SimpleXMLElement $sheets Sheets of the spreadsheet.
  348. *
  349. * @return array|array[]
  350. */
  351. private function iterateOverTables($sheets): array
  352. {
  353. $tables = [];
  354. $max_cols = 0;
  355. $col_count = 0;
  356. $col_names = [];
  357. $tempRow = [];
  358. $tempRows = [];
  359. $rows = [];
  360. /** @var SimpleXMLElement $sheet */
  361. foreach ($sheets as $sheet) {
  362. $col_names_in_first_row = isset($_REQUEST['ods_col_names']);
  363. [$tempRow, $col_names, $max_cols, $tempRows] = $this->iterateOverRows(
  364. $sheet,
  365. $col_names_in_first_row,
  366. $tempRow,
  367. $col_names,
  368. $col_count,
  369. $max_cols,
  370. $tempRows
  371. );
  372. /* Skip over empty sheets */
  373. if (count($tempRows) == 0 || count($tempRows[0]) === 0) {
  374. $col_names = [];
  375. $tempRow = [];
  376. $tempRows = [];
  377. continue;
  378. }
  379. /**
  380. * Fill out each row as necessary to make
  381. * every one exactly as wide as the widest
  382. * row. This included column names.
  383. */
  384. /* Fill out column names */
  385. for ($i = count($col_names); $i < $max_cols; ++$i) {
  386. $col_names[] = $this->import->getColumnAlphaName($i + 1);
  387. }
  388. /* Fill out all rows */
  389. $num_rows = count($tempRows);
  390. for ($i = 0; $i < $num_rows; ++$i) {
  391. for ($j = count($tempRows[$i]); $j < $max_cols; ++$j) {
  392. $tempRows[$i][] = 'NULL';
  393. }
  394. }
  395. /* Store the table name so we know where to place the row set */
  396. $tbl_attr = $sheet->attributes('table', true);
  397. $tables[] = [(string) $tbl_attr['name']];
  398. /* Store the current sheet in the accumulator */
  399. $rows[] = [
  400. (string) $tbl_attr['name'],
  401. $col_names,
  402. $tempRows,
  403. ];
  404. $tempRows = [];
  405. $col_names = [];
  406. $max_cols = 0;
  407. }
  408. return [$tables, $rows];
  409. }
  410. }