-
Notifications
You must be signed in to change notification settings - Fork 51
/
Copy pathmysql-convert-latin1-to-utf8.php
266 lines (222 loc) · 7.25 KB
/
mysql-convert-latin1-to-utf8.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
<?php
/**
* mysql-convert-latin1-to-utf8.php
*
* v1.3
*
* Converts incorrect MySQL latin1 columns to UTF8.
*
* NOTE: Look for 'TODO's for things you may need to configure.
*
* Documentation at:
* http://nicj.net/2011/04/17/mysql-converting-an-incorrect-latin1-column-to-utf8
*
* Or, read README.md.
*
* PHP Version 5
*
* @author Nic Jansma <[email protected]>
* @copyright 2013 Nic Jansma
* @link http://www.nicj.net
*/
// TODO: Pretend-mode -- if set to true, no SQL queries will be executed. Instead, they will only be echo'd
// to the console.
$pretend = true;
// TODO: Should SET and ENUM columns be processed?
$processEnums = false;
// TODO: The collation you want to convert the overall database to
$defaultCollation = 'utf8_unicode_ci';
// TODO Convert column collations and table defaults using this mapping
// latin1_swedish_ci is included since that's the MySQL default
$collationMap =
array(
'latin1_bin' => 'utf8_bin',
'latin1_general_ci' => 'utf8_unicode_ci',
'latin1_swedish_ci' => 'utf8_unicode_ci',
);
// TODO: Database information
$dbHost = 'localhost';
$dbName = '';
$dbUser = '';
$dbPass = '';
if (file_exists('config.php')) {
require_once('config.php');
}
if ($dbPass == '') {
echo 'DB password:';
$dbPass = trim(fgets(STDIN));
};
$mapstring = '';
foreach ($collationMap as $s => $t) {
$mapstring .= "'$s',";
}
// Strip trailing comma
$mapstring = substr($mapstring, 0, -1);
echo $mapstring;
// Open a connection to the information_schema database
$infoDB = new mysqli($dbHost, $dbUser, $dbPass);
$infoDB->select_db('information_schema');
// Open a second connection to the target (to be converted) database
$targetDB = new mysqli($dbHost, $dbUser, $dbPass);
$targetDB->select_db($dbName);
//
// TODO: FULLTEXT Indexes
//
// You may need to drop FULLTEXT indexes before the conversion -- execute the drop here.
// eg.
// sqlExec($targetDB, "ALTER TABLE MyTable DROP INDEX `my_index_name`", $pretend);
//
// If so, you should restore the FULLTEXT index after the conversion -- search for 'TODO'
// later in this script.
//
// Get all tables in the specified database
$tables = sqlObjs($infoDB,
"SELECT TABLE_NAME, TABLE_COLLATION
FROM TABLES
WHERE TABLE_SCHEMA = '$dbName'");
foreach ($tables as $table) {
$tableName = $table->TABLE_NAME;
$tableCollation = $table->TABLE_COLLATION;
// Find all columns whose collation is of one of $mapstring's source types
$cols = sqlObjs($infoDB,
"SELECT *
FROM COLUMNS
WHERE TABLE_SCHEMA = '$dbName'
AND TABLE_Name = '$tableName'
AND COLLATION_NAME IN($mapstring)
AND COLLATION_NAME IS NOT NULL");
$intermediateChanges = array();
$finalChanges = array();
foreach ($cols as $col) {
// If this column doesn't use one of the collations we want to handle, skip it
if (!array_key_exists($col->COLLATION_NAME, $collationMap)) {
continue;
} else {
$targetCollation = $collationMap[$col->COLLATION_NAME];
}
// Save current column settings
$colName = $col->COLUMN_NAME;
$colCollation = $col->COLLATION_NAME;
$colType = $col->COLUMN_TYPE;
$colDataType = $col->DATA_TYPE;
$colLength = $col->CHARACTER_OCTET_LENGTH;
$colNull = ($col->IS_NULLABLE === 'NO') ? 'NOT NULL' : '';
$colDefault = '';
if ($col->COLUMN_DEFAULT !== null) {
$colDefault = "DEFAULT '{$col->COLUMN_DEFAULT}'";
}
// Determine the target temporary BINARY type
$tmpDataType = '';
switch (strtoupper($colDataType)) {
case 'CHAR':
$tmpDataType = 'BINARY';
break;
case 'VARCHAR':
$tmpDataType = 'VARBINARY';
break;
case 'TINYTEXT':
$tmpDataType = 'TINYBLOB';
break;
case 'TEXT':
$tmpDataType = 'BLOB';
break;
case 'MEDIUMTEXT':
$tmpDataType = 'MEDIUMBLOB';
break;
case 'LONGTEXT':
$tmpDataType = 'LONGBLOB';
break;
//
// TODO: If your database uses the enum type it is safe to uncomment this block if and only if
// all of the enum possibilities only use characters in the 0-127 ASCII character set.
//
case 'SET':
case 'ENUM':
$tmpDataType = 'SKIP';
if ($processEnums) {
// ENUM data-type isn't using a temporary BINARY type -- just convert its column type directly
$finalChanges[] = "MODIFY `$colName` $colType COLLATE $defaultCollation $colNull $colDefault";
}
break;
default:
$tmpDataType = '';
break;
}
// any data types marked as SKIP were already handled
if ($tmpDataType === 'SKIP') {
continue;
}
if ($tmpDataType === '') {
print "Unknown type! $colDataType\n";
exit;
}
// Change the column definition to the new type
$tempColType = str_ireplace($colDataType, $tmpDataType, $colType);
// Convert the column to the temporary BINARY cousin
$intermediateChanges[] = "MODIFY `$colName` $tempColType $colNull";
// Convert it back to the original type with the correct collation
$finalChanges[] = "MODIFY `$colName` $colType COLLATE $targetCollation $colNull $colDefault";
}
if (array_key_exists($tableCollation, $collationMap)) {
$finalChanges[] = 'DEFAULT COLLATE ' . $collationMap[$tableCollation];
}
// Now run the conversions
if (count($intermediateChanges) > 0) {
sqlExec($targetDB, "ALTER TABLE `$dbName`.`$tableName`\n". implode(",\n", $intermediateChanges), $pretend);
}
if (count($finalChanges) > 0) {
sqlExec($targetDB, "ALTER TABLE `$dbName`.`$tableName`\n". implode(",\n", $finalChanges), $pretend);
}
}
//
// TODO: Restore FULLTEXT indexes here
// eg.
// sqlExec($targetDB, "ALTER TABLE MyTable ADD FULLTEXT KEY `my_index_name` (`mycol1`)", $pretend);
//
// Set the default collation
sqlExec($infoDB, "ALTER DATABASE `$dbName` COLLATE $defaultCollation", $pretend);
// Done!
//
// Functions
//
/**
* Executes the specified SQL
*
* @param object $db Target SQL connection
* @param string $sql SQL to execute
* @param boolean $pretend Pretend mode -- if set to true, don't execute query
*
* @return SQL result
*/
function sqlExec($db, $sql, $pretend = false)
{
echo "$sql;\n";
if ($pretend === false) {
$res = $db->query($sql);
if ($res === false) {
$error = $db->error_list[0]['error'];
print "!!! ERROR: $error\n";
}
}
return $res;
}
/**
* Gets the SQL back as objects
*
* @param object $db Target SQL connection
* @param string $sql SQL to execute
*
* @return SQL objects
*/
function sqlObjs($db, $sql)
{
$res = sqlExec($db, $sql);
$a = array();
if ($res !== false) {
while ($obj = $res->fetch_object()) {
$a[] = $obj;
}
}
return $a;
}
?>