From cd385cf1cdddecdcaee8eb6c88f03702305f18bc Mon Sep 17 00:00:00 2001 From: travelliu Date: Thu, 15 Aug 2024 14:40:17 +0800 Subject: [PATCH] update(mtk): add mssql Latin1 Chinese desc --- .../docs-mtk/v2.0/faq/mtk-mssql-to-mogdb.md | 88 ++++++++++++++++++ .../docs-mtk/v2.0/faq/mtk-mssql-to-mogdb.md | 89 +++++++++++++++++++ 2 files changed, 177 insertions(+) diff --git a/product/en/docs-mtk/v2.0/faq/mtk-mssql-to-mogdb.md b/product/en/docs-mtk/v2.0/faq/mtk-mssql-to-mogdb.md index 3948de48..639e57b1 100644 --- a/product/en/docs-mtk/v2.0/faq/mtk-mssql-to-mogdb.md +++ b/product/en/docs-mtk/v2.0/faq/mtk-mssql-to-mogdb.md @@ -182,6 +182,94 @@ Manually configured dsn added`?encrypt=disable` } ``` +#### Latin1 character set to store Chinese + +### 2.9.8 + +Use `VARBINARY` type to process data. + +1. Determine the data character set. Find the field with Chinese characters and know the exact Chinese content, use `cast(xxx as VARBINARY)` to view the hexadecimal encoding + + ```sql + select CAST(column name AS VARBINARY(column length)) from table name + ``` + + Open the website [View Chinese Character Encoding](https://www.qqxiuzi.cn/bianma/zifuji.php) and enter Chinese characters to view the encoding. Compare with the results of the database query. For example, the above is GBK encoding + +2. source.parameter configuration + + ```json + "columnNameData": { + "dbo.yp_supplyer.supplyer_name": "CAST({{.ColName}} AS VARBINARY(column length))" + } + ``` + +3. target.connect.clientCharset Configure the encoding obtained in step 1 +4. Target.parameter configuration + + ```json + "columnNameData": { + "dbo.yp_supplyer.supplyer_name": "convert_from({{.ColName}}::bytea,'character set')" + } + ``` + +> Note that the column may need to be extended in length + +### 2.9.9 + +#### Solution 1 + +1. Determine the data character set. Find the column with Chinese characters and know the exact Chinese content, and use `cast(xxx as VARBINARY)` to view the hexadecimal encoding + + ```sql + select CAST(column name AS VARBINARY(column length)) from table name + ``` + + Open the website [View Chinese Character Encoding](https://www.qqxiuzi.cn/bianma/zifuji.php) and enter Chinese characters to view the encoding. Compare with the results of the database query. For example, the above is GBK encoding + +2. source.parameter configuration + + ```json + "columnNameData": { + "dbo.yp_supplyer.supplyer_name": "CAST({{.ColName}} AS VARBINARY(column length))" + } + ``` + +3. target.connect.clientCharset configuration Step 1 obtained encoding + +#### Solution 2 + +1. Determine the data character set. Find the field with Chinese characters and know the exact Chinese content, use `cast(xxx as VARBINARY)` to view the hexadecimal encoding + + ```sql + select CAST(column name AS VARBINARY(column length)) from table name + ``` + + Open the website [View Chinese Character Encoding](https://www.qqxiuzi.cn/bianma/zifuji.php) and enter Chinese characters to view the encoding. Compare with the results of the database query. For example, the above is GBK encoding + +2. View the database encoding ID and column encoding ID + + ```sql + select + schema_name(tab.schema_id) as schema_name, + tab.name as table_name, + c.name as column_name, + collation_name, + SERVERPROPERTY('SqlSortOrder') AS SqlSortOrder + from + sys.columns c + JOIN sys.tables tab ON + tab.object_id = c.object_id + where + tab.name = 'table name' + and c.name = 'column name' + ``` + + Check if the `Collation` at the database level and the `collation_name` at the column level are consistent + +3. source.connect.clientCharset configures the encoding obtained in step 2 SqlSortOrder +4. target.connect.clientCharset configures the encoding obtained in step 1 +
## Part query SQL statement diff --git a/product/zh/docs-mtk/v2.0/faq/mtk-mssql-to-mogdb.md b/product/zh/docs-mtk/v2.0/faq/mtk-mssql-to-mogdb.md index 851cc410..6172cfa6 100644 --- a/product/zh/docs-mtk/v2.0/faq/mtk-mssql-to-mogdb.md +++ b/product/zh/docs-mtk/v2.0/faq/mtk-mssql-to-mogdb.md @@ -183,6 +183,95 @@ date: 2024-05-09 } ``` +#### Latin1 字符集存中文 + +### 2.9.8 + +借助 `VARBINARY` 类型处理数据. + +1. 确定数据字符集编码. 找到存在中文的字段并确切知道中文内容,使用 `cast(xxx as VARBINARY)` 查看16进制编码 + + ```sql + select CAST(列名 AS VARBINARY(列长度)) from 表名 + ``` + + 打开网站[查看汉字编码](https://www.qqxiuzi.cn/bianma/zifuji.php)输入汉字查看编码.和数据库查询出来结果进行对比. 如上面的是GBK编码 + +2. source.parameter 配置 + + ```json + "columnNameData": { + "dbo.yp_supplyer.supplyer_name": "CAST({{.ColName}} AS VARBINARY(列长度))" + } + ``` + +3. target.connect.clientCharset 配置 步骤1 得的编码 +4. target.parameter 配置 + + ```json + "columnNameData": { + "dbo.yp_supplyer.supplyer_name": "convert_from({{.ColName}}::bytea,'字符集')" + } + ``` + +> 注意 可能列需要扩充长度 + +### 2.9.9 + +#### 方案 1 + +1. 确定数据字符集. 找到存在中文的列并确切知道中文内容,使用 `cast(xxx as VARBINARY)` 查看16进制编码 + + ```sql + select CAST(列名 AS VARBINARY(列长度)) from 表名 + ``` + + 打开网站[查看汉字编码](https://www.qqxiuzi.cn/bianma/zifuji.php)输入汉字查看编码.和数据库查询出来结果进行对比. 如上面的是GBK编码 + +2. source.parameter 配置 + + ```json + "columnNameData": { + "dbo.yp_supplyer.supplyer_name": "CAST({{.ColName}} AS VARBINARY(列长度))" + } + ``` + +3. target.connect.clientCharset 配置 步骤1 得的编码 + +#### 方案2 + +1. 确定数据字符集. 找到存在中文的字段并确切知道中文内容,使用 `cast(xxx as VARBINARY)` 查看16进制编码 + + ```sql + select CAST(列名 AS VARBINARY(列长度)) from 表名 + ``` + + 打开网站[查看汉字编码](https://www.qqxiuzi.cn/bianma/zifuji.php)输入汉字查看编码.和数据库查询出来结果进行对比. 如上面的是GBK编码 + +2. 查看数据库的编码ID和列编码ID + + ```sql + select + schema_name(tab.schema_id) as schema_name, + tab.name as table_name, + c.name as column_name, + collation_name, + SERVERPROPERTY('Collation') AS Collation, + SERVERPROPERTY('SqlSortOrder') AS SqlSortOrder + from + sys.columns c + JOIN sys.tables tab ON + tab.object_id = c.object_id + where + tab.name = '表名' + and c.name = '列名' + ``` + + 确定下库级别的 `Collation`和 列级别的 `collation_name` 是否一致 + +3. source.connect.clientCharset 配置 步骤2 得的编码 SqlSortOrder +4. target.connect.clientCharset 配置 步骤1 得的编码 + ## 部分SQL语句 ### Schema -- Gitee